In [125]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import seaborn as sns
%matplotlib inline
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, acf, pacf,arma_order_select_ic
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import joblib
import warnings
warnings.simplefilter('ignore')
import os

def rollCollective(district,case='Confirmed'):
    ts=df.loc[(df['District']==district)]  
    ts=ts[['Date',case]]
    ts=ts.set_index('Date')
    ts.astype('int64')
    a=len(ts.loc[(ts['Confirmed']>=10)])
    ts=ts[-a:]
    return (ts.rolling(window=4,center=False).mean().dropna())


def rollPlotCollective(district, case='Confirmed'):
    ts=df.loc[(df['District']==district)]  
    ts=ts[['Date',case]]
    ts=ts.set_index('Date')
    ts.astype('int64')
    a=len(ts.loc[(ts['Confirmed']>=10)])
    ts=ts[-a:]
    plt.figure(figsize=(16,6))
    plt.plot(ts.rolling(window=7,center=False).mean().dropna(),label='Rolling Mean')
    plt.plot(ts[case])
    plt.plot(ts.rolling(window=7,center=False).std(),label='Rolling std')
    plt.legend()
    plt.title('Cases distribution in %s with rolling mean and standard' %district)
    plt.xticks([])
#Function to check the stationarity of the time serie using Dickey fuller test
def stationarity(ts):
    print('Results of Dickey-Fuller Test:')
    test = adfuller(ts, autolag='AIC')
    results = pd.Series(test[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for i,val in test[4].items():
        results['Critical Value (%s)'%i] = val
    print (results)
def corr(ts):
    plot_acf(ts,lags=12,title="ACF")
    plot_pacf(ts,lags=12,title="PACF")
def mape(y2, y_pred): 
    y2, y_pred = np.array(y2), np.array(y_pred)
    print('\nMean absolute percentage error: %f'%np.mean(np.abs((y2 - y_pred) / y2)) * 100)
    return np.mean(np.abs((y2 - y_pred) / y2)) * 100

#Arima modeling for ts

def arima(tstemp,st,dis):
    p=d=q=range(0,6)
    a=99999
    pdq=set(itertools.product(p,d,q))
    exists=False
    
    #Determining the best parameters
    for var in pdq:
        try:
            model = ARIMA(tstemp, order=var)
            result = model.fit()

            if (result.aic<=a) :
                exists=True
                a=result.aic
                param=var
        except:
            continue
            
    #Modeling
    if exists==True:
        model = ARIMA(tstemp, order=param)
        result = model.fit()

        ##Exporting model
        modelname="arimamodel_for_state_"+st+"_district_"+dis
        print(modelname)
        joblib.dump(model,modelname)

def plotResults(pred,test):
    f,ax=plt.subplots()
    plt.plot(pred,c='green', label= 'predictions')
    plt.plot(test, c='red',label='real values')
    plt.legend()
    plt.title('True vs predicted values')



In [126]:
list = ['Date', 'State', 'District', 'Confirmed']
df=pd.read_csv('output.csv',usecols=list)

In [127]:
df


Unnamed: 0,Date,State,District,Confirmed
0,2020-04-26,Andhra Pradesh,Anantapur,53
1,2020-04-27,Andhra Pradesh,Anantapur,53
2,2020-04-28,Andhra Pradesh,Anantapur,54
3,2020-04-29,Andhra Pradesh,Anantapur,58
4,2020-04-30,Andhra Pradesh,Anantapur,61
...,...,...,...,...
78105,2020-10-01,Meghalaya,South Garo Hills,40
78106,2020-10-02,Meghalaya,South Garo Hills,46
78107,2020-10-03,Meghalaya,South Garo Hills,46
78108,2020-10-04,Meghalaya,South Garo Hills,47


In [128]:
df['Date']=pd.to_datetime(df['Date'])

In [129]:
state_dist=set(zip(df['State'],df['District']))


In [130]:
for [s,d] in state_dist:
    modelname="arimamodel_for_state_"+s+"_district_"+d
    if modelname in os.listdir():
        continue
    else:        
        dftemp=df[df['District']==d]
        train=dftemp[:120]
        test=dftemp[120:]
        train = train.set_index(['Date'])
        test = test.set_index(['Date'])
        traintemp=train['Confirmed'].values
        testtemp=test['Confirmed'].values
        arima(traintemp,s,d)
    

arimamodel_for_state_Arunachal Pradesh_district_Tawang
arimamodel_for_state_Karnataka_district_Dharwad
arimamodel_for_state_West Bengal_district_Uttar Dinajpur
arimamodel_for_state_Odisha_district_Bargarh
arimamodel_for_state_Uttarakhand_district_Udham Singh Nagar
arimamodel_for_state_Mizoram_district_Lunglei
arimamodel_for_state_Tamil Nadu_district_Namakkal
arimamodel_for_state_Rajasthan_district_Churu
arimamodel_for_state_Uttar Pradesh_district_Balrampur
arimamodel_for_state_Uttar Pradesh_district_Kanpur Nagar
arimamodel_for_state_Odisha_district_Nuapada
arimamodel_for_state_Uttar Pradesh_district_Shrawasti
arimamodel_for_state_Odisha_district_Balangir
arimamodel_for_state_Madhya Pradesh_district_Jabalpur
arimamodel_for_state_Uttar Pradesh_district_Varanasi
arimamodel_for_state_Jammu and Kashmir_district_Kathua
arimamodel_for_state_Bihar_district_Muzaffarpur
arimamodel_for_state_Madhya Pradesh_district_Sidhi
arimamodel_for_state_Tamil Nadu_district_Chennai
arimamodel_for_state_Mizora

arimamodel_for_state_Rajasthan_district_Kota
arimamodel_for_state_Chhattisgarh_district_Uttar Bastar Kanker
arimamodel_for_state_Uttar Pradesh_district_Kushinagar
arimamodel_for_state_Karnataka_district_Davanagere
arimamodel_for_state_Madhya Pradesh_district_Singrauli
arimamodel_for_state_Bihar_district_Vaishali
arimamodel_for_state_Gujarat_district_Narmada
arimamodel_for_state_West Bengal_district_Bankura
arimamodel_for_state_Bihar_district_Kishanganj
arimamodel_for_state_Madhya Pradesh_district_Raisen
arimamodel_for_state_Gujarat_district_Anand
arimamodel_for_state_Maharashtra_district_Washim
arimamodel_for_state_Bihar_district_Patna
arimamodel_for_state_Arunachal Pradesh_district_Lower Subansiri
arimamodel_for_state_Madhya Pradesh_district_Rajgarh
arimamodel_for_state_Bihar_district_Bhojpur
arimamodel_for_state_Uttar Pradesh_district_Ambedkar Nagar
arimamodel_for_state_Kerala_district_Kannur
arimamodel_for_state_Puducherry_district_Mahe
arimamodel_for_state_Kerala_district_Kottayam


In [110]:
# Now that the models are generated we can directly load them usin joblib 
# forecast values for give datetime
# compare the forecasted values with a test set in order to generate a mape score and compare with the plot


list