In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime

from statsmodels.tsa.arima.model import ARIMA

from data.dataloader import Covid19IndiaLoader

In [None]:
obj = Covid19IndiaLoader()
dataframes = obj.pull_dataframes()

In [None]:
df_vaccine_state = dataframes['df_vaccine_state']
df_vaccine_state.rename({'Updated On' : 'date'}, axis=1, inplace=True)
df_vaccine_state['date'] = pd.to_datetime(df_vaccine_state['date'], format='%d/%m/%Y')

In [None]:
df_delhi = df_vaccine_state[df_vaccine_state['State'] == 'Karnataka']
df_delhi.loc[:, 'Daily Individuals Vaccinated'] = df_delhi.loc[:, 'Total Individuals Vaccinated'].diff()
df_delhi.loc[:, 'Daily Doses Administered'] = df_delhi.loc[:, 'Total Doses Administered'].diff()

In [None]:
model = ARIMA(df_delhi.loc[df_delhi['date'] >= '2021-05-01', 'Total Individuals Vaccinated'].to_numpy(), order=(1, 1, 1))
res = model.fit()

In [None]:
res.summary()

In [None]:
res.predict(start=1, end=91)

In [None]:
df_projections = pd.DataFrame(columns=['date', 'arima_forecast_cum', 'arima_forecast_inc'])
df_projections['date'] = pd.date_range(start='2021-05-02', end='2021-07-31')
df_projections['arima_forecast_cum'] = res.predict(start=1, end=91)
df_projections['arima_forecast_inc'] = df_projections['arima_forecast_cum'].diff()

In [None]:
total_days = (datetime.strptime('2021-07-31', '%Y-%m-%d') - df_delhi.iloc[-1]['date']).days
last_15_days_mean = df_delhi.loc[(df_delhi['date'] >= '2021-05-01') & (df_delhi['date'] <= '2021-05-15'), 'Daily Individuals Vaccinated'].mean()
simple_average_projections = np.arange(1, total_days+1)*last_15_days_mean + df_delhi['Total Individuals Vaccinated'].iloc[-1]

df_projections['simple_average_cum'] = np.nan
df_projections.loc[df_projections['date'] > '2021-05-15', 'simple_average_cum'] = simple_average_projections

In [None]:
jun_mean = (last_15_days_mean*10)/8.5
jul_mean = (last_15_days_mean*15)/8.5

may_days = (datetime.strptime('2021-05-31', '%Y-%m-%d') - df_delhi.iloc[-1]['date']).days

may_arr = np.arange(1, may_days+1)*last_15_days_mean
jun_arr = np.arange(1, 31)*jun_mean + may_arr[-1]
jul_arr = np.arange(1, 32)*jul_mean + jun_arr[-1]
simple_avg_proj_scaled = np.concatenate((may_arr, jun_arr, jul_arr)) + df_delhi['Total Individuals Vaccinated'].iloc[-1]

df_projections['simple_average_scaled_cum'] = np.nan
df_projections.loc[df_projections['date'] > '2021-05-15', 'simple_average_scaled_cum'] = simple_avg_proj_scaled

In [None]:
df_projections

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))

ax.plot(df_delhi['date'], df_delhi['Total Individuals Vaccinated'], label='Total Individuals Vaccinated')
# ax.plot(df_delhi['date'], df_delhi['Total Individuals Vaccinated'] - df_delhi['Second Dose Administered'], label='Only 1 Dose')
# ax.plot(df_delhi['date'], df_delhi['Second Dose Administered'], label='Both Doses')
ax.plot(df_projections['date'], df_projections['arima_forecast_cum'], c='blue', ls='--', label='ARIMA')
ax.plot(df_projections['date'], df_projections['simple_average_cum'], c='midnightblue', ls='--', label='Simple Avg')
ax.plot(df_projections['date'], df_projections['simple_average_scaled_cum'], c='royalblue', ls='--', label='Simple Avg Scaled')
ax.axvline(df_projections['date'][0], ls=':', c='black')
ax.axvline(df_delhi.iloc[-1]['date'], ls=':', c='black')
ax.legend()
ax.grid(alpha=0.3)

In [None]:
df_delhi['Daily Individuals Vaccinated'].iloc[-15:].mean()*77 + 

In [None]:
mean*16 + jun_mean*30 + jul_mean*31 + df_delhi['Total Individuals Vaccinated'].iloc[-1]