In [None]:
!pip install pmdarima
!pip install shap
!pip install --upgrade mxnet~=1.7 gluonts
!pip install python-utils
!pip install plotly==4.1.0
!pip install -U scikit-learn

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd 
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import MinMaxScaler 
import matplotlib.pyplot as plt
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import warnings
from sklearn.model_selection import train_test_split
import datetime
warnings.filterwarnings("ignore")
np.random.seed(32)

In [None]:
data=pd.read_csv('/content/drive/MyDrive/dataset/country_vaccinations.csv')
data2=pd.read_csv('/content/drive/MyDrive/dataset/covid-19-main/data/countries-aggregated.csv')

In [None]:
data.head()

In [None]:
data.dropna(subset=['daily_vaccinations'],inplace=True)
s=data['date'].str.split('-',expand=True)
data['Year']=s[0]
data['Month']=s[1]
data['Date']=s[2]
fig1=px.scatter_geo(data,color='vaccines',locationmode="ISO-3",locations="iso_code",opacity=0.6,
                     hover_name="iso_code", size="daily_vaccinations",projection='conic equal area',animation_group ="iso_code",color_continuous_scale='blackbody',
                     animation_frame="Date",scope='world',template="plotly_dark",title='Vaccination Count Over The World Over Time' )
fig1.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 400

fig1.update_geos(
    landcolor="white",
    oceancolor="#006994",
    showocean=True,
    lakecolor="LightBlue"
)

fig1.update_traces(
    marker_coloraxis=None
)
fig1.show()

In [None]:
arr=[]
index1=data.groupby(['country','date']).count().index
index2=data2.groupby(['Country','Date']).count().index
for i in index2:
    if i in index1:
        arr.append(1)
    else:
        arr.append(0)
data2['Vaccine_is_there']=arr
data2.head()

In [None]:
t=data2.groupby(['Country']).sum()
cons=t[t['Vaccine_is_there']==0].index
dan=data2[~data2['Country'].isin(cons)]

In [None]:
len(data2['Country'].unique())

In [None]:
data2['Date']=pd.to_datetime(data2['Date'])
dfs=list(data2.groupby("Country"))
first_title = dfs[0][0]+' '*30 +'1 here represents :2399'
traces = []
buttons = []
for i,d in enumerate(dfs):
    visible=[False]*len(dfs)
    visible[i]=True
    name=d[0]
    scale=MinMaxScaler()
    yp=scale.fit_transform(d[1][['Deaths']])
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=d[1]['Date'],y=[i[0] for i in yp]))
    mm=max(d[1]['Deaths'])
    traces.append(
    fig.update_traces(visible=True if i==0 else False).data[0])
    buttons.append(dict(label=name,
                        method="update",
                        args=[{"visible":visible},
                              {"title":str(name)+' '*30+'1 here represents :'+str(mm)}]))

updatemenus = [{'active':0, "buttons":buttons}]
shapes=[({'type': 'line',
               'xref': 'x',
               'yref': 'y',
               'x0': '2020-12-20' ,
               'y0': 0,
               'x1': '2020-12-20',
               'y1': 1})]
fig = go.Figure(data=traces,
                 layout=dict(updatemenus=updatemenus,shapes=shapes,template='plotly_dark'))
fig.update_layout(title=first_title, title_x=0.5)
fig.show()

In [None]:
datan=data[['country','iso_code','date','total_vaccinations_per_hundred','daily_vaccinations']]
datan.sort_values('date',inplace=True)
datan.dropna(inplace=True)
fig = px.scatter(datan, x="total_vaccinations_per_hundred", y='daily_vaccinations', animation_frame="date", animation_group="iso_code",
           hover_name="iso_code", text='iso_code',range_x=[-10,100], range_y=[-100000,1500000])

fig.update_traces(marker=dict(size=32,  color='DarkSlateGrey'))
fig.update_layout(template='plotly_dark')
fig.show()

In [None]:
# We are gonna use keras model with LSTM here to do some predictions :)
datacd = data[data['vaccines']=='Sinopharm/Beijing, Sinopharm/Wuhan, Sinovac']

In [None]:
# Using simple LSTM code to do some time series predictions :)

datacd.sort_values('date',inplace=True)
lp='daily_vaccinations'
datacd.dropna(subset=[lp],inplace=True)
datac = datacd[[lp]]
dates=datacd['date'].values
train,test=train_test_split(datac,test_size=0.2,shuffle=False)
def create_test_train(dataset,look_back=1):
    x=dataset.values[0:len(dataset)-look_back]
    y=dataset.values[look_back:len(dataset)]
    return x,y
look_back = 2
trainX, trainY = create_test_train(train, look_back)
testX, testY = create_test_train(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

# Making the model
model = Sequential()
model.add(LSTM(1000, input_shape=(1, 1), return_sequences=False))
model.add(Dense(1000))
model.add(Dense(100))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
fitted_model=model.fit(trainX, trainY, epochs=100, batch_size=8, verbose=2,validation_data=(testX,testY))

In [None]:
# Plotting the loss of the model
plt.plot(fitted_model.history['loss'])
plt.plot(fitted_model.history['val_loss'])

In [None]:
# make predictions
trainPredict = fitted_model.model.predict(trainX)
testPredict = fitted_model.model.predict(testX)
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=datacd['date'][:len(trainPredict)],y=trainX.flatten(), name="Original data"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=datacd['date'][:len(trainPredict)],y=trainPredict.flatten(), name="Predicted data"),
    secondary_y=True,
)
fig.update_layout(template='plotly_dark')
fig.show()

In [None]:
# Let's predict for 10 days more :)
n=int(input())
date_n=datacd[len(trainPredict)-1:len(trainPredict)+n+1]['date']
l=[]
t=trainX[-1]
for i in range(n):  
    pred=fitted_model.model.predict(t.reshape(1,1,1))
    l.append(pred)
    t=pred
l=[i[0][0] for i in l]

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=datacd['date'][:len(trainPredict)],y=trainX.flatten(), name="Original data"),
    secondary_y=False,
)

fig.add_trace(go.Scatter(x=date_n,y=l,name='Forecast'),secondary_y=False)

fig.update_layout(template='plotly_dark')
fig.show()

In [None]:
# invert predictions
trainPredict = scale.inverse_transform(trainPredict)
trainY = scale.inverse_transform(trainY)
testPredict = scale.inverse_transform(testPredict)
testY = scale.inverse_transform(testY)

In [None]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[:,0], trainPredict[:,0]))
print(trainY[0])
print(trainPredict[:,0])
print('Train Score: %.0f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[:,0], testPredict[:,0]))
print(testY[0])
print(testPredict[:,0])
print('Test Score: %.0f RMSE' % (testScore))

In [None]:
trainPredictPlot = np.empty_like(datac)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

# shift test predictions for plotting
testPredictPlot = np.empty_like(datac)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back)+1:len(datac)-1, :] = testPredict

# plot baseline and predictions
plt.plot(scale.inverse_transform(datac), label='predict')
plt.plot(trainPredictPlot, label='actual')
plt.plot(testPredictPlot, label='forecast')
plt.legend()
plt.show()

In [None]:
mean_squared_error(testY[:,0], testPredict[:,0])

In [None]:
testPredict[:,0].mean()

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
mean_absolute_percentage_error(testY[:,0], testPredict[:,0])

In [None]:
from sklearn.metrics import r2_score
r2_score(testY[:,0], testPredict[:,0])

In [None]:
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(testY[:,0], testPredict[:,0]))

In [None]:
from sklearn.metrics import median_absolute_error
print(median_absolute_error(testY[:,0], testPredict[:,0]))

In [None]:
from math import sqrt
rmse = sqrt(mean_squared_error(testY[:,0], testPredict[:,0]))
print(rmse)

In [None]:
# Using simple LSTM code to do some time series predictions :)

datacd.sort_values('date',inplace=True)
lp=datacd[datacd['vaccines']=='Sinopharm/Beijing, Sinopharm/Wuhan, Sinovac']['daily_vaccinations']
datacd.dropna(subset=[lp],inplace=True)
datac = datacd[[lp]]
dates=datacd['date'].values
train,test=train_test_split(datac,test_size=0.2,shuffle=False)
def create_test_train(dataset,look_back=1):
    x=dataset.values[0:len(dataset)-look_back]
    y=dataset.values[look_back:len(dataset)]
    return x,y
look_back = 2
trainX, trainY = create_test_train(train, look_back)
testX, testY = create_test_train(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

# Making the model
model = Sequential()
model.add(LSTM(1000, input_shape=(1, 1), return_sequences=False))
model.add(Dense(1000))
model.add(Dense(100))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
fitted_model=model.fit(trainX, trainY, epochs=100, batch_size=8, verbose=2,validation_data=(testX,testY))