<a href="https://colab.research.google.com/github/harshyadav1508/Covid19_prediction/blob/main/covid19_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import plotly.express as px
import plotly.offline as py
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [None]:
confirmed_df = pd.read_csv('/content/time_series_covid_19_confirmed.csv')
deaths_df = pd.read_csv('/content/time_series_covid_19_deaths.csv')
recoveries_df = pd.read_csv('/content/time_series_covid_19_recovered.csv')

In [None]:
confirmed_df

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/20/21,5/21/21,5/22/21,5/23/21,5/24/21,5/25/21,5/26/21,5/27/21,5/28/21,5/29/21
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,64575,65080,65486,65728,66275,66903,67743,68366,69130,70111
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,132118,132153,132176,132209,132215,132229,132244,132264,132285,132297
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,126156,126434,126651,126860,127107,127361,127646,127926,128198,128456
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,13569,13569,13569,13569,13569,13664,13671,13682,13693,13693
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,31661,31909,32149,32441,32623,32933,33338,33607,33944,34180
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,,Vietnam,14.058324,108.277199,0,2,2,2,2,2,...,4809,4941,5119,5275,5404,5931,6086,6356,6396,6908
272,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,304532,304968,305201,305201,305777,306334,306795,306795,307569,307838
273,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,6613,6632,6649,6658,6662,6670,6688,6696,6723,6731
274,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,92754,92920,93106,93201,93279,93428,93627,93947,94430,94751


In [None]:
na_columns = (confirmed_df.isna().sum() / confirmed_df.shape[0]) > 0.99
na_columns = na_columns[na_columns]

confirmed_df = confirmed_df.drop(na_columns.index, axis=1)
deaths_df = deaths_df.drop(na_columns.index, axis=1)
recoveries_df = recoveries_df.drop(na_columns.index, axis=1)

confirmed_df = confirmed_df.melt(id_vars=['Country/Region', 'Province/State', 'Lat', 'Long'], var_name='date', value_name='confirmed')
deaths_df = deaths_df.melt(id_vars=['Country/Region', 'Province/State', 'Lat', 'Long'], var_name='date', value_name='deaths')
recoveries_df = recoveries_df.melt(id_vars=['Country/Region', 'Province/State', 'Lat', 'Long'], var_name='date', value_name='recoveries')

confirmed_df['date'] = pd.to_datetime(confirmed_df['date'])
deaths_df['date'] = pd.to_datetime(deaths_df['date'])
recoveries_df['date'] = pd.to_datetime(recoveries_df['date'])

full_df = confirmed_df.merge(recoveries_df).merge(deaths_df)
full_df = full_df.rename(columns={'Country/Region': 'Country', 'date': 'Date', 'confirmed': "Confirmed", "recoveries": "Recoveries", "deaths": "Deaths"})
world_df = full_df.groupby(['Date']).agg({'Confirmed': ['sum'], 'Recoveries': ['sum'], 'Deaths': ['sum']}).reset_index()
world_df.columns = world_df.columns.get_level_values(0)

def add_rates(df):
    df['Confirmed Change'] = df['Confirmed'].diff().shift(-1)

    df['Mortality Rate'] = df['Deaths'] / df['Confirmed']
    df['Recovery Rate'] = df['Recoveries'] / df['Confirmed']
    df['Growth Rate'] = df['Confirmed Change'] / df['Confirmed']
    df['Growth Rate Change'] = df['Growth Rate'].diff().shift(-1)
    df['Growth Rate Accel'] = df['Growth Rate Change'] / df['Growth Rate']
    return df

world_df = add_rates(world_df)

In [None]:
world_df

Unnamed: 0,Date,Confirmed,Recoveries,Deaths,Confirmed Change,Mortality Rate,Recovery Rate,Growth Rate,Growth Rate Change,Growth Rate Accel
0,2020-01-22,551,30,17,98.0,0.030853,0.054446,0.177858,0.255115,1.434373
1,2020-01-23,649,32,17,281.0,0.026194,0.049307,0.432974,0.064876,0.149837
2,2020-01-24,930,39,25,463.0,0.026882,0.041935,0.497849,-0.047024,-0.094454
3,2020-01-25,1393,42,41,628.0,0.029433,0.030151,0.450826,-0.075269,-0.166958
4,2020-01-26,2021,56,54,759.0,0.026719,0.027709,0.375557,0.557897,1.485519
...,...,...,...,...,...,...,...,...,...,...
489,2021-05-25,166372931,103428167,3457899,565378.0,0.020784,0.621665,0.003398,-0.000107,-0.031621
490,2021-05-26,166938309,103978840,3470614,549361.0,0.020790,0.622858,0.003291,-0.000326,-0.099027
491,2021-05-27,167487670,104667376,3483272,496588.0,0.020797,0.624926,0.002965,-0.000117,-0.039510
492,2021-05-28,167984258,105217015,3495056,478382.0,0.020806,0.626350,0.002848,,


In [None]:
def plot_aggregate_metrics(df, fig=None):
    if fig is None:
        fig = go.Figure()
    fig.update_layout(
        title='COVID-19 Worldwide Cases Analysis',
        template='plotly_dark'
    )
    fig.add_trace(go.Scatter(x=df['Date'],
                             y=df['Confirmed'],
                             mode='lines+markers',
                             name='Confirmed',
                             line=dict(color='Yellow', width=2)
                            ))
    fig.add_trace(go.Scatter(x=df['Date'],
                             y=df['Deaths'],
                             mode='lines+markers',
                             name='Deaths',
                             line=dict(color='Red', width=2)
                            ))
    fig.add_trace(go.Scatter(x=df['Date'],
                             y=df['Recoveries'],
                             mode='lines+markers',
                             name='Recoveries',
                             line=dict(color='Green', width=2)
                            ))
    return fig
plot_aggregate_metrics(world_df).show()


In [None]:
def plot_diff_metrics(df, fig=None):
    if fig is None:
        fig = go.Figure()

    fig.update_layout(title='COVID-19 Worldwide Rates Analysis',
                      template='plotly_dark')
    fig.add_trace(go.Scatter(x=df['Date'],
                             y=df['Mortality Rate'],
                             mode='lines+markers',
                             name='Mortality rate',
                             line=dict(color='red', width=2)))

    fig.add_trace(go.Scatter(x=df['Date'],
                             y=df['Recovery Rate'],
                             mode='lines+markers',
                             name='Recovery rate',
                             line=dict(color='Green', width=2)))

    fig.add_trace(go.Scatter(x=df['Date'],
                             y=df['Growth Rate'],
                             mode='lines+markers',
                             name='Growth rate confirmed',
                             line=dict(color='Yellow', width=2)))
    fig.update_layout(yaxis=dict(tickformat=".2%"))

    return fig
plot_diff_metrics(world_df).show()

In [None]:
# Log Scaled Confirmed Cases by Country
confirmed_by_country_df = full_df.groupby(['Date', 'Country']).sum().reset_index()
fig = px.line(confirmed_by_country_df, x='Date', y='Confirmed', color='Country', line_group="Country", hover_name="Country")
fig.update_layout(
    template='plotly_dark',
    yaxis_type="log"
)
fig.show()

In [None]:
world_df

Unnamed: 0,Date,Confirmed,Recoveries,Deaths,Confirmed Change,Mortality Rate,Recovery Rate,Growth Rate,Growth Rate Change,Growth Rate Accel
0,2020-01-22,551,30,17,98.0,0.030853,0.054446,0.177858,0.255115,1.434373
1,2020-01-23,649,32,17,281.0,0.026194,0.049307,0.432974,0.064876,0.149837
2,2020-01-24,930,39,25,463.0,0.026882,0.041935,0.497849,-0.047024,-0.094454
3,2020-01-25,1393,42,41,628.0,0.029433,0.030151,0.450826,-0.075269,-0.166958
4,2020-01-26,2021,56,54,759.0,0.026719,0.027709,0.375557,0.557897,1.485519
...,...,...,...,...,...,...,...,...,...,...
489,2021-05-25,166372931,103428167,3457899,565378.0,0.020784,0.621665,0.003398,-0.000107,-0.031621
490,2021-05-26,166938309,103978840,3470614,549361.0,0.020790,0.622858,0.003291,-0.000326,-0.099027
491,2021-05-27,167487670,104667376,3483272,496588.0,0.020797,0.624926,0.002965,-0.000117,-0.039510
492,2021-05-28,167984258,105217015,3495056,478382.0,0.020806,0.626350,0.002848,,
