### Note
If you are running all cells at one go please fill up the credentials further down

# Imports

In [None]:
# Additional Installations For Google Colab

!pip install TSErrors
!pip install chart_studio
!pip install plotly -U

Requirement already up-to-date: plotly in /usr/local/lib/python3.6/dist-packages (4.14.1)


In [None]:
import pandas as pd

import numpy as np

import plotly.express as px
import plotly.graph_objects as go
import chart_studio.plotly as py
import chart_studio

import requests

from collections import Counter
from TSErrors import FindErrors

from sklearn.model_selection import ParameterGrid

from keras.models import Sequential
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Dense, Flatten

from datetime import datetime
from datetime import date

from math import log
from math import e

from itertools import chain

import warnings
warnings.simplefilter("ignore")

# Data Pre-Processing

## Getting The Data 

In [None]:
# Data from the John Hopkins University Dataset on GitHub
# https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series

# Defining the variables required
filenames = ['time_series_covid19_confirmed_global.csv',
             'time_series_covid19_deaths_global.csv',
             'time_series_covid19_recovered_global.csv']

url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'

# Making the main dataframes required for the analysis
confirmed_global = pd.read_csv(url + filenames[0])
deaths_global = pd.read_csv(url + filenames[1])
recovered_global = pd.read_csv(url + filenames[2])
country_cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv')

## Data Cleaning

In [None]:
# Simple Data Cleaning - Removing and renaming the Columns

# Removing the Province/State column, as it is pretty much not of any use
confirmed_global.drop(columns = ['Province/State', 'Lat', 'Long'], inplace = True)
deaths_global.drop(columns = ['Province/State', 'Lat', 'Long'], inplace = True)
recovered_global.drop(columns = ['Province/State', 'Lat', 'Long'], inplace = True)
country_cases.drop(columns = ["People_Tested","People_Hospitalized"],inplace = True)

# Renaming the columns for easier access
confirmed_global.rename(columns = {"Country/Region": "country"}, inplace = True)
deaths_global.rename(columns = {"Country/Region": "country"}, inplace = True)
recovered_global.rename(columns = {"Country/Region": "country"}, inplace = True)

country_cases.rename(columns = {
    "Country_Region" : "country",
    "Last_Update": "last",
    "Confirmed": "confirmed",
    "Deaths": "deaths",
    "Recovered" : "recovered",
    "Active" : "active",
    "Mortality_Rate": "mortality"
}, inplace = True)

In [None]:
# Removing some duplicate values from the table
confirmed_global = confirmed_global.groupby(['country'], as_index = False).sum()
deaths_global = deaths_global.groupby(['country'], as_index = False).sum()
recovered_global = recovered_global.groupby(['country'], as_index = False).sum()

In [None]:
country_cases_sorted = country_cases.sort_values("confirmed", ascending=False)
country_cases_sorted.index = [x for x in range(len(country_cases_sorted))]

## Error Corrections

In [None]:
# This value is being changed as there was an error in the original dataset that had to be modified
confirmed_global.at[178, '5/20/20'] = 251667

## DataFrames

In [None]:
confirmed_global.head()

Unnamed: 0,country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,...,11/19/20,11/20/20,11/21/20,11/22/20,11/23/20,11/24/20,11/25/20,11/26/20,11/27/20,11/28/20,11/29/20,11/30/20,12/1/20,12/2/20,12/3/20,12/4/20,12/5/20,12/6/20,12/7/20,12/8/20,12/9/20,12/10/20,12/11/20,12/12/20,12/13/20,12/14/20,12/15/20,12/16/20,12/17/20,12/18/20,12/19/20,12/20/20,12/21/20,12/22/20,12/23/20,12/24/20,12/25/20,12/26/20,12/27/20,12/28/20
0,Afghanistan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,...,44177,44363,44503,44706,44988,45174,45384,45600,45723,45844,46116,46274,46516,46718,46837,46837,47072,47306,47516,47716,47851,48053,48116,48229,48527,48718,48952,49161,49378,49621,49681,49817,50013,50190,50433,50655,50810,50886,51039,51280
1,Albania,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,30623,31459,32196,32761,33556,34300,34944,35600,36245,36790,37625,38182,39014,39719,40501,41302,42148,42988,43683,44436,45188,46061,46863,47742,48530,49191,50000,50637,51424,52004,52542,53003,53425,53814,54317,54827,55380,55755,56254,56572
2,Algeria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,...,71652,72755,73774,74862,75867,77000,78025,79110,80168,81212,82221,83199,84152,85084,85927,86730,87502,88252,88825,89416,90014,90579,91121,91638,92102,92597,93065,93507,93933,94371,94781,95203,95659,96069,96549,97007,97441,97857,98249,98631
3,Andorra,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,6066,6142,6207,6256,6304,6351,6428,6534,6610,6610,6712,6745,6790,6842,6904,6955,7005,7050,7084,7127,7162,7190,7236,7288,7338,7382,7382,7446,7466,7519,7560,7577,7602,7633,7669,7699,7756,7806,7821,7875
4,Angola,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,14134,14267,14413,14493,14634,14742,14821,14920,15008,15087,15103,15139,15251,15319,15361,15493,15536,15591,15648,15729,15804,15925,16061,16161,16188,16277,16362,16407,16484,16562,16626,16644,16686,16802,16931,17029,17099,17149,17240,17296


In [None]:
deaths_global.head()

Unnamed: 0,country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,...,11/19/20,11/20/20,11/21/20,11/22/20,11/23/20,11/24/20,11/25/20,11/26/20,11/27/20,11/28/20,11/29/20,11/30/20,12/1/20,12/2/20,12/3/20,12/4/20,12/5/20,12/6/20,12/7/20,12/8/20,12/9/20,12/10/20,12/11/20,12/12/20,12/13/20,12/14/20,12/15/20,12/16/20,12/17/20,12/18/20,12/19/20,12/20/20,12/21/20,12/22/20,12/23/20,12/24/20,12/25/20,12/26/20,12/27/20,12/28/20
0,Afghanistan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1653,1666,1678,1690,1702,1715,1728,1737,1740,1752,1774,1795,1822,1841,1846,1846,1864,1874,1900,1906,1919,1935,1945,1956,1965,1969,1995,2011,2025,2030,2047,2067,2082,2096,2117,2126,2139,2149,2160,2174
1,Albania,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,657,672,685,699,716,735,743,753,771,787,798,810,822,839,852,870,889,905,922,936,951,965,977,989,1003,1016,1028,1040,1055,1066,1074,1088,1098,1111,1117,1125,1134,1143,1153,1164
2,Algeria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,2224,2236,2255,2272,2294,2309,2329,2352,2372,2393,2410,2431,2447,2464,2480,2492,2501,2516,2527,2539,2554,2564,2575,2584,2596,2609,2623,2631,2640,2647,2659,2666,2675,2687,2696,2705,2716,2722,2728,2737
3,Andorra,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,76,76,76,76,76,76,76,76,76,76,76,76,76,76,77,77,78,78,78,78,78,78,78,78,79,79,79,79,79,80,80,81,81,82,82,83,83,83,83,83
4,Angola,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,333,334,336,337,337,338,340,341,342,345,346,348,350,351,352,353,354,354,354,355,358,362,365,366,371,372,372,379,382,384,386,387,390,393,393,393,396,399,399,403


In [None]:
recovered_global.head()

Unnamed: 0,country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,...,11/19/20,11/20/20,11/21/20,11/22/20,11/23/20,11/24/20,11/25/20,11/26/20,11/27/20,11/28/20,11/29/20,11/30/20,12/1/20,12/2/20,12/3/20,12/4/20,12/5/20,12/6/20,12/7/20,12/8/20,12/9/20,12/10/20,12/11/20,12/12/20,12/13/20,12/14/20,12/15/20,12/16/20,12/17/20,12/18/20,12/19/20,12/20/20,12/21/20,12/22/20,12/23/20,12/24/20,12/25/20,12/26/20,12/27/20,12/28/20
0,Afghanistan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,35350,35370,35422,35934,35976,36122,36145,36232,36295,36709,36716,36831,36946,37218,37260,37260,37393,37685,37879,37920,38032,38099,38141,38200,38250,38252,38336,38475,38505,38540,38613,39006,39508,39585,39692,40359,40444,40784,41096,41441
1,Albania,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,14565,15055,15469,15842,16230,16666,17031,17352,17755,18152,18481,18849,19384,19912,20484,20974,21286,21617,22180,22527,23072,23609,24136,24520,24820,24820,25876,26381,26898,27426,27831,28121,28752,29249,29799,30276,30790,31181,31565,32122
2,Algeria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,46962,47581,48183,48794,49421,50070,50712,51334,51946,52568,53204,53809,54405,54990,55538,56079,56617,57146,57648,58146,58146,59135,59590,60028,60457,60888,61307,61700,62089,62487,62869,63260,63644,64020,64401,64777,65144,65505,65862,66214
3,Andorra,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,5127,5239,5290,5358,5405,5503,5542,5649,5710,5710,5794,5873,5940,5988,6066,6130,6171,6238,6293,6367,6452,6505,6598,6629,6629,6706,6706,6819,6875,6919,6963,6997,7028,7073,7106,7171,7203,7252,7288,7318
4,Angola,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,7062,7117,7273,7346,7351,7444,7517,7617,7697,7763,7763,7851,7932,8139,8244,8299,8335,8338,8353,8470,8579,8679,8798,8841,8898,8924,8990,9194,9266,9345,9518,9592,9695,9729,9729,9729,9921,9976,10354,10354


In [None]:
country_cases_sorted.head()

Unnamed: 0,country,last,Lat,Long_,confirmed,deaths,recovered,active,Incident_Rate,mortality,UID,ISO3
0,US,2020-12-30 03:54:35,40.0,-100.0,19556928.0,338561.0,,19218367.0,5935.942161,1.731156,840,USA
1,India,2020-12-30 03:54:35,20.593684,78.96288,10224303.0,148153.0,9807569.0,268581.0,740.889168,1.449028,356,IND
2,Brazil,2020-12-30 03:54:35,-14.235,-51.9253,7563551.0,192681.0,6754111.0,616759.0,3558.323311,2.547494,76,BRA
3,Russia,2020-12-30 03:54:35,61.524,105.3188,3073923.0,55107.0,2470783.0,548033.0,2106.372271,1.792725,643,RUS
4,France,2020-12-30 03:54:35,46.2276,2.2137,2631110.0,64204.0,197726.0,2369180.0,4030.900007,2.440187,250,FRA


# Data Visualization - General Graphs

## Timeseries

### Code

In [None]:
def get_new_cases(country):
    time_series = confirmed_global.melt(
        id_vars=["country"], var_name="date", value_name="cases"
    )
    time_series = time_series[time_series["country"] == country]
    time_series = time_series.drop(["country"], axis=1)
    time_series.index = [x for x in range(len(time_series))]
    return time_series

In [None]:
def get_new_deaths(country):
    time_series = deaths_global.melt(
        id_vars=["country"], var_name="date", value_name="cases"
    )
    time_series = time_series[time_series["country"] == country]
    time_series = time_series.drop(["country"], axis=1)
    time_series.index = [x for x in range(len(time_series))]
    return time_series

In [None]:
def get_new_recoveries(country):
    time_series = recovered_global.melt(
        id_vars=["country"], var_name="date", value_name="cases"
    )
    time_series = time_series[time_series["country"] == country]
    time_series = time_series.drop(["country"], axis=1)
    time_series.index = [x for x in range(len(time_series))]
    return time_series

In [None]:
def get_plot(time_series,name):
    color = "#f54842" if "deaths" in name else "#45a2ff" if "cases" in name else "#42f587"
    fig = px.bar(time_series, x="date", y="cases",color_discrete_sequence= [color]*len(time_series))
    return fig

In [None]:
def plot_timeseries(country_name, func_name, title,n = -90,daily = False):
    if not daily:
      new_confirmed_cases = func_name(country_name)[n:]
    else:
      confirmed_cases = func_name(country_name)
      cases = confirmed_cases["cases"].diff()[1:]
      new_confirmed_cases = confirmed_cases[1:]
      new_confirmed_cases["cases"] = cases
      new_confirmed_cases = new_confirmed_cases[n:]
    fig = get_plot(new_confirmed_cases,str(func_name))
    fig.update_layout(
        template = 'plotly_dark',
        title=title,
        xaxis_title="Date",
        yaxis_title=f'Number of {"deaths" if "deaths" in title else "new cases"}',
    )
    return fig

### Examples

#### US - Confirmed Cases

In [None]:
plot_timeseries("US", get_new_cases, "Confirmed Cases")

#### India - Recoveries (Daily: Last month)

In [None]:
plot_timeseries("India", get_new_recoveries, "Recoveries",n = -30,daily = True)

## Inter-Country : Line Plot

### Code

In [None]:
def unpivot(df):
   return df.melt(id_vars = ["country"],  value_vars = df.columns[1:])

In [None]:
def compare(df,*args):
  l = list(args)
  temp = unpivot(df)
  return temp[temp["country"].isin(l)]

In [None]:
def create_data(df):
  new = df
  l = list(set(new["variable"]))
  l.sort()
  l.reverse()
  ff= new[new['variable'].isin(l[::5])]
  ff.rename(columns = {"country": "Country","variable" : "Date","value": "Cases"}, inplace = True)
  return ff

In [None]:
def static_line(df,*args):
  df = compare(df,*args)
  ff = create_data(df) 
  fig = px.line(ff, x="Date", y="Cases", color="Country",template="plotly_dark",range_y=[0,ff["Cases"].max()])
  fig.layout.update(hovermode = "x")
  return fig

### Example

#### Recoveries - India, New Zealand, US, Brazil

In [None]:
static_line(recovered_global,"India","New Zealand","US","Brazil")

## Intra-Country : All 3 Studies

### Code

In [None]:
def line_comparison(country):
    whole_df = pd.DataFrame()
    whole_df["dates"] = list(confirmed_global.columns[1:])
    whole_df["confirmed"] = list(confirmed_global.loc[confirmed_global['country'] == country].values.flatten()[1:])
    whole_df["deaths"] = list(deaths_global.loc[deaths_global['country'] == country].values.flatten()[1:])
    whole_df["recovered"] = list(recovered_global.loc[recovered_global['country'] == country].values.flatten()[1:]) 
    
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=whole_df["dates"],
            y=whole_df["confirmed"],
            mode="lines",
            name="confirmed"
        )
    )

    fig.add_trace(
        go.Scatter(
            x=whole_df["dates"],
            y=whole_df["deaths"],
            mode="lines",
            name="deaths"
        )
    )

    fig.add_trace(
        go.Scatter(
            x=whole_df["dates"],
            y=whole_df["recovered"],
            mode="lines",
            name="recovered"
        )
    )


    fig.update_layout(
        height=500,
        showlegend=True,
        
        template = "plotly_dark",
        title_text=f"Analysis of {country.title()}", hovermode='x'
    )

    return fig

### Example

#### India

In [None]:
line_comparison("India")

# Data Visualization - Animations

## Top Ten Affected 

### Code

In [None]:
def unpivot(df):
   return df.melt(id_vars = ["country"],  value_vars = df.columns[1:])

In [None]:
def take_top10(df):
  top = list(df[df["variable"] == df['variable'][df.index[-1]]].sort_values(by=['value'], ascending=False).head(10)["country"])
  df = df[df['country'].isin(top)]
  return df

In [None]:
def create_data(df):
  new = take_top10(df)
  l = list(set(new["variable"]))
  l.sort()
  l.reverse()
  ff= new[new['variable'].isin(l[::5])]
  ff.rename(columns = {"country": "Country","variable" : "Date","value": "Cases"}, inplace = True)
  return ff

In [None]:
def plot_fig(ff,Color):
  fig = px.bar(ff, x="Country", y="Cases", color_discrete_sequence=[Color]*len(ff),template="plotly_dark",animation_frame="Date", animation_group="Country", range_y=[0,ff["Cases"].max()])
  fig.layout.update(showlegend=False)
  return fig

In [None]:
def animated_barchart(df,name):
  color = "#f54842" if name == "deaths"  else "#45a2ff" if name == "confirmed" else "#42f587" 
  return plot_fig(create_data(take_top10(unpivot(df))),color)

### Examples

#### Confirmed Cases

In [None]:
animated_barchart(confirmed_global,"confirmed")

#### Deaths

In [None]:
animated_barchart(deaths_global,"deaths")

#### Recoveries 

In [None]:
animated_barchart(recovered_global,"recovered")

## Comparison (User's Choice)

### Code

In [None]:
def compare(df,*args):
  l = list(args)
  temp = unpivot(df)
  return temp[temp["country"].isin(l)]

In [None]:
def create_comparison_animation(df,name,*args):
  df = compare(df,*args)
  ff = create_data(df)
  color = "#f54842" if name == "deaths"  else "#45a2ff" if name == "confirmed" else "#42f587" 
  return plot_fig(ff,color)

### Examples

#### Confirmed Cases - India, US, Australia, Brazil

In [None]:
create_comparison_animation(confirmed_global,"confirmed","India","US","Australia","Brazil")

#### Recoveries - India, US, Australia, Brazil

In [None]:
create_comparison_animation(recovered_global,"recovered","India","US","Australia","Brazil")

#### Deaths - India, US, Brazil

In [None]:
create_comparison_animation(deaths_global,"deaths","India","US","Brazil")

# Data Visualization - Chloropleths

## Code

### Setting Credentials (Mapbox and Chart Studio)

In [None]:
# Set your credentials befpre running this cell!!

chart_studio.tools.set_credentials_file(username="",
                                         api_key="")
mapbox_access_token = ""

### Formatting Data

In [None]:
def chainer(s):
    return list(chain.from_iterable(s.str.split(",")))

In [None]:
def convert_df(df, cols):
    df.dropna(inplace=True)
    df.set_index(df[cols[0]].values)


    L = []
    for i in range(len(df)):
        string = ""
        for j in range(len(cols[1])):
            if j != (len(cols[1]) - 1):
                string = string + str(df[cols[1][j]].values[i]) + ","
            else:
                string = string + str(df[cols[1][j]].values[i])

        L.append(string)

    df["New"] = L
    lens = df["New"].str.split(",").map(len)

    df = pd.DataFrame(
        {
            "Country": np.repeat(df[cols[0]], lens),
            "Lat": np.repeat(df[cols[-2]], lens),
            "Long_": np.repeat(df[cols[-1]], lens),
            "Count": chainer(df["New"]),
        }
    )
    df["Study"] = [cols[1][i] for i in range(len(cols[1]))] * (
        len(df.index) // len(cols[1])
    )
    return df

### Creating Trace

In [None]:
def create_hovertemplate(df, study, country):
    emoji = "💀" if study.lower() == "deaths" else "😷" if study.lower() == "recovered" else "🏥"
    return f"{emoji}: {format(int(float(df.loc[(df['Study'] == study) & (df['Country'] == country), 'Count'])),',d')}"

In [None]:
def create_data(df, study, color):
    countries = list(df["Country"].value_counts().index)
    data = []
    df.dropna(inplace=True)

    for country in countries:
        try:
            event_data = dict(
                lat=df.loc[(df["Study"] == study) & (df["Country"] == country), "Lat"],
                lon=df.loc[
                    (df["Study"] == study) & (df["Country"] == country), "Long_"
                ],
                name=f"{country}",
                marker={
                    "size": log(
                        float(
                            df.loc[
                                (df["Study"] == study) & (df["Country"] == country),
                                "Count",
                            ]
                        ),
                        1.5,
                    ),
                    "opacity": 0.5,
                    "color": color,
                },
                type="scattermapbox",
                hovertemplate=create_hovertemplate(df, study, country),
            )
            data.append(event_data)
        except:
            continue

    return data

### Creating Layout

In [None]:
def create_basic_layout(latitude, longitude, zoom):
    layout = {
        "height": 700,
        "margin": {"t": 0, "b": 0, "l": 0, "r": 0},
        "font": {"color": "#FFFFFF", "size": 15},
        "paper_bgcolor": "#000000",
        "showlegend": False,
        "mapbox": {
            "accesstoken": mapbox_access_token,
            "bearing": 0,
            "center": {"lat": latitude, "lon": longitude},
            "pitch": 0,
            "zoom": zoom,
            "style": "dark",
        },
    }
    return layout

In [None]:
def update_layout(study, layout):
    annotations = [
        {
            "text": f"{study.capitalize()} Cases",
            "font": {"color": "#FFFFFF", "size": 14},
            "borderpad": 10,
            "x": 0.05,
            "y": 0.05,
            "xref": "paper",
            "yref": "paper",
            "align": "left",
            "showarrow": False,
            "bgcolor": "black",
        }
    ]

    layout["title"] = f"{study.capitalize()}"
    layout["annotations"] = annotations
    layout["hoverlabel"] = dict(font_size=16, font_family="Rockwell",font_color = "black")

    return layout

In [None]:
def get_lat_long(country, coord_df=country_cases_sorted):
    lat = float(coord_df.loc[(coord_df["country"] == country), "Lat"])
    long = float(coord_df.loc[(coord_df["country"] == country), "Long_"])
    return lat, long

### Getting Data For Country Plot

In [None]:
def get_country_wise_data():
    response = requests.get("https://corona.lmao.ninja/v2/jhucsse")
    data = response.json()
    return data

In [None]:
def choose_country(array, country):
    return [i for i in array if (i["country"] == country)]

In [None]:
def get_country_frame(country):
    def get(string, country):
        return [i[string] for i in country]

    coords = get("coordinates", country)
    stats = get("stats", country)
    names = get("province", country)

    def make_column(string, main):
        return [i[string] for i in main]

    df = pd.DataFrame()
    df["Provinces"] = names
    df["lat"] = make_column("latitude", coords)
    df["lon"] = make_column("longitude", coords)
    df["Confirmed"] = make_column("confirmed", stats)
    df["Recoveries"] = make_column("recovered", stats)
    df["Deaths"] = make_column("deaths", stats)
    df = df[df["Provinces"] != "Unknown"]
    return df

### Creating Figure Object

In [None]:
def interactive_map(data, layout):
    figure = {"data": data, "layout": layout}

    return figure

### Final Function

#### Global Plot Function

In [None]:
def plot_study(
    starting_df,
    cols,
    study_dict,
    location="global",
    zoom=2,
    latitude=20.59,
    longitude=78.96,
):
    color = study_dict["color"]
    study = study_dict["study"]
    df = convert_df(starting_df, cols)
    data = create_data(df, study, color)
    layout = create_basic_layout(latitude, longitude, zoom)
    updated_layout = update_layout(study, layout)
    figure = interactive_map(data, updated_layout)
    return figure

#### Country Plot Function

In [None]:
def plot_country(Country, data, study):
    country = choose_country(data, Country)
    df = get_country_frame(country)
    columns = ["Provinces", ["Confirmed", "Recoveries", "Deaths"], "lat", "lon"]
    color = "#45a2ff" if study == "Confirmed" else "#f54842" if study == "Deaths" else "#42f587"
    d = dict(study=study.title(), color=color)
    figure = plot_study(
        df,
        columns,
        d,
        country,
        zoom=4.5,
        latitude=get_lat_long(Country)[0],
        longitude=get_lat_long(Country)[1],
    )
    return figure

## Examples

### Global - Confirmed Cases

In [None]:
confirmed = dict(study="confirmed",color="#45a2ff")
recovered = dict(study="recovered",color="#42f587")
deaths = dict(study="deaths",color="#f54842")

columns = ["country", ["deaths", "confirmed", "recovered"], "Lat", "Long_"]

figure = plot_study(country_cases_sorted, columns, confirmed)
py.iplot(figure)

### Japan - Recoveries Cases

In [None]:
figure= plot_country("Japan",get_country_wise_data(),"Recoveries")
py.iplot(figure)

# Working With The Latest Data - Of Individual Countries

## Getting Data

In [None]:
def get_today_data():
    today_data = requests.get("https://corona.lmao.ninja/v2/all?yesterday")
    today_country_data = requests.get("https://corona.lmao.ninja/v2/jhucsse")

    today_data = today_data.json()
    today_country_data = today_country_data.json()

    return today_data, today_country_data

## Formatting Data 

In [None]:
def cases_object(array):
    obj1 = {
        study: sum([(i["stats"][study]) for i in array])
        for study in ["confirmed", "deaths", "recovered"]
    }
    return {**obj1, "updatedAt": [i["updatedAt"] for i in array]}

In [None]:
def choose_country(array, country):
    return [i for i in array if (i["country"] == country)]

In [None]:
def get_final_object(country, array):
    return cases_object(choose_country(array, country))

In [None]:
def get_country_frame(country):
    def get(string, country):
        return [i[string] for i in country]

    coords = get("coordinates", country)
    stats = get("stats", country)
    names = get("province", country)

    def make_column(string, main):
        return [i[string] for i in main]

    df = pd.DataFrame()
    df["Provinces"] = names
    df["lat"] = make_column("latitude", coords)
    df["lon"] = make_column("longitude", coords)
    df["Confirmed"] = make_column("confirmed", stats)
    df["Recoveries"] = make_column("recovered", stats)
    df["Deaths"] = make_column("deaths", stats)
    df = df[df["Provinces"] != "Unknown"]
    return df

## Visualizing The Data

In [None]:
today_data,today_country_data = get_today_data()
country_stats = get_country_frame(choose_country(today_country_data, "India"))

### Bar Chart

In [None]:
def plot_province(data, metric, metric_name):
    fig = go.Figure()

    fig.add_trace(
        go.Bar(x=data["Provinces"], y=data[metric])
    )

    fig.update_layout(
        title={
            "text": "Province Details",
            "y": 0.9,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        },
        template="plotly_dark",
        xaxis_title="Province",
        yaxis_title="Cases",
    )

    return fig



#### Example - India

In [None]:
plot_province(country_stats, "Confirmed", "Confirmed Cases")

### Table Form

In [None]:
def table_province_data(data, metric):
    df = pd.DataFrame(data={"Provinces": data["Provinces"], metric: data[metric]})
    df[metric] = df[metric].map(lambda x: format(x, ",d"))
    if len(df) <= 1:
        return
    else:
        return df

#### Example - India

In [None]:
table_province_data(country_stats, "Confirmed")

Unnamed: 0,Provinces,Confirmed
0,Andaman and Nicobar Islands,4929
1,Andhra Pradesh,881273
2,Arunachal Pradesh,16696
3,Assam,215997
4,Bihar,250390
5,Chandigarh,19551
6,Chhattisgarh,276337
7,Dadra and Nagar Haveli and Daman and Diu,3374
8,Delhi,623415
9,Goa,50772


# Predictive TimeSeries Model : CNN

## Code

### Formatting The Data

In [None]:
def get_data(confirmed = confirmed_global, deaths = deaths_global, recovered = recovered_global):

    recovered = recovered.groupby("country").sum().T
    deaths = deaths.groupby("country").sum().T
    confirmed = confirmed.groupby("country").sum().T

    deaths.index = pd.to_datetime(deaths.index, infer_datetime_format=True)
    recovered.index = pd.to_datetime(
        recovered.index, infer_datetime_format=True)
    confirmed.index = pd.to_datetime(
        confirmed.index, infer_datetime_format=True)

    return deaths, recovered, confirmed

In [None]:
def create_data_frame(dataframe, country):

    deaths, recovered, confirmed = get_data()

    if dataframe == "deaths":
        data = pd.DataFrame(
            index=deaths.index, data=deaths[country].values, columns=["Total"]
        )

    elif dataframe == "recovered":
        data = pd.DataFrame(
            index=recovered.index, data=recovered[country].values, columns=[
                "Total"]
        )

    elif dataframe == "confirmed":
        data = pd.DataFrame(
            index=confirmed.index, data=confirmed[country].values, columns=[
                "Total"]
        )

    data = data[(data != 0).all(1)]

    data_diff = data.diff()

    # removing the first value from data_diff as it had no previous value and is a NaN after diffrencing
    data_diff = data_diff[1:]

    return data, data_diff

### Series Creation

In [None]:
def make_series(df_name, country, steps):

    data, data_diff = create_data_frame(df_name, country)

    # Taking the values from data_diff and making them an array
    series = np.array(data_diff["Total"])

    X, y = [], []
    for i in range(len(series)):
        end = i + steps
        if end > len(series) - 1:
            break
        x_sample, y_sample = series[i:end], series[end]
        X.append(x_sample)
        y.append(y_sample)

    return data, data_diff, np.array(X), np.array(y)

### Error : MASE

In [None]:
def mase(y_true, y_pred):
    er = FindErrors(y_true, y_pred)
    return er.mase()

### Parameter Grid

In [None]:
def create_param_grid():

    param_grid = {
        "filters": (60, 70),
        "nodes": (60, 70),
        "epochs": (60, 70),
        "activation1": ("swish", "relu", "tanh"),
        "activation2": ("swish", "relu", "tanh"),
    }
    grid = ParameterGrid(param_grid)

    return grid

### Compiling The Model

In [None]:

def compile_model(p):

    model = Sequential()
    model.add(
        Conv1D(
            filters=p["filters"],
            kernel_size=2,
            activation=p["activation1"],
            input_shape=(14, 1),
        )
    )
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(p["nodes"], activation=p["activation2"]))
    model.add(Dense(1))
    model.compile(optimizer="adam", loss="mse")

    return model

### Hyperparameter Tuning

In [None]:
def hyperparameter_tuning(grid, X_train, y_train):

    parameters = pd.DataFrame(columns=["MASE", "Parameters"])
    for p in grid:
        model = compile_model(p)

        # reshaping the set to suit the required input shape
        X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))

        model.fit(X_train, y_train, epochs=p["epochs"], verbose=0)
        predictions = model.predict(X_train, verbose=0)

        # flattening the predictions to a 1D array to calculate the MASE
        predictions = predictions.flatten()

        MASE = mase(y_train, predictions)
        parameters = parameters.append(
            {"MASE": MASE, "Parameters": p}, ignore_index=True
        )

    return parameters

In [None]:
def get_best_params(parameters):

    # sort the dataframe based on MASE values
    final = parameters.sort_values("MASE").reset_index().iloc[0]

    return final.values[2]

### Testing The Model

In [None]:
def test_model(p, X_train, X_test, y_train, y_test, data):

    model = compile_model(p)

    # reshaping the set to suit the required input shape
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))

    model.fit(X_train, y_train, epochs=p["epochs"], verbose=0)

    # reshaping the set to suit the required input shape
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    # predicting results of X_test
    predictions = model.predict(X_test, verbose=0)
    predictions = predictions.flatten()

    # Taking the cumulative of the predictions step wise
    # Start is the value just before the test_set, which is used to begin taking the cumulative
    start = data["Total"][-len(y_test) - 1]
    predictions_cumulative = []
    for i in predictions:
        start = start + i
        predictions_cumulative.append(start)

    # The actual cumulative values
    y_test_cumulative = data["Total"][-len(y_test):]

    MASE = mase(y_test_cumulative, predictions_cumulative)

    return MASE

### Fitting The Final Model

In [None]:
def make_final_model(p, X, y):
    model = compile_model(p)

    # reshaping the set to suit the required input shape
    X = X.reshape((X.shape[0], X.shape[1], 1))

    model.fit(X, y, epochs=p["epochs"], verbose=0)

    return model

### Forecasting The Next 14 Days

In [None]:
def forecast(data_diff, data, n, model):

    forecast = []

    for i in range(n):
        l = len(forecast)
        inp = (list(data_diff["Total"][-(n - l):])) + forecast
        inp = np.array(inp)
        inp = inp.reshape(1, 14, 1)
        future = model.predict(inp, verbose=0)
        forecast.append(list(future.flatten())[0])

    forecast_cumulative = []
    start = data["Total"][-1]
    for i in forecast:
        start = start + i
        forecast_cumulative.append(start)

    return forecast_cumulative

### Plotting The Forecast

In [None]:
def plot_graph(data, pred):

    datelist = pd.date_range(data.index[-1], periods=15).tolist()
    datelist = datelist[1:]
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x=data.index, y=data["Total"],
                   mode="lines", name="Up till now")
    )
    fig.add_trace(go.Scatter(x=datelist, y=pred,
                             mode="lines", name="Predictions*"))
    fig.update_layout(template="plotly_dark")

    return fig

### Flatline Check - Naive Forecast

In [None]:
def check_slope(x, y):
    c = Counter(np.diff(y) / np.diff(x))
    return 0 not in [i[0] for i in c.most_common(1)]

In [None]:
def naive_forecast(study, country):
    df, _ = create_data_frame(study, country)
    datelist = pd.date_range(df.index[-1], periods=15).tolist()[1:]
    predictions = [df.Total[-1]] * 14
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x=df.index, y=df["Total"], mode="lines", name="Up till now")
    )
    fig.add_trace(
        go.Scatter(x=datelist, y=predictions,
                   mode="lines", name="Predictions*")
    )
    fig.update_layout(template="plotly_dark")
    return 1, fig, predictions

### Final Function

In [None]:
def cnn_predict(df_name, country):

    data, data_diff, X, y = make_series(df_name, country, 14)
    grid = create_param_grid()
    n = len(data_diff) * 17 // 20
    X_train, X_test, y_train, y_test = X[:n], X[n:], y[:n], y[n:]
    parameters = hyperparameter_tuning(grid, X_train, y_train)
    p = get_best_params(parameters)
    MASE = (test_model(p, X_train, X_test, y_train, y_test, data)).round(2)
    if MASE <= 1 or check_slope([1, 2, 3, 4, 5], data.Total[-5:]):
        cnn = make_final_model(p, X, y)
        f = forecast(data_diff, data, 14, cnn)
        f = list(map(int, f))
        fig = plot_graph(data, f)
    else:
        MASE, fig, f = naive_forecast(df_name, country)
        
    datelist = pd.date_range(data.index[-1], periods=8).tolist()[1:]
    predictions = pd.DataFrame(
        data={"Date": list(map(lambda x: x.strftime('%d/%m/%Y'), datelist)), "Cases": f[:7]})

    return predictions, MASE, fig

## Examples

### India - Confirmed Cases

In [None]:
pred,_,figure = cnn_predict("confirmed","India")

In [None]:
pred

Unnamed: 0,Date,Cases
0,29/12/2020,10245509
1,30/12/2020,10266224
2,31/12/2020,10286461
3,01/01/2021,10306163
4,02/01/2021,10324129
5,03/01/2021,10340682
6,04/01/2021,10356696


In [None]:
figure.show()

### US - Deaths

In [None]:
pred,_,figure = cnn_predict("deaths","US")

In [None]:
pred

Unnamed: 0,Date,Cases
0,29/12/2020,337674
1,30/12/2020,340742
2,31/12/2020,343300
3,01/01/2021,345195
4,02/01/2021,346657
5,03/01/2021,347788
6,04/01/2021,349487


In [None]:
figure.show()

### Japan - Recoveries

In [None]:
pred,_,figure = cnn_predict("recovered","Japan")

In [None]:
pred

Unnamed: 0,Date,Cases
0,29/12/2020,186687
1,30/12/2020,189025
2,31/12/2020,191448
3,01/01/2021,193948
4,02/01/2021,196280
5,03/01/2021,198544
6,04/01/2021,200780


In [None]:
figure.show()