# 1. Import Dependencies and data

In [59]:
import pandas as pd
import os
import plotly.express as px
import plotly.graph_objects as go

Fetch the data from the **COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University**

In [60]:
cases= pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

In [61]:
deaths= pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

# 2. Preprocess the Cases Data

In [62]:
cases.shape

(274, 459)

In [63]:
cases

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/11/21,4/12/21,4/13/21,4/14/21,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,57160,57242,57364,57492,57534,57612,57721,57793,57898,58037
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,128393,128518,128752,128959,129128,129307,129456,129594,129694,129842
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,118516,118645,118799,118975,119142,119323,119486,119642,119805,119992
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,12545,12581,12614,12641,12641,12712,12771,12805,12805,12874
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,23457,23549,23697,23841,23951,24122,24300,24389,24518,24661
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269,,Vietnam,14.058324,108.277199,0,2,2,2,2,2,...,2693,2705,2714,2733,2758,2772,2781,2785,2791,2800
270,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,268132,270856,272767,274690,276407,278135,279753,280741,282270,284280
271,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,5357,5446,5507,5582,5657,5715,5770,5812,5858,5918
272,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,90029,90064,90218,90389,90532,90750,90844,90918,90942,91042


In [64]:
countries=cases["Country/Region"].count()
unique_countries=cases["Country/Region"].nunique(dropna = True)
print("There are ",countries ,"entries for the country column while unique no of countries is ",unique_countries)

There are  274 entries for the country column while unique no of countries is  192


This means there are repeated countries due to the seperate divisions for some countries inthe Province/State column like the US and Australia 

In [65]:
cases["Province/State"].nunique(dropna = True)

85

##### Drop the Province/State column, then group all rows by country.

In [115]:
# df1 shall be used as the dataframe to do preprocessing on
df1=cases.drop('Province/State',axis=1)
df1.shape

(274, 458)

In [116]:
df1["Country/Region"].nunique(dropna = True)

192

##### We have to obtain a similar number of rows to the one in the above output after grouping by country/Region

In [117]:
df1=df1.groupby(['Country/Region'],as_index=False).sum()
df1.shape

(192, 458)

##### Great, they match up, the groupby operation was successful

In [118]:
df1

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,4/11/21,4/12/21,4/13/21,4/14/21,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21
0,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,0,...,57160,57242,57364,57492,57534,57612,57721,57793,57898,58037
1,Albania,41.153300,20.168300,0,0,0,0,0,0,0,...,128393,128518,128752,128959,129128,129307,129456,129594,129694,129842
2,Algeria,28.033900,1.659600,0,0,0,0,0,0,0,...,118516,118645,118799,118975,119142,119323,119486,119642,119805,119992
3,Andorra,42.506300,1.521800,0,0,0,0,0,0,0,...,12545,12581,12614,12641,12641,12712,12771,12805,12805,12874
4,Angola,-11.202700,17.873900,0,0,0,0,0,0,0,...,23457,23549,23697,23841,23951,24122,24300,24389,24518,24661
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,Vietnam,14.058324,108.277199,0,2,2,2,2,2,2,...,2693,2705,2714,2733,2758,2772,2781,2785,2791,2800
188,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,0,...,268132,270856,272767,274690,276407,278135,279753,280741,282270,284280
189,Yemen,15.552727,48.516388,0,0,0,0,0,0,0,...,5357,5446,5507,5582,5657,5715,5770,5812,5858,5918
190,Zambia,-13.133897,27.849332,0,0,0,0,0,0,0,...,90029,90064,90218,90389,90532,90750,90844,90918,90942,91042


##### For the project, we don't need the Latitude and Longitude columns of our data.

In [119]:
df1=df1.drop(['Lat','Long'],axis=1)    

##### Obtain the Country codes from the Plotly Express documentation.Theyre useful in plotting the choropleth map

In [120]:
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
# df

##### Drop the GDP column since we do not need it.

In [121]:
# df=df.drop('GDP (BILLIONS)',axis=1)
# df

In [122]:
df1

Unnamed: 0,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,4/11/21,4/12/21,4/13/21,4/14/21,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,57160,57242,57364,57492,57534,57612,57721,57793,57898,58037
1,Albania,0,0,0,0,0,0,0,0,0,...,128393,128518,128752,128959,129128,129307,129456,129594,129694,129842
2,Algeria,0,0,0,0,0,0,0,0,0,...,118516,118645,118799,118975,119142,119323,119486,119642,119805,119992
3,Andorra,0,0,0,0,0,0,0,0,0,...,12545,12581,12614,12641,12641,12712,12771,12805,12805,12874
4,Angola,0,0,0,0,0,0,0,0,0,...,23457,23549,23697,23841,23951,24122,24300,24389,24518,24661
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,Vietnam,0,2,2,2,2,2,2,2,2,...,2693,2705,2714,2733,2758,2772,2781,2785,2791,2800
188,West Bank and Gaza,0,0,0,0,0,0,0,0,0,...,268132,270856,272767,274690,276407,278135,279753,280741,282270,284280
189,Yemen,0,0,0,0,0,0,0,0,0,...,5357,5446,5507,5582,5657,5715,5770,5812,5858,5918
190,Zambia,0,0,0,0,0,0,0,0,0,...,90029,90064,90218,90389,90532,90750,90844,90918,90942,91042


In [123]:
# Store this dataframe for later use
df1.to_excel ("cases.xlsx", index = False, header=True)

##### Drop all columns from the first date to the second last.
Since we need only the total cases on the last day for our map.

In [124]:
cols=df1[df1.columns[1:-1]]
df2=df1.drop(cols,axis=1)

In [125]:
#df2 will be the new sliced dataframe 
df2

Unnamed: 0,Country/Region,4/20/21
0,Afghanistan,58037
1,Albania,129842
2,Algeria,119992
3,Andorra,12874
4,Angola,24661
...,...,...
187,Vietnam,2800
188,West Bank and Gaza,284280
189,Yemen,5918
190,Zambia,91042


##### DO a left join on the Country/Region and COUNTRY columns

In [126]:
# df3=pd.merge(df2,df,how='left',left_on=['Country/Region'],right_on=['COUNTRY'])
# df3

##### Drop the COUNTRY column from the new dataframe (df3)

In [127]:
# df3=df3.drop('COUNTRY',axis=1)
# df3

##### Check whether there are any missing values due to mismatching Country names in the two datasets and missing codes 
##### for some countries in the left dataset.

In [128]:
# df3['CODE'].isnull().values.any()

In [129]:
# empty= df3[df3.isna().any(axis=1)]
# empty

##### Save the plotly map codes to a csv, edit it with all these missing codes, re-read it and do an inner join using the final version.

In [130]:
# df.to_excel ("map_code_copy.xlsx", index = False, header=True)

In [131]:
pwd=os.getcwd()
df4=pd.read_excel(pwd+"\\map_code_copy.xlsx")

In [132]:
df4

Unnamed: 0,COUNTRY,CODE
0,Afghanistan,AFG
1,Albania,ALB
2,Algeria,DZA
3,American Samoa,ASM
4,Andorra,AND
...,...,...
217,Virgin Islands,VGB
218,West Bank and Gaza,WBG
219,Yemen,YEM
220,Zambia,ZMB


##### Save it as a new file for use in final script

In [133]:
df4.to_excel ("map_code1.xlsx", index = False, header=True)

##### Perform inner join on the map code and cases(df2)
This step eliminates countries (Holy See,North Macedonia and Micronesia) and cruise ships (Diamond Princess and MS Zaandam)in the cases dataset.

In [134]:
df5=pd.merge(df2,df4,how='inner',left_on=['Country/Region'],right_on=['COUNTRY'])
df5=df5.drop('COUNTRY',axis=1)
df5

Unnamed: 0,Country/Region,4/20/21,CODE
0,Afghanistan,58037,AFG
1,Albania,129842,ALB
2,Algeria,119992,DZA
3,Andorra,12874,AND
4,Angola,24661,AGO
...,...,...,...
182,Vietnam,2800,VNM
183,West Bank and Gaza,284280,WBG
184,Yemen,5918,YEM
185,Zambia,91042,ZMB


In [135]:
# Check for missing values
empty= df5[df5.isna().any(axis=1)]
empty

Unnamed: 0,Country/Region,4/20/21,CODE


In [136]:
# Rename the Cases column
casemapdata = df5.rename(columns={df5.columns[1]: 'Total Cases'})
casemapdata

Unnamed: 0,Country/Region,Total Cases,CODE
0,Afghanistan,58037,AFG
1,Albania,129842,ALB
2,Algeria,119992,DZA
3,Andorra,12874,AND
4,Angola,24661,AGO
...,...,...,...
182,Vietnam,2800,VNM
183,West Bank and Gaza,284280,WBG
184,Yemen,5918,YEM
185,Zambia,91042,ZMB


# 3. Fit the data to the Plotly Choropleth map

In [137]:

fig = go.Figure(data=go.Choropleth(
    locations = casemapdata['CODE'],
    z = casemapdata['Total Cases'],
    text = casemapdata['Country/Region'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Total Cases',
))

fig.update_layout(
    title_text='Cumulative Cases per Country',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://github.com/CSSEGISandData/COVID-19">\
            JHU CSSE COVID-19 Data</a>',
        showarrow = False
    )]
)

fig.show()

In [138]:
casemapdata.to_excel ("casemapdata.xlsx", index = False, header=True)

# 4. Do Preprocessing on the Deaths Data

In [139]:
deaths.shape

(274, 459)

##### Drop the Province/State column, then group all rows by country.
##### df1 shall be used as the dataframe to do preprocessing on

In [169]:
df6=deaths.drop('Province/State',axis=1)
df6.shape

(274, 458)

In [170]:
df6["Country/Region"].nunique(dropna = True)

192

###### We have to obtain a similar number of rows to the one in the above output after grouping by country/Region

In [171]:
df6=df6.groupby(['Country/Region'],as_index=False).sum()
df6.shape

(192, 458)

##### Great, they match up, the groupby operation was successful

##### For the project, we don't need the Latitude and Longitude columns of our data.   

In [172]:
df6=df6.drop(['Lat','Long'],axis=1)    

In [173]:
# Store this dataframe for later use
df6.to_excel ("deaths.xlsx", index = False, header=True)

In [174]:
cols2=df6[df6.columns[1:-1]]
df7=df6.drop(cols2,axis=1)
#df7 will be the new sliced dataframe 
df7

Unnamed: 0,Country/Region,4/20/21
0,Afghanistan,2549
1,Albania,2353
2,Algeria,3165
3,Andorra,123
4,Angola,565
...,...,...
187,Vietnam,35
188,West Bank and Gaza,3078
189,Yemen,1138
190,Zambia,1236


In [175]:
df8=pd.merge(df7,df4,how='inner',left_on=['Country/Region'],right_on=['COUNTRY'])
df8=df8.drop('COUNTRY',axis=1)
df8

Unnamed: 0,Country/Region,4/20/21,CODE
0,Afghanistan,2549,AFG
1,Albania,2353,ALB
2,Algeria,3165,DZA
3,Andorra,123,AND
4,Angola,565,AGO
...,...,...,...
182,Vietnam,35,VNM
183,West Bank and Gaza,3078,WBG
184,Yemen,1138,YEM
185,Zambia,1236,ZMB


In [147]:
# Check for missing values
empty= df8[df8.isna().any(axis=1)]
empty

Unnamed: 0,Country/Region,4/20/21,CODE


In [148]:
# Rename the Cases column
deathmapdata = df8.rename(columns={df8.columns[1]: 'Total Deaths'})
deathmapdata

Unnamed: 0,Country/Region,Total Deaths,CODE
0,Afghanistan,2549,AFG
1,Albania,2353,ALB
2,Algeria,3165,DZA
3,Andorra,123,AND
4,Angola,565,AGO
...,...,...,...
182,Vietnam,35,VNM
183,West Bank and Gaza,3078,WBG
184,Yemen,1138,YEM
185,Zambia,1236,ZMB


# 5. Fit the data to the Plotly Choropleth map

In [149]:
fig = go.Figure(data=go.Choropleth(
    locations = casemapdata['CODE'],
    z = deathmapdata['Total Deaths'],
    text = casemapdata['Country/Region'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Total Deaths',
))

fig.update_layout(
    title_text='Cumulative Deaths per Country',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://github.com/CSSEGISandData/COVID-19">\
            JHU CSSE COVID-19 Data</a>',
        showarrow = False
    )]
)

fig.show()

In [150]:
deathmapdata.to_excel ("deathmapdata.xlsx", index = False, header=True)

# Pre-process Dash data for cases

In [151]:
df1=df1.transpose()

In [152]:
#dff1 = df1.loc[df1['Country/Region'] == 'Angola']
#dff1

In [153]:
#dff1.columns[1:]
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,182,183,184,185,186,187,188,189,190,191
Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
1/22/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/23/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/24/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/25/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4/16/21,57612,129307,119323,12712,24122,1213,2658628,207103,29499,589299,...,4398903,159569,86022,3,180609,2772,278135,5715,90750,37534
4/17/21,57721,129456,119486,12771,24300,1216,2677747,207973,29519,591347,...,4401176,162400,86338,3,181903,2781,279753,5770,90844,37699
4/18/21,57793,129594,119642,12805,24389,1216,2694014,208520,29533,593423,...,4403060,164744,86680,3,183190,2785,280741,5812,90918,37751
4/19/21,57898,129694,119805,12805,24518,1217,2714475,208818,29556,595540,...,4406114,167033,86982,3,184595,2791,282270,5858,90942,37859


In [154]:
new_header = df1.iloc[0] #grab the first row for the header
df1 = df1[1:] #take the data less the header row
df1.columns = new_header #set the header row as the df header
df1

Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
1/22/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/23/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/24/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/25/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/26/20,0,0,0,0,0,0,0,0,4,0,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4/16/21,57612,129307,119323,12712,24122,1213,2658628,207103,29499,589299,...,4398903,159569,86022,3,180609,2772,278135,5715,90750,37534
4/17/21,57721,129456,119486,12771,24300,1216,2677747,207973,29519,591347,...,4401176,162400,86338,3,181903,2781,279753,5770,90844,37699
4/18/21,57793,129594,119642,12805,24389,1216,2694014,208520,29533,593423,...,4403060,164744,86680,3,183190,2785,280741,5812,90918,37751
4/19/21,57898,129694,119805,12805,24518,1217,2714475,208818,29556,595540,...,4406114,167033,86982,3,184595,2791,282270,5858,90942,37859


In [155]:
df1.columns

Index(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria',
       ...
       'United Kingdom', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela',
       'Vietnam', 'West Bank and Gaza', 'Yemen', 'Zambia', 'Zimbabwe'],
      dtype='object', name='Country/Region', length=192)

In [156]:
df1=df1.reset_index()
df1

Country/Region,index,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,4/16/21,57612,129307,119323,12712,24122,1213,2658628,207103,29499,...,4398903,159569,86022,3,180609,2772,278135,5715,90750,37534
451,4/17/21,57721,129456,119486,12771,24300,1216,2677747,207973,29519,...,4401176,162400,86338,3,181903,2781,279753,5770,90844,37699
452,4/18/21,57793,129594,119642,12805,24389,1216,2694014,208520,29533,...,4403060,164744,86680,3,183190,2785,280741,5812,90918,37751
453,4/19/21,57898,129694,119805,12805,24518,1217,2714475,208818,29556,...,4406114,167033,86982,3,184595,2791,282270,5858,90942,37859


In [157]:
df1

Country/Region,index,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,4/16/21,57612,129307,119323,12712,24122,1213,2658628,207103,29499,...,4398903,159569,86022,3,180609,2772,278135,5715,90750,37534
451,4/17/21,57721,129456,119486,12771,24300,1216,2677747,207973,29519,...,4401176,162400,86338,3,181903,2781,279753,5770,90844,37699
452,4/18/21,57793,129594,119642,12805,24389,1216,2694014,208520,29533,...,4403060,164744,86680,3,183190,2785,280741,5812,90918,37751
453,4/19/21,57898,129694,119805,12805,24518,1217,2714475,208818,29556,...,4406114,167033,86982,3,184595,2791,282270,5858,90942,37859


In [164]:
df2=df1.rename(columns = {'index':'Date'})
df2

Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,4/16/21,57612,129307,119323,12712,24122,1213,2658628,207103,29499,...,4398903,159569,86022,3,180609,2772,278135,5715,90750,37534
451,4/17/21,57721,129456,119486,12771,24300,1216,2677747,207973,29519,...,4401176,162400,86338,3,181903,2781,279753,5770,90844,37699
452,4/18/21,57793,129594,119642,12805,24389,1216,2694014,208520,29533,...,4403060,164744,86680,3,183190,2785,280741,5812,90918,37751
453,4/19/21,57898,129694,119805,12805,24518,1217,2714475,208818,29556,...,4406114,167033,86982,3,184595,2791,282270,5858,90942,37859


In [159]:
df2[df2.columns[1:]]=df2[df2.columns[1:]].astype('int64')
df2[df2.columns[1:]]

Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,0,0,0,0,0,0,0,0,4,0,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,57612,129307,119323,12712,24122,1213,2658628,207103,29499,589299,...,4398903,159569,86022,3,180609,2772,278135,5715,90750,37534
451,57721,129456,119486,12771,24300,1216,2677747,207973,29519,591347,...,4401176,162400,86338,3,181903,2781,279753,5770,90844,37699
452,57793,129594,119642,12805,24389,1216,2694014,208520,29533,593423,...,4403060,164744,86680,3,183190,2785,280741,5812,90918,37751
453,57898,129694,119805,12805,24518,1217,2714475,208818,29556,595540,...,4406114,167033,86982,3,184595,2791,282270,5858,90942,37859


In [160]:
date_convert=df2[df2.columns[0]]
date_converted= pd.to_datetime(date_convert)
date_converted

0     2020-01-22
1     2020-01-23
2     2020-01-24
3     2020-01-25
4     2020-01-26
         ...    
450   2021-04-16
451   2021-04-17
452   2021-04-18
453   2021-04-19
454   2021-04-20
Name: Date, Length: 455, dtype: datetime64[ns]

In [161]:
df2['Date']=date_converted
df2


Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,2021-04-16,57612,129307,119323,12712,24122,1213,2658628,207103,29499,...,4398903,159569,86022,3,180609,2772,278135,5715,90750,37534
451,2021-04-17,57721,129456,119486,12771,24300,1216,2677747,207973,29519,...,4401176,162400,86338,3,181903,2781,279753,5770,90844,37699
452,2021-04-18,57793,129594,119642,12805,24389,1216,2694014,208520,29533,...,4403060,164744,86680,3,183190,2785,280741,5812,90918,37751
453,2021-04-19,57898,129694,119805,12805,24518,1217,2714475,208818,29556,...,4406114,167033,86982,3,184595,2791,282270,5858,90942,37859


In [163]:
plot_df=df2.rename_axis(None, axis=1)
plot_df

Unnamed: 0,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,2021-04-16,57612,129307,119323,12712,24122,1213,2658628,207103,29499,...,4398903,159569,86022,3,180609,2772,278135,5715,90750,37534
451,2021-04-17,57721,129456,119486,12771,24300,1216,2677747,207973,29519,...,4401176,162400,86338,3,181903,2781,279753,5770,90844,37699
452,2021-04-18,57793,129594,119642,12805,24389,1216,2694014,208520,29533,...,4403060,164744,86680,3,183190,2785,280741,5812,90918,37751
453,2021-04-19,57898,129694,119805,12805,24518,1217,2714475,208818,29556,...,4406114,167033,86982,3,184595,2791,282270,5858,90942,37859


In [194]:
# Test plot
fig = px.line(plot_df, x="Date", y=df2['Kenya'],
              hover_data={"Date"},
              title='Cases',
              labels={"y": "No. of Cases"}
             )
fig.show()

# Pre-process Dash data for deaths

In [176]:
df6=df6.transpose()
df6

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,182,183,184,185,186,187,188,189,190,191
Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
1/22/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/23/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/24/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/25/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4/16/21,2535,2337,3148,123,560,31,59084,3835,910,9843,...,127472,1788,635,0,1870,35,2955,1108,1233,1551
4/17/21,2539,2340,3152,123,561,31,59164,3859,910,9870,...,127508,1848,636,0,1888,35,2983,1119,1234,1552
4/18/21,2539,2342,3155,123,561,31,59228,3878,910,9898,...,127518,1908,638,0,1905,35,3017,1126,1235,1553
4/19/21,2546,2347,3160,123,563,31,59476,3895,910,9922,...,127524,1971,638,0,1925,35,3047,1132,1236,1553


In [177]:
new_header = df6.iloc[0] #grab the first row for the header
df6 = df6[1:] #take the data less the header row
df6.columns = new_header #set the header row as the df header
df6=df6.reset_index()
df7=df6.rename(columns = {'index':'Date'})
df7

Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,4/16/21,2535,2337,3148,123,560,31,59084,3835,910,...,127472,1788,635,0,1870,35,2955,1108,1233,1551
451,4/17/21,2539,2340,3152,123,561,31,59164,3859,910,...,127508,1848,636,0,1888,35,2983,1119,1234,1552
452,4/18/21,2539,2342,3155,123,561,31,59228,3878,910,...,127518,1908,638,0,1905,35,3017,1126,1235,1553
453,4/19/21,2546,2347,3160,123,563,31,59476,3895,910,...,127524,1971,638,0,1925,35,3047,1132,1236,1553


In [178]:
df7[df7.columns[1:]]=df7[df7.columns[1:]].astype('int64')
df7[df7.columns[1:]]
df7

Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,4/16/21,2535,2337,3148,123,560,31,59084,3835,910,...,127472,1788,635,0,1870,35,2955,1108,1233,1551
451,4/17/21,2539,2340,3152,123,561,31,59164,3859,910,...,127508,1848,636,0,1888,35,2983,1119,1234,1552
452,4/18/21,2539,2342,3155,123,561,31,59228,3878,910,...,127518,1908,638,0,1905,35,3017,1126,1235,1553
453,4/19/21,2546,2347,3160,123,563,31,59476,3895,910,...,127524,1971,638,0,1925,35,3047,1132,1236,1553


In [179]:
date_convert=df7[df7.columns[0]]
date_converted= pd.to_datetime(date_convert)
date_converted

df7['Date']=date_converted
df7


Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,2021-04-16,2535,2337,3148,123,560,31,59084,3835,910,...,127472,1788,635,0,1870,35,2955,1108,1233,1551
451,2021-04-17,2539,2340,3152,123,561,31,59164,3859,910,...,127508,1848,636,0,1888,35,2983,1119,1234,1552
452,2021-04-18,2539,2342,3155,123,561,31,59228,3878,910,...,127518,1908,638,0,1905,35,3017,1126,1235,1553
453,2021-04-19,2546,2347,3160,123,563,31,59476,3895,910,...,127524,1971,638,0,1925,35,3047,1132,1236,1553


In [181]:
plot1_df=df7.rename_axis(None, axis=1)
plot1_df


Unnamed: 0,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,2021-04-16,2535,2337,3148,123,560,31,59084,3835,910,...,127472,1788,635,0,1870,35,2955,1108,1233,1551
451,2021-04-17,2539,2340,3152,123,561,31,59164,3859,910,...,127508,1848,636,0,1888,35,2983,1119,1234,1552
452,2021-04-18,2539,2342,3155,123,561,31,59228,3878,910,...,127518,1908,638,0,1905,35,3017,1126,1235,1553
453,2021-04-19,2546,2347,3160,123,563,31,59476,3895,910,...,127524,1971,638,0,1925,35,3047,1132,1236,1553


In [192]:
# Test plot
fig = px.line(plot1_df, x="Date", y=df7['Kenya'],
              hover_data={"Date"},
              title='Deaths',
              labels={"y": "No. of Deaths"}
              )
fig.show()