# 1. Import Dependencies and data

In [73]:
import pandas as pd
import os
import plotly.express as px
import plotly.graph_objects as go

Fetch the data from the **COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University**

In [74]:
cases= pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

In [75]:
deaths= pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

# 2. Preprocess the Cases Data

In [76]:
cases.shape

(275, 463)

In [77]:
cases

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21,4/21/21,4/22/21,4/23/21,4/24/21
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,57534,57612,57721,57793,57898,58037,58214,58312,58542,58730
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,129128,129307,129456,129594,129694,129842,129980,130114,130270,130409
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,119142,119323,119486,119642,119805,119992,120174,120363,120562,120736
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,12641,12712,12771,12805,12805,12874,12917,12942,13007,13024
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,23951,24122,24300,24389,24518,24661,24883,25051,25279,25492
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
270,,Vietnam,14.058324,108.277199,0,2,2,2,2,2,...,2758,2772,2781,2785,2791,2800,2812,2824,2830,2833
271,,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,...,276407,278135,279753,280741,282270,284280,286028,287680,289120,290259
272,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,5657,5715,5770,5812,5858,5918,5960,6020,6056,6105
273,,Zambia,-13.133897,27.849332,0,0,0,0,0,0,...,90532,90750,90844,90918,90942,91042,91119,91189,91251,91317


In [78]:
countries=cases["Country/Region"].count()
unique_countries=cases["Country/Region"].nunique(dropna = True)
print("There are ",countries ,"entries for the country column while unique no of countries is ",unique_countries)

There are  275 entries for the country column while unique no of countries is  192


This means there are repeated countries due to the seperate divisions for some countries inthe Province/State column like the US and Australia 

In [79]:
cases["Province/State"].nunique(dropna = True)

86

##### Drop the Province/State column, then group all rows by country.

In [80]:
# df1 shall be used as the dataframe to do preprocessing on
df1=cases.drop('Province/State',axis=1)
df1.shape

(275, 462)

In [81]:
df1["Country/Region"].nunique(dropna = True)

192

##### We have to obtain a similar number of rows to the one in the above output after grouping by country/Region

In [82]:
df1=df1.groupby(['Country/Region'],as_index=False).sum()
df1.shape

(192, 462)

##### Great, they match up, the groupby operation was successful

In [83]:
df1

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21,4/21/21,4/22/21,4/23/21,4/24/21
0,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,0,...,57534,57612,57721,57793,57898,58037,58214,58312,58542,58730
1,Albania,41.153300,20.168300,0,0,0,0,0,0,0,...,129128,129307,129456,129594,129694,129842,129980,130114,130270,130409
2,Algeria,28.033900,1.659600,0,0,0,0,0,0,0,...,119142,119323,119486,119642,119805,119992,120174,120363,120562,120736
3,Andorra,42.506300,1.521800,0,0,0,0,0,0,0,...,12641,12712,12771,12805,12805,12874,12917,12942,13007,13024
4,Angola,-11.202700,17.873900,0,0,0,0,0,0,0,...,23951,24122,24300,24389,24518,24661,24883,25051,25279,25492
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,Vietnam,14.058324,108.277199,0,2,2,2,2,2,2,...,2758,2772,2781,2785,2791,2800,2812,2824,2830,2833
188,West Bank and Gaza,31.952200,35.233200,0,0,0,0,0,0,0,...,276407,278135,279753,280741,282270,284280,286028,287680,289120,290259
189,Yemen,15.552727,48.516388,0,0,0,0,0,0,0,...,5657,5715,5770,5812,5858,5918,5960,6020,6056,6105
190,Zambia,-13.133897,27.849332,0,0,0,0,0,0,0,...,90532,90750,90844,90918,90942,91042,91119,91189,91251,91317


##### For the project, we don't need the Latitude and Longitude columns of our data.

In [84]:
df1=df1.drop(['Lat','Long'],axis=1)    

##### Obtain the Country codes from the Plotly Express documentation.Theyre useful in plotting the choropleth map

In [85]:
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
# df

##### Drop the GDP column since we do not need it.

In [86]:
# df=df.drop('GDP (BILLIONS)',axis=1)
# df

In [87]:
df1

Unnamed: 0,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21,4/21/21,4/22/21,4/23/21,4/24/21
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,57534,57612,57721,57793,57898,58037,58214,58312,58542,58730
1,Albania,0,0,0,0,0,0,0,0,0,...,129128,129307,129456,129594,129694,129842,129980,130114,130270,130409
2,Algeria,0,0,0,0,0,0,0,0,0,...,119142,119323,119486,119642,119805,119992,120174,120363,120562,120736
3,Andorra,0,0,0,0,0,0,0,0,0,...,12641,12712,12771,12805,12805,12874,12917,12942,13007,13024
4,Angola,0,0,0,0,0,0,0,0,0,...,23951,24122,24300,24389,24518,24661,24883,25051,25279,25492
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,Vietnam,0,2,2,2,2,2,2,2,2,...,2758,2772,2781,2785,2791,2800,2812,2824,2830,2833
188,West Bank and Gaza,0,0,0,0,0,0,0,0,0,...,276407,278135,279753,280741,282270,284280,286028,287680,289120,290259
189,Yemen,0,0,0,0,0,0,0,0,0,...,5657,5715,5770,5812,5858,5918,5960,6020,6056,6105
190,Zambia,0,0,0,0,0,0,0,0,0,...,90532,90750,90844,90918,90942,91042,91119,91189,91251,91317


In [88]:
# Store this dataframe for later use
df1.to_excel ("cases.xlsx", index = False, header=True)

##### Drop all columns from the first date to the second last.
Since we need only the total cases on the last day for our map.

In [89]:
cols=df1[df1.columns[1:-1]]
df2=df1.drop(cols,axis=1)

In [90]:
#df2 will be the new sliced dataframe 
df2

Unnamed: 0,Country/Region,4/24/21
0,Afghanistan,58730
1,Albania,130409
2,Algeria,120736
3,Andorra,13024
4,Angola,25492
...,...,...
187,Vietnam,2833
188,West Bank and Gaza,290259
189,Yemen,6105
190,Zambia,91317


##### DO a left join on the Country/Region and COUNTRY columns

In [91]:
# df3=pd.merge(df2,df,how='left',left_on=['Country/Region'],right_on=['COUNTRY'])
# df3

##### Drop the COUNTRY column from the new dataframe (df3)

In [92]:
# df3=df3.drop('COUNTRY',axis=1)
# df3

##### Check whether there are any missing values due to mismatching Country names in the two datasets and missing codes 
##### for some countries in the left dataset.

In [93]:
# df3['CODE'].isnull().values.any()

In [94]:
# empty= df3[df3.isna().any(axis=1)]
# empty

##### Save the plotly map codes to a csv, edit it with all these missing codes, re-read it and do an inner join using the final version.

In [95]:
# df.to_excel ("map_code_copy.xlsx", index = False, header=True)

In [96]:
pwd=os.getcwd()
df4=pd.read_excel(pwd+"\\map_code_copy.xlsx")

In [97]:
df4

Unnamed: 0,COUNTRY,CODE
0,Afghanistan,AFG
1,Albania,ALB
2,Algeria,DZA
3,American Samoa,ASM
4,Andorra,AND
...,...,...
217,Virgin Islands,VGB
218,West Bank and Gaza,WBG
219,Yemen,YEM
220,Zambia,ZMB


##### Save it as a new file for use in final script

In [98]:
df4.to_excel ("map_code1.xlsx", index = False, header=True)

##### Perform inner join on the map code and cases(df2)
This step eliminates countries (Holy See,North Macedonia and Micronesia) and cruise ships (Diamond Princess and MS Zaandam)in the cases dataset.

In [99]:
df5=pd.merge(df2,df4,how='inner',left_on=['Country/Region'],right_on=['COUNTRY'])
df5=df5.drop('COUNTRY',axis=1)
df5

Unnamed: 0,Country/Region,4/24/21,CODE
0,Afghanistan,58730,AFG
1,Albania,130409,ALB
2,Algeria,120736,DZA
3,Andorra,13024,AND
4,Angola,25492,AGO
...,...,...,...
182,Vietnam,2833,VNM
183,West Bank and Gaza,290259,WBG
184,Yemen,6105,YEM
185,Zambia,91317,ZMB


In [100]:
# Check for missing values
empty= df5[df5.isna().any(axis=1)]
empty

Unnamed: 0,Country/Region,4/24/21,CODE


In [101]:
# Rename the Cases column
casemapdata = df5.rename(columns={df5.columns[1]: 'Total Cases'})
casemapdata

Unnamed: 0,Country/Region,Total Cases,CODE
0,Afghanistan,58730,AFG
1,Albania,130409,ALB
2,Algeria,120736,DZA
3,Andorra,13024,AND
4,Angola,25492,AGO
...,...,...,...
182,Vietnam,2833,VNM
183,West Bank and Gaza,290259,WBG
184,Yemen,6105,YEM
185,Zambia,91317,ZMB


# 3. Fit the data to the Plotly Choropleth map

In [102]:

fig = go.Figure(data=go.Choropleth(
    locations = casemapdata['CODE'],
    z = casemapdata['Total Cases'],
    text = casemapdata['Country/Region'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Total Cases',
))

fig.update_layout(
    title_text='Cumulative Cases per Country',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://github.com/CSSEGISandData/COVID-19">\
            JHU CSSE COVID-19 Data</a>',
        showarrow = False
    )]
)

fig.show()

In [103]:
casemapdata.to_excel ("casemapdata.xlsx", index = False, header=True)

# 4. Do Preprocessing on the Deaths Data

In [104]:
deaths.shape

(275, 463)

##### Drop the Province/State column, then group all rows by country.
##### df1 shall be used as the dataframe to do preprocessing on

In [105]:
df6=deaths.drop('Province/State',axis=1)
df6.shape

(275, 462)

In [106]:
df6["Country/Region"].nunique(dropna = True)

192

###### We have to obtain a similar number of rows to the one in the above output after grouping by country/Region

In [107]:
df6=df6.groupby(['Country/Region'],as_index=False).sum()
df6.shape

(192, 462)

##### Great, they match up, the groupby operation was successful

##### For the project, we don't need the Latitude and Longitude columns of our data.   

In [108]:
df6=df6.drop(['Lat','Long'],axis=1)    

In [109]:
# Store this dataframe for later use
df6.to_excel ("deaths.xlsx", index = False, header=True)

In [110]:
cols2=df6[df6.columns[1:-1]]
df7=df6.drop(cols2,axis=1)
#df7 will be the new sliced dataframe 
df7

Unnamed: 0,Country/Region,4/24/21
0,Afghanistan,2572
1,Albania,2372
2,Algeria,3198
3,Andorra,124
4,Angola,577
...,...,...
187,Vietnam,35
188,West Bank and Gaza,3151
189,Yemen,1175
190,Zambia,1245


In [111]:
df8=pd.merge(df7,df4,how='inner',left_on=['Country/Region'],right_on=['COUNTRY'])
df8=df8.drop('COUNTRY',axis=1)
df8

Unnamed: 0,Country/Region,4/24/21,CODE
0,Afghanistan,2572,AFG
1,Albania,2372,ALB
2,Algeria,3198,DZA
3,Andorra,124,AND
4,Angola,577,AGO
...,...,...,...
182,Vietnam,35,VNM
183,West Bank and Gaza,3151,WBG
184,Yemen,1175,YEM
185,Zambia,1245,ZMB


In [112]:
# Check for missing values
empty= df8[df8.isna().any(axis=1)]
empty

Unnamed: 0,Country/Region,4/24/21,CODE


In [113]:
# Rename the Cases column
deathmapdata = df8.rename(columns={df8.columns[1]: 'Total Deaths'})
deathmapdata

Unnamed: 0,Country/Region,Total Deaths,CODE
0,Afghanistan,2572,AFG
1,Albania,2372,ALB
2,Algeria,3198,DZA
3,Andorra,124,AND
4,Angola,577,AGO
...,...,...,...
182,Vietnam,35,VNM
183,West Bank and Gaza,3151,WBG
184,Yemen,1175,YEM
185,Zambia,1245,ZMB


# 5. Fit the data to the Plotly Choropleth map

In [114]:
fig = go.Figure(data=go.Choropleth(
    locations = casemapdata['CODE'],
    z = deathmapdata['Total Deaths'],
    text = casemapdata['Country/Region'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Total Deaths',
))

fig.update_layout(
    title_text='Cumulative Deaths per Country',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://github.com/CSSEGISandData/COVID-19">\
            JHU CSSE COVID-19 Data</a>',
        showarrow = False
    )]
)

fig.show()

In [115]:
deathmapdata.to_excel ("deathmapdata.xlsx", index = False, header=True)

# Pre-process Dash data for cases

In [116]:
df1=df1.transpose()

In [117]:
#dff1 = df1.loc[df1['Country/Region'] == 'Angola']
#dff1

In [118]:
#dff1.columns[1:]
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,182,183,184,185,186,187,188,189,190,191
Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
1/22/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/23/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/24/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/25/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4/20/21,58037,129842,119992,12874,24661,1217,2743620,209485,29576,597566,...,4408644,169327,87225,3,185736,2800,284280,5918,91042,37875
4/21/21,58214,129980,120174,12917,24883,1217,2769552,210518,29594,600089,...,4411068,172601,87551,4,186745,2812,286028,5960,91119,37980
4/22/21,58312,130114,120363,12942,25051,1217,2796768,211399,29638,602494,...,4413834,175891,87935,4,188063,2824,287680,6020,91189,38018
4/23/21,58542,130270,120562,13007,25279,1222,2824652,212114,29653,604823,...,4416588,179537,88280,4,189381,2830,289120,6056,91251,38045


In [119]:
new_header = df1.iloc[0] #grab the first row for the header
df1 = df1[1:] #take the data less the header row
df1.columns = new_header #set the header row as the df header
df1

Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
1/22/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/23/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/24/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/25/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
1/26/20,0,0,0,0,0,0,0,0,4,0,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4/20/21,58037,129842,119992,12874,24661,1217,2743620,209485,29576,597566,...,4408644,169327,87225,3,185736,2800,284280,5918,91042,37875
4/21/21,58214,129980,120174,12917,24883,1217,2769552,210518,29594,600089,...,4411068,172601,87551,4,186745,2812,286028,5960,91119,37980
4/22/21,58312,130114,120363,12942,25051,1217,2796768,211399,29638,602494,...,4413834,175891,87935,4,188063,2824,287680,6020,91189,38018
4/23/21,58542,130270,120562,13007,25279,1222,2824652,212114,29653,604823,...,4416588,179537,88280,4,189381,2830,289120,6056,91251,38045


In [120]:
df1.columns

Index(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria',
       ...
       'United Kingdom', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela',
       'Vietnam', 'West Bank and Gaza', 'Yemen', 'Zambia', 'Zimbabwe'],
      dtype='object', name='Country/Region', length=192)

In [121]:
df1=df1.reset_index()
df1

Country/Region,index,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,4/20/21,58037,129842,119992,12874,24661,1217,2743620,209485,29576,...,4408644,169327,87225,3,185736,2800,284280,5918,91042,37875
455,4/21/21,58214,129980,120174,12917,24883,1217,2769552,210518,29594,...,4411068,172601,87551,4,186745,2812,286028,5960,91119,37980
456,4/22/21,58312,130114,120363,12942,25051,1217,2796768,211399,29638,...,4413834,175891,87935,4,188063,2824,287680,6020,91189,38018
457,4/23/21,58542,130270,120562,13007,25279,1222,2824652,212114,29653,...,4416588,179537,88280,4,189381,2830,289120,6056,91251,38045


In [122]:
df1

Country/Region,index,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,4/20/21,58037,129842,119992,12874,24661,1217,2743620,209485,29576,...,4408644,169327,87225,3,185736,2800,284280,5918,91042,37875
455,4/21/21,58214,129980,120174,12917,24883,1217,2769552,210518,29594,...,4411068,172601,87551,4,186745,2812,286028,5960,91119,37980
456,4/22/21,58312,130114,120363,12942,25051,1217,2796768,211399,29638,...,4413834,175891,87935,4,188063,2824,287680,6020,91189,38018
457,4/23/21,58542,130270,120562,13007,25279,1222,2824652,212114,29653,...,4416588,179537,88280,4,189381,2830,289120,6056,91251,38045


In [123]:
df2=df1.rename(columns = {'index':'Date'})
df2

Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,4/20/21,58037,129842,119992,12874,24661,1217,2743620,209485,29576,...,4408644,169327,87225,3,185736,2800,284280,5918,91042,37875
455,4/21/21,58214,129980,120174,12917,24883,1217,2769552,210518,29594,...,4411068,172601,87551,4,186745,2812,286028,5960,91119,37980
456,4/22/21,58312,130114,120363,12942,25051,1217,2796768,211399,29638,...,4413834,175891,87935,4,188063,2824,287680,6020,91189,38018
457,4/23/21,58542,130270,120562,13007,25279,1222,2824652,212114,29653,...,4416588,179537,88280,4,189381,2830,289120,6056,91251,38045


In [124]:
df2[df2.columns[1:]]=df2[df2.columns[1:]].astype('int64')
df2[df2.columns[1:]]

Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,0,0,0,0,0,0,0,0,4,0,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,58037,129842,119992,12874,24661,1217,2743620,209485,29576,597566,...,4408644,169327,87225,3,185736,2800,284280,5918,91042,37875
455,58214,129980,120174,12917,24883,1217,2769552,210518,29594,600089,...,4411068,172601,87551,4,186745,2812,286028,5960,91119,37980
456,58312,130114,120363,12942,25051,1217,2796768,211399,29638,602494,...,4413834,175891,87935,4,188063,2824,287680,6020,91189,38018
457,58542,130270,120562,13007,25279,1222,2824652,212114,29653,604823,...,4416588,179537,88280,4,189381,2830,289120,6056,91251,38045


In [125]:
date_convert=df2[df2.columns[0]]
date_converted= pd.to_datetime(date_convert)
date_converted

0     2020-01-22
1     2020-01-23
2     2020-01-24
3     2020-01-25
4     2020-01-26
         ...    
454   2021-04-20
455   2021-04-21
456   2021-04-22
457   2021-04-23
458   2021-04-24
Name: Date, Length: 459, dtype: datetime64[ns]

In [126]:
df2['Date']=date_converted
df2


Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,2021-04-20,58037,129842,119992,12874,24661,1217,2743620,209485,29576,...,4408644,169327,87225,3,185736,2800,284280,5918,91042,37875
455,2021-04-21,58214,129980,120174,12917,24883,1217,2769552,210518,29594,...,4411068,172601,87551,4,186745,2812,286028,5960,91119,37980
456,2021-04-22,58312,130114,120363,12942,25051,1217,2796768,211399,29638,...,4413834,175891,87935,4,188063,2824,287680,6020,91189,38018
457,2021-04-23,58542,130270,120562,13007,25279,1222,2824652,212114,29653,...,4416588,179537,88280,4,189381,2830,289120,6056,91251,38045


In [127]:
plot_df=df2.rename_axis(None, axis=1)
plot_df

Unnamed: 0,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,4,...,0,0,0,0,0,2,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,2021-04-20,58037,129842,119992,12874,24661,1217,2743620,209485,29576,...,4408644,169327,87225,3,185736,2800,284280,5918,91042,37875
455,2021-04-21,58214,129980,120174,12917,24883,1217,2769552,210518,29594,...,4411068,172601,87551,4,186745,2812,286028,5960,91119,37980
456,2021-04-22,58312,130114,120363,12942,25051,1217,2796768,211399,29638,...,4413834,175891,87935,4,188063,2824,287680,6020,91189,38018
457,2021-04-23,58542,130270,120562,13007,25279,1222,2824652,212114,29653,...,4416588,179537,88280,4,189381,2830,289120,6056,91251,38045


In [128]:
# Test plot
fig = px.line(plot_df, x="Date", y=plot_df['Kenya'],
              hover_data={"Date"},
              title='Cases',
              labels={"y": "No. of Cases"}
             )
fig.show()

# Pre-process Dash data for deaths

In [57]:
df6=df6.transpose()
df6

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,182,183,184,185,186,187,188,189,190,191
Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
1/22/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/23/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/24/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1/25/20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4/20/21,2549,2353,3165,123,565,31,59792,3919,910,9959,...,127557,2022,639,0,1944,35,3078,1138,1236,1554
4/21/21,2557,2358,3172,123,570,31,60083,3944,910,9997,...,127577,2083,640,1,1965,35,3096,1147,1238,1555
4/22/21,2561,2364,3181,123,572,31,60620,3969,910,10026,...,127597,2160,640,1,1987,35,3115,1157,1240,1555
4/23/21,2565,2367,3190,123,574,31,61176,3984,910,10055,...,127638,2227,641,1,2009,35,3138,1165,1245,1556


In [58]:
new_header = df6.iloc[0] #grab the first row for the header
df6 = df6[1:] #take the data less the header row
df6.columns = new_header #set the header row as the df header
df6=df6.reset_index()
df7=df6.rename(columns = {'index':'Date'})
df7

Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,4/20/21,2549,2353,3165,123,565,31,59792,3919,910,...,127557,2022,639,0,1944,35,3078,1138,1236,1554
455,4/21/21,2557,2358,3172,123,570,31,60083,3944,910,...,127577,2083,640,1,1965,35,3096,1147,1238,1555
456,4/22/21,2561,2364,3181,123,572,31,60620,3969,910,...,127597,2160,640,1,1987,35,3115,1157,1240,1555
457,4/23/21,2565,2367,3190,123,574,31,61176,3984,910,...,127638,2227,641,1,2009,35,3138,1165,1245,1556


In [59]:
df7[df7.columns[1:]]=df7[df7.columns[1:]].astype('int64')
df7[df7.columns[1:]]
df7

Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,4/20/21,2549,2353,3165,123,565,31,59792,3919,910,...,127557,2022,639,0,1944,35,3078,1138,1236,1554
455,4/21/21,2557,2358,3172,123,570,31,60083,3944,910,...,127577,2083,640,1,1965,35,3096,1147,1238,1555
456,4/22/21,2561,2364,3181,123,572,31,60620,3969,910,...,127597,2160,640,1,1987,35,3115,1157,1240,1555
457,4/23/21,2565,2367,3190,123,574,31,61176,3984,910,...,127638,2227,641,1,2009,35,3138,1165,1245,1556


In [60]:
date_convert=df7[df7.columns[0]]
date_converted= pd.to_datetime(date_convert)
date_converted

df7['Date']=date_converted
df7


Country/Region,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,2021-04-20,2549,2353,3165,123,565,31,59792,3919,910,...,127557,2022,639,0,1944,35,3078,1138,1236,1554
455,2021-04-21,2557,2358,3172,123,570,31,60083,3944,910,...,127577,2083,640,1,1965,35,3096,1147,1238,1555
456,2021-04-22,2561,2364,3181,123,572,31,60620,3969,910,...,127597,2160,640,1,1987,35,3115,1157,1240,1555
457,2021-04-23,2565,2367,3190,123,574,31,61176,3984,910,...,127638,2227,641,1,2009,35,3138,1165,1245,1556


In [61]:
plot1_df=df7.rename_axis(None, axis=1)
plot1_df


Unnamed: 0,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,2021-04-20,2549,2353,3165,123,565,31,59792,3919,910,...,127557,2022,639,0,1944,35,3078,1138,1236,1554
455,2021-04-21,2557,2358,3172,123,570,31,60083,3944,910,...,127577,2083,640,1,1965,35,3096,1147,1238,1555
456,2021-04-22,2561,2364,3181,123,572,31,60620,3969,910,...,127597,2160,640,1,1987,35,3115,1157,1240,1555
457,2021-04-23,2565,2367,3190,123,574,31,61176,3984,910,...,127638,2227,641,1,2009,35,3138,1165,1245,1556


In [62]:
# Test plot
fig = px.line(plot1_df, x="Date", y=df7['Kenya'],
              hover_data={"Date"},
              title='Deaths',
              labels={"y": "No. of Deaths"}
              )
fig.show()

TypeError: 'DataFrame' object is not callable

In [66]:
plot_df.columns == dropdown

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [129]:
df6

Unnamed: 0,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21,4/21/21,4/22/21,4/23/21,4/24/21
0,Afghanistan,0,0,0,0,0,0,0,0,0,...,2533,2535,2539,2539,2546,2549,2557,2561,2565,2572
1,Albania,0,0,0,0,0,0,0,0,0,...,2335,2337,2340,2342,2347,2353,2358,2364,2367,2372
2,Algeria,0,0,0,0,0,0,0,0,0,...,3144,3148,3152,3155,3160,3165,3172,3181,3190,3198
3,Andorra,0,0,0,0,0,0,0,0,0,...,121,123,123,123,123,123,123,123,123,124
4,Angola,0,0,0,0,0,0,0,0,0,...,557,560,561,561,563,565,570,572,574,577
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,Vietnam,0,0,0,0,0,0,0,0,0,...,35,35,35,35,35,35,35,35,35,35
188,West Bank and Gaza,0,0,0,0,0,0,0,0,0,...,2937,2955,2983,3017,3047,3078,3096,3115,3138,3151
189,Yemen,0,0,0,0,0,0,0,0,0,...,1097,1108,1119,1126,1132,1138,1147,1157,1165,1175
190,Zambia,0,0,0,0,0,0,0,0,0,...,1230,1233,1234,1235,1236,1236,1238,1240,1245,1245


In [132]:
dropdown='Kenya'
dropdown
#df6['Country/Region']== dropdown


'Kenya'

In [153]:
dffd=df6.loc[df6['Country/Region'] == dropdown]
dffd

Unnamed: 0,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21,4/21/21,4/22/21,4/23/21,4/24/21
90,Kenya,0,0,0,0,0,0,0,0,0,...,2424,2443,2463,2481,2501,2519,2540,2560,2583,2603


In [154]:
dffd=dffd.reset_index()
dffd=dffd.rename_axis(None, axis=1)
dffd=dffd.drop(['index','Country/Region'],axis=1)
dffd

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/15/21,4/16/21,4/17/21,4/18/21,4/19/21,4/20/21,4/21/21,4/22/21,4/23/21,4/24/21
0,0,0,0,0,0,0,0,0,0,0,...,2424,2443,2463,2481,2501,2519,2540,2560,2583,2603


In [None]:
dffd.loc[0]