In [279]:
from jupyter_dash import JupyterDash

In [362]:
import dash
from dash import dcc, html
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np

In [363]:
# Load COVID-19 database from Our World in Data
df = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv')

# Remove continents group from dataset
df = df[df['continent'].notnull()]

# Fill missing values with zeroes
df.fillna(0, inplace=True)

# Remove dates where COVID wasnt measured
df = df[df['date'] > '2020-02-23']

# Get a year-month column for further analysis
df['month'] = df['date'].str[:7]

In [364]:
countries = df[['location', 'continent']]
countries_clean = countries.drop_duplicates()
countries_clean.reset_index(inplace=True, drop=True)
countries_clean

Unnamed: 0,location,continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa
...,...,...
219,Vietnam,Asia
220,Wallis and Futuna,Oceania
221,Yemen,Asia
222,Zambia,Africa


In [365]:
ind_strings = [i for i in df.columns.tolist() if df.dtypes[i] == 'O']
dict_strings = {i: 'max' for i in ind_strings if i not in ['location', 'month']}
dict_strings

{'iso_code': 'max', 'continent': 'max', 'date': 'max', 'tests_units': 'max'}

In [367]:
df_months.merge(countries_clean, how='left', on='location')

Unnamed: 0,location,month,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million,continent
0,Afghanistan,2020-02,5.0,0.0,0.126,0.000,0.00,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,Asia
1,Afghanistan,2020-03,166.0,4.0,4.167,0.100,1.51,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,Asia
2,Afghanistan,2020-04,1827.0,60.0,45.864,1.506,1.57,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,Asia
3,Afghanistan,2020-05,15180.0,254.0,381.068,6.376,1.61,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,Asia
4,Afghanistan,2020-06,31445.0,739.0,789.373,18.551,1.19,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4302,Zimbabwe,2021-07,108860.0,3532.0,7213.011,234.029,1.64,0.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0,Africa
4303,Zimbabwe,2021-08,124773.0,4419.0,8267.399,292.801,0.81,0.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0,Africa
4304,Zimbabwe,2021-09,130820.0,4623.0,8668.070,306.318,1.06,0.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0,Africa
4305,Zimbabwe,2021-10,132977.0,4678.0,8810.992,309.962,0.85,0.0,0.0,0.0,...,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0,Africa


In [366]:
ind_max = [i for i in df.columns.tolist() if (i[:3] != 'new') & (df.dtypes[i] != 'O')]
dict_max = {i: 'max' for i in ind_max}
df_months = df.groupby(['location', 'month']).agg(dict_max).reset_index()
df_months

Unnamed: 0,location,month,total_cases,total_deaths,total_cases_per_million,total_deaths_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,Afghanistan,2020-02,5.0,0.0,0.126,0.000,0.00,0.0,0.0,0.0,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
1,Afghanistan,2020-03,166.0,4.0,4.167,0.100,1.51,0.0,0.0,0.0,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
2,Afghanistan,2020-04,1827.0,60.0,45.864,1.506,1.57,0.0,0.0,0.0,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
3,Afghanistan,2020-05,15180.0,254.0,381.068,6.376,1.61,0.0,0.0,0.0,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
4,Afghanistan,2020-06,31445.0,739.0,789.373,18.551,1.19,0.0,0.0,0.0,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4302,Zimbabwe,2021-07,108860.0,3532.0,7213.011,234.029,1.64,0.0,0.0,0.0,...,1.6,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0
4303,Zimbabwe,2021-08,124773.0,4419.0,8267.399,292.801,0.81,0.0,0.0,0.0,...,1.6,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0
4304,Zimbabwe,2021-09,130820.0,4623.0,8668.070,306.318,1.06,0.0,0.0,0.0,...,1.6,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0
4305,Zimbabwe,2021-10,132977.0,4678.0,8810.992,309.962,0.85,0.0,0.0,0.0,...,1.6,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0


In [241]:
# Define key variables
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']


### Dash configuration
# Create dash app
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
# Create server variable with Flask server object for use with gunicorn
server = app.server


### Data loading and cleaning
# Load COVID-19 database from Our World in Data
df = pd.read_csv(url)
# Remove continents group from dataset
df = df[df['continent'].notnull()]
# Fill missing values with zeroes
df.fillna(0, inplace=True)
# Remove dates where COVID wasnt measured
df = df[df['date'] > '2020-02-23']
# Get a year-month column for further analysis
df['month'] = df['date'].str[:7]


# Define dictionary for future aggregation
available_indicators = df.columns.tolist()[4:]
# Retrieve all indicators that supports the condition
ind_sum = [i for i in available_indicators if i[:3] == 'new']
ind_max = [i for i in available_indicators if (i[:3] != 'new') & (i  not in ['month', 'tests_units'])]
# Set aggregations for different dictionaries
dict_sum = {i: 'sum' for i in ind_sum}
dict_max = {i: 'max' for i in ind_max}
# Merge dictionaries
dict_agg = dict_sum | dict_max

In [242]:
available_indicators

['total_cases',
 'new_cases',
 'new_cases_smoothed',
 'total_deaths',
 'new_deaths',
 'new_deaths_smoothed',
 'total_cases_per_million',
 'new_cases_per_million',
 'new_cases_smoothed_per_million',
 'total_deaths_per_million',
 'new_deaths_per_million',
 'new_deaths_smoothed_per_million',
 'reproduction_rate',
 'icu_patients',
 'icu_patients_per_million',
 'hosp_patients',
 'hosp_patients_per_million',
 'weekly_icu_admissions',
 'weekly_icu_admissions_per_million',
 'weekly_hosp_admissions',
 'weekly_hosp_admissions_per_million',
 'new_tests',
 'total_tests',
 'total_tests_per_thousand',
 'new_tests_per_thousand',
 'new_tests_smoothed',
 'new_tests_smoothed_per_thousand',
 'positive_rate',
 'tests_per_case',
 'tests_units',
 'total_vaccinations',
 'people_vaccinated',
 'people_fully_vaccinated',
 'total_boosters',
 'new_vaccinations',
 'new_vaccinations_smoothed',
 'total_vaccinations_per_hundred',
 'people_vaccinated_per_hundred',
 'people_fully_vaccinated_per_hundred',
 'total_booste

In [250]:
df = df.groupby(['location', 'month']).agg(dict_agg).reset_index()

In [219]:
ind_sum = [i for i in available_indicators if i[:3] == 'new']
ind_max = [i for i in available_indicators if (i[:3] != 'new') & (i  not in ['month', 'tests_units'])]

In [221]:
dict_sum = {i: 'sum' for i in ind_sum}
dict_max = {i: 'max' for i in ind_max}

In [222]:
dict_agg = dict_sum | dict_max

In [231]:
df.groupby(['location', 'month']).agg(dict_agg).reset_index()

Unnamed: 0,location,month,new_cases,new_cases_smoothed,new_deaths,new_deaths_smoothed,new_cases_per_million,new_cases_smoothed_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,Afghanistan,2020-02,5.0,0.714,0.0,0.000,0.126,0.018,0.000,0.000,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
1,Afghanistan,2020-03,161.0,102.143,4.0,2.716,4.042,2.568,0.100,0.069,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
2,Afghanistan,2020-04,1661.0,1451.713,56.0,49.002,41.699,36.444,1.402,1.229,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
3,Afghanistan,2020-05,13353.0,11601.569,194.0,187.141,335.205,291.238,4.869,4.697,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
4,Afghanistan,2020-06,16265.0,17534.144,485.0,464.287,408.304,440.165,12.176,11.656,...,0.0,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4302,Zimbabwe,2021-07,58996.0,56779.715,1743.0,1591.429,3909.048,3762.196,115.492,105.444,...,1.6,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0
4303,Zimbabwe,2021-08,15913.0,20709.857,887.0,1051.999,1054.389,1372.225,58.771,69.704,...,1.6,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0
4304,Zimbabwe,2021-09,6047.0,5666.999,204.0,224.857,400.670,375.490,13.518,14.902,...,1.6,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0
4305,Zimbabwe,2021-10,2157.0,2881.285,55.0,65.286,142.922,190.912,3.646,4.327,...,1.6,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0


In [115]:
df2 = df.groupby(['location', 'month'])[['total_cases', 'total_deaths', 'continent']].max().reset_index()
df2

Unnamed: 0,location,month,total_cases,total_deaths,continent
0,Afghanistan,2020-02,5.0,0.0,Asia
1,Afghanistan,2020-03,166.0,4.0,Asia
2,Afghanistan,2020-04,1827.0,60.0,Asia
3,Afghanistan,2020-05,15180.0,254.0,Asia
4,Afghanistan,2020-06,31445.0,739.0,Asia
...,...,...,...,...,...
4302,Zimbabwe,2021-07,108860.0,3532.0,Africa
4303,Zimbabwe,2021-08,124773.0,4419.0,Africa
4304,Zimbabwe,2021-09,130820.0,4623.0,Africa
4305,Zimbabwe,2021-10,132977.0,4678.0,Africa


In [141]:
[{"label": i, "value": i} for i in available_indicators]

[{'label': 'total_cases', 'value': 'total_cases'},
 {'label': 'new_cases', 'value': 'new_cases'},
 {'label': 'new_cases_smoothed', 'value': 'new_cases_smoothed'},
 {'label': 'total_deaths', 'value': 'total_deaths'},
 {'label': 'new_deaths', 'value': 'new_deaths'},
 {'label': 'new_deaths_smoothed', 'value': 'new_deaths_smoothed'},
 {'label': 'total_cases_per_million', 'value': 'total_cases_per_million'},
 {'label': 'new_cases_per_million', 'value': 'new_cases_per_million'},
 {'label': 'new_cases_smoothed_per_million',
  'value': 'new_cases_smoothed_per_million'},
 {'label': 'total_deaths_per_million', 'value': 'total_deaths_per_million'},
 {'label': 'new_deaths_per_million', 'value': 'new_deaths_per_million'},
 {'label': 'new_deaths_smoothed_per_million',
  'value': 'new_deaths_smoothed_per_million'},
 {'label': 'reproduction_rate', 'value': 'reproduction_rate'},
 {'label': 'icu_patients', 'value': 'icu_patients'},
 {'label': 'icu_patients_per_million', 'value': 'icu_patients_per_millio

In [138]:
xaxis_column_name = available_indicators[0]
yaxis_column_name = available_indicators[3]

In [139]:
px.scatter(df2, x=xaxis_column_name, y=yaxis_column_name, hover_name='location')

In [117]:
px.scatter(df2, x='total_cases', y='total_deaths', hover_name='location')

In [114]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 124941 entries, 0 to 134669
Data columns (total 68 columns):
 #   Column                                      Non-Null Count   Dtype  
---  ------                                      --------------   -----  
 0   iso_code                                    124941 non-null  object 
 1   continent                                   124941 non-null  object 
 2   location                                    124941 non-null  object 
 3   date                                        124941 non-null  object 
 4   total_cases                                 124941 non-null  float64
 5   new_cases                                   124941 non-null  float64
 6   new_cases_smoothed                          124941 non-null  float64
 7   total_deaths                                124941 non-null  float64
 8   new_deaths                                  124941 non-null  float64
 9   new_deaths_smoothed                         124941 non-null  float64
 

In [22]:
fig = px.scatter(
    df,
    x='total_cases', 
    y='total_deaths',
    size='total_deaths',
    color='continent',
    animation_frame='month'
)

fig.show()

In [None]:
import plotly.express as px
import pandas as pd

url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'
df = pd.read_csv(url)

country = 'location'
fig = px.bar(
  df.sort_values('date'), 
  x=country, y='people_vaccinated_per_hundred',
  color=country,
  animation_frame='date',
  animation_group=country,
  hover_name=country,
  range_y=[0,50],
  range_x=[0,30]
)
fig.update_layout(
  template='plotly_dark',
  margin=dict(r=10, t=25, b=40, l=60)
)
fig.show()

In [1]:
import plotly.express as px
import pandas as pd
import io
import requests
# get OWID data
df = pd.read_csv(io.StringIO(
    requests.get("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv").text))

# used for filtering
dt = pd.to_datetime(df["date"])
# last 30 days and exclude regional aggregations
fig = px.choropleth(df.loc[dt.ge(dt.max()-pd.Timedelta(days=30))].dropna(subset=["continent"]), 
                    locations = 'location', 
                    locationmode = 'country names',
                    color = 'new_deaths',
                    hover_name = 'location', 
                    animation_frame = 'date')

fig.update_layout(margin={"t":0,"b":0,"l":0,"r":0})

In [34]:
df2 = px.data.iris()
df2.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [None]:
fig = px.choropleth(df, 
                    locations = 'location', 
                    locationmode = 'country names',
                    color = 'total_cases',
                    hover_name = 'location', 
                    animation_frame = 'month')


fig = px.scatter(
    df.groupby(['location', 'month'])[['total_cases', 'total_deaths', 'continent']].max().reset_index(),
    x='total_cases', 
    y='total_deaths',
    color='continent',
    hover_name='location',
    animation_frame='month'
)

fig.show()

In [35]:
df['month'].min()

'2020-02'

In [36]:
df3 = pd.read_csv('https://plotly.github.io/datasets/country_indicators.csv')

In [38]:
df3.head()

Unnamed: 0,Country Name,Indicator Name,Year,Value
0,Arab World,"Agriculture, value added (% of GDP)",1962,
1,Arab World,CO2 emissions (metric tons per capita),1962,0.760996
2,Arab World,Domestic credit provided by financial sector (...,1962,18.16869
3,Arab World,Electric power consumption (kWh per capita),1962,
4,Arab World,Energy use (kg of oil equivalent per capita),1962,


In [48]:
df3[df3['Indicator Name'] == 'Agriculture, value added (% of GDP)']['Country Name']

0                            Arab World
14               Caribbean small states
28       Central Europe and the Baltics
42           Early-demographic dividend
56                  East Asia & Pacific
                      ...              
36890                    Virgin Islands
36904                West Bank and Gaza
36918                       Yemen, Rep.
36932                            Zambia
36946                          Zimbabwe
Name: Country Name, Length: 2640, dtype: object

In [39]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million,month
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,0.0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-02
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-02
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-02
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-02
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-02


In [40]:
col = 'total_cases'
df[col]

0              5.0
1              5.0
2              5.0
3              5.0
4              5.0
            ...   
134454    133438.0
134455    133505.0
134456    133557.0
134457    133593.0
134458    133593.0
Name: total_cases, Length: 124743, dtype: float64

In [46]:
df[['location', col]]['location']

0         Afghanistan
1         Afghanistan
2         Afghanistan
3         Afghanistan
4         Afghanistan
             ...     
134454       Zimbabwe
134455       Zimbabwe
134456       Zimbabwe
134457       Zimbabwe
134458       Zimbabwe
Name: location, Length: 124743, dtype: object

In [53]:
df[['location', col]]

Unnamed: 0,location,total_cases
0,Afghanistan,5.0
1,Afghanistan,5.0
2,Afghanistan,5.0
3,Afghanistan,5.0
4,Afghanistan,5.0
...,...,...
134454,Zimbabwe,133438.0
134455,Zimbabwe,133505.0
134456,Zimbabwe,133557.0
134457,Zimbabwe,133593.0


In [54]:
df['timestamp'] = pd.to_datetime(df['date'])

In [60]:
df['timestamp'].min().month

2

In [65]:
marks={date.month: date.month for date in df['timestamp'].dt.date.unique()}

In [66]:
marks

{2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 1: 1}

In [73]:
slider_options = {d_key: d_val for d_key, d_val in enumerate(sorted(df['month'].unique()))}

In [80]:
min(slider_options.keys())

0

In [84]:
x = np.linspace(min(slider_options.keys()), max(slider_options.keys()), 10,dtype=int)
x = x.round(0)

In [143]:
df[df['month'] == slider_options[2]]

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million,month
37,AFG,Asia,Afghanistan,2020-04-01,192.0,26.0,16.857,4.0,0.0,0.429,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-04
38,AFG,Asia,Afghanistan,2020-04-02,235.0,43.0,22.143,4.0,0.0,0.286,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-04
39,AFG,Asia,Afghanistan,2020-04-03,269.0,34.0,25.429,5.0,1.0,0.429,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-04
40,AFG,Asia,Afghanistan,2020-04-04,270.0,1.0,23.429,5.0,0.0,0.429,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-04
41,AFG,Asia,Afghanistan,2020-04-05,299.0,29.0,26.429,7.0,2.0,0.429,...,0.0,37.746,0.5,64.83,0.511,0.0,0.0,0.0,0.0,2020-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134096,ZWE,Africa,Zimbabwe,2020-04-26,31.0,0.0,0.857,4.0,0.0,0.143,...,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0,2020-04
134097,ZWE,Africa,Zimbabwe,2020-04-27,32.0,1.0,1.000,4.0,0.0,0.143,...,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0,2020-04
134098,ZWE,Africa,Zimbabwe,2020-04-28,32.0,0.0,0.571,4.0,0.0,0.143,...,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0,2020-04
134099,ZWE,Africa,Zimbabwe,2020-04-29,32.0,0.0,0.571,4.0,0.0,0.000,...,30.7,36.791,1.7,61.49,0.571,0.0,0.0,0.0,0.0,2020-04


In [145]:
slider_options[1]

'2020-03'

In [97]:
{i: slider_options[i] for i in slider_options}

{0: '2020-02',
 1: '2020-03',
 2: '2020-04',
 3: '2020-05',
 4: '2020-06',
 5: '2020-07',
 6: '2020-08',
 7: '2020-09',
 8: '2020-10',
 9: '2020-11',
 10: '2020-12',
 11: '2021-01',
 12: '2021-02',
 13: '2021-03',
 14: '2021-04',
 15: '2021-05',
 16: '2021-06',
 17: '2021-07',
 18: '2021-08',
 19: '2021-09',
 20: '2021-10',
 21: '2021-11'}

In [74]:
slider_options

{0: '2020-02',
 1: '2020-03',
 2: '2020-04',
 3: '2020-05',
 4: '2020-06',
 5: '2020-07',
 6: '2020-08',
 7: '2020-09',
 8: '2020-10',
 9: '2020-11',
 10: '2020-12',
 11: '2021-01',
 12: '2021-02',
 13: '2021-03',
 14: '2021-04',
 15: '2021-05',
 16: '2021-06',
 17: '2021-07',
 18: '2021-08',
 19: '2021-09',
 20: '2021-10',
 21: '2021-11'}

In [109]:
df.groupby(['location', 'month']).agg(new_cases=('new_cases', 'sum'), total_cases=('total_cases', 'max')).reset_index()

Unnamed: 0,location,month,new_cases,total_cases
0,Afghanistan,2020-02,5.0,5.0
1,Afghanistan,2020-03,161.0,166.0
2,Afghanistan,2020-04,1661.0,1827.0
3,Afghanistan,2020-05,13353.0,15180.0
4,Afghanistan,2020-06,16265.0,31445.0
...,...,...,...,...
4302,Zimbabwe,2021-07,58996.0,108860.0
4303,Zimbabwe,2021-08,15913.0,124773.0
4304,Zimbabwe,2021-09,6047.0,130820.0
4305,Zimbabwe,2021-10,2157.0,132977.0


In [261]:
{'points': [{'curveNumber': 0, 'pointNumber': 190, 'pointIndex': 190, 'x': 7.688, 'y': 55.4550243902, 'customdata': 'Niger', 'hovertext': 'Niger', 'bbox': {'x0': 889.65, 'x1': 895.65, 'y0': 371.85, 'y1': 377.85}}]}

{'points': [{'curveNumber': 0,
   'pointNumber': 190,
   'pointIndex': 190,
   'x': 7.688,
   'y': 55.4550243902,
   'customdata': 'Niger',
   'hovertext': 'Niger',
   'bbox': {'x0': 889.65, 'x1': 895.65, 'y0': 371.85, 'y1': 377.85}}]}

In [246]:
hoverData = {'points': [{'curveNumber': 0, 'pointNumber': 3949, 'pointIndex': 3949, 'x': 47421741, 'y': 767433, 'customdata': 'United States', 'hovertext': 'United States', 'bbox': {'x0': 877.42, 'x1': 879.42, 'y0': 555.33, 'y1': 557.33}}]}
hoverData

{'points': [{'curveNumber': 0,
   'pointNumber': 3949,
   'pointIndex': 3949,
   'x': 47421741,
   'y': 767433,
   'customdata': 'United States',
   'hovertext': 'United States',
   'bbox': {'x0': 877.42, 'x1': 879.42, 'y0': 555.33, 'y1': 557.33}}]}

In [252]:
country_name = hoverData['points'][0]['customdata']

In [255]:
dff = df[df['location'] == country_name]

In [266]:
dff.head()

Unnamed: 0,location,month,new_cases,new_cases_smoothed,new_deaths,new_deaths_smoothed,new_cases_per_million,new_cases_smoothed_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
4095,United States,2020-02,9.0,2.716,1.0,0.143,0.027,0.008,0.003,0.0,...,19.1,24.6,0.0,2.77,78.86,0.926,0.0,0.0,0.0,0.0
4096,United States,2020-03,192054.0,127052.999,5358.0,3097.572,576.886,381.637,16.093,9.304,...,19.1,24.6,0.0,2.77,78.86,0.926,0.0,0.0,9.91,0.0
4097,United States,2020-04,888895.0,873810.713,60795.0,56905.571,2670.034,2624.724,182.611,170.93,...,19.1,24.6,0.0,2.77,78.86,0.926,69906.6,6.94,39.21,209.983282
4098,United States,2020-05,718167.0,732772.571,41520.0,44838.284,2157.207,2201.081,124.719,134.684,...,19.1,24.6,0.0,2.77,78.86,0.926,121279.7,9.49,25.81,364.296211
4099,United States,2020-06,843338.0,778321.717,19691.0,21127.571,2533.19,2337.898,59.146,63.464,...,19.1,24.6,0.0,2.77,78.86,0.926,141673.7,9.51,10.48,425.555077


In [269]:
dff[['location', 'month', xaxis_column_name]].head()

Unnamed: 0,location,month,total_cases
4095,United States,2020-02,25.0
4096,United States,2020-03,192079.0
4097,United States,2020-04,1080974.0
4098,United States,2020-05,1799141.0
4099,United States,2020-06,2642479.0


In [263]:
     Country Name                            Indicator Name  Year  Value
2666         Niger  Fertility rate, total (births per woman)  1962  7.416
6362         Niger  Fertility rate, total (births per woman)  1967  7.345
10058        Niger  Fertility rate, total (births per woman)  1972  7.484
13754        Niger  Fertility rate, total (births per woman)  1977  7.618
17450        Niger  Fertility rate, total (births per woman)  1982  7.604
21146        Niger  Fertility rate, total (births per woman)  1987  7.673
24842        Niger  Fertility rate, total (births per woman)  1992  7.737
28538        Niger  Fertility rate, total (births per woman)  1997  7.748
32234        Niger  Fertility rate, total (births per woman)  2002  7.725
35930        Niger  Fertility rate, total (births per woman)  2007  7.688

SyntaxError: invalid syntax (Temp/ipykernel_5240/875433930.py, line 1)

In [272]:
dff.head()

Unnamed: 0,location,month,new_cases,new_cases_smoothed,new_deaths,new_deaths_smoothed,new_cases_per_million,new_cases_smoothed_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
4095,United States,2020-02,9.0,2.716,1.0,0.143,0.027,0.008,0.003,0.0,...,19.1,24.6,0.0,2.77,78.86,0.926,0.0,0.0,0.0,0.0
4096,United States,2020-03,192054.0,127052.999,5358.0,3097.572,576.886,381.637,16.093,9.304,...,19.1,24.6,0.0,2.77,78.86,0.926,0.0,0.0,9.91,0.0
4097,United States,2020-04,888895.0,873810.713,60795.0,56905.571,2670.034,2624.724,182.611,170.93,...,19.1,24.6,0.0,2.77,78.86,0.926,69906.6,6.94,39.21,209.983282
4098,United States,2020-05,718167.0,732772.571,41520.0,44838.284,2157.207,2201.081,124.719,134.684,...,19.1,24.6,0.0,2.77,78.86,0.926,121279.7,9.49,25.81,364.296211
4099,United States,2020-06,843338.0,778321.717,19691.0,21127.571,2533.19,2337.898,59.146,63.464,...,19.1,24.6,0.0,2.77,78.86,0.926,141673.7,9.51,10.48,425.555077


In [275]:
px.scatter(dff, x='month', y=columns[-1])

NameError: name 'columns' is not defined

In [276]:
import plotly.io as pio
pio.templates

Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none']

In [278]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4307 entries, 0 to 4306
Data columns (total 64 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   location                                    4307 non-null   object 
 1   month                                       4307 non-null   object 
 2   new_cases                                   4307 non-null   float64
 3   new_cases_smoothed                          4307 non-null   float64
 4   new_deaths                                  4307 non-null   float64
 5   new_deaths_smoothed                         4307 non-null   float64
 6   new_cases_per_million                       4307 non-null   float64
 7   new_cases_smoothed_per_million              4307 non-null   float64
 8   new_deaths_per_million                      4307 non-null   float64
 9   new_deaths_smoothed_per_million             4307 non-null   float64
 10  new_tests   