# <center>Animated report about COVID-19 cases in the top 10 largest (by population) European countries </center>


## <center> Data Processing Part </center>


### Let's imports pandas for data processing and create a Data Frame from a downloaded .csv file

In [None]:
import pandas as pd

csv_url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
df = pd.read_csv(csv_url)
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,new_people_vaccinated_smoothed,new_people_vaccinated_smoothed_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,,,,,0.126,0.126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,,,,,0.126,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,,,,,0.126,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,,,,,0.126,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,,,,,0.126,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,


### Let's check the column names of the Data Frame and define an algorithm for the data cleaning and preparation for the visualization


In [None]:
df.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       't


### After getting acquainted with input data, it was decided to proceed with following steps: 
- select all european countries presented in the data set
- sort the result by population
- get names of top 20 countries with the highest population
- visualize the outputs



### Selecting all european countries presented in the data set (entries with `Europe` as a continent)


In [None]:
europe_only_df = df[df.continent=='Europe']
europe_only_df.continent.unique()

array(['Europe'], dtype=object)


### Grouping all countries according to population ([read more about groupby method](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html))


In [None]:
grouped_df = europe_only_df.groupby('location').max('population')
grouped_df.head()

Unnamed: 0_level_0,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,new_people_vaccinated_smoothed,new_people_vaccinated_smoothed_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1
Albania,205549.0,1239.0,1111.857,3161.0,21.0,20.429,71546.718,431.266,387.011,1100.269,7.31,7.111,2.08,,,,,,,,,4562.0,737014.0,256.537,1.588,3847.0,1.339,0.4091,329.5,2226267.0,1111494.0,1001189.0,113584.0,23655.0,17565.0,77.49,38.69,34.85,3.95,6114.0,6436.0,0.224,89.81,2872934.0,104.871,38.0,13.188,8.643,11803.431,1.1,304.195,10.08,7.1,51.2,,2.89,78.57,0.795,10923.4,32.72,120.15,3802.175755
Andorra,20549.0,696.0,274.143,134.0,6.0,2.0,265648.835,8997.595,3544.004,1732.296,77.565,25.855,1.76,,,,,,,,,,247655.0,3201.58,,1185.0,15.319,0.2835,156.5,115709.0,56628.0,50551.0,8530.0,2829.0,1762.0,149.58,73.21,65.35,11.03,22778.0,854.0,1.104,65.74,77354.0,163.755,,,,,,109.135,7.97,29.0,37.8,,,83.73,0.868,89.6,46.81,145.9,1158.311141
Austria,1247399.0,15809.0,14139.571,13462.0,218.0,129.286,137939.74,1748.189,1563.581,1488.653,24.107,14.297,3.17,709.0,78.403,3985.0,440.669,,,,,2945871.0,116555934.0,12888.976,325.76,551494.0,60.985,0.0308,5439.8,15746176.0,6535083.0,6294779.0,3258182.0,159057.0,112054.0,174.12,72.27,69.61,36.03,12391.0,53656.0,0.593,82.41,9043072.0,106.749,44.4,19.202,13.748,45436.686,0.7,145.183,6.35,28.4,30.9,,7.37,81.54,0.922,13797.4,9.28,57.19,1525.74258
Belarus,683172.0,2097.0,2052.857,5371.0,29.0,16.571,72347.943,222.072,217.398,568.789,3.071,1.755,2.67,,,,,,,,,40232.0,10770753.0,1140.623,4.261,37839.0,4.007,0.1221,1424.2,7217727.0,4064247.0,3039334.0,114146.0,,77113.0,76.44,43.04,32.19,1.21,8166.0,35979.0,0.381,43.52,9442867.0,46.858,40.3,14.799,9.788,17167.967,,443.129,5.18,10.5,46.1,,11.0,74.79,0.823,31283.9,21.87,71.42,3312.966284
Belgium,1999764.0,47836.0,18622.857,27895.0,496.0,333.429,171914.252,4112.33,1600.956,2398.057,42.64,28.664,2.62,1474.0,126.716,7461.0,641.402,,,4777.0,410.677,142000.0,26249867.0,2256.629,12.207,120098.0,10.324,0.327,111.1,20351329.0,8879748.0,8759422.0,3134336.0,197294.0,154994.0,174.95,76.34,75.3,26.95,13324.0,100402.0,0.863,81.48,11632334.0,375.564,41.8,18.571,12.849,42658.576,0.2,114.898,4.29,25.1,31.4,,5.64,81.63,0.931,18090.9,15.68,105.35,1555.225288


### Sorting values by `population` in descending order


In [None]:
sorted_df = grouped_df.sort_values('population', ascending=False)


### Creating a sample with top 20 countries sorted by `population`


In [None]:
top_20_df = sorted_df.head(20)
top_20_df.head()

Unnamed: 0_level_0,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,new_people_vaccinated_smoothed,new_people_vaccinated_smoothed_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1
Russia,10009866.0,40210.0,39354.286,290340.0,1222.0,1217.571,68602.065,275.577,269.712,1989.829,8.375,8.345,2.52,,,,,,,,,730274.0,234037966.0,1603.966,5.005,617847.0,4.234,0.0967,1796.6,138517750.0,71003850.0,62751255.0,5949687.0,2389472.0,1020075.0,94.93,48.66,43.01,4.08,6991.0,569281.0,0.39,87.04,145912022.0,8.823,39.6,14.178,9.393,24765.954,0.1,431.297,6.18,23.4,58.3,,8.05,72.58,0.824,984309.3,29.1,83.46,6745.909532
Germany,6813287.0,103018.0,58134.429,108238.0,1734.0,894.429,81206.779,1227.86,692.898,1290.076,20.667,10.661,3.13,5745.0,68.474,,,2426.0,28.915,12596.0,150.13,,89552725.0,1067.369,,277891.0,3.312,0.21,166.7,140915301.0,60888551.0,58411151.0,25171551.0,1577840.0,1030419.0,167.96,72.57,69.62,30.0,12281.0,593658.0,0.708,85.19,83900471.0,237.016,46.6,21.453,15.957,45229.245,,156.139,8.31,28.2,33.1,,8.0,81.33,0.947,71236.4,4.52,36.18,849.058404
United Kingdom,11301095.0,92713.0,72760.571,147283.0,1820.0,1248.857,165687.922,1359.286,1066.759,2159.35,26.683,18.31,3.46,4077.0,59.774,39254.0,575.512,,,28804.0,422.302,1868835.0,346709781.0,5083.191,27.399,1303126.0,19.105,0.348,568.1,124656103.0,51393664.0,46923788.0,26338651.0,937033.0,655652.0,182.76,75.35,68.8,38.62,9613.0,506394.0,0.742,87.96,68207114.0,272.898,40.8,18.517,12.527,39753.244,0.2,122.137,4.28,20.0,24.7,,2.54,81.32,0.932,131450.6,20.59,107.25,1927.227122
France,8622194.0,117900.0,61128.857,121480.0,1438.0,975.143,127614.735,1745.006,904.751,1797.992,21.283,14.433,3.03,7019.0,103.886,33466.0,495.321,4492.0,66.485,23760.0,351.665,1076979.0,,,15.94,806048.0,11.93,0.159,125.0,116208558.0,52430750.0,48496460.0,16338915.0,903540.0,697580.0,172.0,77.6,71.78,24.18,10325.0,409041.0,0.605,87.96,67564251.0,122.578,42.0,19.718,13.079,38605.671,,86.06,4.77,30.1,35.6,,5.98,82.66,0.901,72006.0,7.77,61.08,1065.741112
Italy,5364852.0,40902.0,35072.571,135544.0,993.0,814.286,88869.915,677.55,580.985,2245.315,16.449,13.489,3.58,4068.0,67.387,38507.0,637.877,1896.0,31.404,18445.0,305.542,776563.0,128435536.0,2127.562,12.864,603742.0,10.001,0.2684,272.2,104858805.0,47820980.0,44417400.0,14119633.0,634612.0,592773.0,173.7,79.22,73.58,23.39,9819.0,477101.0,0.79,93.52,60367471.0,205.859,47.9,23.021,16.24,35220.084,2.0,113.151,4.78,19.8,27.8,,3.18,83.51,0.892,146294.1,17.32,85.62,2423.39289


### Getting a list of all countries presented in the previously created Data Frame `top_20_df` 

In [None]:
list_of_countries = top_20_df.index.values
list_of_countries

array(['Russia', 'Germany', 'United Kingdom', 'France', 'Italy', 'Spain',
       'Ukraine', 'Poland', 'Romania', 'Netherlands', 'Belgium',
       'Czechia', 'Greece', 'Portugal', 'Sweden', 'Hungary', 'Belarus',
       'Austria', 'Switzerland', 'Bulgaria'], dtype=object)

### Selecting all rows from an input dataset containing countries from `list_of_countries` in `location` column


In [None]:
selected_countries_df = europe_only_df[europe_only_df.location.isin(list_of_countries)]

### Creating a mapping dictionary between found country names and unique RGB colors
```
    { 
        'country_name1': [1., 1., 1.],
        'country_name2': [0.5, 1., 1.],
        ...
    }
    
```

In [None]:
import seaborn as sns

list_of_colors = sns.color_palette(n_colors=len(list_of_countries))

mapping_country_color = dict(zip(list_of_countries, list_of_colors))
mapping_country_color

{'Austria': (0.4980392156862745, 0.4980392156862745, 0.4980392156862745),
 'Belarus': (0.8901960784313725, 0.4666666666666667, 0.7607843137254902),
 'Belgium': (0.12156862745098039, 0.4666666666666667, 0.7058823529411765),
 'Bulgaria': (0.09019607843137255, 0.7450980392156863, 0.8117647058823529),
 'Czechia': (1.0, 0.4980392156862745, 0.054901960784313725),
 'France': (0.8392156862745098, 0.15294117647058825, 0.1568627450980392),
 'Germany': (1.0, 0.4980392156862745, 0.054901960784313725),
 'Greece': (0.17254901960784313, 0.6274509803921569, 0.17254901960784313),
 'Hungary': (0.5490196078431373, 0.33725490196078434, 0.29411764705882354),
 'Italy': (0.5803921568627451, 0.403921568627451, 0.7411764705882353),
 'Netherlands': (0.09019607843137255, 0.7450980392156863, 0.8117647058823529),
 'Poland': (0.4980392156862745, 0.4980392156862745, 0.4980392156862745),
 'Portugal': (0.8392156862745098, 0.15294117647058825, 0.1568627450980392),
 'Romania': (0.7372549019607844, 0.7411764705882353, 0.

## <center> Visualization part </center>

### Getting the dates of the first and last entry in the input dataset, those values will be used later in the title of bar animation


In [None]:
start_date = selected_countries_df['date'].values[0]
end_date = selected_countries_df['date'].values[-1]

In [None]:
def animate(i):
    
    # getting the i-th date stamp in the dataset
    date = selected_countries_df['date'].values[i]

    # getting values from top 10 countries sorted by total_cases
    single_day_df = selected_countries_df[selected_countries_df['date'].eq(date)]
    sorted_single_day_df = single_day_df.sort_values(by='total_cases', ascending=True).tail(10)
    
    # deleting previously created objects on the plot
    ax.clear()
    
    # creating a list of colors for the found countries in the current iteration
    colors = [mapping_country_color[country_name] for country_name in sorted_single_day_df['location'].values]

    ax.barh(sorted_single_day_df['location'], sorted_single_day_df['total_cases'], color=colors)
    
    # dynamically calculated offset for the number to the right of each bar
    dx = single_day_df['total_cases'].max() / 20000
    
    # iterating over each found country on the i-th day
    for i, (value, name) in enumerate(zip(sorted_single_day_df['total_cases'],
                                          sorted_single_day_df['location'])):
        ax.text(value-dx, i-0.2, name,           
                size=10, weight=600,
                ha='right', va='bottom')
        
        ax.text(value+dx, i,     f'{value:,.0f}', 
                size=14, ha='left',  va='center')
        
        ax.set_yticks([])
        ax.xaxis.set_ticks_position('top')        
        ax.xaxis.set_major_formatter(mticker.StrMethodFormatter('{x:,.0f}'))
        ax.xaxis.set_ticks_position('top')
        
    ax.text(1, 0, date, size=14,
            transform=ax.transAxes, ha='right')

    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.set_axisbelow(True)
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')

    plt.title(f"Registered COVID cases in Europe in period {start_date} {end_date}", y=-0.1)
    plt.box(False)
    plt.show()

In [None]:
%matplotlib notebook


from matplotlib.animation import FuncAnimation
import matplotlib.ticker as mticker
import matplotlib.pyplot as plt


fig, ax = plt.subplots(figsize=(9, 5))

anim = FuncAnimation(fig, animate, interval=300)
plt.show()

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py", line 196, in process
    func(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/matplotlib/animation.py", line 1467, in _stop
    self.event_source.remove_callback(self._loop_delay)
AttributeError: 'NoneType' object has no attribute 'remove_callback'


<IPython.core.display.Javascript object>

In [None]:
# as we work in google colab, extra settings are necessary to display an animation. When starting locally, previous cell will produce the desired output.
from IPython.display import HTML
HTML(anim.to_html5_video())

posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
