# Big Data - level 1 - data cleaning and integration.

Author: Artur Szewczykowski

## 1. Additional description about dataset.
https://github.com/GoogleCloudPlatform/covid-19-open-data/blob/main/docs/table-by-sex.md

## 2. Imports and bigquery

In [1]:
import os
import pandas as pd
from google.cloud import bigquery

In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="XXX.json"
client = bigquery.Client()

## 3. Load data to DataFrame

In [3]:
query = ('select * from bigquery-public-data.covid19_open_data.covid19_open_data limit 10')
query_job = client.query(query)    
query_result = query_job.result()  
df = query_result.to_dataframe()
df

Unnamed: 0,location_key,date,place_id,wikidata_id,datacommons_id,country_code,country_name,iso_3166_1_alpha_2,iso_3166_1_alpha_3,aggregation_level,...,cumulative_vaccine_doses_administered_pfizer,new_persons_fully_vaccinated_moderna,cumulative_persons_fully_vaccinated_moderna,new_vaccine_doses_administered_moderna,cumulative_vaccine_doses_administered_moderna,new_persons_fully_vaccinated_janssen,cumulative_persons_fully_vaccinated_janssen,new_vaccine_doses_administered_janssen,cumulative_vaccine_doses_administered_janssen,location_geometry
0,AR,2020-04-14,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
1,AR,2021-01-15,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
2,AR,2021-11-07,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
3,AR,2021-01-09,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
4,AR,2021-06-16,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
5,AR,2020-08-27,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
6,AR,2022-01-06,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
7,AR,2020-02-05,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
8,AR,2021-09-08,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
9,AR,2021-09-04,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)



## 4. Learning about the data.

### 4.1 Generate basic statistics about dataset.

In [4]:
display(df.head())

Unnamed: 0,location_key,date,place_id,wikidata_id,datacommons_id,country_code,country_name,iso_3166_1_alpha_2,iso_3166_1_alpha_3,aggregation_level,...,cumulative_vaccine_doses_administered_pfizer,new_persons_fully_vaccinated_moderna,cumulative_persons_fully_vaccinated_moderna,new_vaccine_doses_administered_moderna,cumulative_vaccine_doses_administered_moderna,new_persons_fully_vaccinated_janssen,cumulative_persons_fully_vaccinated_janssen,new_vaccine_doses_administered_janssen,cumulative_vaccine_doses_administered_janssen,location_geometry
0,AR,2020-04-14,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
1,AR,2021-01-15,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
2,AR,2021-11-07,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
3,AR,2021-01-09,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)
4,AR,2021-06-16,ChIJZ8b99fXKvJURqA_wKpl3Lz0,Q414,country/ARG,AR,Argentina,AR,ARG,0,...,,,,,,,,,,POINT(-64 -34)


In [5]:
display(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Columns: 701 entries, location_key to location_geometry
dtypes: Int64(217), dbdate(1), float64(441), object(42)
memory usage: 57.0+ KB


None

In [6]:
display(df.describe())

Unnamed: 0,aggregation_level,new_confirmed,new_deceased,cumulative_confirmed,cumulative_deceased,cumulative_tested,new_persons_vaccinated,cumulative_persons_vaccinated,new_persons_fully_vaccinated,cumulative_persons_fully_vaccinated,...,new_vaccine_doses_administered_pfizer,cumulative_vaccine_doses_administered_pfizer,new_persons_fully_vaccinated_moderna,cumulative_persons_fully_vaccinated_moderna,new_vaccine_doses_administered_moderna,cumulative_vaccine_doses_administered_moderna,new_persons_fully_vaccinated_janssen,cumulative_persons_fully_vaccinated_janssen,new_vaccine_doses_administered_janssen,cumulative_vaccine_doses_administered_janssen
count,10.0,10.0,10.0,10.0,10.0,10.0,7.0,7.0,7.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,0.0,20417.8,134.5,3043363.1,66866.5,9916257.6,30379.571429,9050848.0,50280.0,6132022.428571,...,,,,,,,,,,
std,0.0,43681.21582,161.52554,2494158.193182,50625.527481,8668910.451621,29652.213885,6994222.653853,53423.73948,5779011.009321,...,,,,,,,,,,
min,0.0,0.0,0.0,76.0,0.0,433.0,4462.0,58537.0,0.0,15.0,...,,,,,,,,,,
25%,0.0,947.25,20.25,742024.75,21445.75,2085198.0,12132.5,3099351.0,11937.5,815687.0,...,,,,,,,,,,
50%,0.0,6245.5,71.0,3055970.0,71591.0,8739170.0,25262.0,12384879.0,31675.0,7274501.0,...,,,,,,,,,,
75%,0.0,11492.0,182.75,5245308.0,115191.75,17149064.25,33586.5,13953713.0,80181.0,9771534.0,...,,,,,,,,,,
max,0.0,142959.0,523.0,6354831.0,117946.0,22402744.0,91495.0,16806392.0,136048.0,14475199.0,...,,,,,,,,,,


### 4.2 How many countries are included in the dataset?

In [7]:
query = ("""
SELECT COUNT(DISTINCT country_code) AS num_countries
FROM bigquery-public-data.covid19_open_data.covid19_open_data

""")

query_job = client.query(query)
result = query_job.result()

number_of_country = result.to_dataframe()

number_of_country.iloc[0, 0]


246

Returns 246 because some dependent territories like "French Southern Territories", "Guam" etc.
For our purposes this is ok, although in reality there are 195 countries in the world

 ### 4.3. How daily information for countries is recorded?

In [8]:
# They usually add once a day, but sometimes a country has 2 entries because it does it for a different aggregation_level
query = ("""
SELECT *
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE  aggregation_level = 0 AND country_name = 'Poland'
ORDER BY date ASC 
""")

country_all = client.query(query).to_dataframe()
country_all['date']

0      2020-01-01
1      2020-01-02
2      2020-01-03
3      2020-01-04
4      2020-01-05
          ...    
986    2022-09-13
987    2022-09-14
988    2022-09-15
989    2022-09-16
990    2022-09-17
Name: date, Length: 991, dtype: dbdate

In [9]:

is_continuous = (country_all['date'].diff().dropna() == pd.Timedelta(days=1)).all()

is_continuous


True

### 4.4. How numerical values are recorded?

In [10]:
country_all.dtypes.value_counts()

float64    441
Int64      217
object      42
dbdate       1
Name: count, dtype: int64

Examples of columns that should be of numeric type but are actually objects, i.e. strings of characters:

In [11]:
display(country_all[['population_largest_city', 'population_clustered', 'human_capital_index', 'area_rural_sq_km', 'area_urban_sq_km', 'life_expectancy']])

Unnamed: 0,population_largest_city,population_clustered,human_capital_index,area_rural_sq_km,area_urban_sq_km,life_expectancy
0,1775933,1775933,0.747,277840,30501,77.7
1,1775933,1775933,0.747,277840,30501,77.7
2,1775933,1775933,0.747,277840,30501,77.7
3,1775933,1775933,0.747,277840,30501,77.7
4,1775933,1775933,0.747,277840,30501,77.7
...,...,...,...,...,...,...
986,1775933,1775933,0.747,277840,30501,77.7
987,1775933,1775933,0.747,277840,30501,77.7
988,1775933,1775933,0.747,277840,30501,77.7
989,1775933,1775933,0.747,277840,30501,77.7


Examples of columns with per ___ notation:

In [12]:
display(country_all.filter(like='_per_'))

Unnamed: 0,gdp_per_capita_usd,nurses_per_1000,physicians_per_1000,hospital_beds_per_1000
0,15222,6.8926,2.3788,
1,15222,6.8926,2.3788,
2,15222,6.8926,2.3788,
3,15222,6.8926,2.3788,
4,15222,6.8926,2.3788,
...,...,...,...,...
986,15222,6.8926,2.3788,
987,15222,6.8926,2.3788,
988,15222,6.8926,2.3788,
989,15222,6.8926,2.3788,


###  4.5. What time frame is included in the data? What are the time frames for new cases, new deaths, and new vaccinated people?

In [13]:
query_new_confirmed = ("""
SELECT date, SUM(new_confirmed)
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE aggregation_level=0
GROUP BY date
HAVING SUM(new_confirmed) IS NOT NULL AND SUM(new_confirmed) > 0
ORDER BY date ASC
""")

query_new_deceased = ("""
SELECT date, SUM(new_deceased)
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE aggregation_level=0
GROUP BY date
HAVING SUM(new_deceased) IS NOT NULL AND SUM(new_deceased) > 0
ORDER BY date ASC
""")

query_new_person_vaccinated = ("""
SELECT date, SUM(new_persons_vaccinated)
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE aggregation_level=0
GROUP BY date
HAVING SUM(new_persons_vaccinated) IS NOT NULL AND SUM(new_persons_vaccinated) > 0
ORDER BY date ASC
""")

new_confirmed = client.query(query_new_confirmed).to_dataframe()
new_deceased = client.query(query_new_deceased).to_dataframe()
new_person_vaccinated = client.query(query_new_person_vaccinated).to_dataframe()

In [14]:
print(f"The time frame of the data is from {country_all['date'].min()} to {country_all['date'].max()}.")
print(f"The time frame of the new_confirmed is from {new_confirmed['date'].min()} to {new_confirmed['date'].max()}.")
print(f"The time frame of the new_deceased is from {new_deceased['date'].min()} to {new_deceased['date'].max()}.")
print(f"The time frame of the new_person_vaccinated is from {new_person_vaccinated['date'].min()} to {new_person_vaccinated['date'].max()}.")


The time frame of the data is from 2020-01-01 to 2022-09-17.
The time frame of the new_confirmed is from 2020-01-01 to 2022-09-15.
The time frame of the new_deceased is from 2020-01-02 to 2022-09-15.
The time frame of the new_person_vaccinated is from 2020-12-07 to 2022-09-14.


Time intervals for each country separately

In [15]:
query = """
SELECT country_name, 
       MIN(date) AS start_date, 
       MAX(date) AS end_date,
       'new_confirmed' AS metric
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE new_confirmed IS NOT NULL AND new_confirmed > 0 AND aggregation_level=0
GROUP BY country_name

UNION ALL

SELECT country_name, 
       MIN(date) AS start_date, 
       MAX(date) AS end_date,
       'new_deceased' AS metric
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE new_deceased IS NOT NULL AND new_deceased > 0 AND aggregation_level=0
GROUP BY country_name

UNION ALL

SELECT country_name, 
       MIN(date) AS start_date, 
       MAX(date) AS end_date,
       'new_persons_vaccinated' AS metric
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE new_persons_vaccinated IS NOT NULL AND new_persons_vaccinated > 0 AND aggregation_level=0
GROUP BY country_name
ORDER BY country_name, metric
"""

dates_info = client.query(query).to_dataframe()
dates_info

Unnamed: 0,country_name,start_date,end_date,metric
0,Afghanistan,2020-02-22,2022-09-13,new_confirmed
1,Afghanistan,2020-03-21,2022-09-12,new_deceased
2,Afghanistan,2021-02-28,2022-09-12,new_persons_vaccinated
3,Albania,2020-03-07,2022-09-13,new_confirmed
4,Albania,2020-03-09,2022-09-11,new_deceased
...,...,...,...,...
663,Zambia,2020-01-02,2022-09-10,new_deceased
664,Zambia,2021-04-15,2022-08-07,new_persons_vaccinated
665,Zimbabwe,2020-01-02,2022-09-12,new_confirmed
666,Zimbabwe,2020-01-02,2022-08-29,new_deceased


###  4.6. Which countries provided data broken down by gender?


In [16]:
query = ("""
SELECT country_name AS `Country_name`, iso_3166_1_alpha_3 AS `3_letter_code`, SUM(new_confirmed_male) AS confirmed_male, SUM(new_confirmed_female) AS confirmed_female, SUM(new_deceased_male) AS deceased_male, SUM(new_deceased_female) as deceased_female, SUM(new_tested_male) AS tested_male, SUM(new_tested_female) AS tested_female, SUM(new_hospitalized_patients_male) AS hospitalized_male, SUM(new_hospitalized_patients_female) AS hospitalized_female, SUM(new_intensive_care_patients_male) AS cared_male, SUM(new_intensive_care_patients_female) AS cared_female, SUM(new_recovered_male) AS recovered_male, SUM(new_recovered_female) AS recovered_female
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE aggregation_level = 0 
GROUP BY country_name, iso_3166_1_alpha_3
HAVING SUM(new_confirmed_male) IS NOT NULL OR SUM(new_deceased_male) IS NOT NULL OR SUM(new_tested_male) IS NOT NULL OR SUM(new_hospitalized_patients_male) IS NOT NULL OR SUM(new_recovered_male) IS NOT NULL
ORDER BY country_name
""")

df_temp= client.query(query).to_dataframe()
df_temp

Unnamed: 0,Country_name,3_letter_code,confirmed_male,confirmed_female,deceased_male,deceased_female,tested_male,tested_female,hospitalized_male,hospitalized_female,cared_male,cared_female,recovered_male,recovered_female
0,Argentina,ARG,4418859,4780139.0,72801.0,53930.0,13178569.0,14928395.0,248464.0,222232.0,48422.0,33209.0,,
1,Brazil,BRA,8875904,8368673.0,311308.0,244105.0,26382353.0,26921275.0,996777.0,792931.0,330847.0,245941.0,0.0,0.0
2,Colombia,COL,2697358,3042821.0,93611.0,62159.0,,,,,,,2542476.0,2898545.0
3,Czech Republic,CZE,1956993,2111418.0,23149.0,17783.0,,,,,,,1925067.0,2082491.0
4,Estonia,EST,272596,326685.0,,,1595674.0,1890648.0,,,,,,
5,Finland,FIN,526714,587525.0,,,,,,,,,,
6,Germany,DEU,6092181,6356325.0,64006.0,56007.0,,,,,,,4370388.0,4550358.0
7,Hong Kong,HKG,7417,,127.0,,,,6708.0,,,,,
8,India,IND,92449,51846.0,30.0,13.0,,,3431.0,1712.0,,,,
9,Malaysia,MYS,1676866,1323134.0,20855.0,15444.0,,,,,,,,


### 4.7 Which countries provided data broken down by age group?

In [17]:
query = ("""
SELECT country_name AS `Country_name`, iso_3166_1_alpha_3 AS `3_letter_code`, SUM(new_confirmed_age_0) AS confirmed_age_0, SUM(new_deceased_age_0) AS deceased_age_0, SUM(new_tested_age_0) AS tested_age_0, SUM(new_hospitalized_patients_age_0) as hospitalized_age_0, SUM(new_intensive_care_patients_age_0) AS cared_age_0, SUM(new_recovered_age_0) AS recovered_age_0 
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE aggregation_level = 0 
GROUP BY country_name, iso_3166_1_alpha_3
HAVING SUM(new_confirmed_age_0) IS NOT NULL OR SUM(new_deceased_age_0) IS NOT NULL OR SUM(new_tested_age_0) IS NOT NULL OR SUM(new_hospitalized_patients_age_0) IS NOT NULL OR SUM(new_recovered_age_0) IS NOT NULL
ORDER BY country_name
""")

df_temp= client.query(query).to_dataframe()
df_temp

Unnamed: 0,Country_name,3_letter_code,confirmed_age_0,deceased_age_0,tested_age_0,hospitalized_age_0,cared_age_0,recovered_age_0
0,Argentina,ARG,224240,192.0,1303477.0,41095.0,3269.0,
1,Brazil,BRA,891485,1214.0,2726462.0,21195.0,5273.0,0.0
2,Colombia,COL,198102,360.0,,,,191387.0
3,Czech Republic,CZE,321365,7.0,,,,320168.0
4,Estonia,EST,44622,,208798.0,,,
5,Finland,FIN,85684,,,,,
6,France,FRA,2789829,,23039639.0,,,
7,Germany,DEU,437029,23.0,,,,290555.0
8,Hong Kong,HKG,666,0.0,,578.0,,
9,India,IND,4273,1.0,,78.0,,


### 4.8 Which countries provided data on vaccinations by vaccine type (company)?

In [18]:
query = ("""
SELECT country_name AS `Country_name`, iso_3166_1_alpha_3 AS `3_letter_code`, SUM(new_persons_fully_vaccinated_moderna) AS vaccinated_moderna, SUM(new_persons_fully_vaccinated_janssen) AS vaccinated_janssen, SUM(new_persons_fully_vaccinated_pfizer) AS vaccinated_pfizer, SUM(new_persons_vaccinated) AS total_vaccinated 
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE aggregation_level = 0 
GROUP BY country_name, iso_3166_1_alpha_3
HAVING SUM(new_persons_fully_vaccinated_moderna) IS NOT NULL OR SUM(new_persons_fully_vaccinated_janssen) IS NOT NULL OR SUM(new_persons_fully_vaccinated_pfizer) IS NOT NULL
ORDER BY country_name
""")

df_temp= client.query(query).to_dataframe()
df_temp

Unnamed: 0,Country_name,3_letter_code,vaccinated_moderna,vaccinated_janssen,vaccinated_pfizer,total_vaccinated
0,Malaysia,MYS,,,15153452,27880068
1,United States of America,USA,63469471.0,16886719.0,112344792,263883327


### 4.9 What do the numerical values ​​in the restriction columns mean? (school_closing, workplace_closing, cancel_public_events, restrictions on gatherings)

In [19]:
query = ("""
SELECT DISTINCT date, iso_3166_1_alpha_3 AS `3_letter_code`, CAST(school_closing AS INT64) AS `school_closing`, CAST(workplace_closing  AS INT64) AS `workplace_closing`, CAST(cancel_public_events AS INT64) AS `cancel_public_events`, CAST(restrictions_on_gatherings AS INT64) AS `restrictions_on_gatherings`
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE aggregation_level = 0
GROUP BY iso_3166_1_alpha_3, date, school_closing, workplace_closing, cancel_public_events, restrictions_on_gatherings
ORDER BY date
""")
df_restrictions= client.query(query).to_dataframe()
df_restrictions

Unnamed: 0,date,3_letter_code,school_closing,workplace_closing,cancel_public_events,restrictions_on_gatherings
0,2020-01-01,RWA,0,0,0,0
1,2020-01-01,CUW,,,,
2,2020-01-01,GRC,0,0,0,0
3,2020-01-01,AUT,0,0,0,0
4,2020-01-01,TUV,,,,
...,...,...,...,...,...,...
243781,2022-09-17,GIB,,,,
243782,2022-09-17,BGD,,,,
243783,2022-09-17,BWA,,,,
243784,2022-09-17,NIC,,,,


<b>School closing:</b>
<br> 0 - no recommendations,
<br> 1 - recommended closure or introduction of restrictions,
<br> 2 - order to close some types of schools,
<br> 3 - order to close all types of schools. <br> <br>
<b>Workplace closing:</b>
<br> 0 - no recommendations,
<br> 1 - recommended closure or remote work, or introduction of restrictions for companies,
<br> 2 - order to close some sectors or mandatory remote work for some employees,
<br> 3 - order to close all workplaces except for essential activities (e.g. grocery stores, medical facilities). <br> <br>
<b>Cancel public events:</b>
<br> 0 - no recommendations,
<br> 1 - recommended avoidance/cancellation,
<br> 2 - ban on organization. <br> <br>
<b>Restrictions on gatherings:</b>
<br> 0 - no restrictions,
<br> 1 - restrictions for very large gatherings (over 1000 people),
<br> 2 - restrictions for gatherings of 101 to 1000 people,
<br> 3 - restrictions for gatherings of 11 to 100 people,
<br> 4 - restrictions for gatherings of 10 people or less.
<br>
via https://github.com/OxCGRT/covid-policy-dataset/blob/main/documentation_and_codebook.md <br>

## 5. Analysis of selected cases without processing
Take care of data purity, including removing unnecessary empty values ​​and duplicates, unifying the way information is provided. Save ready data from DataFrame objects in separate CSV files.


### 5.1 Basic data about all countries of the world that will be understandable to humans and universal and potentially future-proof for further processing.

In [20]:
query = ("""
SELECT DISTINCT country_name AS `Country_name`, iso_3166_1_alpha_3 AS `3_letter_code`, population, area_sq_km, population_density, population_urban, gdp_per_capita_usd
FROM bigquery-public-data.covid19_open_data.covid19_open_data
WHERE aggregation_level = 0
GROUP BY country_name, iso_3166_1_alpha_3, population, area_sq_km, population_density, population_urban, gdp_per_capita_usd
""")

country_data = client.query(query).to_dataframe()
country_data["GDP_2022"] = country_data["population"] * country_data["gdp_per_capita_usd"]
country_data

Unnamed: 0,Country_name,3_letter_code,population,area_sq_km,population_density,population_urban,gdp_per_capita_usd,GDP_2022
0,Estonia,EST,1324820,45340,30.400,916024,23246,30796765720
1,New Zealand,NZL,4822233,267710,18.314,4258860,42084,202938853572
2,Albania,ALB,2862427,28750,99.600,1747593,5352,15319709304
3,Antarctica,ATA,4400,14200000,,,,
4,American Samoa,ASM,55197,200,275.985,48203,11466,632888802
...,...,...,...,...,...,...,...,...
241,East Timor,TLS,1318442,14870,88.665,400182,1294,1706063948
242,Tanzania,TZA,59734213,947300,67.435,20011885,1122,67021786986
243,Ukraine,UKR,43733759,603550,75.492,30835699,3659,160021824181
244,Saint Vincent and the Grenadines,VCT,110947,390,284.479,58185,7463,827997461


### 5.2 COVID-19 case statistics around the world.

In [21]:
# I do not replace null with 0, because that would mean that no one got sick on a given day, but in fact we have no data for that day.
query = ("""
SELECT date, iso_3166_1_alpha_3 AS `3_letter_code`, new_confirmed as `Total_confirmed_cases`,
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE aggregation_level = 0
GROUP BY iso_3166_1_alpha_3, date, new_confirmed
ORDER BY date
""")

covid_statistic = client.query(query).to_dataframe()
covid_statistic.dropna(subset=['Total_confirmed_cases'], inplace=True)
covid_statistic

Unnamed: 0,date,3_letter_code,Total_confirmed_cases
0,2020-01-01,GUM,0
1,2020-01-01,USA,0
2,2020-01-01,BEN,0
3,2020-01-01,ARG,3
4,2020-01-01,IMN,0
...,...,...,...
243170,2022-09-15,TWN,45405
243186,2022-09-15,FRA,33263
243195,2022-09-15,THA,1125
243267,2022-09-15,MYS,2375


### 5.3 COVID-19 deceased statistics.

In [22]:
query = ("""
SELECT date,iso_3166_1_alpha_3 AS `3_letter_code`, new_deceased as `Total_new_deceased` 
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE aggregation_level = 0
GROUP BY iso_3166_1_alpha_3, date, new_deceased
""")

covid_deceased = client.query(query).to_dataframe()
covid_deceased.dropna(subset=['Total_new_deceased'], inplace=True)
covid_deceased

Unnamed: 0,date,3_letter_code,Total_new_deceased
0,2021-03-31,ARG,160
1,2020-08-23,ARG,214
2,2020-06-27,COL,154
3,2022-01-15,CZE,25
4,2022-02-02,ESP,238
...,...,...,...
243781,2021-01-25,NGA,15
243782,2020-09-27,POL,15
243783,2021-10-05,SLV,15
243784,2022-02-22,BLR,15


### 5.4 Trends and relationships regarding COVID-19 vaccinations.

In [23]:
query = ("""
SELECT date, iso_3166_1_alpha_3 AS `3_letter_code`,
new_persons_vaccinated as `Total_new_vaccinations`,
new_persons_fully_vaccinated as`New_persons_fully_vaccinated`
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE aggregation_level = 0
GROUP BY iso_3166_1_alpha_3, date, new_persons_vaccinated, new_persons_fully_vaccinated
""")

covid_trend_analysis = client.query(query).to_dataframe()
covid_trend_analysis.dropna(subset=['Total_new_vaccinations', 'New_persons_fully_vaccinated'], how='all', inplace=True)
covid_trend_analysis

Unnamed: 0,date,3_letter_code,Total_new_vaccinations,New_persons_fully_vaccinated
0,2021-11-26,COL,0,241945
1,2022-04-22,EST,79,103
2,2021-06-25,PER,92853,35045
3,2021-06-23,PER,41336,88105
4,2022-05-01,ROU,0,419
...,...,...,...,...
243781,2020-06-28,BRA,,0
243782,2022-01-06,FIN,0,0
243783,2022-08-21,CHE,0,0
243784,2021-07-05,COM,0,0


### 5.5 Number of tests performed.

In [24]:
query = ("""
SELECT date, iso_3166_1_alpha_3 AS `3_letter_code`, CAST(investment_in_vaccines AS INT64) as `Investment_in_vaccines`, CAST(new_tested AS INT64) as `Total_tested`
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE aggregation_level = 0
GROUP BY iso_3166_1_alpha_3, date, investment_in_vaccines, new_tested
ORDER BY date
""")

investment_in_vaccines = client.query(query).to_dataframe()
investment_in_vaccines.dropna(subset=['Total_tested'], how='all', inplace=True)

display(investment_in_vaccines)

Unnamed: 0,date,3_letter_code,Investment_in_vaccines,Total_tested
31,2020-01-01,ARG,0,4
70,2020-01-01,MEX,0,72
239,2020-01-01,BRA,0,1154
337,2020-01-02,MEX,0,89
469,2020-01-02,BRA,0,2337
...,...,...,...,...
243106,2022-09-15,AUT,,67392
243149,2022-09-15,ITA,,146983
243183,2022-09-15,AUS,,4367
243267,2022-09-15,MYS,,0


In [25]:
df_restrictions

Unnamed: 0,date,3_letter_code,school_closing,workplace_closing,cancel_public_events,restrictions_on_gatherings
0,2020-01-01,RWA,0,0,0,0
1,2020-01-01,CUW,,,,
2,2020-01-01,GRC,0,0,0,0
3,2020-01-01,AUT,0,0,0,0
4,2020-01-01,TUV,,,,
...,...,...,...,...,...,...
243781,2022-09-17,GIB,,,,
243782,2022-09-17,BGD,,,,
243783,2022-09-17,BWA,,,,
243784,2022-09-17,NIC,,,,


# 6. Saving all previously collected data into one DF/CSV.

In [27]:
df_data_from_4 = pd.merge(country_data, covid_statistic, on=['3_letter_code'])
df_data_from_4 = pd.merge(df_data_from_4, covid_deceased, on=['date', '3_letter_code'])
df_data_from_4 = pd.merge(df_data_from_4, covid_trend_analysis, on=['date', '3_letter_code'])
df_data_from_4 = pd.merge(df_data_from_4, investment_in_vaccines, on=['date', '3_letter_code'])
df_data_from_4 = pd.merge(df_data_from_4, df_restrictions, on =['date', '3_letter_code'])
df_data_from_4.to_csv("../datasets/data_from_4.csv", index=False)
df_data_from_4

Unnamed: 0,Country_name,3_letter_code,population,area_sq_km,population_density,population_urban,gdp_per_capita_usd,GDP_2022,date,Total_confirmed_cases,Total_new_deceased,Total_new_vaccinations,New_persons_fully_vaccinated,Investment_in_vaccines,Total_tested,school_closing,workplace_closing,cancel_public_events,restrictions_on_gatherings
0,Estonia,EST,1324820,45340,30.400,916024,23246,30796765720,2020-12-28,476,13,318,9,0,4422,3,2,2,4
1,Estonia,EST,1324820,45340,30.400,916024,23246,30796765720,2020-12-29,965,3,367,14,0,6632,3,2,2,4
2,Estonia,EST,1324820,45340,30.400,916024,23246,30796765720,2020-12-30,732,5,1013,22,0,5813,3,2,2,4
3,Estonia,EST,1324820,45340,30.400,916024,23246,30796765720,2020-12-31,418,7,586,24,0,3559,3,2,2,4
4,Estonia,EST,1324820,45340,30.400,916024,23246,30796765720,2021-01-02,342,7,30,1,0,3509,3,2,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31435,South Africa,ZAF,59308690,1219090,48.891,39149717,6001,355911448690,2022-06-09,1591,20,16404,16781,0,20865,0,2,2,2
31436,South Africa,ZAF,59308690,1219090,48.891,39149717,6001,355911448690,2022-06-13,710,41,838,854,480000000,10493,0,2,2,2
31437,South Africa,ZAF,59308690,1219090,48.891,39149717,6001,355911448690,2022-06-14,1903,26,14113,13788,0,22417,0,2,2,2
31438,South Africa,ZAF,59308690,1219090,48.891,39149717,6001,355911448690,2022-06-20,1087,20,566,654,0,14018,0,2,2,2


# 7. Removing repeating columns from the set.


In [28]:
df_data_from_4.drop('population', axis=1, inplace=True)
df_data_from_4.drop('area_sq_km', axis=1, inplace=True)
df_data_from_4.drop('population_density', axis=1, inplace=True)

df_data_from_4

Unnamed: 0,Country_name,3_letter_code,population_urban,gdp_per_capita_usd,GDP_2022,date,Total_confirmed_cases,Total_new_deceased,Total_new_vaccinations,New_persons_fully_vaccinated,Investment_in_vaccines,Total_tested,school_closing,workplace_closing,cancel_public_events,restrictions_on_gatherings
0,Estonia,EST,916024,23246,30796765720,2020-12-28,476,13,318,9,0,4422,3,2,2,4
1,Estonia,EST,916024,23246,30796765720,2020-12-29,965,3,367,14,0,6632,3,2,2,4
2,Estonia,EST,916024,23246,30796765720,2020-12-30,732,5,1013,22,0,5813,3,2,2,4
3,Estonia,EST,916024,23246,30796765720,2020-12-31,418,7,586,24,0,3559,3,2,2,4
4,Estonia,EST,916024,23246,30796765720,2021-01-02,342,7,30,1,0,3509,3,2,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31435,South Africa,ZAF,39149717,6001,355911448690,2022-06-09,1591,20,16404,16781,0,20865,0,2,2,2
31436,South Africa,ZAF,39149717,6001,355911448690,2022-06-13,710,41,838,854,480000000,10493,0,2,2,2
31437,South Africa,ZAF,39149717,6001,355911448690,2022-06-14,1903,26,14113,13788,0,22417,0,2,2,2
31438,South Africa,ZAF,39149717,6001,355911448690,2022-06-20,1087,20,566,654,0,14018,0,2,2,2


In [29]:
world_countries = pd.read_csv("world_countries.csv")

df_with_countries = pd.merge(df_data_from_4, world_countries, left_on=['3_letter_code'], right_on=['CCA3'], how="left")
df_with_countries.drop('Country/Territory', axis=1, inplace=True)
df_with_countries.drop('CCA3', axis=1, inplace=True)
df_with_countries["2021 Population"] = (df_with_countries["2020 Population"] + df_with_countries["2022 Population"]) / 2
df_with_countries

Unnamed: 0,Country_name,3_letter_code,population_urban,gdp_per_capita_usd,GDP_2022,date,Total_confirmed_cases,Total_new_deceased,Total_new_vaccinations,New_persons_fully_vaccinated,...,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage,2021 Population
0,Estonia,EST,916024,23246,30796765720,2020-12-28,476,13,318,9,...,1331535,1396877,1570674,1476983,1361999,45227,29.3201,0.9980,0.02,1327753.0
1,Estonia,EST,916024,23246,30796765720,2020-12-29,965,3,367,14,...,1331535,1396877,1570674,1476983,1361999,45227,29.3201,0.9980,0.02,1327753.0
2,Estonia,EST,916024,23246,30796765720,2020-12-30,732,5,1013,22,...,1331535,1396877,1570674,1476983,1361999,45227,29.3201,0.9980,0.02,1327753.0
3,Estonia,EST,916024,23246,30796765720,2020-12-31,418,7,586,24,...,1331535,1396877,1570674,1476983,1361999,45227,29.3201,0.9980,0.02,1327753.0
4,Estonia,EST,916024,23246,30796765720,2021-01-02,342,7,30,1,...,1331535,1396877,1570674,1476983,1361999,45227,29.3201,0.9980,0.02,1327753.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31435,South Africa,ZAF,39149717,6001,355911448690,2022-06-09,1591,20,16404,16781,...,51784921,46813266,39877570,29463549,22368306,1221037,49.0517,1.0084,0.75,59347906.0
31436,South Africa,ZAF,39149717,6001,355911448690,2022-06-13,710,41,838,854,...,51784921,46813266,39877570,29463549,22368306,1221037,49.0517,1.0084,0.75,59347906.0
31437,South Africa,ZAF,39149717,6001,355911448690,2022-06-14,1903,26,14113,13788,...,51784921,46813266,39877570,29463549,22368306,1221037,49.0517,1.0084,0.75,59347906.0
31438,South Africa,ZAF,39149717,6001,355911448690,2022-06-20,1087,20,566,654,...,51784921,46813266,39877570,29463549,22368306,1221037,49.0517,1.0084,0.75,59347906.0


In [30]:
df_gdp = pd.read_csv("gdp.csv")
df_gdp_pivot = df_gdp.pivot(index="Country Code", columns="Year", values="Value")
df_gdp_pivot.columns = [f"GDP_{int(year)}" for year in df_gdp_pivot.columns]
df_gdp_pivot.reset_index(inplace=True)
df_with_gdp = df_with_countries.merge(df_gdp_pivot, left_on="3_letter_code", right_on="Country Code", how="left")
df_with_gdp.drop(columns=["Country Code"], inplace=True)
display(df_with_gdp)

Unnamed: 0,Country_name,3_letter_code,population_urban,gdp_per_capita_usd,GDP_2022,date,Total_confirmed_cases,Total_new_deceased,Total_new_vaccinations,New_persons_fully_vaccinated,...,GDP_2007,GDP_2008,GDP_2009,GDP_2010,GDP_2011,GDP_2012,GDP_2013,GDP_2014,GDP_2015,GDP_2016
0,Estonia,EST,916024,23246,30796765720,2020-12-28,476,13,318,9,...,2.223706e+10,2.419404e+10,1.965249e+10,1.949094e+10,2.317024e+10,2.304386e+10,2.513715e+10,2.622462e+10,2.256696e+10,2.333791e+10
1,Estonia,EST,916024,23246,30796765720,2020-12-29,965,3,367,14,...,2.223706e+10,2.419404e+10,1.965249e+10,1.949094e+10,2.317024e+10,2.304386e+10,2.513715e+10,2.622462e+10,2.256696e+10,2.333791e+10
2,Estonia,EST,916024,23246,30796765720,2020-12-30,732,5,1013,22,...,2.223706e+10,2.419404e+10,1.965249e+10,1.949094e+10,2.317024e+10,2.304386e+10,2.513715e+10,2.622462e+10,2.256696e+10,2.333791e+10
3,Estonia,EST,916024,23246,30796765720,2020-12-31,418,7,586,24,...,2.223706e+10,2.419404e+10,1.965249e+10,1.949094e+10,2.317024e+10,2.304386e+10,2.513715e+10,2.622462e+10,2.256696e+10,2.333791e+10
4,Estonia,EST,916024,23246,30796765720,2021-01-02,342,7,30,1,...,2.223706e+10,2.419404e+10,1.965249e+10,1.949094e+10,2.317024e+10,2.304386e+10,2.513715e+10,2.622462e+10,2.256696e+10,2.333791e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31435,South Africa,ZAF,39149717,6001,355911448690,2022-06-09,1591,20,16404,16781,...,2.990335e+11,2.871000e+11,2.972167e+11,3.752981e+11,4.168782e+11,3.963326e+11,3.668100e+11,3.511191e+11,3.176107e+11,2.954562e+11
31436,South Africa,ZAF,39149717,6001,355911448690,2022-06-13,710,41,838,854,...,2.990335e+11,2.871000e+11,2.972167e+11,3.752981e+11,4.168782e+11,3.963326e+11,3.668100e+11,3.511191e+11,3.176107e+11,2.954562e+11
31437,South Africa,ZAF,39149717,6001,355911448690,2022-06-14,1903,26,14113,13788,...,2.990335e+11,2.871000e+11,2.972167e+11,3.752981e+11,4.168782e+11,3.963326e+11,3.668100e+11,3.511191e+11,3.176107e+11,2.954562e+11
31438,South Africa,ZAF,39149717,6001,355911448690,2022-06-20,1087,20,566,654,...,2.990335e+11,2.871000e+11,2.972167e+11,3.752981e+11,4.168782e+11,3.963326e+11,3.668100e+11,3.511191e+11,3.176107e+11,2.954562e+11


# 8. Adding 3 additional data found on my own way.

### 8.1 Data about inflation.

In [31]:
# https://www.worldbank.org/en/research/brief/inflation-database

#From all pages of the spreadsheet I selected the overall increase in inflation in the given years
df_inflation = pd.read_excel("load_data/inflation_all.xlsx", sheet_name="hcpi_a")

df_inflation.drop(columns=["IMF Country Code", "Indicator Type", "Series Name", "Note", "Country"], inplace=True)
df_inflation = df_inflation.melt(id_vars=["Country Code"], var_name="Year", value_name="Inflation")
df_inflation.dropna(subset=["Inflation"], inplace=True)
df_inflation = df_inflation.pivot(index="Country Code", columns="Year", values="Inflation")
df_inflation.columns = [f"Inflation_{int(year)}" for year in df_inflation.columns]
df_inflation.reset_index(inplace=True)
df_with_inflation = df_with_gdp.merge(df_inflation, left_on="3_letter_code", right_on="Country Code", how="left")
df_with_inflation.drop(columns=["Country Code"], inplace=True)
display(df_with_inflation)

Unnamed: 0,Country_name,3_letter_code,population_urban,gdp_per_capita_usd,GDP_2022,date,Total_confirmed_cases,Total_new_deceased,Total_new_vaccinations,New_persons_fully_vaccinated,...,Inflation_2014,Inflation_2015,Inflation_2016,Inflation_2017,Inflation_2018,Inflation_2019,Inflation_2020,Inflation_2021,Inflation_2022,Inflation_2023
0,Estonia,EST,916024,23246,30796765720,2020-12-28,476,13,318,9,...,-0.10618,-0.492323,0.148682,3.651,3.44,2.268,-0.444531,4.653167,19.398260,9.452177
1,Estonia,EST,916024,23246,30796765720,2020-12-29,965,3,367,14,...,-0.10618,-0.492323,0.148682,3.651,3.44,2.268,-0.444531,4.653167,19.398260,9.452177
2,Estonia,EST,916024,23246,30796765720,2020-12-30,732,5,1013,22,...,-0.10618,-0.492323,0.148682,3.651,3.44,2.268,-0.444531,4.653167,19.398260,9.452177
3,Estonia,EST,916024,23246,30796765720,2020-12-31,418,7,586,24,...,-0.10618,-0.492323,0.148682,3.651,3.44,2.268,-0.444531,4.653167,19.398260,9.452177
4,Estonia,EST,916024,23246,30796765720,2021-01-02,342,7,30,1,...,-0.10618,-0.492323,0.148682,3.651,3.44,2.268,-0.444531,4.653167,19.398260,9.452177
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31435,South Africa,ZAF,39149717,6001,355911448690,2022-06-09,1591,20,16404,16781,...,6.13602,4.509208,6.594604,5.273,4.50,4.130,3.223885,4.609059,7.039727,6.073909
31436,South Africa,ZAF,39149717,6001,355911448690,2022-06-13,710,41,838,854,...,6.13602,4.509208,6.594604,5.273,4.50,4.130,3.223885,4.609059,7.039727,6.073909
31437,South Africa,ZAF,39149717,6001,355911448690,2022-06-14,1903,26,14113,13788,...,6.13602,4.509208,6.594604,5.273,4.50,4.130,3.223885,4.609059,7.039727,6.073909
31438,South Africa,ZAF,39149717,6001,355911448690,2022-06-20,1087,20,566,654,...,6.13602,4.509208,6.594604,5.273,4.50,4.130,3.223885,4.609059,7.039727,6.073909


### 8.2 Data about unemployment.

In [32]:
#  https://data.worldbank.org/indicator/SL.UEM.TOTL.ZS?view=map&year=1991

df_unemployment = pd.read_csv("load_data/total_unemployment.csv")
df_unemployment.drop(columns=["Country Name", "Indicator Name", "Indicator Code", "Unnamed: 68"], inplace=True, errors="ignore")
df_unemployment = df_unemployment.melt(id_vars=["Country Code"], var_name="Year", value_name="Unemployment")
df_unemployment.dropna(subset=["Unemployment"], inplace=True)
df_unemployment = df_unemployment.pivot(index="Country Code", columns="Year", values="Unemployment")
df_unemployment.columns = [f"Unemployment_{int(year)}" for year in df_unemployment.columns]
df_unemployment.reset_index(inplace=True)
df_unemployment = df_with_inflation.merge(df_unemployment, left_on="3_letter_code", right_on="Country Code", how="left")
df_unemployment.drop(columns=["Country Code"], inplace=True)
display(df_unemployment)

Unnamed: 0,Country_name,3_letter_code,population_urban,gdp_per_capita_usd,GDP_2022,date,Total_confirmed_cases,Total_new_deceased,Total_new_vaccinations,New_persons_fully_vaccinated,...,Unemployment_2014,Unemployment_2015,Unemployment_2016,Unemployment_2017,Unemployment_2018,Unemployment_2019,Unemployment_2020,Unemployment_2021,Unemployment_2022,Unemployment_2023
0,Estonia,EST,916024,23246,30796765720,2020-12-28,476,13,318,9,...,7.352,6.412,6.882,5.807,5.412,4.514,6.956,6.178,5.571,6.378
1,Estonia,EST,916024,23246,30796765720,2020-12-29,965,3,367,14,...,7.352,6.412,6.882,5.807,5.412,4.514,6.956,6.178,5.571,6.378
2,Estonia,EST,916024,23246,30796765720,2020-12-30,732,5,1013,22,...,7.352,6.412,6.882,5.807,5.412,4.514,6.956,6.178,5.571,6.378
3,Estonia,EST,916024,23246,30796765720,2020-12-31,418,7,586,24,...,7.352,6.412,6.882,5.807,5.412,4.514,6.956,6.178,5.571,6.378
4,Estonia,EST,916024,23246,30796765720,2021-01-02,342,7,30,1,...,7.352,6.412,6.882,5.807,5.412,4.514,6.956,6.178,5.571,6.378
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31435,South Africa,ZAF,39149717,6001,355911448690,2022-06-09,1591,20,16404,16781,...,24.890,25.149,26.536,27.035,26.906,28.468,29.217,34.007,33.268,32.098
31436,South Africa,ZAF,39149717,6001,355911448690,2022-06-13,710,41,838,854,...,24.890,25.149,26.536,27.035,26.906,28.468,29.217,34.007,33.268,32.098
31437,South Africa,ZAF,39149717,6001,355911448690,2022-06-14,1903,26,14113,13788,...,24.890,25.149,26.536,27.035,26.906,28.468,29.217,34.007,33.268,32.098
31438,South Africa,ZAF,39149717,6001,355911448690,2022-06-20,1087,20,566,654,...,24.890,25.149,26.536,27.035,26.906,28.468,29.217,34.007,33.268,32.098


### 8.3 Data about murders rate per 100k people.

In [33]:
df_crime = pd.read_csv("load_data/crime_rate_per_100k.csv")
df_crime = df_crime.iloc[:217]
df_crime.drop(columns=["Series Name", "Series Code", "Country Name"], inplace=True, errors="ignore")
df_crime.columns = df_crime.columns.str.replace(r"\s*\[YR\d+\]", "", regex=True)
df_crime = df_crime.melt(id_vars=["Country Code"], var_name="Year", value_name="Crime Rate")
df_crime["Year"] = df_crime["Year"].astype(int)
df_crime["Crime Rate"] = pd.to_numeric(df_crime["Crime Rate"], errors="coerce")
df_crime.dropna(subset=["Crime Rate"], inplace=True)
df_crime = df_crime.pivot(index="Country Code", columns="Year", values="Crime Rate")
df_crime.columns = [f"Murders_Rate_{int(year)}" for year in df_crime.columns]
df_crime.reset_index(inplace=True)
df_final = df_unemployment.merge(df_crime, left_on="3_letter_code", right_on="Country Code", how="left")
df_final.drop(columns=["Country Code"], inplace=True)
df_final.to_csv("../datasets/final_data.csv",  index=False)
display(df_final)

Unnamed: 0,Country_name,3_letter_code,population_urban,gdp_per_capita_usd,GDP_2022,date,Total_confirmed_cases,Total_new_deceased,Total_new_vaccinations,New_persons_fully_vaccinated,...,Murders_Rate_2012,Murders_Rate_2013,Murders_Rate_2014,Murders_Rate_2015,Murders_Rate_2016,Murders_Rate_2017,Murders_Rate_2018,Murders_Rate_2019,Murders_Rate_2020,Murders_Rate_2021
0,Estonia,EST,916024,23246,30796765720,2020-12-28,476,13,318,9,...,4.763050,3.945427,3.118995,3.422947,2.507741,2.201057,2.117767,1.959250,3.159215,1.956798
1,Estonia,EST,916024,23246,30796765720,2020-12-29,965,3,367,14,...,4.763050,3.945427,3.118995,3.422947,2.507741,2.201057,2.117767,1.959250,3.159215,1.956798
2,Estonia,EST,916024,23246,30796765720,2020-12-30,732,5,1013,22,...,4.763050,3.945427,3.118995,3.422947,2.507741,2.201057,2.117767,1.959250,3.159215,1.956798
3,Estonia,EST,916024,23246,30796765720,2020-12-31,418,7,586,24,...,4.763050,3.945427,3.118995,3.422947,2.507741,2.201057,2.117767,1.959250,3.159215,1.956798
4,Estonia,EST,916024,23246,30796765720,2021-01-02,342,7,30,1,...,4.763050,3.945427,3.118995,3.422947,2.507741,2.201057,2.117767,1.959250,3.159215,1.956798
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31435,South Africa,ZAF,39149717,6001,355911448690,2022-06-09,1591,20,16404,16781,...,30.507084,31.598028,32.532699,33.418340,33.703002,35.903188,36.662249,36.712138,33.964874,41.865728
31436,South Africa,ZAF,39149717,6001,355911448690,2022-06-13,710,41,838,854,...,30.507084,31.598028,32.532699,33.418340,33.703002,35.903188,36.662249,36.712138,33.964874,41.865728
31437,South Africa,ZAF,39149717,6001,355911448690,2022-06-14,1903,26,14113,13788,...,30.507084,31.598028,32.532699,33.418340,33.703002,35.903188,36.662249,36.712138,33.964874,41.865728
31438,South Africa,ZAF,39149717,6001,355911448690,2022-06-20,1087,20,566,654,...,30.507084,31.598028,32.532699,33.418340,33.703002,35.903188,36.662249,36.712138,33.964874,41.865728
