In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta

In [3]:
df = pd.read_csv('covid-data.csv')

In [4]:
df['date'] = pd.to_datetime(df['date'])

In [5]:
pd.set_option('display.max_rows', 187)
pd.set_option('display.max_columns', 50)

# A1

In [6]:
date_greek_lockdown_started = df[(df['date'] == '2020-03-23') & (df['location'] != 'World')]['total_cases'].max()
location = df.loc[df['total_cases'] == date_greek_lockdown_started] #A1

#Variables to save name and number of cases
name = location.iloc[0,2]
total_cases = location.iloc[0,4]

print(f"{name} had the most total cases ({total_cases.astype(int)}) when Greece's lockdown started")

China had the most total cases (81484) when Greece's lockdown started


# A2

In [7]:
second_to_last_week_of_march = df[(df['date'] >= '2020-03-23') & (df['date'] <= '2020-03-29') & (df['location'] != 'World')]['new_cases'].max()
location = df.loc[df['new_cases'] == second_to_last_week_of_march] #A2 
#Needs output with name and number of new cases

name = location.iloc[0,2]
new_cases = location.iloc[0,5]

print(f"{name} had the most new cases ({new_cases.astype(int)}) during 23-29/03/2020")

United States had the most new cases (19979) during 23-29/03/2020


# A3

In [8]:
greece = (df['location'] == 'Greece')

dates_before_first_lockdown = df[greece & ((df['date'] >= '2020-03-16') & (df['date'] <= '2020-03-22'))] #A3
sum_of_new_cases_before_first_lockdown = dates_before_first_lockdown['new_cases'].sum()

dates_before_second_lockdown = df[greece & ((df['date'] >= '2020-10-31') & (df['date'] <= '2020-11-06'))] #A3 
sum_of_new_cases_before_second_lockdown = dates_before_second_lockdown['new_cases'].sum()

print(f'Before the first lockdown (16-22/3/2020) Greece had: {sum_of_new_cases_before_first_lockdown.astype(int)} cases')
print(f'Before the second lockdown (31/10/2020 - 6/11/2020) Greece had: {sum_of_new_cases_before_second_lockdown.astype(int)} cases')

Before the first lockdown (16-22/3/2020) Greece had: 302 cases
Before the second lockdown (31/10/2020 - 6/11/2020) Greece had: 14297 cases


# A4

In [9]:
max_total_deaths_per_million = df["total_deaths_per_million"].max()
location = df.loc[df['total_deaths_per_million'] == max_total_deaths_per_million]

name = location.iloc[0,2]
total_deaths_per_million = location.iloc[0,13]

print(f'{name} had the most total deaths per million: {total_deaths_per_million.round(decimals=2)}')

Belgium had the most total deaths per million: 1350.09


# A5

In [25]:
max_total_deaths = df[(df['location'] != 'World')]['total_deaths'].max()
location_of_max_total_deaths = df.loc[df['total_deaths'] == max_total_deaths]#USA

max_total_cases = df[(df['location'] != 'World')]['total_cases'].max()
location_of_max_total_cases = df.loc[df['total_cases'] == max_total_cases] #USA

name = location_of_max_total_deaths.iloc[0,2]

percentage_of_deaths_per_case = (max_total_deaths/max_total_cases)*100

print(f'{name} have the highest percentage of deaths per case ({percentage_of_deaths_per_case.round(decimals = 2)}%)')

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,total_tests,new_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,tests_per_case,positive_rate,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
55658,USA,North America,United States,2020-11-24,12420872.0,173963.0,173626.571,257701.0,919.0,1497.286,37524.993,525.564,524.547,778.547,2.776,4.523,,,,,,,,,,,,,,,,,,,,331002647.0,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.924


# B1

In [11]:
greece_new_deaths_and_cases = df[greece].groupby(df[greece]['date'].dt.month)[['new_deaths', 'new_cases']].agg(['median','std']).round(decimals = 2)
greece_new_deaths_and_cases

Unnamed: 0_level_0,new_deaths,new_deaths,new_cases,new_cases
Unnamed: 0_level_1,median,std,median,std
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.44
3,1.0,1.84,35.0,31.87
4,3.0,2.43,32.0,39.83
5,1.0,1.04,10.0,8.72
6,0.0,0.9,14.0,12.07
7,0.0,0.72,29.0,14.38
8,2.0,1.62,193.0,62.54
9,4.0,2.19,255.5,82.85
10,7.0,3.5,438.0,378.47


# B2

In [12]:
portugal = df['location'] == "Portugal"

portugal_new_deaths_and_cases = df[portugal].groupby(df[portugal]['date'].dt.month)[['new_deaths', 'new_cases']].agg(['median','std']).round(decimals = 2)
portugal_new_deaths_and_cases

Unnamed: 0_level_0,new_deaths,new_deaths,new_cases,new_cases
Unnamed: 0_level_1,median,std,median,std
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2,,,,
3,0.0,7.51,86.0,271.8
4,27.5,8.33,620.5,280.45
5,13.0,6.58,228.0,137.89
6,5.5,3.78,327.0,75.0
7,5.0,3.07,291.0,98.83
8,3.0,1.83,213.0,88.33
9,4.0,2.62,595.0,191.78
10,15.0,9.02,1876.0,1151.11
11,62.0,14.81,5667.0,1555.36


In [13]:
greece_new_deaths_and_cases.sum()

new_deaths  median      59.00
            std         42.88
new_cases   median    3409.00
            std       1252.21
dtype: float64

In [14]:
portugal_new_deaths_and_cases.sum()

new_deaths  median     135.00
            std         57.55
new_cases   median    9903.50
            std       3850.55
dtype: float64

In [15]:
print('Based on the above data Greece does better than Portugal')

Based on the above data Greece does better than Portugal


# A5

Timestamp('2020-11-24 00:00:00')

In [66]:
all_locations_latest_date = df[(df['date'] == df['date'].max()) & (df['location'] != 'World') & (df['location'] != 'International')]
all_locations_latest_date['total_deaths'].div(all_locations_latest_date['total_cases']).max()



#a = df.loc[df['total_cases'] == max_total_cases] #USA
#name = location_of_max_total_deaths.iloc[0,2]

0.28903654485049834