In [18]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
import statistics

# For Notebooks
init_notebook_mode(connected=True)
# For offline use
cf.go_offline()

df_covid19_cases_time= pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_time.csv")


Columns (11) have mixed types.Specify dtype option on import or set low_memory=False.



In [19]:
yesterdays_date = (datetime.today() - timedelta(days=1)).strftime('%#m/%#d/%y') 

european_countries_data = [
    {'Country': 'Austria', 'Close_Countries': ['Czechia','Slovakia','Slovenia','Hungary','Croatia', 'Liechtenstein', 'Germany']},
    {'Country': 'Belgium','Close_Countries': ['Netherlands', 'France', 'Luxembourg', 'Germany', 'Denmark']},
    {'Country': 'Czechia','Close_Countries': ['Slovakia','Poland','Hungary','Croatia', 'Austria', 'Germany']},
    {'Country': 'Denmark','Close_Countries': ['Sweden','Poland','Germany','Norway','Netherlands']},
    {'Country': 'Estonia','Close_Countries': ['Latvia','Lithuania','Sweden','Finland']},
    {'Country': 'Finland','Close_Countries': ['Latvia','Norway','Lithuania','Sweden','Estonia']},
    {'Country': 'France','Close_Countries': ['Spain','Italy','Germany','Belgium','Switzerland']},
    {'Country': 'Germany','Close_Countries': ['Poland','Czechia','Austria','Belgium','Switzerland','Netherlands']},
    {'Country': 'Hungary','Close_Countries': ['Slovakia','Czechia','Austria','Croatia', 'Ukraine']},
    {'Country': 'Italy','Close_Countries': ['Spain','France','Portugal','Croatia']},
    {'Country': 'Latvia','Close_Countries': ['Estonia','Lithuania','Belarus']},
    {'Country': 'Liechtenstein','Close_Countries': ['Switzerland','Luxembourg','Slovenia']},
    {'Country': 'Lithuania','Close_Countries': ['Estonia','Latvia','Belarus']},
    {'Country': 'Luxembourg','Close_Countries': ['Switzerland','Liechtenstein','Slovenia']},
    {'Country': 'Netherlands','Close_Countries': ['Belgium', 'France', 'Luxembourg', 'Germany', 'Denmark']},
    {'Country': 'Norway','Close_Countries': ['Sweden', 'Finland']},
    {'Country': 'Poland','Close_Countries': ['Germany','Czechia','Slovakia','Ukraine','Belarus']},
    {'Country': 'Portugal','Close_Countries': ['Spain','Italy']},
    {'Country': 'Slovakia','Close_Countries': ['Poland','Czechia','Austria','Hungary','Ukraine']},
    {'Country': 'Slovenia','Close_Countries': ['Switzerland','Liechtenstein','Croatia']},
    {'Country': 'Spain','Close_Countries': ['Portugal','Italy','France']},
    {'Country': 'Sweden','Close_Countries': ['Norway','Finland','Poland']},
    {'Country': 'Switzerland','Close_Countries': ['Liechtenstein','Luxembourg','Slovenia','Belgium','Austria']},
    {'Country': 'Ukraine',"Close_Countries": ["Belarus","Poland","Slovakia","Hungary","Romania"]},
    {'Country': 'Belarus','Close_Countries': ['Lithuania','Poland','Latvia','Ukraine']}
]
european_countries_data = pd.DataFrame(european_countries_data)


In [3]:
input_country = "Ukraine"

close_countries = european_countries_data[european_countries_data["Country"]==input_country]
close_countries = np.array(close_countries["Close_Countries"])[0]


close_countries_data = df_covid19_cases_time[df_covid19_cases_time["Country_Region"].isin(close_countries)]
close_countries_data = close_countries_data[close_countries_data["Last_Update"] == yesterdays_date].copy().drop(['Last_Update', 'Recovered', 'Active', 'Delta_Confirmed', 'Delta_Recovered', 'Incident_Rate', 'People_Tested', 'People_Hospitalized', 'Province_State', 'FIPS', 'UID', 'iso3', 'Report_Date_String'], axis=1)
close_countries_data.index = close_countries_data["Country_Region"]
close_countries_data = close_countries_data.drop(['Country_Region'], axis=1)

medical_rating = pd.read_csv("data/medical_system.csv")
medical_rating = medical_rating.copy().drop(['Health Distribution','Overall goal attainment', 'Health expenditure per capita in international dollars'],axis =1)
medical_rating.index = medical_rating["Country"]
medical_rating = medical_rating.drop(['Country'], axis=1)
# reversing medical coefficients
#edical_rating["Health Level"] = (500-medical_rating["Health Level"])/100

merged_data = pd.merge(close_countries_data, medical_rating, left_index=True, right_index=True)

population = pd.read_csv("data/population_by_country.csv")
input_country_population = population[population["Country"] == input_country]["Population"]
population = population.copy().drop(['Yearly Change', 'Net Change', 'Density (P/Km²)', 'Land Area (Km²)', 'Migrants (net)', 'Fert. Rate', 'Med. Age', 'Urban Pop %', 'World Share'],axis=1)
population.index = population["Country"]
population = population.drop(['Country'], axis=1)

merged_data = pd.merge(merged_data, population, left_index=True, right_index=True)


not_actual_population_countries_index = merged_data[(merged_data['Population'] > int(input_country_population * 3.5)) | (merged_data['Population'] < int(input_country_population * 0.2))].index

merged_data.drop(not_actual_population_countries_index , inplace=True)

#
merged_data["coef"] = merged_data["Deaths"] /  ( merged_data["Confirmed"] * merged_data["Health Level"] )
top_2_countries_by_health_level = merged_data.sort_values("Health Level").head(2)

average_coef_by_most_healthy_counties = sum(top_2_countries_by_health_level["coef"]) / len(top_2_countries_by_health_level["coef"]) 
max_coef = max(merged_data["coef"])
min_coef = min(merged_data["coef"])

merged_data["Most likely deaths"] = average_coef_by_most_healthy_counties * merged_data["Confirmed"] * merged_data["Health Level"]
merged_data["Falsification %"] = (1 - merged_data["Deaths"] / merged_data["Most likely deaths"]) * 100
merged_data["Max deaths (Worst-case scenario)"] = max_coef * merged_data["Confirmed"] * merged_data["Health Level"]
merged_data["Worst-case scenario falsification %"] = (1 - merged_data["Deaths"] / merged_data["Max deaths (Worst-case scenario)"]) * 100

# Just the country calculation
input_country_data = df_covid19_cases_time[df_covid19_cases_time["Country_Region"] == input_country]

input_country_data = input_country_data[input_country_data["Last_Update"] == yesterdays_date].copy().drop(['Last_Update', 'Recovered', 'Active', 'Delta_Confirmed', 'Delta_Recovered', 'Incident_Rate', 'People_Tested', 'People_Hospitalized', 'Province_State', 'FIPS', 'UID', 'iso3', 'Report_Date_String'], axis=1)
input_country_data.index = input_country_data["Country_Region"]
input_country_data = input_country_data.drop(['Country_Region'], axis=1)
input_country_data = pd.merge(input_country_data, medical_rating, left_index=True, right_index=True)

input_country_data

#medical_rating
input_country_data["Most likely deaths"] = average_coef_by_most_healthy_counties * input_country_data["Confirmed"] * input_country_data["Health Level"]
input_country_data["Falsification %"] = (1 - input_country_data["Deaths"] / input_country_data["Most likely deaths"]) * 100
input_country_data


# add parts of the country confirmed / died
# and than the average should be taken

Unnamed: 0,Confirmed,Deaths,Delta_Deaths,Health Level,On level of health,Overall health system performance,Most likely deaths,Falsification %
Ukraine,527808,9604,182.0,70,101,79,12372.305005,22.375014


In [22]:
input_country = "Belarus"

close_countries = european_countries_data[european_countries_data["Country"]==input_country]
close_countries = np.array(close_countries["Close_Countries"])[0]


close_countries_data = df_covid19_cases_time[df_covid19_cases_time["Country_Region"].isin(close_countries)]
close_countries_data = close_countries_data[close_countries_data["Last_Update"] == yesterdays_date].copy().drop(['Last_Update', 'Recovered', 'Active', 'Delta_Confirmed', 'Delta_Recovered', 'Incident_Rate', 'People_Tested', 'People_Hospitalized', 'Province_State', 'FIPS', 'UID', 'iso3', 'Report_Date_String'], axis=1)
close_countries_data.index = close_countries_data["Country_Region"]
close_countries_data = close_countries_data.drop(['Country_Region'], axis=1)

medical_rating = pd.read_csv("data/medical_system.csv")
medical_rating = medical_rating.copy().drop(['Health Distribution','Overall goal attainment', 'Health expenditure per capita in international dollars'],axis =1)
medical_rating.index = medical_rating["Country"]
medical_rating = medical_rating.drop(['Country'], axis=1)
# reversing medical coefficients
#edical_rating["Health Level"] = (500-medical_rating["Health Level"])/100

merged_data = pd.merge(close_countries_data, medical_rating, left_index=True, right_index=True)

population = pd.read_csv("data/population_by_country.csv")
input_country_population = population[population["Country"] == input_country]["Population"]
population = population.copy().drop(['Yearly Change', 'Net Change', 'Density (P/Km²)', 'Land Area (Km²)', 'Migrants (net)', 'Fert. Rate', 'Med. Age', 'Urban Pop %', 'World Share'],axis=1)
population.index = population["Country"]
population = population.drop(['Country'], axis=1)

merged_data = pd.merge(merged_data, population, left_index=True, right_index=True)


not_actual_population_countries_index = merged_data[(merged_data['Population'] > int(input_country_population * 3.5)) | (merged_data['Population'] < int(input_country_population * 0.2))].index

merged_data.drop(not_actual_population_countries_index , inplace=True)

#
merged_data["coef"] = merged_data["Deaths"] /  ( merged_data["Confirmed"] * merged_data["Overall health system performance"])
top_2_countries_by_health_level = merged_data.sort_values("Health Level").head(2)

average_coef_by_most_healthy_counties = sum(top_2_countries_by_health_level["coef"]) / len(top_2_countries_by_health_level["coef"]) 

max_coef = max(merged_data["coef"])
min_coef = min(merged_data["coef"])

"""
merged_data["Most likely deaths"] = average_coef_by_most_healthy_counties * merged_data["Confirmed"]
merged_data["Min deaths (best-case scenario)"] = average_coef_by_most_healthy_counties * merged_data["Confirmed"]
merged_data["Max deaths (Worst-case scenario)"] = max_coef * merged_data["Confirmed"]

merged_data["Falsification %"] = (1 - merged_data["Deaths"] / merged_data["Most likely deaths"]) * 100
merged_data["best-case scenario falsification %"] = (1 - merged_data["Deaths"] / merged_data["Min deaths (best-case scenario)"]) * 100
merged_data["worst-case scenario falsification %"] = (1 - merged_data["Deaths"] / merged_data["Max deaths (Worst-case scenario)"]) * 100
"""

# Just the country calculation
input_country_data = df_covid19_cases_time[df_covid19_cases_time["Country_Region"] == input_country]

input_country_data = input_country_data[input_country_data["Last_Update"] == yesterdays_date].copy().drop(['Last_Update', 'Recovered', 'Active', 'Delta_Confirmed', 'Delta_Recovered', 'Incident_Rate', 'People_Tested', 'People_Hospitalized', 'Province_State', 'FIPS', 'UID', 'iso3', 'Report_Date_String'], axis=1)
input_country_data.index = input_country_data["Country_Region"]
input_country_data = input_country_data.drop(['Country_Region'], axis=1)
input_country_data = pd.merge(input_country_data, medical_rating, left_index=True, right_index=True)


input_country_data["Most likely deaths"] = average_coef_by_most_healthy_counties * input_country_data["Confirmed"] * input_country_data["Overall health system performance"]
input_country_data["Min deaths (best-case scenario)"] = min_coef * input_country_data["Confirmed"] * input_country_data["Overall health system performance"]
input_country_data["Max deaths (Worst-case scenario)"] = max_coef * input_country_data["Confirmed"] * input_country_data["Overall health system performance"]

# general falsification
falsification = (1 - input_country_data["Deaths"] / input_country_data["Most likely deaths"]) * 100

if (float (falsification) < 0):
    falsification = 0
    
if (float (falsification) > 100):
    falsification = 100
    
input_country_data["Falsification %"] = falsification

# best case scenario falsification
best_case_scenario_falsification = (1 - input_country_data["Deaths"] / input_country_data["Min deaths (best-case scenario)"]) * 100

if (float (best_case_scenario_falsification) < 0):
    best_case_scenario_falsification = 0
    
input_country_data["best-case scenario falsification %"] = best_case_scenario_falsification

# worst case scenario falsification
worst_case_scenario_falsification = (1 - input_country_data["Deaths"] / input_country_data["Max deaths (Worst-case scenario)"]) * 100

if (float (worst_case_scenario_falsification) > 100):
    worst_case_scenario_falsification = 100

input_country_data["worst-case scenario falsification %"] = worst_case_scenario_falsification
input_country_data


Unnamed: 0,Confirmed,Deaths,Delta_Deaths,Health Level,On level of health,Overall health system performance,Most likely deaths,Min deaths (best-case scenario),Max deaths (Worst-case scenario),Falsification %,best-case scenario falsification %,worst-case scenario falsification %
Belarus,111622,1033,6.0,83,116,72,888.215305,873.753425,902.677186,0,0,-14.437367


In [None]:
# add parts of the country confirmed / died
# and than the average should be taken