In [None]:
import sys
from IPython.core.interactiveshell import InteractiveShell

sys.path.append('./../')
sys.path.append('./../code')

InteractiveShell.ast_node_interactivity = "all"

In [None]:
csv_path='./../data/timeseries_by_country_old.csv'
db_path='./../data/latest.db'
csv_country_path = './../data/country_data.csv'
csv_new_path='./../data/timeseries_by_country.csv'

In [None]:
# If './../data/timeseries_by_country_old.csv' doesn't exist, extract data from sqlite3 .db file
import sqlite3
import pandas as pd

try:
    print(f'Loading from: {csv_path}')
    df = pd.read_csv(csv_path)
    df=df.reset_index().rename(columns={'administrative_area_level_1':'country'}).set_index(['country','date'])
    df
except FileNotFoundError:
    try:
        # Load from db file
        print(f'Loading from: {db_path}')
        with sqlite3.connect(db_path) as con:
            df = pd.read_sql_query("SELECT * FROM timeseries", con)
        
        # Rename and set indexes
        df=df.reset_index().rename(columns={'administrative_area_level_1':'country'}).set_index(['country','date'])

        # Some countries are only identified by iso_alpha_3. Just skip them as they are not all that relevant
        df = df.loc[[val for val in df.index if not pd.isnull(val[0])]]
        
        # Save to local file for the future and the other notebooks
        df.to_csv(csv_path)
        df
    except MemoryError as err:
        print('A MemoryError happened while loading the file. Try freeing up some RAM (close Firefox and other programs).')
        print(f'{MemoryError}{err}')


In [None]:
df.recovered.min()
df.recovered.mean()
df.recovered.max()

In [None]:
# Load country data
df_country = pd.read_csv(csv_country_path, index_col=0)

In [None]:
df_country

In [None]:
# Create study instance from the data (downsampling of 1 in 7 days to take one value per week)
from study import CovidCountryStudy
st = CovidCountryStudy(df, downsampling=7)

In [None]:
# Keep only country with population info. Calculate columns per 100.000 habitants
idx = pd.IndexSlice
cols = st.prepare_columns(['covid', 'health_system'])
countries_with_population=[country for country in st.countries if country in df_country.index]
st.data = st.data.loc[idx[countries_with_population,:],:]
for country in st.countries:
    data_country = st.data.loc[idx[country,:],cols]
    st.data.loc[idx[country,:],cols] = data_country / df_country.loc[country, 'population'] * 100_000
new_data = st.data

In [None]:
new_data

In [None]:
new_data.covid.status.recovered.min()
new_data.covid.status.recovered.mean()
new_data.covid.status.recovered.max()

In [None]:
# Reduce columns to last level to be able to save to csv correctly
new_data.columns = new_data.columns.get_level_values(-1)
new_data=new_data.reset_index()
new_data.to_csv(csv_new_path)