In [None]:
cd ../src

In [None]:
import ipynb
from ipynb.fs.full import international_covid_data as cdi
from ipynb.fs.full import covid_data as cd
import pandas as pd
import numpy as np

In [None]:
def normalize(x, population):
    return (x/population)*10000000

## World Data

#### CASES

In [None]:
#CONVERT FROM TOTAL CASES TO NEW CASES
df = cdi.get_international_cases()
date_column = df['date']
cases = df.loc[:,df.columns[1:]].fillna(value=0)
new_cases = cases.diff().fillna(value=0)
new_cases = new_cases.astype(int)
new_cases.insert(loc=0, column="Date", value=date_column)
new_cases = new_cases.loc[1:]
population_df = cdi.get_international_population()
population_df = population_df.rename(columns={"countriesAndTerritories": "country"})
new_cases = new_cases.rename(columns={"United States": "United States of America"})

#REPLACE SPACES WITH UNDERSCORE IN COLUMN NAMES IN CASES DATA
new_cases.columns = new_cases.columns.str.replace(' ','_')

#CALCULATE NORMALIZED VALUES
for country in new_cases.columns[1:]:
    population = population_df[population_df['country'] == country]['population']
    if(population.empty): 
        new_cases = new_cases.drop(country, 1)
        continue
    new_cases[country] = new_cases[country].apply(normalize, args=(population,))

new_cases.iloc[:,1:] = new_cases.iloc[:,1:].round(0).astype(int)
    
new_cases.to_csv('../data/Covid_International/new_cases_normalized.csv', mode='w', index = False, header=True)

#### DEATHS

In [None]:
#CONVERT FROM TOTAL DEATHS TO NEW DEATHS
df = cdi.get_international_deaths()
date_column = df['date']
deaths = df.loc[:,df.columns[1:]].fillna(value=0)
new_deaths = deaths.diff().fillna(value=0)
new_deaths = new_deaths.astype(int)
new_deaths.insert(loc=0, column="Date", value=date_column)
new_deaths = new_deaths.loc[1:]

#GET POPULATION DATA
population_df = cdi.get_international_population()
population_df = population_df.rename(columns={"countriesAndTerritories": "country"})
new_deaths = new_deaths.rename(columns={"United States": "United States of America"})

#REPLACE SPACES WITH UNDERSCORE IN COLUMN NAMES IN CASES DATA
new_deaths.columns = new_deaths.columns.str.replace(' ','_')

#CALCULATE NORMALIZED VALUES
for country in new_deaths.columns[1:]:
    population = population_df[population_df['country'] == country]['population']
    if(population.empty): 
        new_deaths = new_deaths.drop(country, 1)
        continue
    new_deaths[country] = new_deaths[country].apply(normalize, args=(population,))

new_deaths.iloc[:,1:] = new_deaths.iloc[:,1:].round(0).astype(int)

new_deaths.to_csv('../data/Covid_International/new_deaths_normalized.csv', mode='w', index = False, header=True)

## US Data

#### CASES

In [None]:
population_county = cd.get_county_population()
population_grouped = population_county.groupby(population_county.State)
population_state = population_grouped.agg(np.sum)

cases_state = pd.read_csv('../data/Daily/covid_daily_cases.csv')
cases_state = cases_state.rename(columns={"Unnamed: 0": "State"})
cases_state.set_index("State", inplace=True)

states = cases_state.index
for state in states:
    population = population_state.loc[state]["population"]
    cases_state.loc[state] = cases_state.loc[state].apply(normalize, args=(population,))

cases_state = cases_state.round(0).astype(int)

cases_state.to_csv('../data/Daily/daily_new_cases_normalized.csv', mode='w', index = True, header=True)

#### DEATHS

In [None]:
population_county = cd.get_county_population()
population_grouped = population_county.groupby(population_county.State)
population_state = population_grouped.agg(np.sum)

deaths_state = pd.read_csv('../data/Daily/covid_daily_deaths.csv')
deaths_state = deaths_state.rename(columns={"Unnamed: 0": "State"})
deaths_state.set_index("State", inplace=True)

states = deaths_state.index

for state in states:
    population = population_state.loc[state]["population"]
    deaths_state.loc[state] = deaths_state.loc[state].apply(normalize, args=(population,))

deaths_state = deaths_state.round(0).astype(int)

deaths_state.to_csv('../data/Daily/daily_new_deaths_normalized.csv', mode='w', index = True, header=True)