## <strong> Filename: </strong> <em>covid_19_sns_population_infection_rate.ipynb<em>
##### <strong>Description:</strong> <em>COVID-19 Infection rate in population per 100k individuas. World data using dataset provided by https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases</em>
##### <strong>Features:</strong> <em> Population Size | Confirmed Cases | Rate | Contagion Rate </em>
##### <strong>Author:</strong> <em> Carlos Eduardo Beluzo (cbeluzo@gmail.com)</em>
##### <strong>Creation:</strong> <em> Mar/2020 </em>
##### <strong>Version:</strong> <em> 2020.03-1 </em>

In [1]:
# References
#https://plot.ly/python/v3/line-and-scatter/#style-scatter-plots
#https://towardsdatascience.com/plotly-python-scatter-plots-2ea1b4885c90

import numpy as np
import pandas as pd
import seaborn as sns
import plotly.graph_objects as go
import plotly as plt
import plotly.express as px
import datetime as dt

# https://data.worldbank.org
dfPop = pd.read_csv('../data/world-population-2018.csv', sep=';') #TODO POP2019

df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df['Province/State'] = df['Province/State'].fillna('n/a')
df = df.fillna(0)

dfA = df.drop(['Province/State','Lat','Long'], axis = 1)
dfA = dfA.groupby('Country/Region').sum().T
dfA = dfA.reset_index()
dfA = dfA.rename(columns = {'index':'Day'})
dfA[['Day']] = pd.to_datetime(dfA['Day'])

# Calculate rate
dfFinal = dfA.copy()

for country in dfA.columns[1:]:
    dfCountry = pd.DataFrame(dfA[[country]])
    dfAux = pd.DataFrame(dfCountry[country][1:])
    dfAux = dfAux.reset_index()
    dfAux = dfAux.drop(['index'], axis=1)
    dfAux = dfAux.append({country : 0} , ignore_index=True)
    dfCountry['next'] = dfAux
    dfAux['ori'] = dfCountry[country]
    dfCountry['rate'] = np.array((100 * dfAux[country].values / dfAux['ori'].values) - 100)
    dfCountry.loc[dfCountry['rate'] == -100] = 0
    dfCountry.loc[dfCountry['rate'] == np.inf] = 0
    dfCountry = dfCountry.fillna(0)
    rate = str('rate'+'_'+country)
    dfFinal[rate] = dfCountry['rate']

# Calculate Population Infection Rate per 100k individuas
df1 = pd.DataFrame({'Day':'','country':'','cases':int(),'rate':float(),'pop':int()}, index=[1])
df1 = df1.drop(df1.index)

for country in dfA.columns[1:]:
    rate = str('rate' + '_' + country)
    
    try:
        pop = dfPop[dfPop['country'] == country]['pop'].values[0]
    except:
        pop = 0
    
    dfAux = pd.DataFrame(dfFinal[['Day',country,country,rate,rate]])
    dfAux.columns = ['Day','country','cases','rate','pop']
    dfAux['country'] = country
    dfAux['pop'] = pop
    df1 = df1.append(dfAux)

df1 = df1.reset_index()
df1 = df1.drop(['index'], axis=1)
df1[['Day']] = df1['Day'].dt.strftime('%y%m%d')

# Contagion rate per 100k habitants
df1['infection_rate'] = 100000 * df1['cases']/df1['pop']

last_upt = dt.date.today() - dt.timedelta(days=1)

title = 'COVID-19 Population Incidence Rate per 100k individuals (PIR) (' + "{:%d, %b %Y}".format(last_upt) +')'

#df1 = df1.loc[df1['country'].isin(['Finland','Italy','Brazil','Korea, South','France','United Kingdom','Australia','Denmark'])]
graf = px.scatter(df1,
                  x="cases", y="infection_rate", 
                  animation_frame="Day", animation_group="country",
                  size='pop', color="country", 
                  text=np.array(df1.country),
                  width=1200, 
                  height=800,
                  labels={'text':'','country':'Country','cases':'Confirmed Cases', 'pop':'Population', 
                          'rate':'Increase daily rate (%)', 'infection_rate':'PIR (per 100k individuals)' },
                
)

graf.update_traces(textposition='top center', marker_line_width=0.5, marker_line_color="black", mode='lines+markers')

graf.update_layout(
    template='plotly',
    xaxis_type="log",
    yaxis_type="log",
    xaxis_title="Confirmed Cases (log)",
    yaxis_title='PIR (per 100k individuals) (log)',    
    #transition = {'duration': 100000},
    title_text=title)

#graf.show()
plt.offline.plot(graf, filename="../plotly/covid_19_sns_population_infection_rate.html")
df1.to_csv (r'../data/covid_19_sns_population_infection_rate.csv', index = False, header=True)


divide by zero encountered in true_divide


invalid value encountered in true_divide

