In [1]:
import sys
import io
import time
import requests
import datetime
import numpy as np
import pandas as pd

sys.path.append('../')
from src.util.functions import Functions

# Disable warning
pd.options.mode.chained_assignment = None 

level = 'regioni'

path_data = '../data'
path_csv = '{path_data}/csv'.format(path_data=path_data)

csv_out_name = '{path_csv}/covid19-{level}.csv'.format(path_csv=path_csv, level=level)
pop_regions_name = '{path_csv}/popolazione-regioni.csv'.format(path_csv=path_csv)
icu_regions_name = '{path_csv}/terapie-intensive.csv'.format(path_csv=path_csv)
regions_name = '{path_csv}/regioni.csv'.format(path_csv=path_csv)

In [2]:
url_cum = Functions.get_url_cum(level)
url_cum

'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv'

In [3]:
df_raw_r = pd.read_csv(url_cum)
df_raw_r

Unnamed: 0,data,stato,codice_regione,denominazione_regione,lat,long,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,...,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati,note
0,2020-02-24T18:00:00,ITA,13,Abruzzo,42.351222,13.398438,0,0,0,0,...,0,0,0,0,,,0,5,,
1,2020-02-24T18:00:00,ITA,17,Basilicata,40.639471,15.805148,0,0,0,0,...,0,0,0,0,,,0,0,,
2,2020-02-24T18:00:00,ITA,18,Calabria,38.905976,16.594402,0,0,0,0,...,0,0,0,0,,,0,1,,
3,2020-02-24T18:00:00,ITA,15,Campania,40.839566,14.250850,0,0,0,0,...,0,0,0,0,,,0,10,,
4,2020-02-24T18:00:00,ITA,8,Emilia-Romagna,44.494367,11.341721,10,2,12,6,...,0,18,0,0,,,18,148,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6022,2020-12-06T17:00:00,ITA,19,Sicilia,38.115697,13.362357,1367,213,1580,38166,...,206,1022,29984,1759,0.0,0.0,71489,1021846,692062.0,
6023,2020-12-06T17:00:00,ITA,9,Toscana,43.769231,11.255889,1360,252,1612,27587,...,-1345,753,76331,2867,0.0,0.0,108397,1645472,983103.0,
6024,2020-12-06T17:00:00,ITA,10,Umbria,43.106758,12.388247,332,60,392,5673,...,-84,234,18619,460,0.0,0.0,25144,438511,231538.0,
6025,2020-12-06T17:00:00,ITA,2,Valle d'Aosta,45.737503,7.320149,102,8,110,877,...,-69,34,5406,333,0.0,0.0,6726,55973,34644.0,


In [4]:
# Drop useless columns
cols_to_drop = ['stato', 'lat', 'long', 'note']
df_rd = df_raw_r.drop(cols_to_drop, axis=1)
df_rd

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-02-24T18:00:00,13,Abruzzo,0,0,0,0,0,0,0,0,0,,,0,5,
1,2020-02-24T18:00:00,17,Basilicata,0,0,0,0,0,0,0,0,0,,,0,0,
2,2020-02-24T18:00:00,18,Calabria,0,0,0,0,0,0,0,0,0,,,0,1,
3,2020-02-24T18:00:00,15,Campania,0,0,0,0,0,0,0,0,0,,,0,10,
4,2020-02-24T18:00:00,8,Emilia-Romagna,10,2,12,6,18,0,18,0,0,,,18,148,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6022,2020-12-06T17:00:00,19,Sicilia,1367,213,1580,38166,39746,206,1022,29984,1759,0.0,0.0,71489,1021846,692062.0
6023,2020-12-06T17:00:00,9,Toscana,1360,252,1612,27587,29199,-1345,753,76331,2867,0.0,0.0,108397,1645472,983103.0
6024,2020-12-06T17:00:00,10,Umbria,332,60,392,5673,6065,-84,234,18619,460,0.0,0.0,25144,438511,231538.0
6025,2020-12-06T17:00:00,2,Valle d'Aosta,102,8,110,877,987,-69,34,5406,333,0.0,0.0,6726,55973,34644.0


In [5]:
last_date_str = df_rd.iloc[-1].data.split('T')[0]
last_date = datetime.datetime.strptime(last_date_str, '%Y-%m-%d')
prev_date = Functions.get_previous_date(last_date)
prev_date_str = datetime.datetime.strftime(prev_date, '%Y-%m-%d')
last_date_str, prev_date_str

('2020-12-06', '2020-12-05')

In [6]:
# Current state S
df_raw_r_1 = df_rd[df_rd.data.str.startswith(last_date_str)].sort_values(by='codice_regione').reset_index(drop=True)
df_raw_r_1

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-12-06T17:00:00,1,Piemonte,4239,359,4598,61440,66038,-725,1269,105127,6623,0.0,0.0,177788,1665145,1000611.0
1,2020-12-06T17:00:00,2,Valle d'Aosta,102,8,110,877,987,-69,34,5406,333,0.0,0.0,6726,55973,34644.0
2,2020-12-06T17:00:00,3,Lombardia,6372,807,7179,109200,116379,623,2413,289706,23024,0.0,0.0,429109,4279332,2415099.0
3,2020-12-06T17:00:00,5,Veneto,2508,308,2816,73988,76804,1846,3444,84235,4210,0.0,0.0,165249,2885378,1090932.0
4,2020-12-06T17:00:00,6,Friuli Venezia Giulia,652,59,711,14320,15031,-33,702,19401,1035,0.0,0.0,35467,768285,296752.0
5,2020-12-06T17:00:00,7,Liguria,868,92,960,9456,10416,-102,304,40842,2521,0.0,0.0,53779,625828,298052.0
6,2020-12-06T17:00:00,8,Emilia-Romagna,2736,241,2977,65190,68167,-1462,1788,59432,6162,0.0,0.0,133761,2222471,1115823.0
7,2020-12-06T17:00:00,9,Toscana,1360,252,1612,27587,29199,-1345,753,76331,2867,0.0,0.0,108397,1645472,983103.0
8,2020-12-06T17:00:00,10,Umbria,332,60,392,5673,6065,-84,234,18619,460,0.0,0.0,25144,438511,231538.0
9,2020-12-06T17:00:00,11,Marche,518,82,600,19458,20058,360,443,11008,1327,0.0,0.0,32393,433239,253076.0


In [7]:
# State S - 1
df_raw_r_0 = df_rd[df_rd.data.str.startswith(prev_date_str)].sort_values(by='codice_regione').reset_index(drop=True)
df_raw_r_0

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-12-05T17:00:00,1,Piemonte,4276,366,4642,62121,66763,-1713,1456,103188,6568,0.0,0.0,176519,1655759,996658.0
1,2020-12-05T17:00:00,2,Valle d'Aosta,107,9,116,940,1056,-24,46,5306,330,0.0,0.0,6692,55696,34511.0
2,2020-12-05T17:00:00,3,Lombardia,6554,805,7359,108397,115756,-946,3148,288056,22884,0.0,0.0,426696,4253306,2407249.0
3,2020-12-05T17:00:00,5,Veneto,2489,312,2801,72157,74958,2029,3607,82674,4173,0.0,0.0,161805,2870960,1086461.0
4,2020-12-05T17:00:00,6,Friuli Venezia Giulia,640,58,698,14366,15064,-109,755,18693,1008,0.0,0.0,34765,762058,295536.0
5,2020-12-05T17:00:00,7,Liguria,897,93,990,9528,10518,-509,314,40456,2501,0.0,0.0,53475,622736,297122.0
6,2020-12-05T17:00:00,8,Emilia-Romagna,2707,243,2950,66679,69629,-427,1964,56270,6074,0.0,0.0,131973,2211300,1109844.0
7,2020-12-05T17:00:00,9,Toscana,1373,261,1634,28910,30544,-2735,769,74261,2839,0.0,0.0,107644,1633330,979295.0
8,2020-12-05T17:00:00,10,Umbria,330,61,391,5758,6149,-274,226,18310,451,0.0,0.0,24910,435623,230476.0
9,2020-12-05T17:00:00,11,Marche,506,84,590,19108,19698,337,418,10938,1314,0.0,0.0,31950,429447,250652.0


In [8]:
# New df regions state S with Trentino-Alto Adige
df_r_1 = Functions.replace_bt_with_taa(df_raw_r_1)
df_r_1.sort_values(by='codice_regione').reset_index(drop=True)

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-12-06T17:00:00,1,Piemonte,4239,359,4598,61440,66038,-725,1269,105127,6623,0.0,0.0,177788,1665145,1000611.0
1,2020-12-06T17:00:00,2,Valle d'Aosta,102,8,110,877,987,-69,34,5406,333,0.0,0.0,6726,55973,34644.0
2,2020-12-06T17:00:00,3,Lombardia,6372,807,7179,109200,116379,623,2413,289706,23024,0.0,0.0,429109,4279332,2415099.0
3,2020-12-06T17:00:00,4,Trentino-Alto Adige,824,80,904,12434,13338,-293,464,27857,1269,0.0,0.0,42464,719403,293255.0
4,2020-12-06T17:00:00,5,Veneto,2508,308,2816,73988,76804,1846,3444,84235,4210,0.0,0.0,165249,2885378,1090932.0
5,2020-12-06T17:00:00,6,Friuli Venezia Giulia,652,59,711,14320,15031,-33,702,19401,1035,0.0,0.0,35467,768285,296752.0
6,2020-12-06T17:00:00,7,Liguria,868,92,960,9456,10416,-102,304,40842,2521,0.0,0.0,53779,625828,298052.0
7,2020-12-06T17:00:00,8,Emilia-Romagna,2736,241,2977,65190,68167,-1462,1788,59432,6162,0.0,0.0,133761,2222471,1115823.0
8,2020-12-06T17:00:00,9,Toscana,1360,252,1612,27587,29199,-1345,753,76331,2867,0.0,0.0,108397,1645472,983103.0
9,2020-12-06T17:00:00,10,Umbria,332,60,392,5673,6065,-84,234,18619,460,0.0,0.0,25144,438511,231538.0


In [9]:
# New df regions state S - 1 with Trentino-Alto Adige
df_r_0 = Functions.replace_bt_with_taa(df_raw_r_0)
df_r_0.sort_values(by='codice_regione').reset_index()

Unnamed: 0,index,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,0,2020-12-05T17:00:00,1,Piemonte,4276,366,4642,62121,66763,-1713,1456,103188,6568,0.0,0.0,176519,1655759,996658.0
1,1,2020-12-05T17:00:00,2,Valle d'Aosta,107,9,116,940,1056,-24,46,5306,330,0.0,0.0,6692,55696,34511.0
2,2,2020-12-05T17:00:00,3,Lombardia,6554,805,7359,108397,115756,-946,3148,288056,22884,0.0,0.0,426696,4253306,2407249.0
3,19,2020-12-05T17:00:00,4,Trentino-Alto Adige,814,79,893,12738,13631,-61,492,27108,1261,0.0,0.0,42000,714036,291654.0
4,3,2020-12-05T17:00:00,5,Veneto,2489,312,2801,72157,74958,2029,3607,82674,4173,0.0,0.0,161805,2870960,1086461.0
5,4,2020-12-05T17:00:00,6,Friuli Venezia Giulia,640,58,698,14366,15064,-109,755,18693,1008,0.0,0.0,34765,762058,295536.0
6,5,2020-12-05T17:00:00,7,Liguria,897,93,990,9528,10518,-509,314,40456,2501,0.0,0.0,53475,622736,297122.0
7,6,2020-12-05T17:00:00,8,Emilia-Romagna,2707,243,2950,66679,69629,-427,1964,56270,6074,0.0,0.0,131973,2211300,1109844.0
8,7,2020-12-05T17:00:00,9,Toscana,1373,261,1634,28910,30544,-2735,769,74261,2839,0.0,0.0,107644,1633330,979295.0
9,8,2020-12-05T17:00:00,10,Umbria,330,61,391,5758,6149,-274,226,18310,451,0.0,0.0,24910,435623,230476.0


In [10]:
df_r = df_r_1.copy()
# Merge with population data
df_pop_r = pd.read_csv(pop_regions_name)
df_m_r_1 = df_r.merge(df_pop_r, on='denominazione_regione')
df_m_r_1.sort_values(by='codice_regione').reset_index(drop=True)

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati,popolazione
0,2020-12-06T17:00:00,1,Piemonte,4239,359,4598,61440,66038,-725,1269,105127,6623,0.0,0.0,177788,1665145,1000611.0,4341375
1,2020-12-06T17:00:00,2,Valle d'Aosta,102,8,110,877,987,-69,34,5406,333,0.0,0.0,6726,55973,34644.0,125501
2,2020-12-06T17:00:00,3,Lombardia,6372,807,7179,109200,116379,623,2413,289706,23024,0.0,0.0,429109,4279332,2415099.0,10103969
3,2020-12-06T17:00:00,4,Trentino-Alto Adige,824,80,904,12434,13338,-293,464,27857,1269,0.0,0.0,42464,719403,293255.0,1074819
4,2020-12-06T17:00:00,5,Veneto,2508,308,2816,73988,76804,1846,3444,84235,4210,0.0,0.0,165249,2885378,1090932.0,4907704
5,2020-12-06T17:00:00,6,Friuli Venezia Giulia,652,59,711,14320,15031,-33,702,19401,1035,0.0,0.0,35467,768285,296752.0,1211357
6,2020-12-06T17:00:00,7,Liguria,868,92,960,9456,10416,-102,304,40842,2521,0.0,0.0,53779,625828,298052.0,1543127
7,2020-12-06T17:00:00,8,Emilia-Romagna,2736,241,2977,65190,68167,-1462,1788,59432,6162,0.0,0.0,133761,2222471,1115823.0,4467118
8,2020-12-06T17:00:00,9,Toscana,1360,252,1612,27587,29199,-1345,753,76331,2867,0.0,0.0,108397,1645472,983103.0,3722729
9,2020-12-06T17:00:00,10,Umbria,332,60,392,5673,6065,-84,234,18619,460,0.0,0.0,25144,438511,231538.0,880285


In [11]:
# Merge with icu data
df_icu_r = pd.read_csv(icu_regions_name)
df_m_r_2 = df_m_r_1.merge(df_icu_r, on='denominazione_regione')
df_m_r_2.sort_values(by='codice_regione').reset_index(drop=True)

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati,popolazione,totale_posti_terapia_intensiva
0,2020-12-06T17:00:00,1,Piemonte,4239,359,4598,61440,66038,-725,1269,105127,6623,0.0,0.0,177788,1665145,1000611.0,4341375,575
1,2020-12-06T17:00:00,2,Valle d'Aosta,102,8,110,877,987,-69,34,5406,333,0.0,0.0,6726,55973,34644.0,125501,20
2,2020-12-06T17:00:00,3,Lombardia,6372,807,7179,109200,116379,623,2413,289706,23024,0.0,0.0,429109,4279332,2415099.0,10103969,1036
3,2020-12-06T17:00:00,4,Trentino-Alto Adige,824,80,904,12434,13338,-293,464,27857,1269,0.0,0.0,42464,719403,293255.0,1074819,106
4,2020-12-06T17:00:00,5,Veneto,2508,308,2816,73988,76804,1846,3444,84235,4210,0.0,0.0,165249,2885378,1090932.0,4907704,1016
5,2020-12-06T17:00:00,6,Friuli Venezia Giulia,652,59,711,14320,15031,-33,702,19401,1035,0.0,0.0,35467,768285,296752.0,1211357,180
6,2020-12-06T17:00:00,7,Liguria,868,92,960,9456,10416,-102,304,40842,2521,0.0,0.0,53779,625828,298052.0,1543127,209
7,2020-12-06T17:00:00,8,Emilia-Romagna,2736,241,2977,65190,68167,-1462,1788,59432,6162,0.0,0.0,133761,2222471,1115823.0,4467118,634
8,2020-12-06T17:00:00,9,Toscana,1360,252,1612,27587,29199,-1345,753,76331,2867,0.0,0.0,108397,1645472,983103.0,3722729,523
9,2020-12-06T17:00:00,10,Umbria,332,60,392,5673,6065,-84,234,18619,460,0.0,0.0,25144,438511,231538.0,880285,117


In [12]:
# Copy last merged df into df_r
df_r = df_m_r_2.copy()

In [13]:
# Add columns
df_r['variazione_ricoverati_con_sintomi'] = df_r_1['ricoverati_con_sintomi'] - df_r_0['ricoverati_con_sintomi']
df_r['variazione_terapia_intensiva'] = df_r_1['terapia_intensiva'] - df_r_0['terapia_intensiva']
df_r['variazione_totale_ospedalizzati'] = df_r_1['totale_ospedalizzati'] - df_r_0['totale_ospedalizzati']
df_r['posti_disponibili_terapia_intensiva'] = df_r['totale_posti_terapia_intensiva'] - df_r['terapia_intensiva']
df_r['saturazione_terapia_intensiva'] = ((df_r_1['terapia_intensiva'] / df_r['totale_posti_terapia_intensiva']) * 100).round(2)
df_r['variazione_isolamento_domiciliare'] = df_r_1['isolamento_domiciliare'] - df_r_0['isolamento_domiciliare']
df_r['variazione_nuovi_positivi'] = df_r_1['nuovi_positivi'] - df_r_0['nuovi_positivi']
df_r['nuovi_tamponi'] = df_r_1['tamponi'] - df_r_0['tamponi']
df_r['nuovi_casi_testati'] = df_r_1['casi_testati'] - df_r_0['casi_testati']
df_r['nuovi_deceduti'] = df_r_1['deceduti'] - df_r_0['deceduti']
df_r['nuovi_dimessi_guariti'] = df_r_1['dimessi_guariti'] - df_r_0['dimessi_guariti']
df_r['np_su_nt'] = ((df_r['nuovi_positivi'] / df_r['nuovi_tamponi']) * 100).round(2)
df_r['np_su_p'] = ((df_r['nuovi_positivi'] / df_r['popolazione']) * 100).round(5)
df_r['tc_su_p'] = ((df_r['totale_casi'] / df_r['popolazione']) * 100).round(2)

df_r = df_r.sort_values(by='nuovi_positivi', ascending=False).reset_index(drop=True)
df_r

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,...,saturazione_terapia_intensiva,variazione_isolamento_domiciliare,variazione_nuovi_positivi,nuovi_tamponi,nuovi_casi_testati,nuovi_deceduti,nuovi_dimessi_guariti,np_su_nt,np_su_p,tc_su_p
0,2020-12-06T17:00:00,5,Veneto,2508,308,2816,73988,76804,1846,3444,...,30.31,1831,-163,14418,4471.0,37,1561,23.89,0.07018,3.37
1,2020-12-06T17:00:00,3,Lombardia,6372,807,7179,109200,116379,623,2413,...,77.9,803,-735,26026,7850.0,140,1650,9.27,0.02388,4.25
2,2020-12-06T17:00:00,16,Puglia,1613,203,1816,44018,45834,1674,1789,...,55.01,1654,-95,7293,1915.0,24,91,24.53,0.04463,1.61
3,2020-12-06T17:00:00,8,Emilia-Romagna,2736,241,2977,65190,68167,-1462,1788,...,38.01,-1489,-176,11171,5979.0,88,3162,16.01,0.04003,2.99
4,2020-12-06T17:00:00,12,Lazio,3136,340,3476,90735,94211,732,1632,...,40.14,680,-151,19513,15000.0,14,886,8.36,0.02782,2.22
5,2020-12-06T17:00:00,15,Campania,1916,155,2071,99172,101243,-118,1552,...,30.69,-466,31,19313,9204.0,32,1638,8.04,0.02682,2.86
6,2020-12-06T17:00:00,1,Piemonte,4239,359,4598,61440,66038,-725,1269,...,62.43,-681,-187,9386,3953.0,55,1939,13.52,0.02923,4.1
7,2020-12-06T17:00:00,19,Sicilia,1367,213,1580,38166,39746,206,1022,...,36.22,241,-218,8132,5023.0,36,780,12.57,0.02057,1.44
8,2020-12-06T17:00:00,9,Toscana,1360,252,1612,27587,29199,-1345,753,...,48.18,-1323,-16,12142,3808.0,28,2070,6.2,0.02023,2.91
9,2020-12-06T17:00:00,6,Friuli Venezia Giulia,652,59,711,14320,15031,-33,702,...,32.78,-46,-53,6227,1216.0,27,708,11.27,0.05795,2.93


In [14]:
# Write pre-processed data
df_r.to_csv(csv_out_name, index=False)

In [15]:
# ICU Stats
df_r[['denominazione_regione', 'terapia_intensiva', 'posti_disponibili_terapia_intensiva', 'totale_posti_terapia_intensiva', 'saturazione_terapia_intensiva']] \
    .sort_values(by='saturazione_terapia_intensiva', ascending=False).reset_index(drop=True)

Unnamed: 0,denominazione_regione,terapia_intensiva,posti_disponibili_terapia_intensiva,totale_posti_terapia_intensiva,saturazione_terapia_intensiva
0,Lombardia,807,229,1036,77.9
1,Trentino-Alto Adige,80,26,106,75.47
2,Piemonte,359,216,575,62.43
3,Marche,82,61,143,57.34
4,Puglia,203,166,369,55.01
5,Umbria,60,57,117,51.28
6,Toscana,252,271,523,48.18
7,Abruzzo,71,77,148,47.97
8,Liguria,92,117,209,44.02
9,Lazio,340,507,847,40.14


In [16]:
def add_cols(df):
    df['variazione_terapia_intensiva'] = df['terapia_intensiva'].diff()
    df['variazione_ricoverati_con_sintomi'] = df['ricoverati_con_sintomi'].diff()
    df['variazione_totale_ospedalizzati'] = df['totale_ospedalizzati'].diff()
    df['nuovi_dimessi_guariti'] = df['dimessi_guariti'].diff()
    df['nuovi_deceduti'] = df['deceduti'].diff()
    df['nuovi_tamponi'] = df['tamponi'].diff()
    df['nuovi_casi_testati'] = df['casi_testati'].diff()
    df['variazione_isolamento_domiciliare'] = df['isolamento_domiciliare'].diff()
    df = df.iloc[1:]
    df['ti_su_to'] = ((df['terapia_intensiva'] / df['totale_ospedalizzati']).replace(np.inf, 0) * 100).round(2)
    df['to_su_tp'] = ((df['totale_ospedalizzati'] / df['totale_positivi']).replace(np.inf, 0) * 100).round(2)
    df['np_su_nt'] = ((df['nuovi_positivi'] / df['nuovi_tamponi']).replace(np.inf, 0) * 100).round(2)
    df['np_su_nct'] = ((df['nuovi_positivi'] / df['nuovi_casi_testati']).replace(np.inf, 0) * 100).round(2)
    df['var_ti_perc'] = ((df['variazione_terapia_intensiva'] / (df['terapia_intensiva'] - df['variazione_terapia_intensiva'])).replace(np.inf, 0) * 100).round(2)
    df['var_pos_perc'] = ((df['variazione_totale_positivi'] / (df['totale_positivi'] - df['variazione_totale_positivi'])).replace(np.inf, 0) * 100).round(2)
    return df.fillna(0)

In [17]:
# df_rd.sort_values(by='codice_regione').reset_index(drop=True)
df_rn = pd.read_csv(regions_name)
for region_code in df_rn.codice_regione:
    if region_code != 4: # TAA
        df_cur_region = df_rd[df_rd.codice_regione == region_code]
        region_name = df_cur_region.denominazione_regione.iloc[0].lower()
        csv_out_region_name = '{path_csv}/regioni/covid19-{region_name}.csv'.format(path_csv=path_csv, region_name=region_name)
        df_cur_region = add_cols(df_cur_region)
        df_cur_region.to_csv(csv_out_region_name, index=False)
        print('Aggiornato {file}'.format(file=csv_out_region_name))

Aggiornato ../data/csv/regioni/covid19-piemonte.csv
Aggiornato ../data/csv/regioni/covid19-valle d'aosta.csv
Aggiornato ../data/csv/regioni/covid19-lombardia.csv
Aggiornato ../data/csv/regioni/covid19-veneto.csv
Aggiornato ../data/csv/regioni/covid19-friuli venezia giulia.csv
Aggiornato ../data/csv/regioni/covid19-liguria.csv
Aggiornato ../data/csv/regioni/covid19-emilia-romagna.csv
Aggiornato ../data/csv/regioni/covid19-toscana.csv
Aggiornato ../data/csv/regioni/covid19-umbria.csv
Aggiornato ../data/csv/regioni/covid19-marche.csv
Aggiornato ../data/csv/regioni/covid19-lazio.csv
Aggiornato ../data/csv/regioni/covid19-abruzzo.csv
Aggiornato ../data/csv/regioni/covid19-molise.csv
Aggiornato ../data/csv/regioni/covid19-campania.csv
Aggiornato ../data/csv/regioni/covid19-puglia.csv
Aggiornato ../data/csv/regioni/covid19-basilicata.csv
Aggiornato ../data/csv/regioni/covid19-calabria.csv
Aggiornato ../data/csv/regioni/covid19-sicilia.csv
Aggiornato ../data/csv/regioni/covid19-sardegna.csv


In [18]:
df_tb = df_rd[df_rd.codice_regione > 20].reset_index(drop=True)
df_tb

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-02-24T18:00:00,21,P.A. Bolzano,0,0,0,0,0,0,0,0,0,,,0,1,
1,2020-02-24T18:00:00,22,P.A. Trento,0,0,0,0,0,0,0,0,0,,,0,3,
2,2020-02-25T18:00:00,21,P.A. Bolzano,1,0,1,0,1,1,1,0,0,,,1,1,
3,2020-02-25T18:00:00,22,P.A. Trento,0,0,0,0,0,0,0,0,0,,,0,3,
4,2020-02-26T18:00:00,21,P.A. Bolzano,1,0,1,0,1,0,0,0,0,,,1,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
569,2020-12-04T17:00:00,22,P.A. Trento,415,47,462,2044,2506,-26,255,13475,680,0.0,0.0,16661,389049,139155.0
570,2020-12-05T17:00:00,21,P.A. Bolzano,410,31,441,10671,11112,-74,266,13429,572,0.0,0.0,25113,321387,151590.0
571,2020-12-05T17:00:00,22,P.A. Trento,404,48,452,2067,2519,13,226,13679,689,0.0,0.0,16887,392649,140064.0
572,2020-12-06T17:00:00,21,P.A. Bolzano,410,32,442,10479,10921,-191,248,13862,578,0.0,0.0,25361,323433,152109.0


In [19]:
# Initialize df_taa
df_taa = Functions.replace_bt_with_taa(df_tb[:2])
for i in range(2, df_tb.shape[0], 2):
    df_taa = df_taa.append(Functions.replace_bt_with_taa(df_tb[i:i+2]), ignore_index=True)
df_taa = add_cols(df_taa)
df_taa

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,...,nuovi_deceduti,nuovi_tamponi,nuovi_casi_testati,variazione_isolamento_domiciliare,ti_su_to,to_su_tp,np_su_nt,np_su_nct,var_ti_perc,var_pos_perc
1,2020-02-25T18:00:00,4,Trentino-Alto Adige,1,0,1,0,1,1,1,...,0.0,0.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.00
2,2020-02-26T18:00:00,4,Trentino-Alto Adige,1,0,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.00
3,2020-02-27T18:00:00,4,Trentino-Alto Adige,1,0,1,0,1,0,0,...,0.0,30.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.00
4,2020-02-28T18:00:00,4,Trentino-Alto Adige,1,0,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.00
5,2020-02-29T17:00:00,4,Trentino-Alto Adige,1,0,1,0,1,0,0,...,0.0,25.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282,2020-12-02T17:00:00,4,Trentino-Alto Adige,828,80,908,12880,13788,39,595,...,11.0,5716.0,1641.0,29.0,8.81,6.59,10.41,36.26,2.56,0.28
283,2020-12-03T17:00:00,4,Trentino-Alto Adige,832,80,912,12835,13747,-41,613,...,25.0,6293.0,1619.0,-45.0,8.77,6.63,9.74,37.86,0.00,-0.30
284,2020-12-04T17:00:00,4,Trentino-Alto Adige,833,79,912,12780,13692,-55,540,...,17.0,5940.0,1611.0,-55.0,8.66,6.66,9.09,33.52,-1.25,-0.40
285,2020-12-05T17:00:00,4,Trentino-Alto Adige,814,79,893,12738,13631,-61,492,...,12.0,5664.0,1454.0,-42.0,8.85,6.55,8.69,33.84,0.00,-0.45


In [20]:
region_name = df_taa.denominazione_regione.iloc[0].lower()
csv_out_region_name = '{path_csv}/regioni/covid19-{region_name}.csv'.format(path_csv=path_csv, region_name=region_name)
df_taa.to_csv(csv_out_region_name, index=False)
print('Aggiornato {file}'.format(file=csv_out_region_name))

Aggiornato ../data/csv/regioni/covid19-trentino-alto adige.csv
