In [1]:
import sys
import io
import time
import requests
import datetime
import numpy as np
import pandas as pd

sys.path.append('../')
from src.util.functions import Functions

# Disable warning
pd.options.mode.chained_assignment = None 

level = 'regioni'

path_data = '../data'
path_csv = '{path_data}/csv'.format(path_data=path_data)

csv_out_name = '{path_csv}/covid19-{level}.csv'.format(path_csv=path_csv, level=level)
pop_regions_name = '{path_csv}/popolazione-regioni.csv'.format(path_csv=path_csv)
icu_regions_name = '{path_csv}/terapie-intensive.csv'.format(path_csv=path_csv)
regions_name = '{path_csv}/regioni.csv'.format(path_csv=path_csv)

In [2]:
url_cum = Functions.get_url_cum(level)
url_cum

'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv'

In [3]:
df_raw_r = pd.read_csv(url_cum)
df_raw_r

Unnamed: 0,data,stato,codice_regione,denominazione_regione,lat,long,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,...,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati,note
0,2020-02-24T18:00:00,ITA,13,Abruzzo,42.351222,13.398438,0,0,0,0,...,0,0,0,0,,,0,5,,
1,2020-02-24T18:00:00,ITA,17,Basilicata,40.639471,15.805148,0,0,0,0,...,0,0,0,0,,,0,0,,
2,2020-02-24T18:00:00,ITA,18,Calabria,38.905976,16.594402,0,0,0,0,...,0,0,0,0,,,0,1,,
3,2020-02-24T18:00:00,ITA,15,Campania,40.839566,14.250850,0,0,0,0,...,0,0,0,0,,,0,10,,
4,2020-02-24T18:00:00,ITA,8,Emilia-Romagna,44.494367,11.341721,10,2,12,6,...,0,18,0,0,,,18,148,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5791,2020-11-25T17:00:00,ITA,19,Sicilia,38.115697,13.362357,1574,250,1824,36496,...,121,1317,17359,1322,34777.0,22224.0,57001,911444,625213.0,
5792,2020-11-25T17:00:00,ITA,9,Toscana,43.769231,11.255889,1754,286,2040,47240,...,-1426,986,46318,2378,75858.0,22118.0,97976,1494998,928848.0,effettuati 72404 tamponi rapidi antigenici di...
5793,2020-11-25T17:00:00,ITA,10,Umbria,43.106758,12.388247,361,70,431,9506,...,-260,386,12186,355,6314.0,16164.0,22478,399810,219242.0,
5794,2020-11-25T17:00:00,ITA,2,Valle d'Aosta,45.737503,7.320149,137,16,153,1530,...,-84,27,4219,297,5526.0,673.0,6199,56803,32389.0,


In [4]:
# Drop useless columns
cols_to_drop = ['stato', 'lat', 'long', 'note']
df_rd = df_raw_r.drop(cols_to_drop, axis=1)
df_rd

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-02-24T18:00:00,13,Abruzzo,0,0,0,0,0,0,0,0,0,,,0,5,
1,2020-02-24T18:00:00,17,Basilicata,0,0,0,0,0,0,0,0,0,,,0,0,
2,2020-02-24T18:00:00,18,Calabria,0,0,0,0,0,0,0,0,0,,,0,1,
3,2020-02-24T18:00:00,15,Campania,0,0,0,0,0,0,0,0,0,,,0,10,
4,2020-02-24T18:00:00,8,Emilia-Romagna,10,2,12,6,18,0,18,0,0,,,18,148,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5791,2020-11-25T17:00:00,19,Sicilia,1574,250,1824,36496,38320,121,1317,17359,1322,34777.0,22224.0,57001,911444,625213.0
5792,2020-11-25T17:00:00,9,Toscana,1754,286,2040,47240,49280,-1426,986,46318,2378,75858.0,22118.0,97976,1494998,928848.0
5793,2020-11-25T17:00:00,10,Umbria,361,70,431,9506,9937,-260,386,12186,355,6314.0,16164.0,22478,399810,219242.0
5794,2020-11-25T17:00:00,2,Valle d'Aosta,137,16,153,1530,1683,-84,27,4219,297,5526.0,673.0,6199,56803,32389.0


In [5]:
last_date_str = df_rd.iloc[-1].data.split('T')[0]
last_date = datetime.datetime.strptime(last_date_str, '%Y-%m-%d')
prev_date = Functions.get_previous_date(last_date)
prev_date_str = datetime.datetime.strftime(prev_date, '%Y-%m-%d')
last_date_str, prev_date_str

('2020-11-25', '2020-11-24')

In [6]:
# Current state S
df_raw_r_1 = df_rd[df_rd.data.str.startswith(last_date_str)].sort_values(by='codice_regione').reset_index(drop=True)
df_raw_r_1

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-11-25T17:00:00,1,Piemonte,5095,403,5498,72423,77921,846,2878,72529,5803,66552.0,89701.0,156253,1468297,923880.0
1,2020-11-25T17:00:00,2,Valle d'Aosta,137,16,153,1530,1683,-84,27,4219,297,5526.0,673.0,6199,56803,32389.0
2,2020-11-25T17:00:00,3,Lombardia,8114,942,9056,128973,138029,-10731,5173,227924,21005,286833.0,100125.0,386958,3904809,2289221.0
3,2020-11-25T17:00:00,5,Veneto,2422,305,2727,72411,75138,1784,2660,51509,3429,31612.0,98464.0,130076,2714890,1039770.0
4,2020-11-25T17:00:00,6,Friuli Venezia Giulia,588,54,642,12894,13536,189,696,12034,715,22310.0,3975.0,26285,679986,268850.0
5,2020-11-25T17:00:00,7,Liguria,1115,123,1238,12817,14055,-728,460,33018,2308,33844.0,15537.0,49381,578449,283560.0
6,2020-11-25T17:00:00,8,Emilia-Romagna,2763,249,3012,69514,72526,1182,2130,34681,5493,70428.0,42272.0,112700,2040254,1048727.0
7,2020-11-25T17:00:00,9,Toscana,1754,286,2040,47240,49280,-1426,986,46318,2378,75858.0,22118.0,97976,1494998,928848.0
8,2020-11-25T17:00:00,10,Umbria,361,70,431,9506,9937,-260,386,12186,355,6314.0,16164.0,22478,399810,219242.0
9,2020-11-25T17:00:00,11,Marche,578,94,672,15359,16031,314,448,10312,1211,21196.0,6358.0,27554,398356,231259.0


In [7]:
# State S - 1
df_raw_r_0 = df_rd[df_rd.data.str.startswith(prev_date_str)].sort_values(by='codice_regione').reset_index(drop=True)
df_raw_r_0

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-11-24T17:00:00,1,Piemonte,5116,404,5520,71555,77075,152,2070,70581,5719,65654.0,87721.0,153375,1446757,915304.0
1,2020-11-24T17:00:00,2,Valle d'Aosta,143,14,157,1610,1767,-82,73,4113,292,5501.0,671.0,6172,56259,32193.0
2,2020-11-24T17:00:00,3,Lombardia,8360,932,9292,139468,148760,-1158,4886,212175,20850,283494.0,98291.0,381785,3862746,2274365.0
3,2020-11-24T17:00:00,5,Veneto,2336,300,2636,70718,73354,-373,2194,50709,3353,31175.0,96241.0,127416,2698593,1035198.0
4,2020-11-24T17:00:00,6,Friuli Venezia Giulia,561,54,615,12732,13347,142,554,11543,699,21716.0,3873.0,25589,673076,267036.0
5,2020-11-24T17:00:00,7,Liguria,1179,121,1300,13483,14783,-538,509,31852,2286,33624.0,15297.0,48921,572618,281926.0
6,2020-11-24T17:00:00,8,Emilia-Romagna,2723,243,2966,68378,71344,1964,2501,33788,5439,69408.0,41163.0,110571,2021756,1041385.0
7,2020-11-24T17:00:00,9,Toscana,1771,296,2067,48639,50706,-1713,962,43973,2311,75233.0,21757.0,96990,1480155,922166.0
8,2020-11-24T17:00:00,10,Umbria,368,72,440,9757,10197,-286,218,11548,347,6227.0,15865.0,22092,395479,217537.0
9,2020-11-24T17:00:00,11,Marche,555,89,644,15073,15717,255,351,10188,1201,20798.0,6308.0,27106,394593,229538.0


In [8]:
# New df regions state S with Trentino-Alto Adige
df_r_1 = Functions.replace_bt_with_taa(df_raw_r_1)
df_r_1.sort_values(by='codice_regione').reset_index(drop=True)

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-11-25T17:00:00,1,Piemonte,5095,403,5498,72423,77921,846,2878,72529,5803,66552.0,89701.0,156253,1468297,923880.0
1,2020-11-25T17:00:00,2,Valle d'Aosta,137,16,153,1530,1683,-84,27,4219,297,5526.0,673.0,6199,56803,32389.0
2,2020-11-25T17:00:00,3,Lombardia,8114,942,9056,128973,138029,-10731,5173,227924,21005,286833.0,100125.0,386958,3904809,2289221.0
3,2020-11-25T17:00:00,4,Trentino-Alto Adige,863,82,945,13102,14047,-81,520,21357,1104,29444.0,7064.0,36508,659675,276171.0
4,2020-11-25T17:00:00,5,Veneto,2422,305,2727,72411,75138,1784,2660,51509,3429,31612.0,98464.0,130076,2714890,1039770.0
5,2020-11-25T17:00:00,6,Friuli Venezia Giulia,588,54,642,12894,13536,189,696,12034,715,22310.0,3975.0,26285,679986,268850.0
6,2020-11-25T17:00:00,7,Liguria,1115,123,1238,12817,14055,-728,460,33018,2308,33844.0,15537.0,49381,578449,283560.0
7,2020-11-25T17:00:00,8,Emilia-Romagna,2763,249,3012,69514,72526,1182,2130,34681,5493,70428.0,42272.0,112700,2040254,1048727.0
8,2020-11-25T17:00:00,9,Toscana,1754,286,2040,47240,49280,-1426,986,46318,2378,75858.0,22118.0,97976,1494998,928848.0
9,2020-11-25T17:00:00,10,Umbria,361,70,431,9506,9937,-260,386,12186,355,6314.0,16164.0,22478,399810,219242.0


In [9]:
# New df regions state S - 1 with Trentino-Alto Adige
df_r_0 = Functions.replace_bt_with_taa(df_raw_r_0)
df_r_0.sort_values(by='codice_regione').reset_index()

Unnamed: 0,index,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,0,2020-11-24T17:00:00,1,Piemonte,5116,404,5520,71555,77075,152,2070,70581,5719,65654.0,87721.0,153375,1446757,915304.0
1,1,2020-11-24T17:00:00,2,Valle d'Aosta,143,14,157,1610,1767,-82,73,4113,292,5501.0,671.0,6172,56259,32193.0
2,2,2020-11-24T17:00:00,3,Lombardia,8360,932,9292,139468,148760,-1158,4886,212175,20850,283494.0,98291.0,381785,3862746,2274365.0
3,19,2020-11-24T17:00:00,4,Trentino-Alto Adige,895,82,977,13151,14128,-5,303,20771,1089,29043.0,6945.0,35988,653540,274995.0
4,3,2020-11-24T17:00:00,5,Veneto,2336,300,2636,70718,73354,-373,2194,50709,3353,31175.0,96241.0,127416,2698593,1035198.0
5,4,2020-11-24T17:00:00,6,Friuli Venezia Giulia,561,54,615,12732,13347,142,554,11543,699,21716.0,3873.0,25589,673076,267036.0
6,5,2020-11-24T17:00:00,7,Liguria,1179,121,1300,13483,14783,-538,509,31852,2286,33624.0,15297.0,48921,572618,281926.0
7,6,2020-11-24T17:00:00,8,Emilia-Romagna,2723,243,2966,68378,71344,1964,2501,33788,5439,69408.0,41163.0,110571,2021756,1041385.0
8,7,2020-11-24T17:00:00,9,Toscana,1771,296,2067,48639,50706,-1713,962,43973,2311,75233.0,21757.0,96990,1480155,922166.0
9,8,2020-11-24T17:00:00,10,Umbria,368,72,440,9757,10197,-286,218,11548,347,6227.0,15865.0,22092,395479,217537.0


In [10]:
df_r = df_r_1.copy()
# Merge with population data
df_pop_r = pd.read_csv(pop_regions_name)
df_m_r_1 = df_r.merge(df_pop_r, on='denominazione_regione')
df_m_r_1.sort_values(by='codice_regione').reset_index(drop=True)

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati,popolazione
0,2020-11-25T17:00:00,1,Piemonte,5095,403,5498,72423,77921,846,2878,72529,5803,66552.0,89701.0,156253,1468297,923880.0,4341375
1,2020-11-25T17:00:00,2,Valle d'Aosta,137,16,153,1530,1683,-84,27,4219,297,5526.0,673.0,6199,56803,32389.0,125501
2,2020-11-25T17:00:00,3,Lombardia,8114,942,9056,128973,138029,-10731,5173,227924,21005,286833.0,100125.0,386958,3904809,2289221.0,10103969
3,2020-11-25T17:00:00,4,Trentino-Alto Adige,863,82,945,13102,14047,-81,520,21357,1104,29444.0,7064.0,36508,659675,276171.0,1074819
4,2020-11-25T17:00:00,5,Veneto,2422,305,2727,72411,75138,1784,2660,51509,3429,31612.0,98464.0,130076,2714890,1039770.0,4907704
5,2020-11-25T17:00:00,6,Friuli Venezia Giulia,588,54,642,12894,13536,189,696,12034,715,22310.0,3975.0,26285,679986,268850.0,1211357
6,2020-11-25T17:00:00,7,Liguria,1115,123,1238,12817,14055,-728,460,33018,2308,33844.0,15537.0,49381,578449,283560.0,1543127
7,2020-11-25T17:00:00,8,Emilia-Romagna,2763,249,3012,69514,72526,1182,2130,34681,5493,70428.0,42272.0,112700,2040254,1048727.0,4467118
8,2020-11-25T17:00:00,9,Toscana,1754,286,2040,47240,49280,-1426,986,46318,2378,75858.0,22118.0,97976,1494998,928848.0,3722729
9,2020-11-25T17:00:00,10,Umbria,361,70,431,9506,9937,-260,386,12186,355,6314.0,16164.0,22478,399810,219242.0,880285


In [11]:
# Merge with icu data
df_icu_r = pd.read_csv(icu_regions_name)
df_m_r_2 = df_m_r_1.merge(df_icu_r, on='denominazione_regione')
df_m_r_2.sort_values(by='codice_regione').reset_index(drop=True)

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati,popolazione,totale_posti_terapia_intensiva
0,2020-11-25T17:00:00,1,Piemonte,5095,403,5498,72423,77921,846,2878,72529,5803,66552.0,89701.0,156253,1468297,923880.0,4341375,575
1,2020-11-25T17:00:00,2,Valle d'Aosta,137,16,153,1530,1683,-84,27,4219,297,5526.0,673.0,6199,56803,32389.0,125501,20
2,2020-11-25T17:00:00,3,Lombardia,8114,942,9056,128973,138029,-10731,5173,227924,21005,286833.0,100125.0,386958,3904809,2289221.0,10103969,1036
3,2020-11-25T17:00:00,4,Trentino-Alto Adige,863,82,945,13102,14047,-81,520,21357,1104,29444.0,7064.0,36508,659675,276171.0,1074819,106
4,2020-11-25T17:00:00,5,Veneto,2422,305,2727,72411,75138,1784,2660,51509,3429,31612.0,98464.0,130076,2714890,1039770.0,4907704,1016
5,2020-11-25T17:00:00,6,Friuli Venezia Giulia,588,54,642,12894,13536,189,696,12034,715,22310.0,3975.0,26285,679986,268850.0,1211357,180
6,2020-11-25T17:00:00,7,Liguria,1115,123,1238,12817,14055,-728,460,33018,2308,33844.0,15537.0,49381,578449,283560.0,1543127,209
7,2020-11-25T17:00:00,8,Emilia-Romagna,2763,249,3012,69514,72526,1182,2130,34681,5493,70428.0,42272.0,112700,2040254,1048727.0,4467118,634
8,2020-11-25T17:00:00,9,Toscana,1754,286,2040,47240,49280,-1426,986,46318,2378,75858.0,22118.0,97976,1494998,928848.0,3722729,523
9,2020-11-25T17:00:00,10,Umbria,361,70,431,9506,9937,-260,386,12186,355,6314.0,16164.0,22478,399810,219242.0,880285,117


In [12]:
# Copy last merged df into df_r
df_r = df_m_r_2.copy()

In [13]:
# Add columns
df_r['variazione_ricoverati_con_sintomi'] = df_r_1['ricoverati_con_sintomi'] - df_r_0['ricoverati_con_sintomi']
df_r['variazione_terapia_intensiva'] = df_r_1['terapia_intensiva'] - df_r_0['terapia_intensiva']
df_r['variazione_totale_ospedalizzati'] = df_r_1['totale_ospedalizzati'] - df_r_0['totale_ospedalizzati']
df_r['posti_disponibili_terapia_intensiva'] = df_r['totale_posti_terapia_intensiva'] - df_r['terapia_intensiva']
df_r['saturazione_terapia_intensiva'] = ((df_r_1['terapia_intensiva'] / df_r['totale_posti_terapia_intensiva']) * 100).round(2)
df_r['variazione_isolamento_domiciliare'] = df_r_1['isolamento_domiciliare'] - df_r_0['isolamento_domiciliare']
df_r['variazione_nuovi_positivi'] = df_r_1['nuovi_positivi'] - df_r_0['nuovi_positivi']
df_r['nuovi_tamponi'] = df_r_1['tamponi'] - df_r_0['tamponi']
df_r['nuovi_casi_testati'] = df_r_1['casi_testati'] - df_r_0['casi_testati']
df_r['nuovi_deceduti'] = df_r_1['deceduti'] - df_r_0['deceduti']
df_r['nuovi_dimessi_guariti'] = df_r_1['dimessi_guariti'] - df_r_0['dimessi_guariti']
df_r['np_su_nt'] = ((df_r['nuovi_positivi'] / df_r['nuovi_tamponi']) * 100).round(2)
df_r['np_su_p'] = ((df_r['nuovi_positivi'] / df_r['popolazione']) * 100).round(5)
df_r['tc_su_p'] = ((df_r['totale_casi'] / df_r['popolazione']) * 100).round(2)

df_r = df_r.sort_values(by='nuovi_positivi', ascending=False).reset_index(drop=True)
df_r

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,...,saturazione_terapia_intensiva,variazione_isolamento_domiciliare,variazione_nuovi_positivi,nuovi_tamponi,nuovi_casi_testati,nuovi_deceduti,nuovi_dimessi_guariti,np_su_nt,np_su_p,tc_su_p
0,2020-11-25T17:00:00,3,Lombardia,8114,942,9056,128973,138029,-10731,5173,...,90.93,-10495,287,42063,14856.0,155,15749,12.3,0.0512,3.83
1,2020-11-25T17:00:00,1,Piemonte,5095,403,5498,72423,77921,846,2878,...,70.09,868,808,21540,8576.0,84,1948,13.36,0.06629,3.6
2,2020-11-25T17:00:00,15,Campania,2282,192,2474,99674,102148,-703,2815,...,38.02,-702,1051,23130,16937.0,47,3471,12.17,0.04865,2.47
3,2020-11-25T17:00:00,5,Veneto,2422,305,2727,72411,75138,1784,2660,...,30.02,1693,466,16297,4572.0,76,800,16.32,0.0542,2.65
4,2020-11-25T17:00:00,8,Emilia-Romagna,2763,249,3012,69514,72526,1182,2130,...,39.27,1136,-371,18498,7342.0,54,893,11.51,0.04768,2.52
5,2020-11-25T17:00:00,12,Lazio,3348,349,3697,82868,86565,769,2102,...,41.2,778,-407,29189,24719.0,58,1275,7.2,0.03584,1.87
6,2020-11-25T17:00:00,16,Puglia,1483,210,1693,32378,34071,1112,1511,...,56.91,1109,-56,9988,3398.0,30,369,15.13,0.0377,1.19
7,2020-11-25T17:00:00,19,Sicilia,1574,250,1824,36496,38320,121,1317,...,42.52,141,11,11433,7275.0,47,1149,11.52,0.02651,1.15
8,2020-11-25T17:00:00,9,Toscana,1754,286,2040,47240,49280,-1426,986,...,54.68,-1399,24,14843,6682.0,67,2345,6.64,0.02649,2.63
9,2020-11-25T17:00:00,6,Friuli Venezia Giulia,588,54,642,12894,13536,189,696,...,30.0,162,142,6910,1814.0,16,491,10.07,0.05746,2.17


In [14]:
# Write pre-processed data
df_r.to_csv(csv_out_name, index=False)

In [15]:
# ICU Stats
df_r[['denominazione_regione', 'terapia_intensiva', 'posti_disponibili_terapia_intensiva', 'totale_posti_terapia_intensiva', 'saturazione_terapia_intensiva']] \
    .sort_values(by='saturazione_terapia_intensiva', ascending=False).reset_index(drop=True)

Unnamed: 0,denominazione_regione,terapia_intensiva,posti_disponibili_terapia_intensiva,totale_posti_terapia_intensiva,saturazione_terapia_intensiva
0,Lombardia,942,94,1036,90.93
1,Valle d'Aosta,16,4,20,80.0
2,Trentino-Alto Adige,82,24,106,77.36
3,Piemonte,403,172,575,70.09
4,Marche,94,49,143,65.73
5,Umbria,70,47,117,59.83
6,Liguria,123,86,209,58.85
7,Puglia,210,159,369,56.91
8,Toscana,286,237,523,54.68
9,Abruzzo,72,76,148,48.65


In [16]:
def add_cols(df):
    df['variazione_terapia_intensiva'] = df['terapia_intensiva'].diff()
    df['variazione_ricoverati_con_sintomi'] = df['ricoverati_con_sintomi'].diff()
    df['variazione_totale_ospedalizzati'] = df['totale_ospedalizzati'].diff()
    df['nuovi_dimessi_guariti'] = df['dimessi_guariti'].diff()
    df['nuovi_deceduti'] = df['deceduti'].diff()
    df['nuovi_tamponi'] = df['tamponi'].diff()
    df['nuovi_casi_testati'] = df['casi_testati'].diff()
    df['variazione_isolamento_domiciliare'] = df['isolamento_domiciliare'].diff()
    df = df.iloc[1:]
    df['ti_su_to'] = ((df['terapia_intensiva'] / df['totale_ospedalizzati']).replace(np.inf, 0) * 100).round(2)
    df['to_su_tp'] = ((df['totale_ospedalizzati'] / df['totale_positivi']).replace(np.inf, 0) * 100).round(2)
    df['np_su_nt'] = ((df['nuovi_positivi'] / df['nuovi_tamponi']).replace(np.inf, 0) * 100).round(2)
    df['np_su_nct'] = ((df['nuovi_positivi'] / df['nuovi_casi_testati']).replace(np.inf, 0) * 100).round(2)
    df['var_ti_perc'] = ((df['variazione_terapia_intensiva'] / (df['terapia_intensiva'] - df['variazione_terapia_intensiva'])).replace(np.inf, 0) * 100).round(2)
    df['var_pos_perc'] = ((df['nuovi_positivi'] / (df['totale_positivi'] - df['nuovi_positivi'] + df['nuovi_dimessi_guariti'] + df['nuovi_deceduti'])).replace(np.inf, 0) * 100).round(3)
    return df.fillna(0)

In [17]:
# df_rd.sort_values(by='codice_regione').reset_index(drop=True)
df_rn = pd.read_csv(regions_name)
for region_code in df_rn.codice_regione:
    if region_code != 4: # TAA
        df_cur_region = df_rd[df_rd.codice_regione == region_code]
        region_name = df_cur_region.denominazione_regione.iloc[0].lower()
        csv_out_region_name = '{path_csv}/regioni/covid19-{region_name}.csv'.format(path_csv=path_csv, region_name=region_name)
        df_cur_region = add_cols(df_cur_region)
        df_cur_region.to_csv(csv_out_region_name, index=False)
        print('Aggiornato {file}'.format(file=csv_out_region_name))

Aggiornato ../data/csv/regioni/covid19-piemonte.csv
Aggiornato ../data/csv/regioni/covid19-valle d'aosta.csv
Aggiornato ../data/csv/regioni/covid19-lombardia.csv
Aggiornato ../data/csv/regioni/covid19-veneto.csv
Aggiornato ../data/csv/regioni/covid19-friuli venezia giulia.csv
Aggiornato ../data/csv/regioni/covid19-liguria.csv
Aggiornato ../data/csv/regioni/covid19-emilia-romagna.csv
Aggiornato ../data/csv/regioni/covid19-toscana.csv
Aggiornato ../data/csv/regioni/covid19-umbria.csv
Aggiornato ../data/csv/regioni/covid19-marche.csv
Aggiornato ../data/csv/regioni/covid19-lazio.csv
Aggiornato ../data/csv/regioni/covid19-abruzzo.csv
Aggiornato ../data/csv/regioni/covid19-molise.csv
Aggiornato ../data/csv/regioni/covid19-campania.csv
Aggiornato ../data/csv/regioni/covid19-puglia.csv
Aggiornato ../data/csv/regioni/covid19-basilicata.csv
Aggiornato ../data/csv/regioni/covid19-calabria.csv
Aggiornato ../data/csv/regioni/covid19-sicilia.csv
Aggiornato ../data/csv/regioni/covid19-sardegna.csv


In [18]:
df_tb = df_rd[df_rd.codice_regione > 20].reset_index(drop=True)
df_tb

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,dimessi_guariti,deceduti,casi_da_sospetto_diagnostico,casi_da_screening,totale_casi,tamponi,casi_testati
0,2020-02-24T18:00:00,21,P.A. Bolzano,0,0,0,0,0,0,0,0,0,,,0,1,
1,2020-02-24T18:00:00,22,P.A. Trento,0,0,0,0,0,0,0,0,0,,,0,3,
2,2020-02-25T18:00:00,21,P.A. Bolzano,1,0,1,0,1,1,1,0,0,,,1,1,
3,2020-02-25T18:00:00,22,P.A. Trento,0,0,0,0,0,0,0,0,0,,,0,3,
4,2020-02-26T18:00:00,21,P.A. Bolzano,1,0,1,0,1,0,0,0,0,,,1,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547,2020-11-23T17:00:00,22,P.A. Trento,433,43,476,2042,2518,-24,93,11122,585,7360.0,6865.0,14225,353917,128773.0
548,2020-11-24T17:00:00,21,P.A. Bolzano,459,40,499,11157,11656,41,160,9474,490,21620.0,0.0,21620,297388,145395.0
549,2020-11-24T17:00:00,22,P.A. Trento,436,42,478,1994,2472,-46,143,11297,599,7423.0,6945.0,14368,356152,129600.0
550,2020-11-25T17:00:00,21,P.A. Bolzano,442,41,483,11117,11600,-56,341,9863,498,21961.0,0.0,21961,300122,146006.0


In [19]:
# Initialize df_taa
df_taa = Functions.replace_bt_with_taa(df_tb[:2])
for i in range(2, df_tb.shape[0], 2):
    df_taa = df_taa.append(Functions.replace_bt_with_taa(df_tb[i:i+2]), ignore_index=True)
df_taa = add_cols(df_taa)
df_taa

Unnamed: 0,data,codice_regione,denominazione_regione,ricoverati_con_sintomi,terapia_intensiva,totale_ospedalizzati,isolamento_domiciliare,totale_positivi,variazione_totale_positivi,nuovi_positivi,...,nuovi_deceduti,nuovi_tamponi,nuovi_casi_testati,variazione_isolamento_domiciliare,ti_su_to,to_su_tp,np_su_nt,np_su_nct,var_ti_perc,var_pos_perc
1,2020-02-25T18:00:00,4,Trentino-Alto Adige,1,0,1,0,1,1,1,...,0.0,0.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.000
2,2020-02-26T18:00:00,4,Trentino-Alto Adige,1,0,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.000
3,2020-02-27T18:00:00,4,Trentino-Alto Adige,1,0,1,0,1,0,0,...,0.0,30.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.000
4,2020-02-28T18:00:00,4,Trentino-Alto Adige,1,0,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.000
5,2020-02-29T17:00:00,4,Trentino-Alto Adige,1,0,1,0,1,0,0,...,0.0,25.0,0.0,0.0,0.00,100.00,0.00,0.00,0.00,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,2020-11-21T17:00:00,4,Trentino-Alto Adige,863,79,942,13435,14377,-138,760,...,30.0,7784.0,2663.0,-119.0,8.39,6.55,9.76,28.54,3.95,5.236
272,2020-11-22T17:00:00,4,Trentino-Alto Adige,873,79,952,13214,14166,-211,694,...,8.0,6424.0,1844.0,-221.0,8.30,6.72,10.80,37.64,0.00,4.827
273,2020-11-23T17:00:00,4,Trentino-Alto Adige,906,81,987,13146,14133,-33,353,...,12.0,2947.0,960.0,-68.0,8.21,6.98,11.98,36.77,2.53,2.492
274,2020-11-24T17:00:00,4,Trentino-Alto Adige,895,82,977,13151,14128,-5,303,...,30.0,3649.0,1169.0,5.0,8.39,6.92,8.30,25.92,1.23,2.144


In [20]:
region_name = df_taa.denominazione_regione.iloc[0].lower()
csv_out_region_name = '{path_csv}/regioni/covid19-{region_name}.csv'.format(path_csv=path_csv, region_name=region_name)
df_taa.to_csv(csv_out_region_name, index=False)
print('Aggiornato {file}'.format(file=csv_out_region_name))

Aggiornato ../data/csv/regioni/covid19-trentino-alto adige.csv
