In [1]:
import sys
import time
import requests
import datetime
import pandas as pd

sys.path.append('../')
from src.util.functions import Functions

# Disable warning
pd.options.mode.chained_assignment = None

level = 'province'

path_data = '../data'
path_csv = '{path_data}/csv'.format(path_data=path_data)
csv_out_name = '{path_csv}/covid19-{level}.csv'.format(path_csv=path_csv, level=level)

In [2]:
url_cum = Functions.get_url_cum(level)
url_cum

'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv'

In [3]:
df_raw_p = pd.read_csv(url_cum)
df_raw_p.columns

Index(['data', 'stato', 'codice_regione', 'denominazione_regione',
       'codice_provincia', 'denominazione_provincia', 'sigla_provincia', 'lat',
       'long', 'totale_casi', 'note'],
      dtype='object')

In [4]:
cols_to_drop = ['stato', 'lat', 'long', 'note']
df_pd = df_raw_p.drop(cols_to_drop, axis=1)
df_pd

Unnamed: 0,data,codice_regione,denominazione_regione,codice_provincia,denominazione_provincia,sigla_provincia,totale_casi
0,2020-02-24T18:00:00,13,Abruzzo,66,L'Aquila,AQ,0
1,2020-02-24T18:00:00,13,Abruzzo,67,Teramo,TE,0
2,2020-02-24T18:00:00,13,Abruzzo,68,Pescara,PE,0
3,2020-02-24T18:00:00,13,Abruzzo,69,Chieti,CH,0
4,2020-02-24T18:00:00,13,Abruzzo,979,In fase di definizione/aggiornamento,,0
...,...,...,...,...,...,...,...
37514,2020-11-18T17:00:00,5,Veneto,27,Venezia,VE,15902
37515,2020-11-18T17:00:00,5,Veneto,28,Padova,PD,20510
37516,2020-11-18T17:00:00,5,Veneto,29,Rovigo,RO,2902
37517,2020-11-18T17:00:00,5,Veneto,899,Fuori Regione / Provincia Autonoma,,1929


In [5]:
last_date_str = df_pd.iloc[-1].data.split('T')[0]
last_date = datetime.datetime.strptime(last_date_str, '%Y-%m-%d')
prev_date = Functions.get_previous_date(last_date)
prev_date_str = datetime.datetime.strftime(prev_date, '%Y-%m-%d')
last_date_str, prev_date_str

('2020-11-18', '2020-11-17')

In [6]:
df_raw_p_1 = df_pd[df_pd.data.str.startswith(last_date_str)]
df_raw_p_1

Unnamed: 0,data,codice_regione,denominazione_regione,codice_provincia,denominazione_provincia,sigla_provincia,totale_casi
37370,2020-11-18T17:00:00,13,Abruzzo,66,L'Aquila,AQ,6714
37371,2020-11-18T17:00:00,13,Abruzzo,67,Teramo,TE,5634
37372,2020-11-18T17:00:00,13,Abruzzo,68,Pescara,PE,4324
37373,2020-11-18T17:00:00,13,Abruzzo,69,Chieti,CH,4092
37374,2020-11-18T17:00:00,13,Abruzzo,879,Fuori Regione / Provincia Autonoma,,207
...,...,...,...,...,...,...,...
37514,2020-11-18T17:00:00,5,Veneto,27,Venezia,VE,15902
37515,2020-11-18T17:00:00,5,Veneto,28,Padova,PD,20510
37516,2020-11-18T17:00:00,5,Veneto,29,Rovigo,RO,2902
37517,2020-11-18T17:00:00,5,Veneto,899,Fuori Regione / Provincia Autonoma,,1929


In [7]:
df_raw_p_0 = df_pd[df_pd.data.str.startswith(prev_date_str)]
df_raw_p_0

Unnamed: 0,data,codice_regione,denominazione_regione,codice_provincia,denominazione_provincia,sigla_provincia,totale_casi
37221,2020-11-17T17:00:00,13,Abruzzo,66,L'Aquila,AQ,6454
37222,2020-11-17T17:00:00,13,Abruzzo,67,Teramo,TE,5511
37223,2020-11-17T17:00:00,13,Abruzzo,68,Pescara,PE,4200
37224,2020-11-17T17:00:00,13,Abruzzo,69,Chieti,CH,3988
37225,2020-11-17T17:00:00,13,Abruzzo,879,Fuori Regione / Provincia Autonoma,,208
...,...,...,...,...,...,...,...
37365,2020-11-17T17:00:00,5,Veneto,27,Venezia,VE,15521
37366,2020-11-17T17:00:00,5,Veneto,28,Padova,PD,20097
37367,2020-11-17T17:00:00,5,Veneto,29,Rovigo,RO,2827
37368,2020-11-17T17:00:00,5,Veneto,899,Fuori Regione / Provincia Autonoma,,1857


In [8]:
# State S
df_raw_p_1 = df_raw_p_1.sort_values(by='codice_provincia').reset_index(drop=True)
df_raw_p_1

Unnamed: 0,data,codice_regione,denominazione_regione,codice_provincia,denominazione_provincia,sigla_provincia,totale_casi
0,2020-11-18T17:00:00,1,Piemonte,1,Torino,TO,73694
1,2020-11-18T17:00:00,1,Piemonte,2,Vercelli,VC,5077
2,2020-11-18T17:00:00,1,Piemonte,3,Novara,NO,9932
3,2020-11-18T17:00:00,1,Piemonte,4,Cuneo,CN,17278
4,2020-11-18T17:00:00,1,Piemonte,5,Asti,AT,6223
...,...,...,...,...,...,...,...
144,2020-11-18T17:00:00,9,Toscana,995,In fase di definizione/aggiornamento,,0
145,2020-11-18T17:00:00,4,P.A. Trento,996,In fase di definizione/aggiornamento,,0
146,2020-11-18T17:00:00,10,Umbria,997,In fase di definizione/aggiornamento,,0
147,2020-11-18T17:00:00,2,Valle d'Aosta,998,In fase di definizione/aggiornamento,,0


In [9]:
# State S - 1
df_raw_p_0 = df_raw_p_0.sort_values(by='codice_provincia').reset_index(drop=True)
df_raw_p_0

Unnamed: 0,data,codice_regione,denominazione_regione,codice_provincia,denominazione_provincia,sigla_provincia,totale_casi
0,2020-11-17T17:00:00,1,Piemonte,1,Torino,TO,71980
1,2020-11-17T17:00:00,1,Piemonte,2,Vercelli,VC,4863
2,2020-11-17T17:00:00,1,Piemonte,3,Novara,NO,9664
3,2020-11-17T17:00:00,1,Piemonte,4,Cuneo,CN,17014
4,2020-11-17T17:00:00,1,Piemonte,5,Asti,AT,6004
...,...,...,...,...,...,...,...
144,2020-11-17T17:00:00,9,Toscana,995,In fase di definizione/aggiornamento,,0
145,2020-11-17T17:00:00,4,P.A. Trento,996,In fase di definizione/aggiornamento,,0
146,2020-11-17T17:00:00,10,Umbria,997,In fase di definizione/aggiornamento,,0
147,2020-11-17T17:00:00,2,Valle d'Aosta,998,In fase di definizione/aggiornamento,,0


In [10]:
df_p = df_raw_p_1.copy()
df_p['variazione_totale_casi'] = df_raw_p_1['totale_casi'] - df_raw_p_0['totale_casi']
df_p

Unnamed: 0,data,codice_regione,denominazione_regione,codice_provincia,denominazione_provincia,sigla_provincia,totale_casi,variazione_totale_casi
0,2020-11-18T17:00:00,1,Piemonte,1,Torino,TO,73694,1714
1,2020-11-18T17:00:00,1,Piemonte,2,Vercelli,VC,5077,214
2,2020-11-18T17:00:00,1,Piemonte,3,Novara,NO,9932,268
3,2020-11-18T17:00:00,1,Piemonte,4,Cuneo,CN,17278,264
4,2020-11-18T17:00:00,1,Piemonte,5,Asti,AT,6223,219
...,...,...,...,...,...,...,...,...
144,2020-11-18T17:00:00,9,Toscana,995,In fase di definizione/aggiornamento,,0,0
145,2020-11-18T17:00:00,4,P.A. Trento,996,In fase di definizione/aggiornamento,,0,0
146,2020-11-18T17:00:00,10,Umbria,997,In fase di definizione/aggiornamento,,0,0
147,2020-11-18T17:00:00,2,Valle d'Aosta,998,In fase di definizione/aggiornamento,,0,0


In [11]:
# Write df to csv
df_p.to_csv(csv_out_name, index=False)