In [1]:
! ls dati/*.csv

dati/anagrafica-vaccini-summary-latest.csv
dati/consegne-vaccini-latest.csv
dati/punti-somministrazione-latest.csv
dati/punti-somministrazione-tipologia.csv
dati/somministrazioni-vaccini-latest.csv
dati/somministrazioni-vaccini-summary-latest.csv
dati/vaccini-summary-latest.csv


In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import os

if not os.path.isdir('DW'): os.mkdir('DW')

In [2]:
# mapping columns names
def createNameMappingDict(df):
    '''This function returns a dictionary which helps mapping columns names in a DataFrame'''
    nameMappingDict = {oldName : oldName.replace('_',' ').title() for oldName in df.columns}
    
    return nameMappingDict

In [3]:
def ETL_anagraficaVacciniSummaryLatest():
    global anaVacSumLat 
    anaVacSumLat = pd.read_csv('dati/anagrafica-vaccini-summary-latest.csv')
    anaVacSumLat = anaVacSumLat.rename(columns=createNameMappingDict(anaVacSumLat))
    
    vLastUpdate = datetime.strptime(anaVacSumLat.iloc[0,-1],"%Y-%m-%d").strftime("%d/%m/%Y")
    
    anaVacSumLat = anaVacSumLat.iloc[:,:-1]
    
    # -----NEW COLUMNS-----
    anaVacSumLat['% Seconda Dose Sul Totale'] = round(100 * anaVacSumLat['Seconda Dose']/anaVacSumLat['Totale'], 2)
    anaVacSumLat['Platea'] = [2298846,6084382,6854632,8937229,9414195,7364364,5968373,3628160,613523]
    anaVacSumLat['% Seconda Dose Assoluta'] = round(anaVacSumLat['Seconda Dose']/anaVacSumLat['Platea'] * 100,2)
    anaVacSumLat['% Totale Assoluto'] = round(anaVacSumLat['Totale']/anaVacSumLat['Platea'] * 100,2)
    # ---------------------
    
    anaVacSumLat.to_csv('DW/anagraficaVacciniSummaryLatest.csv')
    

In [4]:
def ETL_consegneVacciniLatest():
    global consVacciniLat 
    consVacciniLat = pd.read_csv('dati/consegne-vaccini-latest.csv')
    consVacciniLat = consVacciniLat.rename(columns=createNameMappingDict(consVacciniLat)
                                          ).rename(columns={'Nome Area': 'Regione'} )   
    consVacciniLat = consVacciniLat.iloc[:,[1,2,3,7]]
    consVacciniLat['Data Consegna'] = pd.to_datetime(consVacciniLat['Data Consegna'])
    
    consVacciniLat.to_csv('DW/consegneVacciniLatest.csv')

In [53]:
def ETL_somministrazioniVacciniSummaryLatest():
    global somVacciniSumLat
    somVacciniSumLat = pd.read_csv('dati/somministrazioni-vaccini-summary-latest.csv')
    somVacciniSumLat = somVacciniSumLat.rename(columns=createNameMappingDict(somVacciniSumLat)
                                              ).rename(columns={'Nome Area': 'Regione'} )

    somVacciniSumLat.drop(columns=['Area', 'Codice Nuts1', 'Codice Nuts2', 'Codice Regione Istat'], inplace=True)

    somVacciniSumLat['Data Somministrazione'] = pd.to_datetime(somVacciniSumLat['Data Somministrazione']).dt.date
    
    # -----NEW COLUMNS-----
    somVacciniSumLat['Totale'] = somVacciniSumLat['Prima Dose'] + somVacciniSumLat['Seconda Dose']
    # ---------------------
    
    somVacciniSumLat = somVacciniSumLat.sort_values(['Data Somministrazione','Regione']).reset_index()
    somVacciniSumLat.drop(columns='index', inplace=True)
    somVacciniSumLat.to_csv('DW/somministrazioniVacciniSummaryLatest.csv')

In [55]:
ETL_anagraficaVacciniSummaryLatest()
ETL_consegneVacciniLatest()
ETL_somministrazioniVacciniSummaryLatest()

In [56]:
somVacciniSumLat

Unnamed: 0,Data Somministrazione,Totale,Sesso Maschile,Sesso Femminile,Categoria Operatori Sanitari Sociosanitari,Categoria Personale Non Sanitario,Categoria Ospiti Rsa,Categoria Over80,Categoria Forze Armate,Categoria Personale Scolastico,Prima Dose,Seconda Dose,Regione
0,2020-12-27,37,19,18,34,1,2,0,0,0,37,0,Abruzzo
1,2020-12-27,106,41,65,104,2,0,0,0,0,106,0,Basilicata
2,2020-12-27,275,150,125,274,1,0,0,0,0,275,0,Calabria
3,2020-12-27,720,422,298,703,17,0,0,0,0,720,0,Campania
4,2020-12-27,968,372,596,824,118,26,0,0,0,968,0,Emilia-Romagna
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1647,2021-03-18,14102,6994,7108,1280,6805,597,5331,55,34,8395,5707,Sicilia
1648,2021-03-18,8584,3436,5148,422,1402,138,6614,6,2,5140,3444,Toscana
1649,2021-03-18,2723,1058,1665,940,142,168,1459,3,11,1552,1171,Umbria
1650,2021-03-18,134,43,91,4,4,25,100,1,0,98,36,Valle d'Aosta / Vallée d'Aoste
