In [1]:
import sys
import os
from pathlib import Path
sys.path.append(os.path.abspath(''))
import numpy as np
import pandas as pd
import geopandas as gpd
from geo import COM, PROV, REG, DEMOG

import matplotlib.pyplot as plt
%matplotlib notebook

In [2]:
CWD = Path(os.path.abspath(''))

data_dir = CWD / 'protezione_civile/COVID-19'
prov_dir = data_dir /  'dati-province'

In [3]:
DEMOG[['prov_name', 'population']].groupby('prov_name').sum().query('prov_name=="roma"')
DEMOG.query('prov_name=="roma" and Sesso=="totale"')
pop = DEMOG.pivot_table(index=['prov_name'], columns=['Sesso', 'Stato civile', 'Età'])['population']['totale']['totale'][['totale']].reset_index()
prov = PROV.merge(pop, on='prov_name')
prov.plot('totale', legend=True)
plt.title('Residenti al primo Gennaio 2020')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Residenti al primo Gennaio 2020')

# Dati della Protezione Civile
Data from [github.com/pcm-dpc/COVID-19](https://github.com/pcm-dpc/COVID-19).

Questi dati sono da aggiornare giornalmente coi comandi:
```
cd covid-19/protezione_civile/COVID-19
git fetch
git merge
```

In [4]:
df = []
prec = lambda s: f'{s:.4f}'
for f in sorted(prov_dir.glob('dpc-covid19-ita-province-2020*.csv')):
    tmp_df = pd.read_csv(f, encoding='latin-1')
    tmp_df['day'] = str(f).split('/')[-1].split('-')[-1].rstrip('.csv')
    tmp_df['geometry'] = gpd.points_from_xy(tmp_df['long'], tmp_df['lat'])
    tmp_df['lat_long'] = tmp_df['lat'].apply(prec).astype(str) + '_' + tmp_df['long'].apply(prec).astype(str)
    df.append(tmp_df)
    del tmp_df
DF = pd.concat(df)

df = DF.pivot_table(index='sigla_provincia', columns='day', values='totale_casi')
df['totale_casi'] = df.sum(1)
df['nuovi_casi'] = df.iloc[:,-2] - df.iloc[:,-3]

In [5]:
m = prov.merge(df, left_on='prov_acr', right_on='sigla_provincia')
m['perc_casi'] = m['totale_casi'] / m['totale'] * 100 
m['perc_nuovi_casi'] = m['nuovi_casi'] / m['totale'] * 100

In [19]:
m.plot(column='nuovi_casi', legend=True)
plt.title(f'Nuovi casi Covid-19 \n aggiornamento: {df.columns[-3]}')

m.plot(column='perc_nuovi_casi', legend=True)
plt.title(f'Percentuale nuovi casirispetto alla popolazione provinciale\n aggiornamento: {df.columns[-3]}')

m.plot(column='perc_casi', legend=True)
plt.title(f'Percentuale casi totali rispetto alla popolazione provinciale\n aggiornamento: {df.columns[-3]}')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Percentuale casi totali rispetto alla popolazione provinciale\n aggiornamento: 2020-03-19 17:00:00')

# Andamento per Regioni

In [7]:
df = DF.pivot_table(index='denominazione_regione', columns='data', values='totale_casi', aggfunc=np.sum)
df['incremento'] = df[df.columns[-1]] - df[df.columns[-2]]
df = df.sort_values(['incremento', df.columns[-1]], ascending=[0, 0])
display(df[df.columns[-5:]])

data,2020-03-17 17:00:00,2020-03-18 17:00:00,2020-03-19 17:00:00,2020-03-20 17:00:00,incremento
denominazione_regione,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lombardia,16220.0,17713.0,19884.0,22264.0,2380.0
Emilia Romagna,3931.0,4525.0,5214.0,5968.0,754.0
Veneto,2704.0,3214.0,3484.0,4031.0,547.0
Piemonte,1897.0,2341.0,2932.0,3461.0,529.0
Toscana,1053.0,1330.0,1482.0,1793.0,311.0
Marche,1371.0,1568.0,1737.0,1981.0,244.0
Lazio,607.0,724.0,823.0,1008.0,185.0
Liguria,778.0,887.0,1059.0,1221.0,162.0
P.A. Trento,385.0,455.0,523.0,642.0,119.0
P.A. Bolzano,291.0,376.0,436.0,548.0,112.0


# Andamento per Province

In [17]:
df = DF.pivot_table(index=['sigla_provincia', 'denominazione_provincia'], columns='data', values='totale_casi', aggfunc=np.sum)
df['incremento'] = (df.iloc[:, -1] - df.iloc[:, -2])
df = df.sort_values(['incremento', df.columns[-2]], ascending=[0, 0]).reset_index()
df.index.rename('rank', inplace=True)
df.columns.rename('', inplace=True)
BRi = df.query('sigla_provincia=="BR"').index.values[0]
display(df.iloc[[1,2,3,4,5,6,7,BRi,-3,-2,-1], [0,1,-3,-2,-1]])
df.shape

Unnamed: 0_level_0,sigla_provincia,denominazione_provincia,2020-03-19 17:00:00,2020-03-20 17:00:00,incremento
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,TO,Torino,1042.0,1556.0,514.0
2,BG,Bergamo,4645.0,5154.0,509.0
3,BS,Brescia,4247.0,4648.0,401.0
4,MB,Monza e della Brianza,495.0,816.0,321.0
5,AL,Alessandria,374.0,595.0,221.0
6,RE,Reggio nell'Emilia,608.0,773.0,165.0
7,PC,Piacenza,1428.0,1575.0,147.0
76,BR,Brindisi,75.0,84.0,9.0
103,VV,Vibo Valentia,8.0,8.0,0.0
104,RG,Ragusa,7.0,7.0,0.0


(106, 29)

# Dati Johns Hopkins
Data from [github.com/CSSEGISandData/COVID-19](https://github.com/CSSEGISandData/COVID-19)

In [9]:
df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
# df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
# df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')

In [10]:
countries = ['Italy','France','Germany','Spain','Belgium', 'US', 'UK', 'Denmark', 'Netherlands']
dates = [c for c in df.columns if '/20' in c]
df.groupby('Country/Region').sum().loc[countries][dates[-10:]].T.plot(logy=True, title='Confirmed Cases')

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  This is separate from the ipykernel package so we can avoid doing imports until


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f9d9a8c98d0>

# Andamento Globale

In [11]:
df = df.groupby('Country/Region').sum().loc[countries][dates[-10:]]
df['increment'] = df.iloc[:, -1] - df.iloc[:, -2]
df = df.sort_values(['increment', df.columns[-1]], ascending=[0, 0])
display(df)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,increment
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
US,959.0,1281.0,1663.0,2179.0,2727.0,3499.0,4632.0,6421.0,7783.0,13677.0,5894.0
Italy,10149.0,12462.0,12462.0,17660.0,21157.0,24747.0,27980.0,31506.0,35713.0,41035.0,5322.0
Spain,1695.0,2277.0,2277.0,5232.0,6391.0,7798.0,9942.0,11748.0,13910.0,17963.0,4053.0
Germany,1457.0,1908.0,2078.0,3675.0,4585.0,5795.0,7272.0,9257.0,12327.0,15320.0,2993.0
France,1792.0,2290.0,2290.0,3678.0,4487.0,4523.0,6668.0,7699.0,9105.0,10947.0,1842.0
Netherlands,382.0,503.0,503.0,806.0,962.0,1138.0,1416.0,1711.0,2058.0,2467.0,409.0
Belgium,267.0,314.0,314.0,559.0,689.0,886.0,1058.0,1243.0,1486.0,1795.0,309.0
Denmark,264.0,444.0,617.0,804.0,836.0,875.0,933.0,1025.0,1116.0,1225.0,109.0
UK,,,,,,,,,,,
