In [1]:
import sys
import os
from pathlib import Path
sys.path.append(os.path.abspath(''))
import numpy as np
import pandas as pd
import geopandas as gpd
from geo import COM, PROV, REG, DEMOG

import matplotlib.pyplot as plt
%matplotlib notebook

In [2]:
CWD = Path(os.path.abspath(''))

data_dir = CWD / 'protezione_civile/COVID-19'
prov_dir = data_dir /  'dati-province'

In [3]:
DEMOG[['prov_name', 'population']].groupby('prov_name').sum().query('prov_name=="roma"')
DEMOG.query('prov_name=="roma" and Sesso=="totale"')
pop = DEMOG.pivot_table(index=['prov_name'], columns=['Sesso', 'Stato civile', 'Età'])['population']['totale']['totale'][['totale']].reset_index()
prov = PROV.merge(pop, on='prov_name')
prov.plot('totale', legend=True)
plt.title('Residenti al primo Gennaio 2020')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Residenti al primo Gennaio 2020')

# Dati della Protezione Civile
Data from [github.com/pcm-dpc/COVID-19](https://github.com/pcm-dpc/COVID-19).

Questi dati sono da aggiornare giornalmente coi comandi:
```
cd covid-19/protezione_civile/COVID-19
git fetch
git merge
```

In [4]:
df = []
prec = lambda s: f'{s:.4f}'
for f in sorted(prov_dir.glob('dpc-covid19-ita-province-2020*.csv')):
    tmp_df = pd.read_csv(f, encoding='latin-1')
    tmp_df['day'] = str(f).split('/')[-1].split('-')[-1].rstrip('.csv')
    tmp_df['geometry'] = gpd.points_from_xy(tmp_df['long'], tmp_df['lat'])
    tmp_df['lat_long'] = tmp_df['lat'].apply(prec).astype(str) + '_' + tmp_df['long'].apply(prec).astype(str)
    df.append(tmp_df)
    del tmp_df
DF = pd.concat(df)

df = DF.pivot_table(index='sigla_provincia', columns='day', values='totale_casi')
df['totale_casi'] = df.sum(1)
df['nuovi_casi'] = df.iloc[:,-2] - df.iloc[:,-3]

In [5]:
m = prov.merge(df, left_on='prov_acr', right_on='sigla_provincia')
m['perc_casi'] = m['totale_casi'] / m['totale'] * 100
m['perc_casi'] = m['totale_casi'] / m['totale'] * 100 
m['perc_nuovi_casi'] = m['nuovi_casi'] / m['totale'] * 100

In [6]:
m.plot(column='nuovi_casi', legend=True)
plt.title(f'Nuovi casi Covid-19 \n aggiornamento: {df.columns[-3]}')

m.plot(column='perc_casi', legend=True)
plt.title(f'Percentuale casi totali rispetto alla popolazione \n aggiornamento: {df.columns[-3]}')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Percentuale casi totali rispetto alla popolazione \n aggiornamento: 20200319')

# Andamento per Regioni

In [7]:
df = DF.pivot_table(index='denominazione_regione', columns='data', values='totale_casi', aggfunc=np.sum)
df['incremento'] = df[df.columns[-1]] - df[df.columns[-2]]
df = df.sort_values(['incremento', df.columns[-1]], ascending=[0, 0])
display(df[df.columns[-5:]])

data,2020-03-16 17:00:00,2020-03-17 17:00:00,2020-03-18 17:00:00,2020-03-19 17:00:00,incremento
denominazione_regione,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lombardia,14649.0,16220.0,17713.0,19884.0,2171.0
Emilia Romagna,3522.0,3931.0,4525.0,5214.0,689.0
Piemonte,1516.0,1897.0,2341.0,2932.0,591.0
Veneto,2473.0,2704.0,3214.0,3484.0,270.0
Campania,400.0,460.0,460.0,652.0,192.0
Liguria,667.0,778.0,887.0,1059.0,172.0
Marche,1242.0,1371.0,1568.0,1737.0,169.0
Toscana,866.0,1053.0,1330.0,1482.0,152.0
Friuli Venezia Giulia,386.0,394.0,462.0,599.0,137.0
Abruzzo,176.0,229.0,263.0,385.0,122.0


# Andamento per Province

In [8]:
df = DF.pivot_table(index=['sigla_provincia', 'denominazione_provincia'], columns='data', values='totale_casi', aggfunc=np.sum)
df['incremento'] = (df.iloc[:, -1] - df.iloc[:, -2])
df = df.sort_values(['incremento', df.columns[-2]], ascending=[0, 0])
display(df[df.columns[-5:]].head(20))

Unnamed: 0_level_0,data,2020-03-16 17:00:00,2020-03-17 17:00:00,2020-03-18 17:00:00,2020-03-19 17:00:00,incremento
sigla_provincia,denominazione_provincia,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MI,Milano,1983.0,2326.0,2644.0,3278.0,634.0
BS,Brescia,2918.0,3300.0,3784.0,4247.0,463.0
BG,Bergamo,3760.0,3993.0,4305.0,4645.0,340.0
RE,Reggio nell'Emilia,241.0,299.0,414.0,608.0,194.0
MN,Mantova,382.0,465.0,514.0,636.0,122.0
CR,Cremona,1881.0,2073.0,2167.0,2286.0,119.0
GE,Genova,274.0,330.0,378.0,488.0,110.0
MB,Monza e della Brianza,346.0,376.0,401.0,495.0,94.0
PC,Piacenza,1073.0,1204.0,1340.0,1428.0,88.0
RM,Roma,412.0,486.0,590.0,678.0,88.0


# Provincia di Brindisi

In [9]:
BRi = df.index.get_level_values(1).tolist().index('Brindisi')
print(f'Ranking Nazionale: {BRi}')
df.iloc[[BRi]][df.columns[-4:]]

Ranking Nazionale: 63


Unnamed: 0_level_0,data,2020-03-17 17:00:00,2020-03-18 17:00:00,2020-03-19 17:00:00,incremento
sigla_provincia,denominazione_provincia,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BR,Brindisi,52.0,65.0,75.0,10.0


# Dati Johns Hopkins
Data from [github.com/CSSEGISandData/COVID-19](https://github.com/CSSEGISandData/COVID-19)

In [10]:
df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
# df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
# df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')

In [11]:
countries = ['Italy','France','Germany','Spain','Belgium', 'US', 'UK', 'Denmark', 'Netherlands']
dates = [c for c in df.columns if '/20' in c]
df.groupby('Country/Region').sum().loc[countries][dates[-10:]].T.plot(logy=True, title='Confirmed Cases')

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  This is separate from the ipykernel package so we can avoid doing imports until


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7fce290ef190>

# Andamento Globale

In [12]:
df = df.groupby('Country/Region').sum().loc[countries][dates[-10:]]
df['increment'] = df.iloc[:, -1] - df.iloc[:, -2]
df = df.sort_values(['increment', df.columns[-1]], ascending=[0, 0])
display(df)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,increment
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Italy,9172.0,10149.0,12462.0,12462.0,17660.0,21157.0,24747.0,27980.0,31506.0,35713.0,4207.0
Germany,1176.0,1457.0,1908.0,2078.0,3675.0,4585.0,5795.0,7272.0,9257.0,12327.0,3070.0
Spain,1073.0,1695.0,2277.0,2277.0,5232.0,6391.0,7798.0,9942.0,11748.0,13910.0,2162.0
France,1217.0,1792.0,2290.0,2290.0,3678.0,4487.0,4523.0,6668.0,7699.0,9105.0,1406.0
US,583.0,959.0,1281.0,1663.0,2179.0,2727.0,3499.0,4632.0,6421.0,7783.0,1362.0
Netherlands,321.0,382.0,503.0,503.0,806.0,962.0,1138.0,1416.0,1711.0,2058.0,347.0
Belgium,239.0,267.0,314.0,314.0,559.0,689.0,886.0,1058.0,1243.0,1486.0,243.0
Denmark,92.0,264.0,444.0,617.0,804.0,836.0,875.0,932.0,1024.0,1115.0,91.0
UK,,,,,,,,,,,
