## Data wrangling to reproduce Istat-based KPIs

In [2]:
from enum import Enum
from os import path, listdir

import numpy as np
import pandas as pd
import geopandas as gpd
import geopy, geopy.distance
import shapely
from sklearn import gaussian_process

from matplotlib import pyplot as plt 
import seaborn as sns
plt.rcParams['figure.figsize']= (20,14)

In [3]:
## TODO: find way to put this into some global settings
import os
import sys
nb_dir = os.path.dirname(os.getcwd())
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

from references import common_cfg
from references import istat_kpi

In [4]:
selectedCity = common_cfg.cityList[0]
print(selectedCity)

Milano


In [5]:
# load sezione di censimento data for the selected city
istatData = common_cfg.get_istat_cpa_data(selectedCity)

In [6]:
istatData.head()

Unnamed: 0_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,E25,E26,E27,E28,E29,E30,E31,quartiere,IDquartiere,geometry
SEZ2011,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
151460000236,49010,15146,236,1,8061.468253,449.226698,70.0,1.0,3,5.0,...,2,0,51,5,2,0,0,DUOMO,1,"POLYGON ((1514122.149438965 5034191.777491422,..."
151460000237,49011,15146,237,1,5416.911543,344.834372,173.0,1.0,12,7.79,...,5,1,74,0,7,0,0,DUOMO,1,"POLYGON ((1514166.339374326 5034198.582483572,..."
151460000241,49015,15146,241,1,12107.858114,510.055011,160.0,1.0,13,9.15,...,3,1,105,14,0,0,0,DUOMO,1,"POLYGON ((1514365.509413606 5034211.737496569,..."
151460000244,49018,15146,244,1,11178.70356,421.080703,105.0,1.0,22,27.16,...,1,2,67,6,1,0,0,DUOMO,1,"POLYGON ((1514508.984403429 5034317.407504656,..."
151460000151,49241,15146,151,1,2727.769331,262.767785,6.0,1.0,1,25.0,...,0,0,3,1,0,0,0,DUOMO,1,"POLYGON ((1515504.809281959 5035162.942410328,..."


In [7]:
quartiereData = istatData.groupby([common_cfg.IdQuartiereColName,istatData.quartiere]).sum()
quartiereData.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,E22,E23,E24,E25,E26,E27,E28,E29,E30,E31
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,93,130,279,297,237,12074,699,346,58,2
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,35,71,182,219,284,11828,445,348,35,0
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0,0,0,1,1,28,2,1,0,0
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,21,72,154,247,226,10163,463,281,24,0
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,24,33,70,149,203,7922,194,273,43,11


In [8]:
metaData = common_cfg.get_istat_metadata()
metaData.head()

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
0,CODREG,Codice numerico che identifica univocamente la...
1,REGIONE,Denominazione della regione
2,CODPRO,Codice numerico che identifica univocamente la...
3,PROVINCIA,Denominazione della provincia
4,CODCOM,Codice numerico che identifica univocamente il...


## Indici

1. Percentuale residenti stranieri.
    - Sul totale dei residenti
    
2. Percentuali di provenienza degli stranieri (*per ogni continente la percentuale è rispetto la popolazione residente straniera*).
    - Europa
    - Africa
    - Asia
    - America
    - Oceania
    
3. Indice di vecchiaia: *rapporto tra il numero di residenti di età > 64 e numero residenti che hanno meno di 14 anni.*
    - Sul totale popolazione
    - Tra gli uomini
    - Tra le donne
    
4. Indice di popolazione attiva: *rapporto tra il numero di residenti con età compresa tra i 39 ed i 64 anni e quelli tra i 15 ed i 39.*
    - Sul totale popolazione
    - Tra gli uomini
    - Tra le donne
    
5. Popolazione residente che svolge la propria giornata fuori dal comune di residenza sul totale della popolazione residente.

6. Popolazione residente che svolge la propria giornata nel comune ma fuori dal proprio quartiere

7. Popolazione residente che svolge la propria giornata nel proprio quartiere

#### 1. Percentuale residenti stranieri

In [9]:
# Colonne ISTAT utilizzate
metaData[(metaData['NOME_CAMPO'] == 'P1') | (metaData['NOME_CAMPO'] == 'ST15')]

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
12,P1,Popolazione residente - totale
102,ST15,Stranieri residenti in Italia - totale


In [10]:
# Creazione indice residenti stranieri
istat_kpi.new_index(quartiereData, ['ST15'], ['P1'], 'indice_stranieri').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,E23,E24,E25,E26,E27,E28,E29,E30,E31,indice_stranieri
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,130,279,297,237,12074,699,346,58,2,0.108609
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,71,182,219,284,11828,445,348,35,0,0.102618
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0,0,1,1,28,2,1,0,0,0.185185
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,72,154,247,226,10163,463,281,24,0,0.102723
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,33,70,149,203,7922,194,273,43,11,0.082385


#### 2. Percentuali di provenienza degli stranieri

In [11]:
# Colonne ISTAT utilizzate
metaData[(metaData['NOME_CAMPO'] == 'ST9') | 
         (metaData['NOME_CAMPO'] == 'ST10')|
         (metaData['NOME_CAMPO'] == 'ST11')|
         (metaData['NOME_CAMPO'] == 'ST12')|
         (metaData['NOME_CAMPO'] == 'ST13')|
         (metaData['NOME_CAMPO'] == 'ST15')]

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
96,ST9,Stranieri residenti in Italia - Europa
97,ST10,Stranieri residenti in Italia - Africa
98,ST11,Stranieri residenti in Italia - America
99,ST12,Stranieri residenti in Italia - Asia
100,ST13,Stranieri residenti in Italia - Oceania
102,ST15,Stranieri residenti in Italia - totale


In [12]:
# Lista di nomi di variabili che voglio creare 
list_columns_continenti = ['perc_europei', 'perc_africa', 'perc_america', 'perc_asia', 'perc_oceania']

# Lista colonne utili dal df di istat
list_attr_continenti = ['ST9', 'ST10', 'ST11', 'ST12', 'ST13']

In [13]:
# Calcolare l'indice per ogni continente
for idx, continente in enumerate(list_columns_continenti):
    istat_kpi.new_index(quartiereData, [list_attr_continenti[idx]], ['ST15'], list_columns_continenti[idx])

In [14]:
quartiereData.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,E28,E29,E30,E31,indice_stranieri,perc_europei,perc_africa,perc_america,perc_asia,perc_oceania
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,699,346,58,2,0.108609,0.305572,0.088077,0.175554,0.429599,0.001198
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,445,348,35,0,0.102618,0.346041,0.06393,0.171848,0.412903,0.005279
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,2,1,0,0,0.185185,0.1,0.0,0.2,0.7,0.0
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,463,281,24,0,0.102723,0.275503,0.044414,0.205413,0.473282,0.001388
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,194,273,43,11,0.082385,0.309179,0.103382,0.199034,0.386473,0.001932


#### 3. Indice vecchiaia

##### 3.1. Totale popolazione residente

In [15]:
# Colonne ISTAT utilizzate
metaData[(metaData['NOME_CAMPO'] == 'P27') | 
         (metaData['NOME_CAMPO'] == 'P28')|
         (metaData['NOME_CAMPO'] == 'P29')|
         (metaData['NOME_CAMPO'] == 'P14')|
         (metaData['NOME_CAMPO'] == 'P15')|
         (metaData['NOME_CAMPO'] == 'P16')]

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
25,P14,Popolazione residente - età < 5 anni
26,P15,Popolazione residente - età 5 - 9 anni
27,P16,Popolazione residente - età 10 - 14 anni
38,P27,Popolazione residente - età 65 - 69 anni
39,P28,Popolazione residente - età 70 - 74 anni
40,P29,Popolazione residente - età > 74 anni


In [16]:
# Creo la nuova colonna
istat_kpi.new_index(quartiereData, ['P27','P28','P29'], ['P14','P15','P16'], 'indice_vecchiaia').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,E29,E30,E31,indice_stranieri,perc_europei,perc_africa,perc_america,perc_asia,perc_oceania,indice_vecchiaia
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,346,58,2,0.108609,0.305572,0.088077,0.175554,0.429599,0.001198,1.677846
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,348,35,0,0.102618,0.346041,0.06393,0.171848,0.412903,0.005279,1.793528
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,1,0,0,0.185185,0.1,0.0,0.2,0.7,0.0,0.454545
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,281,24,0,0.102723,0.275503,0.044414,0.205413,0.473282,0.001388,1.925208
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,273,43,11,0.082385,0.309179,0.103382,0.199034,0.386473,0.001932,1.873557


##### 3.2. Totale popolazione residente maschile

In [17]:
# Colonne ISTAT utilizzate
metaData[(metaData['NOME_CAMPO'] == 'P43') | 
         (metaData['NOME_CAMPO'] == 'P44')|
         (metaData['NOME_CAMPO'] == 'P45')|
         (metaData['NOME_CAMPO'] == 'P30')|
         (metaData['NOME_CAMPO'] == 'P31')|
         (metaData['NOME_CAMPO'] == 'P32')]

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
41,P30,Popolazione residente - maschi - età < 5 anni
42,P31,Popolazione residente - maschi - età 5 - 9 anni
43,P32,Popolazione residente - maschi - età 10 - 14 anni
54,P43,Popolazione residente - maschi - età 65 - 69 anni
55,P44,Popolazione residente - maschi - età 70 - 74 anni
56,P45,Popolazione residente - maschi - età > 74 anni


In [18]:
# Creo la nuova colonna
istat_kpi.new_index(quartiereData, ['P43','P44','P45'], ['P30','P31','P32'], 'indice_vecchiaia_uomo').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,E30,E31,indice_stranieri,perc_europei,perc_africa,perc_america,perc_asia,perc_oceania,indice_vecchiaia,indice_vecchiaia_uomo
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,58,2,0.108609,0.305572,0.088077,0.175554,0.429599,0.001198,1.677846,1.408425
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,35,0,0.102618,0.346041,0.06393,0.171848,0.412903,0.005279,1.793528,1.457093
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0,0,0.185185,0.1,0.0,0.2,0.7,0.0,0.454545,0.4
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,24,0,0.102723,0.275503,0.044414,0.205413,0.473282,0.001388,1.925208,1.57361
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,43,11,0.082385,0.309179,0.103382,0.199034,0.386473,0.001932,1.873557,1.435216


##### 3.3. Totale popolazione residente femminile
Il numero della popolazione femminile non è espresso esplicitamente nel dataset. Pertanto è ottenuto facendo la differenza tra la popolazione residente totale e quella maschile.

Le colonne utilizzate sono descritte nelle due sezioni precedenti (`3.1` e `3.2`).

In [19]:
# Calcola numeratore e denominatore
donne_anziane = quartiereData[['P27','P28','P29']].sum(axis=1)-(quartiereData[['P43','P44','P45']].sum(axis=1))
donne_giovani = quartiereData[['P14','P15','P16']].sum(axis=1)-quartiereData[['P30','P31','P32']].sum(axis=1)

# Quindi, il nuovo indice
quartiereData['indice_vecchiaia_donna'] = donne_anziane/donne_giovani

quartiereData.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,E31,indice_stranieri,perc_europei,perc_africa,perc_america,perc_asia,perc_oceania,indice_vecchiaia,indice_vecchiaia_uomo,indice_vecchiaia_donna
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,2,0.108609,0.305572,0.088077,0.175554,0.429599,0.001198,1.677846,1.408425,1.964878
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,0,0.102618,0.346041,0.06393,0.171848,0.412903,0.005279,1.793528,1.457093,2.158745
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0,0.185185,0.1,0.0,0.2,0.7,0.0,0.454545,0.4,0.5
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,0,0.102723,0.275503,0.044414,0.205413,0.473282,0.001388,1.925208,1.57361,2.288288
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,11,0.082385,0.309179,0.103382,0.199034,0.386473,0.001932,1.873557,1.435216,2.351025


#### 4. Indice popolazione attiva

##### 4.1. Totale popolazione residente


In [20]:
# Colonne ISTAT utilizzate
metaData[(metaData['NOME_CAMPO'] == 'P22')| 
         (metaData['NOME_CAMPO'] == 'P23')|
         (metaData['NOME_CAMPO'] == 'P24')|
         (metaData['NOME_CAMPO'] == 'P25')|
         (metaData['NOME_CAMPO'] == 'P26')|
         (metaData['NOME_CAMPO'] == 'P17')|
         (metaData['NOME_CAMPO'] == 'P18')|
         (metaData['NOME_CAMPO'] == 'P19')|
         (metaData['NOME_CAMPO'] == 'P20')|
         (metaData['NOME_CAMPO'] == 'P21')]

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
28,P17,Popolazione residente - età 15 - 19 anni
29,P18,Popolazione residente - età 20 - 24 anni
30,P19,Popolazione residente - età 25 - 29 anni
31,P20,Popolazione residente - età 30 - 34 anni
32,P21,Popolazione residente - età 35 - 39 anni
33,P22,Popolazione residente - età 40 - 44 anni
34,P23,Popolazione residente - età 45 - 49 anni
35,P24,Popolazione residente - età 50 - 54 anni
36,P25,Popolazione residente - età 55 - 59 anni
37,P26,Popolazione residente - età 60 - 64 anni


In [21]:
# Creo la nuova colonna
istat_kpi.new_index(quartiereData, ['P22','P23','P24','P25','P26'], ['P17','P18','P19','P20','P21'], 'indice_pop_attiva').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,indice_stranieri,perc_europei,perc_africa,perc_america,perc_asia,perc_oceania,indice_vecchiaia,indice_vecchiaia_uomo,indice_vecchiaia_donna,indice_pop_attiva
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,0.108609,0.305572,0.088077,0.175554,0.429599,0.001198,1.677846,1.408425,1.964878,1.439135
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,0.102618,0.346041,0.06393,0.171848,0.412903,0.005279,1.793528,1.457093,2.158745,1.499047
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0.185185,0.1,0.0,0.2,0.7,0.0,0.454545,0.4,0.5,0.809524
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,0.102723,0.275503,0.044414,0.205413,0.473282,0.001388,1.925208,1.57361,2.288288,1.462838
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,0.082385,0.309179,0.103382,0.199034,0.386473,0.001932,1.873557,1.435216,2.351025,1.455016


##### 4.2. Popolazione residente maschile

In [22]:
# Colonne ISTAT utilizzate
metaData[(metaData['NOME_CAMPO'] == 'P38')| 
         (metaData['NOME_CAMPO'] == 'P39')|
         (metaData['NOME_CAMPO'] == 'P40')|
         (metaData['NOME_CAMPO'] == 'P41')|
         (metaData['NOME_CAMPO'] == 'P42')|
         (metaData['NOME_CAMPO'] == 'P33')|
         (metaData['NOME_CAMPO'] == 'P34')|
         (metaData['NOME_CAMPO'] == 'P35')|
         (metaData['NOME_CAMPO'] == 'P36')|
         (metaData['NOME_CAMPO'] == 'P37')]

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
44,P33,Popolazione residente - maschi - età 15 - 19 anni
45,P34,Popolazione residente - maschi - età 20 - 24 anni
46,P35,Popolazione residente - maschi - età 25 - 29 anni
47,P36,Popolazione residente - maschi - età 30 - 34 anni
48,P37,Popolazione residente - maschi - età 35 - 39 anni
49,P38,Popolazione residente - maschi - età 40 - 44 anni
50,P39,Popolazione residente - maschi - età 45 - 49 anni
51,P40,Popolazione residente - maschi - età 50 - 54 anni
52,P41,Popolazione residente - maschi - età 55 - 59 anni
53,P42,Popolazione residente - maschi - età 60 - 64 anni


In [23]:
# Creo la nuova colonna
istat_kpi.new_index(quartiereData, ['P38','P39','P40','P41','P42'], ['P33','P34','P35','P36','P37'], 'indice_pop_attiva').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,indice_stranieri,perc_europei,perc_africa,perc_america,perc_asia,perc_oceania,indice_vecchiaia,indice_vecchiaia_uomo,indice_vecchiaia_donna,indice_pop_attiva
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,0.108609,0.305572,0.088077,0.175554,0.429599,0.001198,1.677846,1.408425,1.964878,1.412698
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,0.102618,0.346041,0.06393,0.171848,0.412903,0.005279,1.793528,1.457093,2.158745,1.449253
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0.185185,0.1,0.0,0.2,0.7,0.0,0.454545,0.4,0.5,0.5
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,0.102723,0.275503,0.044414,0.205413,0.473282,0.001388,1.925208,1.57361,2.288288,1.365452
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,0.082385,0.309179,0.103382,0.199034,0.386473,0.001932,1.873557,1.435216,2.351025,1.387879


##### 4.3. Popolazione residente femminile

Anche in questo caso, i numeri sulle donne sono ricavati dalla differenza tra il totale e la popolazione maschile.

In [24]:
# Calcolo numeratore e denominatore
numeratore_donne = (quartiereData[['P22','P23','P24','P25','P26']].sum(axis=1)) - (quartiereData[['P38','P39','P40','P41','P42']].sum(axis=1))
denominatore_donne = (quartiereData[['P17','P18','P19','P20','P21']].sum(axis=1))-(quartiereData[['P33','P34','P35','P36','P37']].sum(axis=1))

# Creazione colonna
quartiereData['indice_pop_attiva_donna'] = numeratore_donne/denominatore_donne

#### 5.  Popolazione residente che svolge la propria giornata fuori dal comune di residenza sul totale della popolazione residente

In [25]:
# Colonne ISTAT utilizzate
metaData[(metaData['NOME_CAMPO'] == 'P138')]

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
85,P138,Popolazione residente che si sposta giornalmen...


In [26]:
# Creo la nuova colonna
istat_kpi.new_index(quartiereData, ['P138'], ['P1'], 'indice_pop_pendolare').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,perc_africa,perc_america,perc_asia,perc_oceania,indice_vecchiaia,indice_vecchiaia_uomo,indice_vecchiaia_donna,indice_pop_attiva,indice_pop_attiva_donna,indice_pop_pendolare
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,0.088077,0.175554,0.429599,0.001198,1.677846,1.408425,1.964878,1.412698,1.463087,0.044641
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,0.06393,0.171848,0.412903,0.005279,1.793528,1.457093,2.158745,1.449253,1.541907,0.047668
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0.0,0.2,0.7,0.0,0.454545,0.4,0.5,0.5,1.222222,0.055556
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,0.044414,0.205413,0.473282,0.001388,1.925208,1.57361,2.288288,1.365452,1.552432,0.048474
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,0.103382,0.199034,0.386473,0.001932,1.873557,1.435216,2.351025,1.387879,1.517134,0.05763


#### 6.  Popolazione residente che svolge la propria giornata nel comune di residenza ma al di fuori del proprio quartiere sul totale della popolazione residente.

In [27]:
# Colonne ISTAT utilizzate
metaData[(metaData['NOME_CAMPO'] == 'P137')]

Unnamed: 0,NOME_CAMPO,DEFINIZIONE
84,P137,Popolazione residente che si sposta giornalmen...


In [28]:
# Creo la nuova colonna
istat_kpi.new_index(quartiereData, ['P137'], ['P1'], 'indice_pop_non_pend_esterna_quartiere').head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,perc_america,perc_asia,perc_oceania,indice_vecchiaia,indice_vecchiaia_uomo,indice_vecchiaia_donna,indice_pop_attiva,indice_pop_attiva_donna,indice_pop_pendolare,indice_pop_non_pend_esterna_quartiere
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,0.175554,0.429599,0.001198,1.677846,1.408425,1.964878,1.412698,1.463087,0.044641,0.478037
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,0.171848,0.412903,0.005279,1.793528,1.457093,2.158745,1.449253,1.541907,0.047668,0.453867
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0.2,0.7,0.0,0.454545,0.4,0.5,0.5,1.222222,0.055556,0.537037
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,0.205413,0.473282,0.001388,1.925208,1.57361,2.288288,1.365452,1.552432,0.048474,0.470987
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,0.199034,0.386473,0.001932,1.873557,1.435216,2.351025,1.387879,1.517134,0.05763,0.475683


#### 7.  Popolazione residente che svolge la propria giornata nel proprio quartiere sul totale della popolazione residente.

Il valore assoluto della popolazione che non svolge la giornata nel proprio quartiere è pari alla differenza tra `P1` e la somma di `P137` e `P138`.

In [29]:
giornata_dentro_quartiere = quartiereData['P1']-(quartiereData[['P137','P138']].sum(axis=1))

quartiereData['indice_pop_non_pend_interna_quartiere'] = giornata_dentro_quartiere/quartiereData['P1']

In [30]:
quartiereData.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,PRO_COM,SEZ,TIPO_LOC,SHAPE_AREA,SHAPE_LEN,POP_2010,ACE_x,mappa2,mappa2bis,...,perc_asia,perc_oceania,indice_vecchiaia,indice_vecchiaia_uomo,indice_vecchiaia_donna,indice_pop_attiva,indice_pop_attiva_donna,indice_pop_pendolare,indice_pop_non_pend_esterna_quartiere,indice_pop_non_pend_interna_quartiere
IDquartiere,quartiere,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,DUOMO,10544416,3120076,32223,206,2335388.0,93251.843798,17156.0,206.0,1721,2108.11,...,0.429599,0.001198,1.677846,1.408425,1.964878,1.412698,1.463087,0.044641,0.478037,0.477322
2,BRERA,6952820,2029564,40018,134,1637361.0,64133.122955,18049.0,322.0,1738,1443.85,...,0.412903,0.005279,1.793528,1.457093,2.158745,1.449253,1.541907,0.047668,0.453867,0.498465
3,GIARDINI PORTA VENEZIA,155765,45438,1477,3,249641.1,3471.485065,48.0,18.0,10,18.52,...,0.7,0.0,0.454545,0.4,0.5,0.5,1.222222,0.055556,0.537037,0.407407
4,GUASTALLA,5456083,1620622,52463,107,1547985.0,56412.832222,15076.0,642.0,1471,1140.69,...,0.473282,0.001388,1.925208,1.57361,2.288288,1.365452,1.552432,0.048474,0.470987,0.480539
5,VIGENTINA,3261127,969344,103460,64,1065016.0,36666.261686,13613.0,654.0,1054,518.73,...,0.386473,0.001932,1.873557,1.435216,2.351025,1.387879,1.517134,0.05763,0.475683,0.466688


### Preview del dataset degli indici

In [None]:
quartiereData[list(quartiereData.columns)[-11:]].head()

# Salva df in csv
quartiereData[list(quartiereData.columns)[-11:]].to_csv('../data/output/kpi_istat_cpa2011.csv', sep = ';')