# Case Study - Homeless

In [2]:
import pandas as pd
df = pd.read_csv('source/homeless_by_regions_2021.csv', sep=';')
df

Unnamed: 0,ITTER107,Territorio,TIPO_DATO_CENS_POP,Tipo dato,SEXISTAT1,Sesso,ETA1,Classe di età,CITTADINANZA,Cittadinanza,TIME,Seleziona periodo,Value,Flag Codes,Flags
0,ITC1,Piemonte,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,M,maschi,TOTAL,totale,ITL,italiano-a,2021,2021,4218,,
1,ITC1,Piemonte,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,F,femmine,TOTAL,totale,ITL,italiano-a,2021,2021,1496,,
2,ITC1,Piemonte,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,T,totale,TOTAL,totale,ITL,italiano-a,2021,2021,5714,,
3,ITC2,Valle d'Aosta / Vallée d'Aoste,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,M,maschi,TOTAL,totale,ITL,italiano-a,2021,2021,41,,
4,ITC2,Valle d'Aosta / Vallée d'Aoste,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,F,femmine,TOTAL,totale,ITL,italiano-a,2021,2021,17,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
972,ITE1,Toscana,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,F,femmine,Y_GE55,55 anni e più,FRGAPO,straniero-a/apolide,2021,2021,79,,
973,ITD4,Friuli-Venezia Giulia,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,T,totale,Y_GE55,55 anni e più,FRGAPO,straniero-a/apolide,2021,2021,45,,
974,ITC2,Valle d'Aosta / Vallée d'Aoste,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,F,femmine,Y35-54,35-54 anni,FRGAPO,straniero-a/apolide,2021,2021,1,,
975,ITD5,Emilia-Romagna,PR_SEC_HOMELESS_PERS,persone senza tetto e senza fissa dimora,M,maschi,Y35-54,35-54 anni,FRGAPO,straniero-a/apolide,2021,2021,466,,


Select only some fields

In [5]:
df = df[['ITTER107','Territorio','SEXISTAT1', 'ETA1', 'CITTADINANZA', 'Value']]
df['Territorio'] = df['Territorio'].str.replace('Trentino Alto Adige / Südtirol', 'Trentino-Alto Adige/Südtirol')

Focus on total age, total sex and total citizenship

In [6]:
df_tot = df[(df['ETA1'] == 'TOTAL') & (df['SEXISTAT1'] == 'T') &  (df['CITTADINANZA'] == 'TOTAL')]

## Draw a preliminary map

In [11]:
import altair as alt

# Load the TopoJSON file by URL
url = "https://raw.githubusercontent.com/openpolis/geojson-italy/master/topojson/limits_IT_regions.topo.json"


source = alt.topo_feature(url, "regions")

alt.Chart(source).mark_geoshape().encode(
    tooltip='properties.reg_name:N',
    color=alt.Color('Value:Q')
).project('mercator').properties(
    width=500,
    height=500
).transform_lookup(
    lookup='properties.reg_name',
    from_=alt.LookupData(df_tot, 'Territorio', ['Territorio', 'Value'])
).properties(title='Homeless in Italy in 2021')


Normalize by population

In [12]:
df2 = pd.read_csv('source/population_by_regions_2021.csv')
df2

Unnamed: 0,ITTER107,Territorio,TIPO_DATO_CENS_POP,Tipo dato,SEXISTAT1,Sesso,ETA1,Classe di età,TIME,Seleziona periodo,Value,Flag Codes,Flags
0,ITD4,Friuli-Venezia Giulia,RESPOP_AV,popolazione residente,M,maschi,Y_UN4,fino a 4 anni,2021,2021,19794,,
1,ITE1,Toscana,RESPOP_AV,popolazione residente,M,maschi,Y_UN4,fino a 4 anni,2021,2021,61789,,
2,ITF6,Calabria,RESPOP_AV,popolazione residente,M,maschi,Y_UN4,fino a 4 anni,2021,2021,36842,,
3,ITD2,Provincia Autonoma Trento,RESPOP_AV,popolazione residente,T,totale,Y5-9,5-9 anni,2021,2021,25101,,
4,ITD3,Veneto,RESPOP_AV,popolazione residente,M,maschi,Y5-9,5-9 anni,2021,2021,105344,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1447,ITDA,Trentino Alto Adige / Südtirol,RESPOP_AV,popolazione residente,F,femmine,Y30-34,30-34 anni,2021,2021,29518,,
1448,ITDA,Trentino Alto Adige / Südtirol,RESPOP_AV,popolazione residente,T,totale,Y20-24,20-24 anni,2021,2021,59099,,
1449,ITDA,Trentino Alto Adige / Südtirol,RESPOP_AV,popolazione residente,M,maschi,Y70-74,70-74 anni,2021,2021,25405,,
1450,ITDA,Trentino Alto Adige / Südtirol,RESPOP_AV,popolazione residente,M,maschi,Y80-84,80-84 anni,2021,2021,15882,,


In [13]:
df2_tot = df2[(df2['ETA1'] == 'TOTAL') & (df2['Sesso'] == 'totale')]
df2_tot['Territorio'] = df2_tot['Territorio'].str.replace('Trentino Alto Adige / Südtirol', 'Trentino-Alto Adige/Südtirol')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2_tot['Territorio'] = df2_tot['Territorio'].str.replace('Trentino Alto Adige / Südtirol', 'Trentino-Alto Adige/Südtirol')


In [15]:
len(df_tot), len(df2_tot)

(22, 22)

Join the two datasets

In [16]:
df_tot = df_tot[['Value', 'ITTER107']]
df2_tot = df2_tot[['Value', 'ITTER107','Territorio']]

In [26]:
df3 = df2_tot.set_index('ITTER107').join(df_tot.set_index('ITTER107'),lsuffix='_pop', rsuffix='_hom').reset_index()

Calculate ratio: number of homeless people over 1000 inhabitants

In [29]:
df3['Ratio'] = df3['Value_hom']/df3['Value_pop']*1000

In [30]:
# Load the TopoJSON file by URL
url = "https://raw.githubusercontent.com/openpolis/geojson-italy/master/topojson/limits_IT_regions.topo.json"


source = alt.topo_feature(url, "regions")

alt.Chart(source).mark_geoshape().encode(
    tooltip='properties.reg_name:N',
    color=alt.Color('Ratio:Q')
).project('mercator').properties(
    width=500,
    height=500
).transform_lookup(
    lookup='properties.reg_name',
    from_=alt.LookupData(df3, 'Territorio', ['Territorio', 'Ratio'])
).properties(title='Homeless in Italy in 2021')


Drop Trentino Provinces

In [31]:
df3.drop(labels=[11,1], axis=0,inplace=True)

In [32]:
alt.Chart(df3).mark_bar().encode(
    y = alt.Y('Territorio', sort='-x'),
    x = alt.X('Ratio')
   
).properties(width=500)

  for col_name, dtype in df.dtypes.iteritems():


In [34]:
alt.Chart(df3).mark_bar().encode(
    y = alt.Y('Territorio', sort='-x'),
    x = alt.X('Value_hom'),
   
).properties(width=500)

In [47]:
alt.Chart(df3).mark_bar().encode(
    y = alt.Y('Territorio', sort='-x', axis=alt.Axis(title='')),
    x = alt.X('Ratio', axis=alt.Axis(tickCount=4,title='')),
    color=alt.condition(alt.datum.Ratio > 2, alt.value('darkred'), alt.value('lightgray'))
   
).properties(width=500,title='Number of homeless people in a population of 1,000')