# Case Study - Homeless

In [13]:
import pandas as pd
df = pd.read_csv('source/homeless.csv')
df.head()

Unnamed: 0,ITTER107,Territory,Sex,Age,Citizenship,Value
0,ITC1,Piemonte,M,TOTAL,ITL,4218
1,ITC1,Piemonte,F,TOTAL,ITL,1496
2,ITC1,Piemonte,T,TOTAL,ITL,5714
3,ITC2,Valle d'Aosta / Vallée d'Aoste,M,TOTAL,ITL,41
4,ITC2,Valle d'Aosta / Vallée d'Aoste,F,TOTAL,ITL,17


Select only some fields

In [14]:
df['Territory'] = df['Territory'].str.replace('Trentino Alto Adige / Südtirol', 'Trentino-Alto Adige/Südtirol')

Focus on total age, total sex and total citizenship

In [15]:
df_tot = df[(df['Age'] == 'TOTAL') & (df['Sex'] == 'T') &  (df['Citizenship'] == 'TOTAL')]

## Draw a preliminary map

In [16]:
import altair as alt

# Load the TopoJSON file by URL
url = "https://raw.githubusercontent.com/openpolis/geojson-italy/master/topojson/limits_IT_regions.topo.json"


source = alt.topo_feature(url, "regions")

alt.Chart(source).mark_geoshape().encode(
    tooltip='properties.reg_name:N',
    color=alt.Color('Value:Q')
).project('mercator').properties(
    width=500,
    height=500
).transform_lookup(
    lookup='properties.reg_name',
    from_=alt.LookupData(df_tot, 'Territory', ['Territory', 'Value'])
).properties(title='Homeless in Italy in 2021')


Normalize by population

In [20]:
population = pd.read_csv('source/population.csv')
population.head()

Unnamed: 0,ITTER107,Territory,Sex,Age,Value
0,ITD4,Friuli-Venezia Giulia,M,Y_UN4,19794
1,ITE1,Toscana,M,Y_UN4,61789
2,ITF6,Calabria,M,Y_UN4,36842
3,ITD2,Provincia Autonoma Trento,T,Y5-9,25101
4,ITD3,Veneto,M,Y5-9,105344


In [23]:
pop_tot = population[(population['Age'] == 'TOTAL') & (population['Sex'] == 'T')]
pop_tot['Territory'] = pop_tot['Territory'].str.replace('Trentino Alto Adige / Südtirol', 'Trentino-Alto Adige/Südtirol')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pop_tot['Territory'] = pop_tot['Territory'].str.replace('Trentino Alto Adige / Südtirol', 'Trentino-Alto Adige/Südtirol')


In [24]:
len(df_tot), len(pop_tot)

(22, 22)

Join the two datasets

In [25]:
df_tot = df_tot[['Value', 'ITTER107']]
pop_tot = pop_tot[['Value', 'ITTER107','Territory']]

In [26]:
df3 = pop_tot.set_index('ITTER107').join(df_tot.set_index('ITTER107'),lsuffix='_pop', rsuffix='_hom').reset_index()

Calculate ratio: number of homeless people over 1000 inhabitants

In [27]:
df3['Ratio'] = df3['Value_hom']/df3['Value_pop']*1000

In [28]:
# Load the TopoJSON file by URL
url = "https://raw.githubusercontent.com/openpolis/geojson-italy/master/topojson/limits_IT_regions.topo.json"


source = alt.topo_feature(url, "regions")

alt.Chart(source).mark_geoshape().encode(
    tooltip='properties.reg_name:N',
    color=alt.Color('Ratio:Q')
).project('mercator').properties(
    width=500,
    height=500
).transform_lookup(
    lookup='properties.reg_name',
    from_=alt.LookupData(df3, 'Territory', ['Territory', 'Ratio'])
).properties(title='Homeless in Italy in 2021')


Drop Trentino Provinces

In [29]:
df3.drop(labels=[11,1], axis=0,inplace=True)

In [30]:
alt.Chart(df3).mark_bar().encode(
    y = alt.Y('Territory', sort='-x'),
    x = alt.X('Ratio')
   
).properties(width=500)

In [31]:
alt.Chart(df3).mark_bar().encode(
    y = alt.Y('Territory', sort='-x'),
    x = alt.X('Value_hom'),
   
).properties(width=500)

In [32]:
alt.Chart(df3).mark_bar().encode(
    y = alt.Y('Territory', sort='-x', axis=alt.Axis(title='')),
    x = alt.X('Ratio', axis=alt.Axis(tickCount=4,title='')),
    color=alt.condition(alt.datum.Ratio > 2, alt.value('darkred'), alt.value('lightgray'))
   
).properties(width=500,title='Number of homeless people in a population of 1,000')