# Visualization of Census data at district level

We want to create visualizations that answer the following questions:
* District levels' of decrease or increase in spanish/foreign population at age ranges: 0-25, 26-40, 41-65, 66-85, 86-100
* General increase or decrease cummulative ratios on the population for each of the age-ranges compearing spanish vs foreign

In [15]:
import pandas as pd
import numpy as np
import folium
from sklearn.preprocessing import MinMaxScaler
from folium.features import DivIcon
from IPython.display import HTML

### 1. Load, select segments and aggregate data by age range

In [2]:
# Census cleaned and scaled data
variation_positive = pd.read_csv('variation_matrix_positive.csv', index_col=0)
variation_negative = pd.read_csv('variation_matrix_negative.csv', index_col=0)

# Parameters for aggregation by age range and segment
age_ranges = {'0-24': ['_0 - 4','_5 - 9','10 - 14','15 - 19','20 - 24'],
              '25-39': ['25 - 29','30 - 34','35 - 39'],
              '40-64': ['40 - 44','45 - 49','50 - 54','55 - 59','60 - 64'],
              '65-84': ['65 - 69','70 - 74','75 - 79','80 - 84'],
              '85-100': ['85 - 89','90 - 94','95 - 99','100 o más']}
segments = ['foreign_total', 'spanish_total']

# Districts data
district_dict = pd.read_csv('district_dict.csv')
districts_geo = open('districts_madrid.geojson').read()

In [3]:
def select_columns_with_string_pattern(list_columns, pattern):
    return [col for col in list_columns if pattern in col]
    
    
def select_columns_math_list_string_pattern(list_columns, list_patterns):
    return [col for col in list_columns if any(pat in col for pat in list_patterns)]    
    

def aggregate_by_age_ranges(data, age_ranges, segments):
    data_ = data.copy()
    scaler = MinMaxScaler(feature_range=(0,10))
    
    # select segments
    selected_columns_a = select_columns_with_string_pattern(data.columns.tolist(), segments[0])
    selected_columns_b = select_columns_with_string_pattern(data.columns.tolist(), segments[1])
    data_a = data_.loc[:, selected_columns_a]
    data_b = data_.loc[:, selected_columns_b]
    
    # aggregate by age ranges
    agg_a = []
    agg_b = []
    age_range_keys = list(age_ranges.keys())
    age_range_values = list(age_ranges.values())
    for i in range(len(age_ranges)):
        # select all columns that contain that age range
        selected_columns_a = select_columns_math_list_string_pattern(data_a.columns.tolist(), age_range_values[i])
        selected_columns_b = select_columns_math_list_string_pattern(data_b.columns.tolist(), age_range_values[i])
        
        # aggregate within the age_range by averaging and then rescale from 1 to 10
        segment_age_a = data_a.loc[:,selected_columns_a].median(axis=1)
        segment_age_b = data_b.loc[:,selected_columns_b].median(axis=1)
        
        agg_a.append(pd.DataFrame(scaler.fit_transform(segment_age_a.values.reshape(-1,1)), 
                                  index=segment_age_a.index.tolist(), 
                                  columns=[age_range_keys[i]]))
        agg_b.append(pd.DataFrame(scaler.fit_transform(segment_age_b.values.reshape(-1,1)), 
                                  index=segment_age_a.index.tolist(), 
                                  columns=[age_range_keys[i]]))
        
    return pd.concat(agg_a, axis=1).reset_index(), pd.concat(agg_b, axis=1).reset_index()

In [4]:
var_positive_foreign, var_positive_spanish = aggregate_by_age_ranges(variation_positive, age_ranges, segments)
var_negative_foreign, var_negative_spanish = aggregate_by_age_ranges(variation_negative, age_ranges, segments)

### 2. Visualize spanish vs foreigners for each age range + type of variation

In [25]:
 f = folium.Figure()
f.html.add_child(folium.Element("<h4>Cummulative ratio of variation in population with respect to 2004 between 0-24 years in the period 2005-2017</h4>"))

center_madrid = [40.4161778,-3.7128473]
m = folium.Map(location=center_madrid, zoom_start=13, width=1000, height=1000)
#m.choropleth(
#    geo_data=districts_geo,
#    data=var_positive_spanish,
#    threshold_scale=[0,2,3,5,7,10],
#    columns=['index', '0-24'],
#    key_on='feature.properties.name',
#    fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
#    legend_name='Spanish Increase Variation'
#    )
#m.choropleth(
#    geo_data=districts_geo,
#    data=var_negative_spanish,
#    threshold_scale=[0,2,3,5,7,10],
#    columns=['index', '0-24'],
#    key_on='feature.properties.name',
#    fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2,
#    legend_name='Spanish Decrease Variation'
#    )
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '0-24'],
    key_on='feature.properties.name',
    fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Home location density by districts'
    )
#m.choropleth(
#    geo_data=districts_geo,
#    data=var_negative_foreign,
#    threshold_scale=[0,2,3,5,7,10],
#    columns=['index', '0-24'],
#    key_on='feature.properties.name',
#    fill_color='PuBuGn', fill_opacity=0.7, line_opacity=0.2,
#    legend_name='Foreign Decrease Variation'
#    )
folium.LayerControl().add_to(m)
folium.Marker([40.3812771,-3.6645649], popup="Centro Comercial Madrid Sur").add_to(m)
f.add_child(m)
f.html.add_child(folium.Element("<footer>Change layers for different age ranges by checking differnt boxes in the layer icon.</footer>"))
m.save('cummulative_variation_ratio_by_districts_0_24_years.html')
m

In [234]:
f = folium.Figure()
f.html.add_child(folium.Element("<h4>Cummulative ratio of variation in population with respect to 2004 between 25-39 years in the period 2005-2017</h4>"))

center_madrid = [40.4161778,-3.7128473]
m = folium.Map(location=center_madrid, zoom_start=13, width=1000,height=1000)
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_spanish,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '25-39'],
    key_on='feature.properties.name',
    fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Spanish Increase Variation'
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_negative_spanish,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '25-39'],
    key_on='feature.properties.name',
    fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Spanish Decrease Variation'
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '25-39'],
    key_on='feature.properties.name',
    fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Foreign Increase Variation'
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_negative_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '25-39'],
    key_on='feature.properties.name',
    fill_color='PuBuGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Foreign Decrease Variation'
    )
folium.LayerControl().add_to(m)
f.add_child(m)
f.html.add_child(folium.Element("<footer>Change layers for different age ranges by checking differnt boxes in the layer icon.</footer>"))
m.save('cummulative_variation_ratio_by_districts_25_39_years.html')
m

In [235]:
 f = folium.Figure()
f.html.add_child(folium.Element("<h4>Cummulative ratio of variation in population with respect to 2004 between 40-64 years in the period 2005-2017</h4>"))

center_madrid = [40.4161778,-3.7128473]
m = folium.Map(location=center_madrid, zoom_start=13, width=1000,height=1000)
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_spanish,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '40-64'],
    key_on='feature.properties.name',
    fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Spanish Increase Variation'
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_negative_spanish,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '40-64'],
    key_on='feature.properties.name',
    fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Spanish Decrease Variation'
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '40-64'],
    key_on='feature.properties.name',
    fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Foreign Increase Variation'
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_negative_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '40-64'],
    key_on='feature.properties.name',
    fill_color='PuBuGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Foreign Decrease Variation'
    )
folium.LayerControl().add_to(m)
f.add_child(m)
f.html.add_child(folium.Element("<footer>Change layers for different age ranges by checking differnt boxes in the layer icon.</footer>"))
m.save('cummulative_variation_ratio_by_districts_40_64_years.html')
m

In [236]:
 f = folium.Figure()
f.html.add_child(folium.Element("<h4>Cummulative ratio of variation in population with respect to 2004 between 65-84 years in the period 2005-2017</h4>"))

center_madrid = [40.4161778,-3.7128473]
m = folium.Map(location=center_madrid, zoom_start=13, width=1000,height=1000)
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_spanish,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '65-84'],
    key_on='feature.properties.name',
    fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Spanish Increase Variation',
    reset=True
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_negative_spanish,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '65-84'],
    key_on='feature.properties.name',
    fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Spanish Decrease Variation',
    reset=True
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '65-84'],
    key_on='feature.properties.name',
    fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Foreign Increase Variation',
    reset=True
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_negative_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '65-84'],
    key_on='feature.properties.name',
    fill_color='PuBuGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Foreign Decrease Variation',
    reset=True
    )
folium.LayerControl().add_to(m)
f.add_child(m)
f.html.add_child(folium.Element("<footer>Change layers for different age ranges by checking differnt boxes in the layer icon.</footer>"))
m.save('cummulative_variation_ratio_by_districts_65_84_years.html')
m

In [237]:
f = folium.Figure()
f.html.add_child(folium.Element("<h4>Cummulative ratio of variation in population with respect to 2004 between 85-100 years in the period 2005-2017</h4>"))

center_madrid = [40.4161778,-3.7128473]
m = folium.Map(location=center_madrid, zoom_start=13, width=1000,height=1000)
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_spanish,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '85-100'],
    key_on='feature.properties.name',
    fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Spanish Increase Variation',
    reset=True
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_negative_spanish,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '85-100'],
    key_on='feature.properties.name',
    fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Spanish Decrease Variation',
    reset=True
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_positive_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '85-100'],
    key_on='feature.properties.name',
    fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Foreign Increase Variation',
    reset=True
    )
m.choropleth(
    geo_data=districts_geo,
    data=var_negative_foreign,
    threshold_scale=[0,2,3,5,7,10],
    columns=['index', '85-100'],
    key_on='feature.properties.name',
    fill_color='PuBuGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='Foreign Decrease Variation',
    reset=True
    )
folium.LayerControl().add_to(m)
f.add_child(m)
f.html.add_child(folium.Element("<footer>Change layers for different age ranges by checking differnt boxes in the layer icon.</footer>"))
m.save('cummulative_variation_ratio_by_districts_85_100_years.html')
m