# Visualization of Census data at district level

We want to create visualizations that answer the following questions:
* District levels' of decrease or increase in spanish/foreign population at age ranges: 0-25, 26-40, 41-65, 66-85, 86-100
* General increase on the population for each of the age-ranges compearing spanish vs foreign

In [65]:
import pandas as pd
import numpy as np
import folium
from IPython.display import HTML

### 1. Load, select segments and aggregate data by age range

In [67]:
# Census cleaned and scaled data
variation_positive = pd.read_csv('variation_matrix_positive.csv', index_col=0)
variation_negative = pd.read_csv('variation_matrix_negative.csv', index_col=0)

# Parameters for aggregation by age range and segment
age_ranges = {'0-24': ['0 - 4','5 - 9','10 - 14','15 - 19','20 - 24'],
              '25-39': ['25 - 29','30 - 34','35 - 39'],
              '40-64': ['40 - 44','45 - 49','50 - 54','55 - 59','60 - 64'],
              '65-84': ['65 - 69','70 - 74','75 - 79','80 - 84'],
              '85-100': ['85 - 89','90 - 94','95 - 99','100 o más']}
segments = ['foreign_total', 'spanish_total']

# Districts data
district_dict = pd.read_csv('district_dict.csv')
districts_geo = open('districts_madrid.geojson').read()

In [100]:
def select_columns_with_string_pattern(list_columns, list_pattens):
    return [col for col in list_columns if any(pat in col for pat in list_pattens)]
    

def aggregate_by_age_ranges(data, age_ranges, segments):
    data_ = data.copy()
    
    # select segments
    selected_columns_a = select_columns_with_string_pattern(data.columns.tolist(), segments[0])
    selected_columns_b = select_columns_with_string_pattern(data.columns.tolist(), segments[1])
    data_a = data_.loc[:, selected_columns_a]
    data_b = data_.loc[:, selected_columns_b]
    
    # aggregate by age ranges
    agg_a = []
    agg_b = []
    for age_range in age_ranges:
        # select all columns that contain that age range
        selected_columns_a = select_columns_with_string_pattern(data_a.columns.tolist(), age_range)
        selected_columns_b = select_columns_with_string_pattern(data_b.columns.tolist(), age_range)
        
        # aggregate within the age_range by averaging and then rescale from 1 to 10
        agg_a.append(data_a.loc[:,selected_columns_a].median(axis=0))
        agg_b.append(data_b.loc[:,selected_columns_b].median(axis=0))
    
    return pd.concat(agg_a), pd.concat(agg_b)

In [101]:
a,b = aggregate_by_age_ranges(variation_positive, age_ranges, segments)

In [96]:
a = variation_negative.reset_index().loc[:,['index','spanish_men_0 - 4']]
b = a.merge(district_dict, left_on='index',right_on='district_name')

In [97]:
b

Unnamed: 0,index,spanish_men_0 - 4,district_name,district_id
0,Arganzuela,0.012458,Arganzuela,2
1,Barajas,-0.720671,Barajas,21
2,Carabanchel,-0.904557,Carabanchel,11
3,Centro,1.320445,Centro,1
4,Chamartín,-1.057425,Chamartín,5
5,Chamberí,0.904565,Chamberí,7
6,Ciudad Lineal,1.169294,Ciudad Lineal,15
7,Fuencarral - El Pardo,-0.675371,Fuencarral - El Pardo,8
8,Hortaleza,-0.731869,Hortaleza,16
9,Latina,0.511563,Latina,10


In [99]:
# Madrid districts' polygons
center_madrid = [40.4161778,-3.7128473]
m = folium.Map(location=center_madrid, zoom_start=13, width=500,height=500)
m.choropleth(
    geo_data=districts_geo,
    data=b,
    threshold_scale=[-2,-1,0,1,2],
    columns=['index', 'spanish_men_0 - 4'],
    key_on='feature.properties.name',
    fill_color='YlGn', fill_opacity=0.7, line_opacity=0.2,
    legend_name='blablabla',
    )
m

In [35]:
center_madrid = [40.4161778,-3.7128473]
map_osm = folium.Map(location=center_madrid, tiles='Stamen Toner', zoom_start=13, width=300,height=300)
map_osm