## Imports

In [120]:
# !pip install geopandas
# !pip install pyreadstat
# !pip install folium

In [121]:
import pandas as pd
import geopandas as gpd
import pyreadstat

import folium
from branca.colormap import LinearColormap
from branca.element import Figure, Element, Div

from IPython.core.display import HTML

## Data preparation

#### Importing the data

In [122]:
df, meta = pyreadstat.read_sav('./data/survey/ENCAVI_2015.sav')#, apply_value_formats=True, formats_as_category=True)

regions_gid = pd.read_csv('./data/survey/region_to_GID.csv', sep=';', index_col='region_name', squeeze=True)
question_format = pd.read_csv('./data/survey/question_format.csv', sep=';', index_col='question')

#### Value changes

In [123]:
df['Region'] = df.Region.map(meta.variable_value_labels['Region'])  # Using region names instead of codes

df_f = df[['Region', *question_format.index.values]].copy()  # Creating a filtered copy

df_f['GID_1'] = df_f.Region.map(regions_gid)  # Assigning GID to each region

df_f.rename(columns=question_format.var_name.to_dict(), inplace=True)  # Renaming columns from file

df_f.replace({9: None, 8: None}, inplace=True)  # 9, 8 = does not know / does not answer

#### Fixing the meaning of scales
In the survey, some scales had ascending codes (larger=more/better) and others had descending codes (larger=less/worst).This code reverses the scale of the descending ones to unify the meaning. 

In [124]:
descending_scale_columns = question_format[question_format.ascending_scale == False].var_name.values

for column in descending_scale_columns:
    df_f[column] = df_f[column].max() - df_f[column]

#### Calculating metrics

In [149]:
df_reg = df_f.groupby('GID_1').mean()  # getting the average value for each column
df_reg = df_reg.round(3)

## Map creation

In [150]:
chilean_map = gpd.read_file('./data/map/chilean_map.json', encoding='latin-1')

gid_dict_mapping = {regions_gid['Ñuble']: regions_gid['Biobío']}  # When the survey was conducted Ñuble was part of Biobío
chilean_map.GID_1.replace(gid_dict_mapping, inplace=True)

chilean_map = chilean_map.merge(df_reg, on='GID_1')

In [151]:
def create_map(gdf, color_column, tooltip_columns=None):
    tooltip_columns = tooltip_columns if tooltip_columns else ['NAME_1', color_column]
    
    is_good_indicator = question_format[question_format.var_name == color_column].good_indicator.iloc[0]
    if is_good_indicator:
        color_scale = LinearColormap(['yellow','green'], vmin=chilean_map[color_column].min(), vmax=chilean_map[color_column].max())
    else:
        color_scale = LinearColormap(['yellow','red'], vmin=chilean_map[color_column].min(), vmax=chilean_map[color_column].max())
        
    m = folium.Map(
    location = [-39.5, -60], 
    zoom_start = 4,
    )
    
    folium.GeoJson(
        data = gdf.to_json(),
        style_function = lambda feature: {
            'fillColor': color_scale(feature['properties'][color_column]),
            'fillOpacity': 0.65,
            'color' : 'black',
            'weight' : 1.5,
        },
        highlight_function = lambda x: {"weight": 1,'fillOpacity': 1},
        tooltip = folium.features.GeoJsonTooltip(fields=tooltip_columns, labels=False, sticky=False)
    ).add_to(m)
    
    return m

#### Ploting two maps side-by-side

In [152]:
def plot_maps(map_1, map_2, title_1, title_2, correlation=None):
    fig_txt = Figure(height="70px")

    fig_txt.header.add_child(Element("<style>div {float: left; width: 49%; margin-right: 1%}</style>"))
    fig_txt.header.add_child(Element("<style>p {margin-bottom: 0px}</style>"))

    fig_txt.html.add_child(Element('<div style="font-size: 20px"; align=center>{}</div>'.format(variable_1)))
    fig_txt.html.add_child(Element('<div style="font-size: 20px"; align=center>{}</div>'.format(variable_2)))

    correlation_txt = "(correlation between variables {0:.2f})".format(correlation) if correlation else ""
    fig_txt.html.add_child(Element('<p syle="margin-bottom: -50px" align=center>{}</p>'.format(correlation_txt)))

    display(fig_txt)

    fig_maps = Figure()
    fig_maps.header.add_child(Element("<style>div {float: left; margin-right: 1%}</style>"))

    div_map_1 = Div(width="49%")
    div_map_1.add_child(map_1)

    div_map_2 = Div(width="49%")
    div_map_2.add_child(map_2)

    fig_maps.add_child(div_map_1)
    fig_maps.add_child(div_map_2)
    display(fig_maps)

### Suggesting variables

In [153]:
def get_most_correlated(df, original_variable, correlation='positive'):
    corr_values = df.corr()[original_variable].drop(original_variable)
    
    if correlation == 'positive':
        return corr_values.idxmax()
    elif correlation == 'negative':
        return corr_values.idxmin()
    elif correlation == 'absolute':
        return corr_values.abs().idxmax()
    else:
        raise ValueError('Invalid correlation selector.')

## Visualizing

In [154]:
variable_1 = 'has_hypertension'
variable_2 = get_most_correlated(df_f, variable_1, correlation='negative')

map_1 = create_map(chilean_map, variable_1)
map_2 = create_map(chilean_map, variable_2)

correlation = df_f[[variable_1, variable_2]].corr().iloc[1, 0]

plot_maps(map_1, map_2, variable_1, variable_2, correlation)