# Composite Indicator

In [1]:
#importing libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo

%load_ext extensions.skip

# defining a style
sns.set(style='darkgrid')
colors_arr = ['#1a96d0', '#fe9c39']



In [2]:
# choose the spatial unit: neighborhood or hdu
spatial_unit = 'hdu'

spatial_area_label = 'hdu_id' if spatial_unit == 'hdu' else 'neighborhood'

## 1. Loading the data

This provided dataset is the one obtained after the data cleaning stage mentioned in the article *"A composite indicator of liveability based on sociodemographic and Uber quality service dimensions: a data-driven approach"*.

In [3]:
df = pd.read_csv("./data/uber_{}.csv".format(spatial_unit), index_col=0)

In [10]:
df.hdu_id = df.hdu_id.astype(str)

### 1.1. Preparing the data

In [4]:
# adding new variables
if spatial_unit == 'neighborhood':
  df['car_crimes_rate'] = 100000*(df['car_theft'] + df['car_robbery'])/df['population']
  df['urban_services_ratio'] = 100000*(df['education_units'] + df['health_units'] + df['security_units'])/df['population']

In [11]:
numeric_cols = list(df.select_dtypes(['float64', 'int64']).columns)

# Selecting numeric columns and standardizing the variables
numerical_data = df[numeric_cols]
scaler = StandardScaler()
numerical_data = scaler.fit_transform(numerical_data)

df_numerical = pd.DataFrame(numerical_data, columns = numeric_cols)
df_numerical.shape

(58, 15)

In [12]:
def highlight_small_corr(s):
    if np.abs(s) < 0.3:
        return 'background-color: yellow'
    return ''

df_numerical.corr().style.applymap(highlight_small_corr)

Unnamed: 0,population,gini,life_exp,r_dep,rdpc,r_age,mhdi,mhdi_e,mhdi_l,mhdi_i,ETA_S_mean,ETA_X_mean,ETA_S_std,ETA_X_std,car_robbery
population,1.0,-0.054675,-0.036575,-0.001223,-0.05628,-0.087513,0.006503,0.058314,-0.036797,-0.036152,0.227976,0.135404,-0.231206,-0.20756,0.857882
gini,-0.054675,1.0,0.783095,-0.483854,0.771103,0.389367,0.793999,0.748193,0.784186,0.825146,-0.438105,-0.423091,-0.087591,-0.259206,0.125121
life_exp,-0.036575,0.783095,1.0,-0.679339,0.793736,0.5577,0.988821,0.980832,0.99999,0.955811,-0.588441,-0.575795,-0.220995,-0.347302,0.193502
r_dep,-0.001223,-0.483854,-0.679339,1.0,-0.57208,-0.264301,-0.677171,-0.67232,-0.679756,-0.658771,0.63552,0.634393,0.203944,0.332797,-0.187276
rdpc,-0.05628,0.771103,0.793736,-0.57208,1.0,0.394841,0.855947,0.798065,0.795233,0.932761,-0.456604,-0.427639,-0.214708,-0.360056,0.14021
r_age,-0.087513,0.389367,0.5577,-0.264301,0.394841,1.0,0.540936,0.530758,0.557432,0.524489,-0.532644,-0.522792,-0.18392,-0.312639,0.070729
mhdi,0.006503,0.793999,0.988821,-0.677171,0.855947,0.540936,1.0,0.99072,0.988995,0.981111,-0.549532,-0.541864,-0.22419,-0.351983,0.222118
mhdi_e,0.058314,0.748193,0.980832,-0.67232,0.798065,0.530758,0.99072,1.0,0.980716,0.948517,-0.511835,-0.516941,-0.204392,-0.318231,0.252659
mhdi_l,-0.036797,0.784186,0.99999,-0.679756,0.795233,0.557432,0.988995,0.980716,1.0,0.956462,-0.589221,-0.576528,-0.220822,-0.347566,0.193667
mhdi_i,-0.036152,0.825146,0.955811,-0.658771,0.932761,0.524489,0.981111,0.948517,0.956462,1.0,-0.555725,-0.535311,-0.241224,-0.383321,0.19113
