In [32]:
import os
import conda
import pandas as pd
import numpy as np
import json
from scipy.optimize import curve_fit
# from shapely.geometry import Polygon as Poly
import unidecode


pd.options.display.max_columns = 250
pd.options.display.max_rows = 2500


conda_file_dir = conda.__file__
conda_dir = conda_file_dir.split('lib')[0]
proj_lib = os.path.join(os.path.join(conda_dir, 'share'), 'proj')
os.environ["PROJ_LIB"] = proj_lib

import matplotlib.pyplot as plt
import matplotlib.cm
import matplotlib.colors as colors

import geopandas
from geopandas.tools import sjoin
import geoplot as gplt
import geoplot.crs as gcrs

import folium

### Health index 

In [70]:
LifeExpectancy_Min = 20
LifeExpectancy_Max = 85

LifeExpectancyProvince_df = pd.read_csv('LifeExpectancy_Province.csv', sep=',', header=0, encoding='utf-8')
LifeExpectancyProvince_df = LifeExpectancyProvince_df.rename(columns={'Total':'Life Expectancy', 'Provincias':'Provincia'})
LifeExpectancyProvince_df['Health index'] = (LifeExpectancyProvince_df['Life Expectancy'] - LifeExpectancy_Min)/ (LifeExpectancy_Max - LifeExpectancy_Min)

HDIProvince_df = LifeExpectancyProvince_df.copy()
HDIProvince_df = HDIProvince_df.set_index('Provincia')
HDIProvince_df

Unnamed: 0_level_0,Sexo,Periodo,Life Expectancy,Health index
Provincia,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Albacete,Ambos sexos,2019,83.3,0.973846
Alicante,Ambos sexos,2019,83.19,0.972154
Almeria,Ambos sexos,2019,82.06,0.954769
Alava,Ambos sexos,2019,84.79,0.996769
Asturias,Ambos sexos,2019,82.82,0.966462
Avila,Ambos sexos,2019,84.02,0.984923
Badajoz,Ambos sexos,2019,82.36,0.959385
Illes Balears,Ambos sexos,2019,83.59,0.978308
Barcelona,Ambos sexos,2019,84.08,0.985846
Bizkaia,Ambos sexos,2019,83.61,0.978615


### Merge CCAA

In [73]:
CCAA_Province_df = pd.read_csv('CCAA_Province.csv', sep=',', header=0, encoding='utf-8')
# CCAA_Province_df['CCAA'] = CCAA_Province_df['CCAA'].str.upper()
CCAA_Province_df['CCAA'] = CCAA_Province_df['CCAA'].apply(unidecode.unidecode)
CCAA_Province_df['Provincia'] = CCAA_Province_df['Provincia'].astype(str)
CCAA_Province_df['Provincia'] = CCAA_Province_df['Provincia'].apply(unidecode.unidecode)

HDIProvince_df = HDIProvince_df.merge(CCAA_Province_df, on='Provincia')
HDIProvince_df
# CCAA_Province_df['Provincia']

Unnamed: 0,Provincia,Sexo,Periodo,Life Expectancy,Health index,CCAA
0,Albacete,Ambos sexos,2019,83.3,0.973846,Castilla-La Mancha
1,Alicante,Ambos sexos,2019,83.19,0.972154,Comunitat Valenciana
2,Almeria,Ambos sexos,2019,82.06,0.954769,Andalucia
3,Alava,Ambos sexos,2019,84.79,0.996769,Pais Vasco
4,Asturias,Ambos sexos,2019,82.82,0.966462,Principado de Asturias
5,Avila,Ambos sexos,2019,84.02,0.984923,Castilla y Leon
6,Badajoz,Ambos sexos,2019,82.36,0.959385,Extremadura
7,Illes Balears,Ambos sexos,2019,83.59,0.978308,Illes Balears
8,Barcelona,Ambos sexos,2019,84.08,0.985846,Cataluna
9,Bizkaia,Ambos sexos,2019,83.61,0.978615,Pais Vasco


### Expected years of schooling index

In [43]:
ExpectedYearsOfSchooling_Min = 0
ExpectedYearsOfSchooling_Max = 18
ExpectedYearsOfSchooling_Spain_Average = 17.9

HDIProvince_df['Expected years of schooling index'] = (ExpectedYearsOfSchooling_Spain_Average - ExpectedYearsOfSchooling_Min)/(ExpectedYearsOfSchooling_Max-ExpectedYearsOfSchooling_Min)
HDIProvince_df

Unnamed: 0,Provincia,Sexo,Periodo,Life Expectancy,Health index,CCAA,Expected years of schooling index
0,Albacete,Ambos sexos,2019,83.3,0.973846,Castilla-La Mancha,0.994444
1,Alicante,Ambos sexos,2019,83.19,0.972154,Comunitat Valenciana,0.994444
2,Almeria,Ambos sexos,2019,82.06,0.954769,Andalucia,0.994444
3,Alava,Ambos sexos,2019,84.79,0.996769,Pais Vasco,0.994444
4,Avila,Ambos sexos,2019,84.02,0.984923,Castilla y Leon,0.994444
5,Badajoz,Ambos sexos,2019,82.36,0.959385,Extremadura,0.994444
6,Illes Balears,Ambos sexos,2019,83.59,0.978308,Illes Balears,0.994444
7,Barcelona,Ambos sexos,2019,84.08,0.985846,Cataluna,0.994444
8,Bizkaia,Ambos sexos,2019,83.61,0.978615,Pais Vasco,0.994444
9,Burgos,Ambos sexos,2019,84.49,0.992154,Castilla y Leon,0.994444


### Mean years of schooling index

In [87]:
Years_Abandono = 10/2  #Sobre nivel CINE3
Years_ESO = 10
Years_Bachillerato = 12
Years_EducacionSuperior = 16


NivelDeFormacion_df = pd.read_csv('NivelDeFormacion.csv', sep=',', header=0, encoding='utf-8')
AbandonoEducacion_df = pd.read_csv('AbandonoEducacion.csv', sep=',', header=0, encoding='utf-8') #Sobre nivel CINE3

NivelDeFormacion_df = NivelDeFormacion_df.merge(AbandonoEducacion_df, on='CCAA')

NivelDeFormacion_df['ESO'] = NivelDeFormacion_df['Inferior a Bachillerato'] - NivelDeFormacion_df['Tasa de abandono escolar']

# Explotación de las variables educativas de la Encuesta de
# Población Activa
# Año 2019
# NOTA RESUMEN
# Del 48,5% de población de 25 a 29 años que ha alcanzado el nivel de Educación Superior, 14,9
# puntos corresponden a quienes tienen E. Superior no universitaria y 33,6 puntos a quienes
# tienen titulación de Educación universitaria o equivalente 
NivelDeFormacion_df['Educacion Superior no Uni'] = NivelDeFormacion_df['Educacion Superior']


NivelDeFormacion_df['Mean years of schooling'] = (NivelDeFormacion_df['Tasa de abandono escolar']/100*Years_Abandono + 
                                                  NivelDeFormacion_df['ESO']/100*Years_ESO +
                                                  NivelDeFormacion_df['Bachillerato']/100*Years_Bachillerato+
                                                  NivelDeFormacion_df['Educacion Superior']/100*Years_EducacionSuperior) 
NivelDeFormacion_df['Mean years of schooling'].mean()

NivelDeFormacion_df

Unnamed: 0,CCAA,Inferior a Bachillerato,Bachillerato,Educacion Superior,Tasa de abandono escolar,ESO,Mean years of schooling
0,Andalucia,47.8,21.0,31.2,21.627,26.173,11.21065
1,Aragon,34.3,26.5,39.3,14.6142,19.6858,12.16729
2,Principado de Asturias,33.1,23.6,43.3,12.4198,20.6802,12.44901
3,Illes Balears,41.4,26.7,31.9,24.1616,17.2384,11.23992
4,Canarias,42.7,24.5,32.8,20.7838,21.9162,11.41881
5,Cantabria,31.3,26.7,42.0,12.7723,18.5277,12.415385
6,Castilla y Leon,38.6,23.0,38.4,14.3017,24.2983,12.048915
7,Castilla-La Mancha,48.5,21.5,30.0,20.2494,28.2506,11.21753
8,Cataluna,36.3,21.3,42.4,19.0215,17.2785,12.018925
9,Comunitat Valenciana,39.9,23.2,36.9,16.4292,23.4708,11.85654
