### Modeling Suicide Rates in Mexico
#### Final Project
Team: Victor González | Felipe Murillo | Marcela A. Vasquez
    
TEC Data Bootcamp | 2020

This Jupyter notebok collects Mexico's INEGI suicide and population data to construct a dataset
classified by Mexico's 32 entities for further visualization and analysis

---

### Import Dependencies

In [1]:
#import shapely.geometry
import geopandas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Specify File Locations

In [2]:
mxFile = "../Data/Mexico Research Data/Mental_06.xlsx"
mxHex = "../Data/Mexico Research Data/hex_map_mexico.csv"
mxPop = "../Data/Mexico Research Data/pob_ini_proyecciones.csv"

### Pull in Suicide Data

In [3]:
mxSuicides = pd.read_excel(mxFile)

In [4]:
mxSuicides.head(20)

Unnamed: 0,Estado,Edad,2010_Total,2010_Hombres,2010_Mujeres,2011_Total,2011_Hombres,2011_Mujeres,2012_Total,2012_Hombres,...,2015_Mujeres,2016_Total,2016_Hombres,2016_Mujeres,2017_Total,2017_Hombres,2017_Mujeres,2018_Total,2018_Hombres,2018_Mujeres
0,Total nacional,Total,5012,4091,921,5718,4621,1095,5549,4470,...,1280,6370,5181,1187,6559,5323,1233,6808,5540,1265
1,Total nacional,10 a 14 años,187,111,76,249,141,108,208,126,...,96,228,113,115,209,115,94,229,121,108
2,Total nacional,15 a 19 años,685,475,210,859,600,259,826,589,...,281,774,535,239,795,571,224,800,566,234
3,Total nacional,20 a 24 años,761,614,147,916,729,187,877,694,...,218,1035,866,169,1027,840,187,1035,831,204
4,Total nacional,25 a 29 años,608,507,101,716,575,141,662,560,...,143,816,677,139,868,711,157,890,741,149
5,Total nacional,30 a 34 años,528,448,80,559,470,89,592,479,...,129,675,574,101,746,640,106,810,678,132
6,Total nacional,35 a 39 años,549,461,88,521,443,78,535,451,...,118,628,533,95,652,535,116,690,571,119
7,Total nacional,40 a 44 años,352,298,54,448,385,63,438,365,...,86,552,451,101,551,447,104,559,488,71
8,Total nacional,45 a 49 años,320,257,63,346,296,49,332,265,...,61,414,342,72,428,365,63,472,400,72
9,Total nacional,50 a 54 años,244,215,29,271,234,37,253,214,...,45,320,275,45,352,301,51,376,318,58


In [5]:
gender = ["Total","Male","Female"]
years = np.arange(2010,2019)

In [6]:
def pullYear(year, mxYEAR):
    f_mxYEAR = pd.DataFrame(data = {"State":[],"Year":[],"Age":[],"Gender":[],"noSuicides":[]})
    for i in mxYEAR.index:
        for j in np.arange(0,3):
            temp_df = pd.DataFrame(
                {
                    'State': [mxYEAR.loc[i]["Estado"]],
                    'Year' : year,
                    'Age': mxYEAR.loc[i]["Edad"],
                    'Gender': gender[j],
                    'noSuicides': mxYEAR.iloc[i,j+2]
                },index= [3*i+j])
            f_mxYEAR = pd.concat([f_mxYEAR,temp_df], sort=False)
    return f_mxYEAR

In [7]:
mx2010 = mxSuicides[["Estado","Edad","2010_Total","2010_Hombres","2010_Mujeres"]]
f_mx2010 = pullYear(2010,mx2010)

In [8]:
mx2011 = mxSuicides[["Estado","Edad","2011_Total","2011_Hombres","2011_Mujeres"]]
f_mx2011 = pullYear(2011,mx2011)

In [9]:
mx2012 = mxSuicides[["Estado","Edad","2012_Total","2012_Hombres","2012_Mujeres"]]
f_mx2012 = pullYear(2012,mx2012)

In [10]:
mx2013 = mxSuicides[["Estado","Edad","2013_Total","2013_Hombres","2013_Mujeres"]]
f_mx2013 = pullYear(2013,mx2013)

In [11]:
mx2014 = mxSuicides[["Estado","Edad","2014_Total","2014_Hombres","2014_Mujeres"]]
f_mx2014 = pullYear(2014,mx2014)

In [12]:
mx2015 = mxSuicides[["Estado","Edad","2015_Total","2015_Hombres","2015_Mujeres"]]
f_mx2015 = pullYear(2015,mx2015)

In [13]:
mx2016 = mxSuicides[["Estado","Edad","2016_Total","2016_Hombres","2016_Mujeres"]]
f_mx2016 = pullYear(2016,mx2016)

In [14]:
mx2017 = mxSuicides[["Estado","Edad","2017_Total","2017_Hombres","2017_Mujeres"]]
f_mx2017 = pullYear(2017,mx2017)

In [15]:
mx2018 = mxSuicides[["Estado","Edad","2018_Total","2018_Hombres","2018_Mujeres"]]
f_mx2018 = pullYear(2018,mx2018)

In [16]:
mxFinal = pd.concat([f_mx2010, f_mx2011, f_mx2012, f_mx2013, f_mx2014, f_mx2015, f_mx2016, f_mx2017, f_mx2018], ignore_index=True, sort=False)

In [17]:
mxFinal['Year'] = mxFinal['Year'].astype(int)
mxFinal['noSuicides'] = mxFinal['noSuicides'].astype(int)

In [18]:
mxFinal.to_csv("../Data/Mexico Research Data/mexico_suicides.csv")

In [19]:
mxFinal["Age"].unique()

array(['Total', '10 a 14 años', '15 a 19 años', '20 a 24 años',
       '25 a 29 años', '30 a 34 años', '35 a 39 años', '40 a 44 años',
       '45 a 49 años', '50 a 54 años', '55 a 59 años', '60 años y más',
       'No especificado'], dtype=object)

In [20]:
newAgeGrps = []
for entry in mxFinal.index:
    oldName = mxFinal.loc[entry]["Age"]
    if oldName == "10 a 14 años":
        newName = "10-14yrs"
    elif oldName == "15 a 19 años":
        newName = "15-19yrs"
    elif oldName == "20 a 24 años":
        newName = "20-24yrs"
    elif oldName == "25 a 29 años":
        newName = "25-29yrs"
    elif oldName == "30 a 34 años":
        newName = "30-34yrs"
    elif oldName == "35 a 39 años":
        newName = "35-39yrs"
    elif oldName == "40 a 44 años":
        newName = "40-44yrs"
    elif oldName == "45 a 49 años":
        newName = "45-49yrs"
    elif oldName == "50 a 54 años":
        newName = "50-54yrs"
    elif oldName == "55 a 59 años":
        newName = "55-59yrs"
    elif oldName == "60 años y más":
        newName = "60+yrs"
    elif oldName == "Total":
        newName = "Total"
    else:
        newName = "Undefined"
    newAgeGrps.append(newName)

In [21]:
mxFinal["Age Grp"] = newAgeGrps
mxFinal = mxFinal.drop(["Age"], axis=1)

In [22]:
indexes = mxFinal.loc[mxFinal.Gender == "Total"].index
mxFinal = mxFinal.drop(indexes, axis=0)
mxFinal

Unnamed: 0,State,Year,Gender,noSuicides,Age Grp
1,Total nacional,2010,Male,4091,Total
2,Total nacional,2010,Female,921,Total
4,Total nacional,2010,Male,111,10-14yrs
5,Total nacional,2010,Female,76,10-14yrs
7,Total nacional,2010,Male,475,15-19yrs
...,...,...,...,...,...
11576,Zacatecas,2018,Female,1,55-59yrs
11578,Zacatecas,2018,Male,11,60+yrs
11579,Zacatecas,2018,Female,0,60+yrs
11581,Zacatecas,2018,Male,0,Undefined


In [23]:
indexes2 = mxFinal.loc[mxFinal["Age Grp"] == "Total"].index
mxFinal = mxFinal.drop(indexes2, axis=0)
mxFinal

Unnamed: 0,State,Year,Gender,noSuicides,Age Grp
4,Total nacional,2010,Male,111,10-14yrs
5,Total nacional,2010,Female,76,10-14yrs
7,Total nacional,2010,Male,475,15-19yrs
8,Total nacional,2010,Female,210,15-19yrs
10,Total nacional,2010,Male,614,20-24yrs
...,...,...,...,...,...
11576,Zacatecas,2018,Female,1,55-59yrs
11578,Zacatecas,2018,Male,11,60+yrs
11579,Zacatecas,2018,Female,0,60+yrs
11581,Zacatecas,2018,Male,0,Undefined


In [24]:
population = pd.read_csv(mxPop, encoding = "ISO-8859-1")
population

Unnamed: 0,RENGLON,AÑO,ENTIDAD,CVE_GEO,EDAD,SEXO,POBLACION
0,1,1950,República Mexicana,0,0,Hombres,564510
1,2,1950,República Mexicana,0,0,Mujeres,552054
2,3,1950,República Mexicana,0,1,Hombres,505655
3,4,1950,República Mexicana,0,1,Mujeres,496559
4,5,1950,República Mexicana,0,2,Hombres,470139
...,...,...,...,...,...,...,...
599715,599716,2051,Zacatecas,32,107,Mujeres,4
599716,599717,2051,Zacatecas,32,108,Hombres,1
599717,599718,2051,Zacatecas,32,108,Mujeres,2
599718,599719,2051,Zacatecas,32,109,Hombres,0


In [25]:
# Filter out undesired years
population = population.loc[(population["AÑO"] > 2009) & (population["AÑO"] < 2019)]

In [26]:
# Drop unwanted columns
population = population.drop(columns=["CVE_GEO","RENGLON"])

In [27]:
population

Unnamed: 0,AÑO,ENTIDAD,EDAD,SEXO,POBLACION
13200,2010,República Mexicana,0,Hombres,1140967
13201,2010,República Mexicana,0,Mujeres,1100958
13202,2010,República Mexicana,1,Hombres,1140835
13203,2010,República Mexicana,1,Mujeres,1102867
13204,2010,República Mexicana,2,Hombres,1141969
...,...,...,...,...,...
592455,2018,Zacatecas,107,Mujeres,1
592456,2018,Zacatecas,108,Hombres,0
592457,2018,Zacatecas,108,Mujeres,0
592458,2018,Zacatecas,109,Hombres,0


In [28]:
# Create a function that designates age group
def designate_ageGrp(age):
    if age >= 60:
        grp = "60+yrs"
    elif (age >=55) & (age < 60):
        grp = "55-59yrs"
    elif (age >=50) & (age < 55):
        grp = "50-54yrs"
    elif (age >=45) & (age < 50):
        grp = "45-49yrs"
    elif (age >=40) & (age < 45):
        grp = "40-44yrs"
    elif (age >=35) & (age < 40):
        grp = "35-39yrs"
    elif (age >=30) & (age < 35):
        grp = "30-34yrs"
    elif (age >=25) & (age < 30):
        grp = "25-29yrs"
    elif (age >=20) & (age < 25):
        grp = "20-24yrs"
    elif (age >=15) & (age < 20):
        grp = "15-19yrs"
    elif (age >=10) & (age < 15):
        grp = "10-14yrs"
    else:
        grp = "0-9yrs"
    return(grp)

In [29]:
# Create an age group list
ageGrp = []
for pop in population.index:
    ageGrp.append(designate_ageGrp(population.loc[pop]["EDAD"]))

In [30]:
# Append age group to population dataframe
population["Age Grp"] = ageGrp

In [31]:
# Reorder columns
population = population[["ENTIDAD","AÑO","Age Grp", "SEXO","POBLACION"]]

In [32]:
# Rename columns
population = population.rename(columns={"ENTIDAD": "State","AÑO":"Year","SEXO":"Gender","POBLACION": "Population (Gender)"})

In [33]:
mxPop = population.groupby(by=["State","Year","Age Grp","Gender"]).sum()

In [34]:
mxPop = mxPop.reset_index()
mxPop

Unnamed: 0,State,Year,Age Grp,Gender,Population (Gender)
0,Aguascalientes,2010,0-9yrs,Hombres,130058
1,Aguascalientes,2010,0-9yrs,Mujeres,124996
2,Aguascalientes,2010,10-14yrs,Hombres,63218
3,Aguascalientes,2010,10-14yrs,Mujeres,61490
4,Aguascalientes,2010,15-19yrs,Hombres,60365
...,...,...,...,...,...
7123,Zacatecas,2018,50-54yrs,Mujeres,41463
7124,Zacatecas,2018,55-59yrs,Hombres,32392
7125,Zacatecas,2018,55-59yrs,Mujeres,34681
7126,Zacatecas,2018,60+yrs,Hombres,84591


In [35]:
# Grab population for the whole age group (independent of gender)
temPop = mxPop.groupby(["State","Year","Age Grp"]).sum()
popAge = []
for i in temPop["Population (Gender)"]:
    popAge.append(i)
    popAge.append(i)

In [36]:
# Add age group population to dataframe
mxPop["Population (AgeGrp)"] = popAge

In [37]:
# Before attempting to join ensure state names match
mxPop.State.unique()

array(['Aguascalientes', 'Baja California', 'Baja California Sur',
       'Campeche', 'Chiapas', 'Chihuahua', 'Ciudad de México', 'Coahuila',
       'Colima', 'Durango', 'Guanajuato', 'Guerrero', 'Hidalgo',
       'Jalisco', 'Michoacán', 'Morelos', 'México', 'Nayarit',
       'Nuevo León', 'Oaxaca', 'Puebla', 'Querétaro', 'Quintana Roo',
       'República Mexicana', 'San Luis Potosí', 'Sinaloa', 'Sonora',
       'Tabasco', 'Tamaulipas', 'Tlaxcala', 'Veracruz', 'Yucatán',
       'Zacatecas'], dtype=object)

In [38]:
mxFinal.State.unique()

array(['Total nacional', 'Aguascalientes', 'Baja California',
       'Baja California Sur', 'Campeche', 'Coahuila de Zaragoza',
       'Colima', 'Chiapas', 'Chihuahua', 'Ciudad de México', 'Durango',
       'Guanajuato', 'Guerrero', 'Hidalgo', 'Jalisco', 'México',
       'Michoacán de Ocampo', 'Morelos', 'Nayarit', 'Nuevo León',
       'Oaxaca', 'Puebla', 'Querétaro', 'Quintana Roo', 'San Luis Potosí',
       'Sinaloa', 'Sonora', 'Tabasco', 'Tamaulipas', 'Tlaxcala',
       'Veracruz de Ignacio de la Llave', 'Yucatán', 'Zacatecas'],
      dtype=object)

In [39]:
# Rename state & gender names so that we can join cleanly
mxFinal.loc[mxFinal.State == "Total nacional","State"] = 'República Mexicana'
mxPop.loc[mxPop.State == "Michoacán","State"] = 'Michoacán de Ocampo'
mxPop.loc[mxPop.State == "Coahuila","State"] = 'Coahuila de Zaragoza'
mxPop.loc[mxPop.State == "Veracruz","State"] = 'Veracruz de Ignacio de la Llave'
mxPop.loc[mxPop.Gender == "Hombres","Gender"] = 'Male'
mxPop.loc[mxPop.Gender == "Mujeres","Gender"] = 'Female'

In [40]:
mxPop.loc[(mxPop.State == "República Mexicana") & (mxPop.Year > 2015)]

Unnamed: 0,State,Year,Age Grp,Gender,Population (Gender),Population (AgeGrp)
5112,República Mexicana,2016,0-9yrs,Male,11318753,22247500
5113,República Mexicana,2016,0-9yrs,Female,10928747,22247500
5114,República Mexicana,2016,10-14yrs,Male,5693453,11195122
5115,República Mexicana,2016,10-14yrs,Female,5501669,11195122
5116,República Mexicana,2016,15-19yrs,Male,5596310,11047951
...,...,...,...,...,...,...
5179,República Mexicana,2018,50-54yrs,Female,3379060,6476506
5180,República Mexicana,2018,55-59yrs,Male,2588689,5432670
5181,República Mexicana,2018,55-59yrs,Female,2843981,5432670
5182,República Mexicana,2018,60+yrs,Male,6043451,13180639


In [41]:
# Merge popualtion data with suicide data 
mxSuicideAge = mxFinal.merge(mxPop, left_on=["State","Year","Age Grp","Gender"], right_on =["State","Year","Age Grp","Gender"], how="left")
    

In [42]:
mxSuicideAge

Unnamed: 0,State,Year,Gender,noSuicides,Age Grp,Population (Gender),Population (AgeGrp)
0,República Mexicana,2010,Male,111,10-14yrs,5666711.0,11170946.0
1,República Mexicana,2010,Female,76,10-14yrs,5504235.0,11170946.0
2,República Mexicana,2010,Male,475,15-19yrs,5552644.0,11026036.0
3,República Mexicana,2010,Female,210,15-19yrs,5473392.0,11026036.0
4,República Mexicana,2010,Male,614,20-24yrs,4991850.0,10168795.0
...,...,...,...,...,...,...,...
7123,Zacatecas,2018,Female,1,55-59yrs,34681.0,67073.0
7124,Zacatecas,2018,Male,11,60+yrs,84591.0,177581.0
7125,Zacatecas,2018,Female,0,60+yrs,92990.0,177581.0
7126,Zacatecas,2018,Male,0,Undefined,,


In [43]:
# Calculate age standardized suicide rate (per 100k individuals)
mxSuicideAge['Suicide Rate (per 100K)'] = mxSuicideAge.noSuicides/mxSuicideAge["Population (AgeGrp)"]*100000
mxSuicideAge

Unnamed: 0,State,Year,Gender,noSuicides,Age Grp,Population (Gender),Population (AgeGrp),Suicide Rate (per 100K)
0,República Mexicana,2010,Male,111,10-14yrs,5666711.0,11170946.0,0.993649
1,República Mexicana,2010,Female,76,10-14yrs,5504235.0,11170946.0,0.680336
2,República Mexicana,2010,Male,475,15-19yrs,5552644.0,11026036.0,4.307985
3,República Mexicana,2010,Female,210,15-19yrs,5473392.0,11026036.0,1.904583
4,República Mexicana,2010,Male,614,20-24yrs,4991850.0,10168795.0,6.038080
...,...,...,...,...,...,...,...,...
7123,Zacatecas,2018,Female,1,55-59yrs,34681.0,67073.0,1.490913
7124,Zacatecas,2018,Male,11,60+yrs,84591.0,177581.0,6.194356
7125,Zacatecas,2018,Female,0,60+yrs,92990.0,177581.0,0.000000
7126,Zacatecas,2018,Male,0,Undefined,,,


In [44]:
mxSuicideAge.sort_values(by="Suicide Rate (per 100K)",ascending=False).head(20)

Unnamed: 0,State,Year,Gender,noSuicides,Age Grp,Population (Gender),Population (AgeGrp),Suicide Rate (per 100K)
3280,Campeche,2014,Male,9,50-54yrs,20568.0,41714.0,21.57549
4790,Aguascalientes,2016,Male,16,45-49yrs,35285.0,74938.0,21.350983
4042,Baja California Sur,2015,Male,11,35-39yrs,28201.0,55605.0,19.782394
4070,Campeche,2015,Male,10,45-49yrs,25038.0,50902.0,19.645593
894,Campeche,2011,Male,14,25-29yrs,34434.0,71671.0,19.533703
566,Quintana Roo,2010,Male,13,45-49yrs,35281.0,67809.0,19.171496
562,Quintana Roo,2010,Male,20,35-39yrs,54175.0,106297.0,18.815206
4116,Colima,2015,Male,9,40-44yrs,23291.0,47968.0,18.762508
4948,Chihuahua,2016,Male,60,20-24yrs,163254.0,321402.0,18.66821
102,Campeche,2010,Male,13,25-29yrs,33539.0,70127.0,18.537796


In [45]:
mxSuicideAge.to_csv("../Data/Mexico Research Data/mxAgeStandardSuicideRates.csv")

In [46]:
mxSuicideAge["Age Grp"].unique()

array(['10-14yrs', '15-19yrs', '20-24yrs', '25-29yrs', '30-34yrs',
       '35-39yrs', '40-44yrs', '45-49yrs', '50-54yrs', '55-59yrs',
       '60+yrs', 'Undefined'], dtype=object)