In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

In [2]:
# Open crime data bases and keep first=degree homicides
crimes = pd.read_csv('data/IDM_NM_ene2020.csv', encoding = 'latin')
crimes.columns = map(str.lower, crimes.columns)

crimes.rename(columns={'año': 'year', 
                      'clave_ent': 'state_key',
                      'entidad': 'state',
                      'cve. municipio': 'key',
                      'municipio': 'municipality',
                      'bien jurídico afectado': 'affected_good',
                      'tipo de delito': 'crime_type',
                      'subtipo de delito': 'crime_subtype',
                      'modalidad': 'modality',
                      'enero': 'January',
                      'febrero': 'February',
                      'marzo' : 'March',
                      'abril': 'April',
                      'mayo': 'May',
                      'junio': 'June',
                      'julio': 'July',
                      'agosto': 'August',
                      'septiembre': 'September',
                      'octubre': 'October',
                      'noviembre': 'November',
                      'diciembre': 'December'}, inplace=True)

crimes = crimes[(crimes.crime_subtype == 'Homicidio doloso') & (crimes.year < 2020)]

crimes.drop(columns=['state_key', 'state_key', 'affected_good', 'crime_type', 'crime_subtype', 'modality'],
           inplace=True)


In [3]:
# Open old (2011-2014) crime data bases and keep first-degree homicides

crimes_old = pd.read_csv('data/IDM_oct19.csv', encoding = 'latin')
crimes_old.columns = map(str.lower, crimes_old.columns)
crimes_old = crimes_old[(crimes_old.año < 2015) & (crimes_old.modalidad == 'HOMICIDIOS') & (crimes_old.tipo == 'DOLOSOS')]

crimes_old.rename(columns={'año': 'year', 
                      'inegi': 'key',
                      'municipio': 'municipality',
                      'enero': 'January',
                      'febrero': 'February',
                      'marzo' : 'March',
                      'abril': 'April',
                      'mayo': 'May',
                      'junio': 'June',
                      'julio': 'July',
                      'agosto': 'August',
                      'septiembre': 'September',
                      'octubre': 'October',
                      'noviembre': 'November',
                      'diciembre': 'December'}, inplace=True)

crimes_old.drop(columns=['entidad', 'modalidad', 'tipo', 'subtipo', 'municipality'], inplace=True)

crimes_old=crimes_old.merge(crimes[['key', 'municipality', 'state']].drop_duplicates(), how='left', on='key')

In [4]:
# Append databases
crimes = crimes.append(crimes_old, sort=False)

# Aggregate homicides by year

crimes = crimes.melt(id_vars=['key', 'municipality', 'state', 'year'],
                   value_vars=['January', 'February', 'March', 'April', 'May', 'June',
                               'July', 'August', 'September', 'October', 'November', 'December'],
                   var_name='month', value_name='homicides')

crimes.fillna(0, inplace=True)
crimes = crimes.groupby(['key', 'municipality', 'state', 'year'], as_index=False).sum()

In [5]:
# Create columns per year
crimes = crimes.pivot_table(values='homicides', index=['key', 'municipality', 'state'], columns='year',
                            fill_value=0).reset_index()


In [6]:
# Open population estimates
pop = pd.read_csv('data/base_municipios_final_datos_01-1.csv', encoding = 'latin')
pop2 = pd.read_csv('data/base_municipios_final_datos_02-1.csv', encoding = 'latin')
pop = pop.append(pop2)

pop.columns = map(str.lower, pop.columns)

pop.rename(columns={'año': 'year', 
                    'clave': 'key',
                    }, inplace=True)

pop = pop[pop.year < 2020]

pop.drop(columns=['renglon', 'clave_ent', 'nom_ent', 'mun', 'sexo', 'edad_quin'], inplace=True)

pop = pop.groupby(['key', 'year'], as_index=False).sum()


In [7]:
# pivot population
pop['year'] = 'pop' + pop.year.apply(str)
pop = pop.pivot_table(values='pob', index='key', columns='year',
                      fill_value=0).reset_index()

In [8]:
# Get file of 2010 population Source: INEGI. Censo de Poblacon y Vivienda 2010.
pop3 = pd.read_csv('data/INEGI_Exporta_20200229165754.csv', encoding = 'latin')
pop3.key.replace('\\s', value='', regex=True, inplace=True)
pop3['key'] = pop3.key.astype(np.int64)
pop3.rename(columns={'Total': 'pop2010'}, inplace=True)
pop3.pop2010.replace('\\,', value='', regex=True, inplace=True)
pop3['pop2010'] = pop3.pop2010.astype(np.int64)
pop3.drop(columns=['mun', 'Hombre', 'Mujer'], inplace=True)
pop3 = pop3[pop3.key > 1000]

In [9]:
# Merge population data
pop = pop.merge(pop3, how='inner', on='key')

In [10]:
# Merge homicide and population data
homicides = crimes.merge(pop, how='inner', on='key')

In [11]:
# Create homicide rate variables

for y in range(2011, 2020):
    if y < 2015:
        pop = 'pop2010'

    else:
        pop = 'pop' + str(y)

    var = 'homrate' + str(y)

    homicides[var] = homicides[y] / (homicides[pop]/100000)

In [12]:
# Create region variable
homicides.loc[homicides.state == 'Coahuila de Zaragoza', 'state'] = 'Coahuila'
homicides.loc[homicides.state == 'Michoacán de Ocampo', 'state'] = 'Michoacán'
homicides.loc[homicides.state == 'Veracruz de Ignacio de la Llave', 'state'] = 'Veracruz'

nw = ['Baja California', 'Baja California Sur', 'Chihuahua', 'Sinaloa', 'Sonora']
ne = ['Coahuila', 'Durango', 'Nuevo León', 'San Luis Potosí', 'Tamaulipas']
w = ['Aguascalientes', 'Colima', 'Guanajuato', 'Jalisco', 'Michoacán', 'Nayarit', 'Querétaro', 'Zacatecas']
c = ['Ciudad de México', 'México', 'Guerrero', 'Hidalgo', 'Morelos', 'Puebla', 'Tlaxcala']
se = ['Campeche', 'Chiapas', 'Oaxaca', 'Quintana Roo', 'Tabasco', 'Veracruz', 'Yucatán']

homicides['region'] = np.where(homicides.state.isin(nw), 'Northwest',
                                np.where(homicides.state.isin(ne), 'Northeast',
                                         np.where(homicides.state.isin(w), 'West', 
                                                  np.where(homicides.state.isin(c), 'Central', 'Southeast'))))


In [13]:
# Replace Nan
homicides.fillna(0, inplace=True)

#write file
homicides.to_csv('data/homicides.csv')

In [14]:
# Open States shapefile
states = gpd.read_file('data/00ent.shp')
states.columns = map(str.lower, states.columns)

In [15]:
# Simplify geometry
states.geometry = states.geometry.simplify(tolerance=500)

# Transform crs 
states.crs = {'init' :'epsg:6362'}
states.to_crs({'init': 'epsg:4326'}, inplace=True)

  "`from_crs` is deprecated and will be removed in 2.2.0. "


In [16]:
states.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x11b165978>

In [17]:
# Create region variable
states.loc[states.nomgeo == 'Coahuila de Zaragoza', 'nomgeo'] = 'Coahuila'
states.loc[states.nomgeo == 'Michoacán de Ocampo', 'nomgeo'] = 'Michoacán'
states.loc[states.nomgeo == 'Veracruz de Ignacio de la Llave', 'nomgeo'] = 'Veracruz'

states['region'] = np.where(states.nomgeo.isin(nw), 'Northwest',
                                np.where(states.nomgeo.isin(ne), 'Northeast',
                                         np.where(states.nomgeo.isin(w), 'West', 
                                                  np.where(states.nomgeo.isin(c), 'Central', 'Southeast'))))

In [18]:
# Write file
states.to_file('data/states.geojson', driver='GeoJSON')

In [19]:
# Open municipalites file
mun = gpd.read_file('data/00mun.shp')
mun.columns = map(str.lower, mun.columns)

In [20]:
# Change geometry for centroids
mun.geometry = mun.geometry.centroid

# Transform crs
mun.crs = {'init' :'epsg:6362'}
mun.to_crs({'init': 'epsg:4326'}, inplace=True)

  "`from_crs` is deprecated and will be removed in 2.2.0. "


In [21]:
mun.rename(columns={'cvegeo': 'key'}, inplace=True)

mun['key'] = pd.to_numeric(mun.key)
mun['key'] = mun.key.astype(str)


In [25]:
# Extract geometry

mun['lat'] = mun.geometry.y
mun['lon'] = mun.geometry.x

In [28]:
mun.lon

0      -102.295872
1      -102.045590
2      -102.704911
3      -102.297038
4      -102.445700
           ...    
2460   -101.714324
2461   -102.851571
2462   -102.678055
2463   -102.312009
2464   -103.343882
Name: lon, Length: 2465, dtype: float64

In [23]:
# Write file
mun.to_file('data/municipalities.geojson', driver='GeoJSON')

test = gpd.read_file('data/states.geojson')