####  Datos de shapes descargados de: 
https://www.inegi.org.mx/app/mapas/
http://internet.contenidos.inegi.org.mx/contenidos/Productos/prod_serv/contenidos/espanol/bvinegi/productos/geografia/CGURA_Junio2016/Nuevo_Leon/702825218867_s.zip

## Importar data frames

In [1]:
import pandas as pd
from functools import partial
from pyproj import Proj, transform

In [2]:
df = pd.read_csv("data/crime_inegi.csv")

In [3]:
df.head()

Unnamed: 0,year,date,lat,lon,news,cassualties,url,neighborhood,point,clave_mun,nom_mun,clave_loc,nom_loc,ageb
0,2015,2015-10-12,25.617672,-100.259318,Persiguen y ejecutan a hombre,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,EJIDAL LOS REMATES,"[2673990.068792671, 1508739.2403134028]",39.0,Monterrey,1.0,Monterrey,3729
1,2015,2015-10-02,25.632075,-100.286088,Ejecutan a uno en plaza en Garza Sada,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,REVOLUCION PROLETARIA,"[2671295.4132258967, 1510294.803596502]",39.0,Monterrey,1.0,Monterrey,4727
2,2015,2015-10-01,25.675784,-100.475788,Lo ejecutan frente a su casa,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,HDA SANTA CATARINA (FOMERREY 29),"[2652285.5673206407, 1514899.6189773784]",48.0,Santa Catarina,1.0,Ciudad Santa Catarina,545
3,2015,2015-09-24,25.657586,-100.322029,Lo esperan para ejecutarlo,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,INDEPENDENCIA,"[2667670.5901743174, 1513064.839283476]",39.0,Monterrey,1.0,Monterrey,1968
4,2015,2015-09-24,25.683207,-100.30565,Matan a 2 clientes en ataque 13 a bar,2.0,http://www.elnorte.com/aplicaciones/articulo/d...,CENTRO DE MONTERREY,"[2669273.8077375377, 1515908.7290589423]",39.0,Monterrey,1.0,Monterrey,1281


In [4]:
print(df.shape)

(2114, 14)


In [5]:
df.dtypes

year              int64
date             object
lat             float64
lon             float64
news             object
cassualties     float64
url              object
neighborhood     object
point            object
clave_mun       float64
nom_mun          object
clave_loc       float64
nom_loc          object
ageb             object
dtype: object

In [6]:
df["point"] = df["point"].str.replace("[", "")
df["point"] = df["point"].str.replace("]", "")
df["point"] = df["point"].str.split(',')
df["x"] = df["point"].str[0].astype(float)
df["y"] = df["point"].str[1].astype(float)

## Mapear a shapes de INEGI

In [7]:
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
import shapefile as shp

#### Los archivos que terminan en a se refieren a: Área geoestadística básica

In [8]:
sf = shp.Reader("data/inegi_NL_2016/conjunto_de_datos/19l", encoding="latin1")

In [9]:
print(sf)

shapefile Reader
    945 shapes (type 'POLYGON')
    945 records (7 fields)


#### Son 945 Localidades

In [10]:
s = sf.shape(0)
['%.3f' % coord for coord in s.bbox]

['2674785.542', '1275446.771', '2675346.485', '1276102.915']

In [11]:
sf.fields

[('DeletionFlag', 'C', 1, 0),
 ['CVE_ENT', 'C', 2, 0],
 ['CVE_MUN', 'C', 3, 0],
 ['CVE_LOC', 'C', 4, 0],
 ['NOMLOC', 'C', 110, 0],
 ['TIPO', 'C', 6, 0],
 ['CONDICION', 'C', 11, 0]]

In [12]:
sf.record(0)

Record #0: ['19', '036', '0021', 'San José de Cuatro Caminos', 'R', 'H']

In [13]:
paths = [Path(sf.shape(i).points) for i in range(len(sf.shapes())) ]

In [14]:
paths[0].contains_point((2690417.3087897752, 1512217.7368954867))

False

In [15]:
def point_in_path(row):
    for i in range(len(paths)):
        if(paths[i].contains_point([row["x"], row["y"]])):
            return i

In [16]:
def getRecord(r, j):
    if not np.isnan(r):
        if j != 3:
            return int(sf.record(int(r))[j])
        else:
            return sf.record(int(r))[j]
    else:
        return np.nan

#### Ahora sí con df

In [17]:
df["r"] = df.apply(point_in_path, axis = 1)

In [18]:
df.head()

Unnamed: 0,year,date,lat,lon,news,cassualties,url,neighborhood,point,clave_mun,nom_mun,clave_loc,nom_loc,ageb,x,y,r
0,2015,2015-10-12,25.617672,-100.259318,Persiguen y ejecutan a hombre,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,EJIDAL LOS REMATES,"[2673990.068792671, 1508739.2403134028]",39.0,Monterrey,1.0,Monterrey,3729,2673990.0,1508739.0,75.0
1,2015,2015-10-02,25.632075,-100.286088,Ejecutan a uno en plaza en Garza Sada,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,REVOLUCION PROLETARIA,"[2671295.4132258967, 1510294.803596502]",39.0,Monterrey,1.0,Monterrey,4727,2671295.0,1510295.0,75.0
2,2015,2015-10-01,25.675784,-100.475788,Lo ejecutan frente a su casa,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,HDA SANTA CATARINA (FOMERREY 29),"[2652285.5673206407, 1514899.6189773784]",48.0,Santa Catarina,1.0,Ciudad Santa Catarina,545,2652286.0,1514900.0,70.0
3,2015,2015-09-24,25.657586,-100.322029,Lo esperan para ejecutarlo,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,INDEPENDENCIA,"[2667670.5901743174, 1513064.839283476]",39.0,Monterrey,1.0,Monterrey,1968,2667671.0,1513065.0,75.0
4,2015,2015-09-24,25.683207,-100.30565,Matan a 2 clientes en ataque 13 a bar,2.0,http://www.elnorte.com/aplicaciones/articulo/d...,CENTRO DE MONTERREY,"[2669273.8077375377, 1515908.7290589423]",39.0,Monterrey,1.0,Monterrey,1281,2669274.0,1515909.0,75.0


In [19]:
df[df["neighborhood"].isnull()].shape

(254, 17)

In [20]:
df[df["r"].isnull()].shape

(175, 17)

In [21]:
import numpy as np

In [22]:
df["cve_mun"] = df["r"].apply(getRecord, j = 1)
df["cve_loc"] = df["r"].apply(getRecord, j = 2)
df["nom_loc"] = df["r"].apply(getRecord, j = 3)

In [23]:
df.head(50)

Unnamed: 0,year,date,lat,lon,news,cassualties,url,neighborhood,point,clave_mun,nom_mun,clave_loc,nom_loc,ageb,x,y,r,cve_mun,cve_loc
0,2015,2015-10-12,25.617672,-100.259318,Persiguen y ejecutan a hombre,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,EJIDAL LOS REMATES,"[2673990.068792671, 1508739.2403134028]",39.0,Monterrey,1.0,Monterrey,3729,2673990.0,1508739.0,75.0,39.0,1.0
1,2015,2015-10-02,25.632075,-100.286088,Ejecutan a uno en plaza en Garza Sada,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,REVOLUCION PROLETARIA,"[2671295.4132258967, 1510294.803596502]",39.0,Monterrey,1.0,Monterrey,4727,2671295.0,1510295.0,75.0,39.0,1.0
2,2015,2015-10-01,25.675784,-100.475788,Lo ejecutan frente a su casa,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,HDA SANTA CATARINA (FOMERREY 29),"[2652285.5673206407, 1514899.6189773784]",48.0,Santa Catarina,1.0,Ciudad Santa Catarina,0545,2652286.0,1514900.0,70.0,48.0,1.0
3,2015,2015-09-24,25.657586,-100.322029,Lo esperan para ejecutarlo,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,INDEPENDENCIA,"[2667670.5901743174, 1513064.839283476]",39.0,Monterrey,1.0,Monterrey,1968,2667671.0,1513065.0,75.0,39.0,1.0
4,2015,2015-09-24,25.683207,-100.30565,Matan a 2 clientes en ataque 13 a bar,2.0,http://www.elnorte.com/aplicaciones/articulo/d...,CENTRO DE MONTERREY,"[2669273.8077375377, 1515908.7290589423]",39.0,Monterrey,1.0,Monterrey,1281,2669274.0,1515909.0,75.0,39.0,1.0
5,2015,2015-09-23,25.777561,-100.410404,Asesinan a ex reo de tres balazos,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,LA ALIANZA,"[2658693.510923911, 1526191.7772835381]",39.0,Monterrey,1.0,Monterrey,5015,2658694.0,1526192.0,75.0,39.0,1.0
6,2015,2015-09-18,25.702387,-100.330845,Ejecutan a uno en la Col. Garza Nieto,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,PEDRO LOZANO,"[2666732.2379076034, 1517993.7998131234]",39.0,Monterrey,1.0,Monterrey,0921,2666732.0,1517994.0,75.0,39.0,1.0
7,2015,2015-09-05,25.584773,-100.23548,Ejecutan a joven tras persecuci&#243;n,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,NOGALES DE LA SIERRA,"[2676417.3226552834, 1505141.5785776984]",39.0,Monterrey,1.0,Monterrey,2970,2676417.0,1505142.0,75.0,39.0,1.0
8,2015,2015-09-05,25.691347,-100.280345,Lo acribillan desde un veh&#237;culo,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,NUEVA MADERO,"[2671791.048815214, 1516836.2648395365]",39.0,Monterrey,1.0,Monterrey,1347,2671791.0,1516836.0,75.0,39.0,1.0
9,2015,2015-09-07,25.801002,-100.053654,Torturan y ejecutan a dos en Pesquer&#237;a,2.0,http://www.elnorte.com/aplicaciones/articulo/d...,,"[2694271.730618012, 1529215.312199961]",,,,,,2694272.0,1529215.0,,,


### No están todos los shapes :(
### pero no faltan tantos :)

In [24]:
ageeml = pd.read_csv("data/AGEEML_NL.csv")

In [25]:
ageeml.head()

Unnamed: 0,Mapa,Cve_Ent,Nom_Ent,Nom_Abr,Cve_Mun,Nom_Mun,Cve_Loc,Nom_Loc,Ámbito,Latitud,Longitud,Lat_Decimal,Lon_Decimal,Altitud,Cve_Carta,Pob_Total,Pob_Masculina,Pob_Femenina,Total De Viviendas Habitadas
0,190010001,19,Nuevo León,NL,1,Abasolo,1,Abasolo,U,"25°56´43.215N""","100°23´59.958W""",25.945338,-100.399988,502,G14C15,1976,1005,971,529
1,190010008,19,Nuevo León,NL,1,Abasolo,8,La Gloria,R,"25°56´14.103N""","100°22´48.748W""",25.937251,-100.380208,499,G14C15,7,0,0,1
2,190010009,19,Nuevo León,NL,1,Abasolo,9,Los Ligeros,R,"25°55´57.079N""","100°23´39.742W""",25.932522,-100.394373,504,G14C15,3,0,0,1
3,190010011,19,Nuevo León,NL,1,Abasolo,11,La Muralla,R,"25°56´18.560N""","100°23´52.699W""",25.938489,-100.397972,500,G14C15,4,0,0,2
4,190010012,19,Nuevo León,NL,1,Abasolo,12,Seis de Enero (Las Bugambilias),R,"25°56´59.312N""","100°24´26.446W""",25.949809,-100.407346,509,G14C15,0,0,0,0


In [26]:
ageeml.dtypes

Mapa                              int64
Cve_Ent                           int64
Nom_Ent                          object
Nom_Abr                          object
Cve_Mun                           int64
Nom_Mun                          object
Cve_Loc                           int64
Nom_Loc                          object
Ámbito                           object
Latitud                          object
Longitud                         object
Lat_Decimal                     float64
Lon_Decimal                     float64
Altitud                           int64
Cve_Carta                        object
Pob_Total                         int64
Pob_Masculina                     int64
Pob_Femenina                      int64
Total De Viviendas Habitadas      int64
dtype: object

In [46]:
merged = df.merge(ageeml[["Cve_Mun", "Nom_Mun", "Cve_Loc"]], 
                  right_on=["Cve_Mun", "Cve_Loc"], left_on=["cve_mun", "cve_loc"], how="left")

In [47]:
print(merged.shape)

(2114, 22)


In [48]:
merged.head(7)

Unnamed: 0,year,date,lat,lon,news,cassualties,url,neighborhood,point,clave_mun,...,nom_loc,ageb,x,y,r,cve_mun,cve_loc,Cve_Mun,Nom_Mun,Cve_Loc
0,2015,2015-10-12,25.617672,-100.259318,Persiguen y ejecutan a hombre,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,EJIDAL LOS REMATES,"[2673990.068792671, 1508739.2403134028]",39.0,...,Monterrey,3729,2673990.0,1508739.0,75.0,39.0,1.0,39.0,Monterrey,1.0
1,2015,2015-10-02,25.632075,-100.286088,Ejecutan a uno en plaza en Garza Sada,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,REVOLUCION PROLETARIA,"[2671295.4132258967, 1510294.803596502]",39.0,...,Monterrey,4727,2671295.0,1510295.0,75.0,39.0,1.0,39.0,Monterrey,1.0
2,2015,2015-10-01,25.675784,-100.475788,Lo ejecutan frente a su casa,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,HDA SANTA CATARINA (FOMERREY 29),"[2652285.5673206407, 1514899.6189773784]",48.0,...,Ciudad Santa Catarina,545,2652286.0,1514900.0,70.0,48.0,1.0,48.0,Santa Catarina,1.0
3,2015,2015-09-24,25.657586,-100.322029,Lo esperan para ejecutarlo,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,INDEPENDENCIA,"[2667670.5901743174, 1513064.839283476]",39.0,...,Monterrey,1968,2667671.0,1513065.0,75.0,39.0,1.0,39.0,Monterrey,1.0
4,2015,2015-09-24,25.683207,-100.30565,Matan a 2 clientes en ataque 13 a bar,2.0,http://www.elnorte.com/aplicaciones/articulo/d...,CENTRO DE MONTERREY,"[2669273.8077375377, 1515908.7290589423]",39.0,...,Monterrey,1281,2669274.0,1515909.0,75.0,39.0,1.0,39.0,Monterrey,1.0
5,2015,2015-09-23,25.777561,-100.410404,Asesinan a ex reo de tres balazos,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,LA ALIANZA,"[2658693.510923911, 1526191.7772835381]",39.0,...,Monterrey,5015,2658694.0,1526192.0,75.0,39.0,1.0,39.0,Monterrey,1.0
6,2015,2015-09-18,25.702387,-100.330845,Ejecutan a uno en la Col. Garza Nieto,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,PEDRO LOZANO,"[2666732.2379076034, 1517993.7998131234]",39.0,...,Monterrey,921,2666732.0,1517994.0,75.0,39.0,1.0,39.0,Monterrey,1.0


In [49]:
merged = merged.drop(["r", "Cve_Mun", "Cve_Loc", "point", 
                      "clave_loc", "clave_mun", "ageb", "nom_mun"], axis=1)

In [50]:
merged.columns

Index(['year', 'date', 'lat', 'lon', 'news', 'cassualties', 'url',
       'neighborhood', 'nom_loc', 'x', 'y', 'cve_mun', 'cve_loc', 'Nom_Mun'],
      dtype='object')

In [53]:
merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2114 entries, 0 to 2113
Data columns (total 14 columns):
year            2114 non-null int64
date            2114 non-null object
lat             2114 non-null float64
lon             2114 non-null float64
news            2114 non-null object
cassualties     2114 non-null float64
url             2114 non-null object
neighborhood    1860 non-null object
x               2114 non-null float64
y               2114 non-null float64
cve_mun         1939 non-null float64
nom_mun         1939 non-null object
cve_loc         1939 non-null float64
nom_loc         1939 non-null object
dtypes: float64(7), int64(1), object(6)
memory usage: 247.7+ KB


In [51]:
merged.columns = ['year', 'date', 'lat', 'lon', 'news', 'cassualties', 'url',
       'neighborhood', 'nom_loc', 'x', 'y', 'cve_mun', 'cve_loc', 'nom_mun']

In [52]:
merged = merged[['year', 'date', 'lat', 'lon', 'news', 'cassualties', 'url',
       'neighborhood', 'x', 'y', 'cve_mun', 'nom_mun', 'cve_loc', 'nom_loc']]

In [54]:
merged["cve_mun"] = merged["cve_mun"].fillna('')
merged["cve_mun"] = merged["cve_mun"].astype(str)
merged["cve_mun"] = merged["cve_mun"].str.split('.')
merged["cve_mun"] = merged["cve_mun"].str[0]

In [55]:
merged["cve_loc"] = merged["cve_loc"].fillna('')
merged["cve_loc"] = merged["cve_loc"].astype(str)
merged["cve_loc"] = merged["cve_loc"].str.split('.')
merged["cve_loc"] = merged["cve_loc"].str[0]

In [56]:
merged.to_csv("data/crime_inegi_localidades.csv", index=False)