In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd

%matplotlib inline

In [2]:
import networkx as nx

In [3]:
from os.path import join

In [4]:
df = pd.read_csv("../data/crime.csv", parse_dates=["date"], encoding="latin")

In [5]:
df.head()

Unnamed: 0,year,date,lat,lon,news,cassualties,url,object_id,postal_code,mun_name,neighborhood_name,neighborhood_type,area
0,2015,2015-10-12,25.617672,-100.259318,Persiguen y ejecutan a hombre,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,35483,64878.0,MONTERREY,EJIDAL LOS REMATES,COLONIA,4.2e-05
1,2015,2015-10-02,25.632075,-100.286088,Ejecutan a uno en plaza en Garza Sada,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,35424,64788.0,MONTERREY,REVOLUCION PROLETARIA,COLONIA,9e-06
2,2015,2015-10-01,25.675784,-100.475788,Lo ejecutan frente a su casa,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,36310,66257.0,SANTA CATARINA,HDA SANTA CATARINA (FOMERREY 29),COLONIA,2.3e-05
3,2015,2015-09-24,25.657586,-100.322029,Lo esperan para ejecutarlo,1.0,http://www.elnorte.com/aplicaciones/articulo/d...,35376,64720.0,MONTERREY,INDEPENDENCIA,FRACCIONAMIENTO,0.000222
4,2015,2015-09-24,25.683207,-100.30565,Matan a 2 clientes en ataque 13 a bar,2.0,http://www.elnorte.com/aplicaciones/articulo/d...,34955,64000.0,MONTERREY,CENTRO DE MONTERREY,COLONIA,0.000717


In [None]:
PATH = "/home/dianae/Workspace/crimenNL/shape"

fp = join(PATH, 'nuevo_leon.shp')

map_df = gpd.read_file(fp, encoding='latin')
fig, ax = plt.subplots(figsize=(20, 20))
scatter = df.plot.scatter(x='lon', y='lat', ax=ax, alpha=0.4, c='Red', s = 2)

map_df.plot(ax=ax, alpha=0.5)

In [None]:
vecinos =  pd.read_csv("../data/adjacent_neighborhoods.csv")

In [None]:
map_df["CENTROID"] = map_df.geometry.centroid.apply(lambda p: (p.x, p.y))

In [None]:
map_df["WITH_EVENT"] = map_df.OBJECTID.apply(lambda x: x in df.OBJECTID.tolist())

In [None]:
nodeData = map_df[["OBJECTID", "SETT_NAME", "POSTALCODE", "MUN_NAME", 
                   "SETT_TYPE", "CENTROID", "WITH_EVENT"]].set_index("OBJECTID").to_dict('index')

In [None]:
G = nx.from_pandas_edgelist(vecinos)
G.add_nodes_from(map_df.OBJECTID)
nx.set_node_attributes(G, nodeData)

In [None]:
plt.figure(figsize=(20, 60))
pos = nx.get_node_attributes(G, "CENTROID")
color = ["red" if val else "blue" for key, val in nx.get_node_attributes(G, "WITH_EVENT").items()]
nx.draw_networkx(G, pos = pos, node_color = color, with_labels = False, node_size = 20, alpha = 0.4)

In [None]:
df_cytoscape = map_df.loc[:, ['OBJECTID', 'POSTALCODE', 'ST_NAME', 'MUN_NAME', 'SETT_NAME',
       'SETT_TYPE', 'WITH_EVENT']]
df_cytoscape["x_centroid"] =  round(map_df.geometry.centroid.x, 3)
df_cytoscape["y_centroid"] =  round(map_df.geometry.centroid.y, 3)
df_cytoscape.to_csv("../data/map_df_all.csv")

### Ahora separemos la red por fechas

In [None]:
df.set_index(["OBJECTID", "date"], inplace=True)
level_values = df.index.get_level_values

In [None]:
df_agg = df.groupby([level_values(0)] + [pd.Grouper(freq='W', closed='left', level=-1)]).agg(
    {
        "cassualties": ["sum", "count"],
        "year": "first",
        "lat": "first",
        "lon": "first",
        "POSTALCODE" : "first",
        "MUN_NAME" : "first",
        "SETT_NAME" : "first",
        "SETT_TYPE" : "first",
    }
)

In [None]:
cols = df_agg.columns.droplevel(1) 
cols[1] = "events"
