In [None]:
#importar librerías
import pandas as pd
import numpy as np

import os

import folium
from folium import plugins

from wwo_hist import retrieve_hist_data

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

In [None]:
def leer_data():
    data = pd.read_csv('../Data/carpetas-de-investigacion-pgj-de-la-ciudad-de-mexico.csv', sep=';')
    return data

In [None]:
def transformar_data():
    data.drop(columns=['ao_hechos', 'mes_hechos', 'calle_hechos2', 'geopoint', 'ao_inicio', 'mes_inicio', 'fecha_inicio'], inplace=True)
    data['fecha_hechos'] = pd.to_datetime(data.fecha_hechos, errors='coerce')
    data.longitud.astype('float', inplace=True)
    data.latitud.astype('float', inplace=True)
    data.dropna(inplace=True)
    return data

In [None]:
def get_clima():
    os.chdir("../Data")
    frequency = 1
    start_date = '01-DEC-2014'
    end_date = '30-AUG-2019'
    api_key = '28f7f02aa28d4afe9dc215223190509'
    location_list = ['mexico_city']
    hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label = False, export_csv=True, store_df = True)
    clima = pd.read_csv('../Data/mexico_city.csv')
    clima.drop(columns=['maxtempC', 'mintempC', 'totalSnow_cm', 'sunHour', 'uvIndex.1', 'moonrise', 'moonset', 'sunrise', 'sunset', 'HeatIndexC', 'WindChillC', 'WindGustKmph'], inplace=True)
    clima.columns = ['fecha_hechos', 'uv', 'ilu_luna', 'punto_rocio', 'temp_sentir', 'nubosidad', 'humedad', 'precipitacion', 'presion', 'temperatura', 'visibilidad', 'dir_viento', 'vel_viento']
    return clima

In [None]:
def get_colonia_delito():
    roma_n = data[(data.alcaldia_hechos == 'CUAUHTEMOC') & (data.colonia_hechos == 'ROMA NORTE') & (data.categoria_delito == 'ROBO A TRANSEUNTE EN VÍA PÚBLICA CON Y SIN VIOLENCIA')]
    roma_n.sort_values(by='fecha_hechos', ascending=True, inplace=True)
    roma_n = pd.merge_asof(roma_n, clima, on='fecha_hechos')
    roma_n['nombre_dia'] = roma_n.fecha_hechos.dt.weekday
    return roma_n

In [None]:
def mapear():
    os.chdir("../Images")
    mapa=folium.Map(location=[19.41641111, -99.16169722], zoom_start=15)
    for index, row in roma_n.iterrows():
        folium.CircleMarker([row['latitud'], row['longitud']], radius=1, fill_color="#3db7e4").add_to(mapa)
    geo = roma_n[['latitud', 'longitud']].as_matrix()
    mapa.add_children(plugins.HeatMap(geo, radius=15))
    mapa.save('mapa.html')

In [None]:
def preparar_prediccion():
    prueba = roma_n.copy()
    prueba.drop(columns=['fiscalia', 'agencia', 'unidad_investigacion', 'alcaldia_hechos', 'colonia_hechos'], inplace=True)
    X = prueba.drop(columns=['fecha_hechos', 'latitud', 'longitud'])
    y = prueba[['fecha_hechos', 'latitud', 'longitud']]
    label = LabelEncoder()
    X.delito = label.fit_transform(X.delito)
    X.categoria_delito = label.fit_transform(X.categoria_delito)
    X.calle_hechos = label.fit_transform(X.calle_hechos)
    X['año'] = y.fecha_hechos.dt.year
    process = StandardScaler()
    X = process.fit_transform(X)
    y['dia'] = y.fecha_hechos.dt.day
    y['mese'] = y.fecha_hechos.dt.month
    y['hora'] = y.fecha_hechos.dt.hour
    y['minuto'] = y.fecha_hechos.dt.minute
    y.drop(columns='fecha_hechos', inplace=True)
    return X, y

In [None]:
def prediccion():
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    model = RandomForestRegressor(n_estimators=500)
    model.fit(X, y)
    pre = model.predict(X)
    prediction = pd.DataFrame(pre, columns=['latitud', 'longitud', 'day', 'month', 'hour', 'minute'])
    prediction[['day', 'month', 'hour', 'minute']] = prediction[['day', 'month', 'hour', 'minute']].round().astype('int')
    prediction['year'] = '2020'
    prediction['fecha_hechos'] = pd.to_datetime(prediction[['year', 'day', 'month', 'hour', 'minute']], errors='coerce')
    prediction.drop(columns=['year', 'day', 'month', 'hour', 'minute'], inplace=True)
    return prediction

In [None]:
def mapear_prediccion():
    os.chdir("../Images")
    mapa_prediccion=folium.Map(location=[19.41641111, -99.16169722], zoom_start=15)
    for index, row in prediction.iterrows():
        folium.CircleMarker([row['latitud'], row['longitud']], radius=1, fill_color="#3db7e4").add_to(mapa_prediccion)
    geo = prediction[['latitud', 'longitud']].as_matrix()
    mapa_prediccion.add_children(plugins.HeatMap(geo, radius=15))
    mapa_prediccion.save('prediccion.html')