In [46]:
import numpy as np
import pandas as pd
import math
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.formula.api import glm
import collections
from statsmodels.sandbox.stats.multicomp import multipletests
import time
from folium import plugins
import folium
from folium.plugins import HeatMap

In [47]:
df=pd.read_csv('Final_poisson.csv').iloc[:,1:]

In [49]:
def conteo_perf(newdf=df):
    df_drop=newdf.drop_duplicates(subset='LAT_LON') 
    conteos=newdf[['LAT_LON','SITIO_EXACTO_ACCIDENTE']].groupby(['LAT_LON','SITIO_EXACTO_ACCIDENTE']).size().reset_index(name='count')
    grp_conteos = conteos.groupby('LAT_LON')['count'].agg(['sum','max'])
    grp_conteos['sum_rate'] = grp_conteos['max'] / grp_conteos['sum']
    filtered_conteos = grp_conteos[grp_conteos['sum_rate'] > 0.3]
    return_df = filtered_conteos.drop(['max', 'sum_rate'], axis=1)
    return_df.rename(columns = {'sum':'NACCIDENTS'}, inplace = True) 
    fusion=pd.merge(return_df,df_drop,on='LAT_LON',how='inner')[['NACCIDENTS','AÑO_ACCIDENTE','MES_ACCIDENTE',
                                                     'CLASE_ACCIDENTE','LONGITUD', 'LATITUD',
                                                     'POT', 'PIEZA_URBANA', 'BUS_SUBIDA','BUS_BAJADA', 
                                                     'TRAFICO','ES_FESTIVO','SOLO_HORA','DIA_ACCIDENTE','JUNIOR_JUGO']].reset_index().iloc[:,1:]
    return(fusion)
b=conteo_perf()

In [50]:
semaforos=pd.read_csv('lights_clean.csv')[['LATITUD','LONGITUD']]

In [53]:
def distancia_semaforos_mas_rapido(final_df,lights=semaforos):
    closest=np.zeros(final_df.shape[0])
    numclose=np.zeros(final_df.shape[0])
    numclose2=np.zeros(final_df.shape[0])
    numclose3=np.zeros(final_df.shape[0])
    for i in range(final_df.shape[0]):
        corner_lat=final_df['LATITUD'][i]
        corner_lon=final_df['LONGITUD'][i]
        light_lat=np.array(lights['LATITUD'])
        light_lon=np.array(lights['LONGITUD'])
        dx = (light_lon-corner_lon)*40000*np.cos((light_lat+corner_lat)*math.pi/360)/360
        dy = (light_lat-corner_lat)*40000/360
        distance=np.sqrt(dx**2+dy**2)
        closest[i]=distance[distance.argmin()]
        numclose[i]=np.sum(distance<0.1)
        numclose2[i]=np.sum(((distance>=0.1) & (distance<0.5)))
        numclose3[i]=np.sum(((distance>=0.5) & (distance<2)))
    final_df['NLIGHTS']=list(numclose)
    final_df['NLIGHTS2']=list(numclose2)
    final_df['NLIGHTS3']=list(numclose3)
    final_df['CLOSEST_LIGHT']=list(closest)
    return(final_df)

In [58]:
def modelo_poisson(junior=False,dia=False,festivo=False,mes=False, df=df,lights=semaforos): #Ojo dia, festivo, mes, año tienen que ser excluyente, si uno es verdadero automaticamente los otros son falsos
        if(mes!=False):
            df=df[df.MES_ACCIDENTE==mes]
        if(festivo!=False):
            df=df[df.ES_FESTIVO==festivo]
        if(dia!=False):
            df=df[df.DIA_ACCIDENTE==dia]
        if(junior!=False):
            df=df[df.JUNIOR_JUGO==junior]            
        data=conteo_perf(newdf=df)
        data=distancia_semaforos_mas_rapido(final_df=data,lights=lights)
        formula='NACCIDENTS~BUS_SUBIDA+BUS_BAJADA+POT+PIEZA_URBANA+NLIGHTS+NLIGHTS2+TRAFICO+CLOSEST_LIGHT'
        model = glm(formula=formula, data=data, family=sm.families.Poisson()).fit()
        predichos=model.predict()
        data['PREDICHOS']=predichos
        return(data)
modelo=modelo_poisson(dia='Mié')

In [60]:
def mapas_calor_predichos(data):
    mapa_barranquilla = folium.Map(location=[10.980706, -74.807636],zoom_start=13,tiles="OpenStreetMap")
    capa_calor=HeatMap(data[['LATITUD','LONGITUD','PREDICHOS']],radius=15)
    mapa_barranquilla.add_child(capa_calor)
    return(mapa_barranquilla)

def mapas_calor_reales(data):
    mapa_barranquilla = folium.Map(location=[10.980706, -74.807636],zoom_start=13,tiles="OpenStreetMap")
    capa_calor=HeatMap(data[['LATITUD','LONGITUD','NACCIDENTS']],radius=15)
    mapa_barranquilla.add_child(capa_calor)
    return(mapa_barranquilla)
    

In [59]:
mapas_calor_predichos(modelo)

In [61]:
mapas_calor_reales(modelo)