# Usando Google Maps para predecir crimenes

**Ciudad : San Franscisco**

In [1]:
import warnings
warnings.simplefilter('ignore')

In [2]:
# librerias

import numpy as np 
import pandas as pd 
import datetime
import urllib
import matplotlib.pyplot as plt

In [3]:
## Datos de crimenes
#df_raw=pd.read_csv('https://data.sfgov.org/api/views/q6gg-sa2p/rows.csv?accessType=DOWNLOAD')
df_raw=pd.read_csv('data/raw_data_crimes.csv')
df_raw.head()

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location
0,110308742,MISSING PERSON,FOUND PERSON,Friday,04/15/2011,16:00,TARAVAL,LOCATED,0 Block of CRESPI DR,-122.476328,37.719687,"(37.7196874952717, -122.476327815126)"
1,90316914,WARRANTS,WARRANT ARREST,Thursday,03/26/2009,12:28,SOUTHERN,"ARREST, BOOKED",1000 Block of MARKET ST,-122.41134,37.781271,"(37.7812707434494, -122.411339562557)"
2,130132311,SECONDARY CODES,DOMESTIC VIOLENCE,Friday,02/15/2013,02:00,PARK,"ARREST, BOOKED",CASTRO ST / MARKET ST,-122.435188,37.76267,"(37.7626702770872, -122.435187699349)"
3,80052833,OTHER OFFENSES,"DRIVERS LICENSE, SUSPENDED OR REVOKED",Tuesday,01/15/2008,20:13,MISSION,"ARREST, CITED",VALENCIA ST / 16TH ST,-122.421886,37.764918,"(37.7649178909424, -122.421886357981)"
4,70535770,SECONDARY CODES,DOMESTIC VIOLENCE,Saturday,05/26/2007,18:10,MISSION,"ARREST, BOOKED",2800 Block of FOLSOM ST,-122.413935,37.751693,"(37.7516932889916, -122.413935404044)"


In [4]:
#df_raw.to_csv('data/raw_data_crimes.csv', index=False)

In [5]:
df_raw.columns

Index(['IncidntNum', 'Category', 'Descript', 'DayOfWeek', 'Date', 'Time',
       'PdDistrict', 'Resolution', 'Address', 'X', 'Y', 'Location'],
      dtype='object')

In [6]:
df_raw.shape

(2215024, 12)

In [7]:
df_raw.isna().sum()

IncidntNum    0
Category      0
Descript      0
DayOfWeek     0
Date          0
Time          0
PdDistrict    1
Resolution    0
Address       0
X             0
Y             0
Location      0
dtype: int64

In [8]:
df_raw.describe()

Unnamed: 0,IncidntNum,X,Y
count,2215024.0,2215024.0,2215024.0
mean,104072800.0,-122.4229,37.77076
std,46128270.0,0.02983459,0.4203596
min,3979.0,-122.5136,37.70788
25%,61241700.0,-122.4332,37.75302
50%,101155800.0,-122.4166,37.77542
75%,140925600.0,-122.4069,37.78448
max,991582400.0,-120.5,90.0


In [None]:
df_raw.info(memory_usage='deep')

In [None]:
# seleccion y limpieza de datos

df=df_raw[['Category', 'DayOfWeek', 'Date', 'Time', 'X', 'Y']]
df.columns=['Category', 'DayOfWeek', 'Date', 'Time', 'Longitude', 'Latitude']

df=df.dropna()
df.head()

In [None]:
# exploracion

df.Category.value_counts()

In [None]:
df.head()

### Reclasificacion en 4 grupos

In [None]:
# grupos por tipos
THEFT=['LARCENY/THEFT', 'VEHICLE THEFT', 'BURGLARY', 'ROBBERY', 'STOLEN PROPERTY']


IMPAIRED=['DRUNKENNESS', 'DRIVING UNDER THE INFLUENCE', 'LIQUOR LAWS', 
          'DISORDERLY CONDUCT', 'DRUG/NARCOTIC', 'LOITERING']


VIOLENCE=['ASSAULT', 'VANDALISM', 'SUSPICIOUS OCC', 'TRESPASS', 'SEX OFFENSES, FORCIBLE' , 
          'SEX OFFENSES, NON FORCIBLE'] 


OTHER=['OTHER OFFENSES', 'NON-CRIMINAL']

In [None]:
CATEGORIAS=THEFT+IMPAIRED+VIOLENCE+OTHER


df=df[df.Category.isin(CATEGORIAS)] 

In [None]:
# creacion de nuevos supergrupos

selecciones=[(df.Category.isin(THEFT)), (df.Category.isin(IMPAIRED)),
             (df.Category.isin(VIOLENCE)), (df.Category.isin(OTHER))] 


nuevas_cat=['THEFT', 'IMPAIRED', 'VIOLENCE', 'OTHER']


df['CAT']=np.select(selecciones, nuevas_cat, default='OTHER')


df.CAT.value_counts()

## Plots

### por dia de la semana

In [None]:
plt.figure(figsize=(12,10))
for i in range(1,5):
    plt.subplot(2,2,i)
    df_tmp=df[df.CAT==nuevas_cat[i-1]]
    
    df_tmp.DayOfWeek.value_counts(sort=False).plot(kind='bar', color=['r','g','b','c','m','y','violet'])
    plt.title('Categoria: {}'.format(nuevas_cat[i-1]))
    if i<=2:
        plt.xticks([])
    else:
        plt.xticks(rotation=45)

### por dia del mes

In [None]:
df['Day_of_month']=[int(e.split('/')[1]) for e in df.Date]

df['Month_of_year']=[int(e.split('/')[0]) for e in df.Date]

df_por_mes=pd.DataFrame(df.Month_of_year.value_counts())
df_por_mes=df_por_mes.sort_index()
df_por_mes

In [None]:
meses=['Jan','Feb','Mar','Apr','Ma','Jun','Jul','Aug','Sept','Oct','Nov','Dec']

plt.figure(figsize=(14,8))
plt.barh(df_por_mes.index, df_por_mes.Month_of_year, align='center', alpha=0.5)
plt.yticks(df_por_mes.index, meses)
plt.xlabel('Crimenes')
plt.title('Crimenes totales por mes');

In [None]:
plt.figure(figsize=(12,10))
for i in range(1,5):
    plt.subplot(2,2,i)
    df_por_mes=df[df.CAT==nuevas_cat[i-1]]
    df_por_mes=pd.DataFrame(df.Month_of_year.value_counts())
    df_por_mes=df_por_mes.sort_index()
    
    plt.barh(df_por_mes.index, df_por_mes.Month_of_year, align='center', alpha=0.5, color=['r','g','b','c','m','y','violet'])
    plt.title('Categoria: {}'.format(nuevas_cat[i-1]))
    plt.yticks(df_por_mes.index, meses)
    if i<=2:
        plt.xticks([])
    else:
        plt.xticks(rotation=45)

### geomapa

In [None]:
# remove edge coordinates / outliers 
q=df.Longitude.quantile(0.95)
df=df[df.Longitude<q]


q=df.Latitude.quantile(0.95)
df=df[df.Latitude<q]


df.Latitude.describe()

In [None]:
plt.plot(df.Longitude.head(50000), 
         df.Latitude.head(50000),
         linestyle='none', marker='.')


plt.suptitle('Crimenes por Latitud y Longitud')
plt.xlabel('Longitud')
plt.ylabel('Latitud');

In [None]:
df.Longitude=np.round(df.Longitude,3)
df.Latitude=np.round(df.Latitude,3)

df.Longitude.head(2000).hist();

### segmentos temporales (AM, PM, noche)

In [None]:
# crear AM, AFT, NT
df['Hour']=[int(e.split(':')[0]) for e in df.Time]


# nuevos supergrupos
selecciones=[(df.Hour>5) & (df.Hour<=13),
             (df.Hour>13) & (df.Hour<=19),
             (df.Hour>18) & (df.Hour<=5)] 



n_cat=[0, 1, 2] # ['AM', 'AFT', 'NIT']



df['Day_Segment']=np.select(selecciones, n_cat, default=2)
df['Day_Segment'].value_counts()

In [None]:
segmentos=['AM','AFT','NIT']




plt.figure(figsize=(16,10))
for i in range(1,5):
    plt.subplot(2,2,i)
    df_por_seg=df[df.CAT==nuevas_cat[i-1]]
    df_por_seg=pd.DataFrame(df.Day_Segment.value_counts())
    df_por_seg=df_por_seg.sort_index()
    
    plt.barh(df_por_seg.index, df_por_seg.Day_Segment, align='center', alpha=0.5, color=['r','g','b','c','m','y','violet'])
    plt.title('Categoria: {}'.format(nuevas_cat[i-1]))
    plt.yticks(df_por_seg.index, segmentos)
    
    if i<=2:
        plt.xticks([])
    else:
        plt.xticks(rotation=45)
        plt.xlabel('Crimenes')

### Caracteristicas finales

In [None]:
df['Year']=[int(e.split('/')[2]) for e in df.Date]

In [None]:
df=df[['CAT', 'Day_of_month','Month_of_year', 'Day_Segment', 'Longitude', 'Latitude', 'Year']]

df.head()

In [None]:
df['Count']=0

df.head()

In [None]:
df_agg=df.groupby(['CAT', 'Day_of_month', 'Month_of_year',  'Day_Segment', 
                   'Longitude',  'Latitude', 'Year']).count().reset_index()

#crime_df_agg.to_csv('data/sf-crime-horizon2.csv', index=None)

df_agg.head()

# Mapa Folium

In [None]:
import folium
import folium.plugins as plugins

In [None]:
df_fo=df_raw[['Date', 'Time', 'X', 'Y']]
df_fo.columns=['date', 'time', 'long', 'lat']

df_fo=df_fo.dropna()
df_fo.head()

In [None]:
df_folium=df_fo.sort_values(by=['date', 'time']).reset_index(drop=True)

df_folium.head()

In [None]:
df_folium.info()

In [None]:
data=[[[df_folium.lat.to_list()[i],df_folium.long.to_list()[i], 1] for i in range(50)]]

data

In [None]:
mapa=folium.Map([37.747146, -122.421899], tiles='stamentoner', zoom_start=6)

heat=plugins.HeatMapWithTime(data)

heat.add_to(mapa)

mapa