In [None]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import sys, os
import matplotlib.pyplot as plt
%matplotlib inline

cd = os.path.split(os.getcwd())[0]
if cd not in sys.path:
    sys.path.append(cd)

from lib import noaa, bexarcrime

In [None]:
dfw = noaa.noaa_from_web_small() # weather database handle

In [None]:
c = bexarcrime.crime_from_web_small() # crime database handle

In [None]:
v = bexarcrime.get_violent_crime(c)

Drop all useless columns from the crime dataframe

In [None]:
c = c.drop(['CASE-CAUSE-NBR', 'FULL-NAME', 'ADDR-CITY', 'ADDR-STATE','ADDR-ZIP-CODE','OFFENSE-DESC','OFFENSE-TYPE',
               'REDUCED-OFFENSE-CODE','REDUCED-OFFENSE-DESC','REDUCED-OFFENSE-TYPE','LOCATION','CUSTODY-DATE','BOND-DATE',
               'BOND-STATUS','BOND-AMOUNT'],axis = 1)
v = v.drop(['CASE-CAUSE-NBR', 'FULL-NAME', 'ADDR-CITY', 'ADDR-STATE','ADDR-ZIP-CODE','OFFENSE-DESC','OFFENSE-TYPE',
               'REDUCED-OFFENSE-CODE','REDUCED-OFFENSE-DESC','REDUCED-OFFENSE-TYPE','LOCATION','CUSTODY-DATE','BOND-DATE',
               'BOND-STATUS','BOND-AMOUNT'],axis = 1)

Group the offenses by day

In [None]:
c['OFFENSE-DATE'] = pd.to_datetime(c['OFFENSE-DATE'], errors = 'coerce')  # need to coerce all the crime data
c = c.groupby([c['OFFENSE-DATE'].dt.year, c['OFFENSE-DATE'].dt.month, c['OFFENSE-DATE'].dt.day]).size()
v = v.groupby([v['OFFENSE-DATE'].dt.year, v['OFFENSE-DATE'].dt.month, v['OFFENSE-DATE'].dt.day]).size()

Ratio of $\frac{violent crimes}{total crimes}$

In [None]:
ratio = v.divide(c)

Drop the RHPeriod from the weather dataframe

In [None]:
dfw = dfw.drop('RHPeriod', axis = 1)

Replace all values used to represent NULL

In [None]:
dfw['Temperature'] = dfw['Temperature'].replace(9999,np.nan)
dfw['Pressure'] = dfw['Pressure'].replace(99999,np.nan)
dfw['Humidity'] = dfw['Humidity'].replace(999, np.nan)
dfw['Sky'] = dfw['Sky'].replace([9,99], np.nan)

Scale the data

In [None]:
dfw['Temperature'] = dfw['Temperature'].map(lambda x : x/10)
dfw['Pressure'] = dfw['Pressure'].map(lambda x : x/10)
dfw['Sky'] = dfw['Sky'].map(lambda x : x/8)

Convert the Temperature to Farenheit

In [None]:
dfw['Temperature'] = dfw['Temperature'].map(lambda x : x * 9/5 + 32)
dfw

Group the weather data by the average for a day

In [None]:
dailyaverage = dfw.groupby([dfw.Date.dt.year, dfw.Date.dt.month, dfw.Date.dt.day]).mean()

Integrate daily crime data with weather data

In [None]:
dailyaverage['Crimes'] = ratio

In [None]:
dailyaverage.plot(subplots=True, sharex=True, figsize=[10,10])