## Building SQL database for Illinois traffic data

In [1]:
from glob import glob
import sets
import datetime
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd
import numpy as np

  from ipykernel import kernelapp as app


In [2]:
import math
# The usual preamble
%matplotlib inline
%pylab inline
import matplotlib.pyplot as plt
plt.rcParams['axes.color_cycle'] = ['r', 'g', 'b', 'c']
plt.rcParams['lines.color'] = 'r'
plt.rcParams['figure.figsize'] = (15, 5)


Populating the interactive namespace from numpy and matplotlib




In [3]:
dbname = 'traffic_stops_il'
username = 'along528'
pswd = 'password'
## 'engine' is a connection to a database
## Here, we're using postgres, but sqlalchemy can connect to other things too.
engine = create_engine('postgresql://%s:%s@localhost/%s'%(username,pswd,dbname))
print engine.url

postgresql://along528:password@localhost/traffic_stops_il


# 2004 

In [4]:
year = '2004'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Statewide*/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df = df.reset_index(drop=True)
df

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,AgencyName,AgencyCode,DateAndTimeOfStop,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,Race,ReasonForStop,MovingViolationType,...,Passenger2SearchType,Passenger3SearchType,Passenger4SearchType,Passenger5SearchType,Passenger6SearchType,ContrabandFound,DrugsFound,WeaponFound,StolenPropertyFound,OtherContrabandFound
0,Abingdon Police Department,13462,1/15/2004 7:20:00 AM,chevrolet,2002,1955,Male,Caucasian,Moving Violation,Speed,...,,,,,,,,,,
1,Abingdon Police Department,13462,1/17/2004 9:20:00 PM,Buick,1992,1972,Male,Caucasian,Equipment,,...,,,,,,,,,,
2,Abingdon Police Department,13462,1/19/2004 12:58:00 AM,Chevrolet,2001,1985,Male,Caucasian,License Plate/Registration,,...,,,,,,,,,,
3,Abingdon Police Department,13462,1/19/2004 2:13:00 AM,Honda,1989,1981,Male,Caucasian,Equipment,,...,,,,,,,,,,
4,Abingdon Police Department,13462,1/21/2004 9:52:00 PM,Mercury,1988,1984,Male,Caucasian,Equipment,,...,,,,,,,,,,
5,Abingdon Police Department,13462,1/10/2004 7:40:00 PM,Chrysler,1994,1976,Male,Caucasian,License Plate/Registration,,...,,,,,,,,,,
6,Abingdon Police Department,13462,1/11/2004 1:50:00 AM,Toyota,1989,1953,Male,Caucasian,Equipment,,...,,,,,,,,,,
7,Abingdon Police Department,13462,1/3/2004 8:30:00 PM,Ford,1998,1982,Male,Caucasian,Moving Violation,Speed,...,,,,,,,,,,
8,Abingdon Police Department,13462,1/3/2004 11:43:00 PM,Saturn,1999,1982,Male,Caucasian,Moving Violation,Speed,...,,,,,,,,,,
9,Abingdon Police Department,13462,1/2/2004 10:40:00 PM,Chevrolet,1976,1979,Male,Caucasian,License Plate/Registration,,...,,,,,,No,,,,


In [5]:
def race_map(x):
    #if type(x)!='object': return x
    x = x.replace('Caucasian','white')
    x = x.replace('African American','black')
    x = x.replace('Hispanic','hispanic')
    x = x.replace('Asian/Pacific Islander','asian')
    x = x.replace('Native American/Alaskan','native_american')
    return x
def process_2004(df):
    #change race reporting format
    tmpdf = pd.DataFrame(df)
    tmprace = tmpdf['Race']
    tmprace = tmprace.fillna('other')
    tmprace = tmprace.map(race_map)
    tmpdf['Race'] = tmprace
    #parse datese
    #tmpdf['DateAndTimeOfStop'] = pd.to_datetime(tmpdf['DateAndTimeOfStop'])
    #tmpdf['year'] = tmpdf['DateAndTimeOfStop'].map(lambda x: x.year)
    #tmpdf['month'] = tmpdf['DateAndTimeOfStop'].map(lambda x: x.month)
    #fillna
    #pluck out columns of interest
    tmpdf =  tmpdf[['AgencyName',
                       'AgencyCode',
                       'DriverYearOfBirth',
                       'DriverSex',
                       'Race',
                       'SearchConducted',
                       'ContrabandFound']]#, 'year', #'month']]
    #format agency title
    tmpdf = tmpdf.fillna('No')
    tmpdf['AgencyName'] = tmpdf['AgencyName'].map(str.title)
    #format column names
    tmpdf.rename(columns=str.lower,inplace=True)
    return tmpdf


In [6]:
tmpdf = process_2004(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Abingdon Police Department,13462,1955,Male,white,No,No
1,Abingdon Police Department,13462,1972,Male,white,No,No
2,Abingdon Police Department,13462,1985,Male,white,No,No
3,Abingdon Police Department,13462,1981,Male,white,No,No
4,Abingdon Police Department,13462,1984,Male,white,No,No
5,Abingdon Police Department,13462,1976,Male,white,No,No
6,Abingdon Police Department,13462,1953,Male,white,No,No
7,Abingdon Police Department,13462,1982,Male,white,No,No
8,Abingdon Police Department,13462,1982,Male,white,No,No
9,Abingdon Police Department,13462,1979,Male,white,Yes,No


In [8]:
tmpdf['year'] = year
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound,2004,year
0,Abingdon Police Department,13462,1955,Male,white,No,No,2004,2004
1,Abingdon Police Department,13462,1972,Male,white,No,No,2004,2004
2,Abingdon Police Department,13462,1985,Male,white,No,No,2004,2004
3,Abingdon Police Department,13462,1981,Male,white,No,No,2004,2004
4,Abingdon Police Department,13462,1984,Male,white,No,No,2004,2004
5,Abingdon Police Department,13462,1976,Male,white,No,No,2004,2004
6,Abingdon Police Department,13462,1953,Male,white,No,No,2004,2004
7,Abingdon Police Department,13462,1982,Male,white,No,No,2004,2004
8,Abingdon Police Department,13462,1982,Male,white,No,No,2004,2004
9,Abingdon Police Department,13462,1979,Male,white,Yes,No,2004,2004


In [None]:
tmpdf['year'] = year

In [None]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

# 2005 

In [69]:
year = '2005'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Statewide*/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df = df.reset_index(drop=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateAndTimeOfStop,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,Race,ReasonForStop,MovingViolationType,...,Passenger2SearchType,Passenger3SearchType,Passenger4SearchType,Passenger5SearchType,Passenger6SearchType,ContrabandFound,DrugsFound,WeaponFound,StolenPropertyFound,OtherContrabandFound
0,NAPERVILLE PARK DISTRICT POLICE,14079,1/17/2005 5:45:00 PM,Nissan,1995,1959,Female,Caucasian,Moving Violation,Traffic Sign or Signal,...,,,,,,No,No,No,No,No
1,NAPERVILLE PARK DISTRICT POLICE,14079,7/31/2005 5:30:00 PM,Chevrolet,2000,1988,Male,Hispanic,License Plate/Registration,,...,,,,,,No,No,No,No,No
2,NAPERVILLE PARK DISTRICT POLICE,14079,11/13/2005 7:17:00 PM,GMC,2002,1980,Male,Caucasian,Moving Violation,Lane Violation,...,,,,,,No,No,No,No,No
3,NAPERVILLE PARK DISTRICT POLICE,14079,6/7/2005 3:03:00 PM,Jeep,1996,1989,Female,Caucasian,Moving Violation,Speed,...,,,,,,No,No,No,No,No
4,GOLCONDA POLICE,14078,2/11/2005 5:27:00 PM,TOYOTA,1997,1981,Female,Caucasian,Moving Violation,Speed,...,,,,,,No,No,No,No,No
5,GOLCONDA POLICE,14078,2/20/2005 9:36:00 PM,HONDA,1989,1977,Male,Caucasian,Moving Violation,Other,...,,,,,,No,Yes,No,No,No
6,GOLCONDA POLICE,14078,3/14/2005 1:40:00 PM,FORD,2002,1951,Female,Caucasian,Moving Violation,Lane Violation,...,,,,,,No,No,No,No,No
7,GOLCONDA POLICE,14078,10/2/2005 4:15:00 PM,GMC,1997,1972,Male,Caucasian,License Plate/Registration,,...,,,,,,No,No,No,No,No
8,GOLCONDA POLICE,14078,10/5/2005 8:10:00 PM,TOYOTA,1992,1957,Male,Caucasian,Moving Violation,Other,...,,,,,,No,No,No,No,No
9,GOLCONDA POLICE,14078,10/1/2005 10:15:00 PM,CHEV,1989,1958,Male,Caucasian,Moving Violation,Other,...,,,,,,No,No,No,No,No


In [10]:
process_2005 = process_2004

In [70]:
tmpdf = process_2005(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Naperville Park District Police,14079,1959,Female,white,No,No
1,Naperville Park District Police,14079,1988,Male,hispanic,No,No
2,Naperville Park District Police,14079,1980,Male,white,No,No
3,Naperville Park District Police,14079,1989,Female,white,No,No
4,Golconda Police,14078,1981,Female,white,No,No
5,Golconda Police,14078,1977,Male,white,Yes,No
6,Golconda Police,14078,1951,Female,white,No,No
7,Golconda Police,14078,1972,Male,white,No,No
8,Golconda Police,14078,1957,Male,white,No,No
9,Golconda Police,14078,1958,Male,white,No,No


In [None]:
tmpdf['year'] = year


In [72]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
293868,Chicago Police,13194,1900,Male,black,No,No
293869,Chicago Police,13194,1900,Female,black,No,No
293870,Chicago Police,13194,1900,Male,black,No,No
293871,Chicago Police,13194,1900,Female,white,No,No
293872,Chicago Police,13194,1900,Female,black,No,No
293873,Chicago Police,13194,1900,Female,black,No,No
293874,Chicago Police,13194,1900,Female,black,No,No
293875,Chicago Police,13194,1900,Female,hispanic,No,No
293876,Chicago Police,13194,1900,Male,hispanic,Yes,No
293877,Chicago Police,13194,1900,Male,native_american,No,No


# 2006

In [71]:
year = '2006'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Statewide*/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df = df.reset_index(drop=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateAndTimeOfStop,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,Race,ReasonForStop,MovingViolationType,...,Passenger2SearchType,Passenger3SearchType,Passenger4SearchType,Passenger5SearchType,Passenger6SearchType,ContrabandFound,DrugsFound,WeaponFound,StolenPropertyFound,OtherContrabandFound
0,SALEM POLICE,13628,1/6/2006 3:30:00 AM,JEEP,1985,64,Female,Caucasian,Equipment,,...,,,,,,,,,,
1,SALEM POLICE,13628,1/6/2006 2:07:00 PM,FORD,1995,63,Male,Caucasian,License Plate/Registration,,...,,,,,,,,,,
2,LISLE POLICE,13262,2/5/2006 5:20:00 PM,Ford Mustang,2002,1976,Female,Caucasian,License Plate/Registration,,...,,,,,,,,,,
3,VILLA GROVE POLICE,13238,1/18/2006 6:20:00 PM,PONTIAC,1997,1987,Female,Caucasian,Moving Violation,Traffic Sign or Signal,...,,,,,,,,,,
4,COLLINSVILLE POLICE,13624,2/6/2006 4:08:00 PM,chev,1983,1986,Male,Caucasian,Moving Violation,Speed,...,,,,,,,,,,
5,MONMOUTH POLICE,13903,7/24/2006 4:15:00 PM,BUICK,1994,1987,Female,Caucasian,Moving Violation,Traffic Sign or Signal,...,,,,,,,,,,
6,HEYWORTH POLICE,13575,11/6/2006 4:30:00 PM,TOYOTA,2005,1966,Male,Caucasian,Moving Violation,Speed,...,,,,,,,,,,
7,RICHTON PARK POLICE,13001,11/24/2006 7:42:00 PM,CHRYSLER,2003,1959,Male,African American,Moving Violation,Traffic Sign or Signal,...,,,,,,,,,,
8,RICHTON PARK POLICE,13001,11/25/2006 5:40:00 PM,SATURN,2002,1955,Male,Caucasian,License Plate/Registration,,...,,,,,,No,,,,
9,SHELBYVILLE POLICE,13839,11/26/2006 2:55:00 PM,Ford,2000,1952,Female,Caucasian,Moving Violation,Speed,...,,,,,,,,,,


In [17]:
process_2006 = process_2004

In [67]:
tmpdf = process_2006(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Salem Police,13628,64,Female,white,No,No
1,Salem Police,13628,63,Male,white,No,No
2,Lisle Police,13262,1976,Female,white,No,No
3,Villa Grove Police,13238,1987,Female,white,No,No
4,Collinsville Police,13624,1986,Male,white,No,No
5,Monmouth Police,13903,1987,Female,white,No,No
6,Heyworth Police,13575,1966,Male,white,No,No
7,Richton Park Police,13001,1959,Male,black,No,No
8,Richton Park Police,13001,1955,Male,white,Yes,No
9,Shelbyville Police,13839,1952,Female,white,No,No


In [None]:
tmpdf['year'] = year


In [68]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
2313411,Chicago Police,13194,1948,Male,white,No,No
2313412,Chicago Police,13194,1978,Female,black,No,No
2313413,Chicago Police,13194,1971,Male,white,Yes,No
2313414,Chicago Police,13194,1987,Male,black,Yes,No
2313415,Chicago Police,13194,1970,Male,black,Yes,No
2313416,Chicago Police,13194,1956,Male,asian,Yes,No
2313417,Chicago Police,13194,1982,Male,hispanic,No,No
2313418,Chicago Police,13194,1989,Male,hispanic,No,No
2313419,Chicago Police,13194,1961,Male,black,Yes,No
2313420,Chicago Police,13194,1980,Male,black,Yes,No


# 2007

In [111]:
year = '2007'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Statewide*/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df = df.reset_index(drop=True)
df.rename(columns=lambda x: x.replace('YearofBirth','YearOfBirth'),inplace=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateAndTimeOfStop,DurationOfStop,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,Race,ReasonForStop,...,WasConsentGranted,WasConsentSearchPerformed,WasConsentContrabandFound,ConsentDrugsFound,ConsentParaphernaliaFound,ConsentAlcoholFound,ConsentWeaponFound,ConsentStolenPropertyFound,ConsentOtherContrabandFound,ConsentDrugQuantity
0,ADAMS COUNTY SHERIFF,13054,1/1/2007 10:49:00 AM,7,FORD,2007,1959,Male,Caucasian,Equipment,...,,,,,,,,,,
1,ADAMS COUNTY SHERIFF,13054,1/1/2007 10:35:00 PM,15,FORD,2000,1983,Male,Caucasian,Moving Violation,...,,,,,,,,,,
2,ADAMS COUNTY SHERIFF,13054,1/1/2007 10:58:00 PM,15,FORD,2000,1988,Male,African American,Moving Violation,...,,,,,,,,,,
3,ADAMS COUNTY SHERIFF,13054,1/1/2007 11:01:00 PM,15,JEEP,2004,1977,Male,Caucasian,Moving Violation,...,,,,,,,,,,
4,ADAMS COUNTY SHERIFF,13054,1/2/2007 8:04:00 PM,6,FORD,2002,1970,Female,African American,Moving Violation,...,,,,,,,,,,
5,ADAMS COUNTY SHERIFF,13054,1/2/2007 8:19:00 PM,15,CHEVY,2000,1958,Female,Caucasian,Moving Violation,...,,,,,,,,,,
6,ADAMS COUNTY SHERIFF,13054,1/2/2007 9:34:00 PM,15,FORD,1992,1972,Male,Caucasian,Moving Violation,...,,,,,,,,,,
7,ADAMS COUNTY SHERIFF,13054,1/2/2007 9:46:00 PM,14,BUICK,1997,1979,Female,Caucasian,Moving Violation,...,,,,,,,,,,
8,ADAMS COUNTY SHERIFF,13054,1/2/2007 9:51:00 PM,15,DODGE,1997,1988,Male,Caucasian,Moving Violation,...,,,,,,,,,,
9,ADAMS COUNTY SHERIFF,13054,1/3/2007 7:40:00 PM,10,DODGE,1993,1981,Male,Caucasian,Equipment,...,,,,,,,,,,


In [112]:
process_2007 = process_2004

In [113]:
tmpdf = process_2007(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Adams County Sheriff,13054,1959,Male,white,No,No
1,Adams County Sheriff,13054,1983,Male,white,No,No
2,Adams County Sheriff,13054,1988,Male,black,No,No
3,Adams County Sheriff,13054,1977,Male,white,No,No
4,Adams County Sheriff,13054,1970,Female,black,No,No
5,Adams County Sheriff,13054,1958,Female,white,No,No
6,Adams County Sheriff,13054,1972,Male,white,No,No
7,Adams County Sheriff,13054,1979,Female,white,No,No
8,Adams County Sheriff,13054,1988,Male,white,No,No
9,Adams County Sheriff,13054,1981,Male,white,No,No


Chicago data is split into 3 separate files with their own formats. The first file doesn't have the results of the search and they have most of the data from Jan to March. 
Probably should just omit 2007 as a whole. At least initially

In [82]:
year = '2007'
tmpdfs = []

for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Chicago*/*1.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df_chi1 = pd.concat(tmpdfs)
df_chi1 = df_chi1.reset_index(drop=True)
df_chi1.rename(columns=lambda x: x.replace(' ',''),inplace=True)

df_chi1.rename(columns=lambda x: x.replace('andTimeofStop','AndTimeOfStop'),inplace=True)
df_chi1.rename(columns=lambda x: x.replace('YearofBirth','YearOfBirth'),inplace=True)
#df_chi1 = process_2007(df_chi1)
df_chi1

Unnamed: 0,AgencyName,AgencyCode,DateAndTimeOfStop,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,Race,ReasonforStop,MovingViolationType,...,BeatLocationStop,SearchConducted,VehicleSearchType,DriverSearchType,Pass1SearchType,Pass2SearchType,Pass3SearchType,Pass4SearchType,Pass5SearchType,Pass6SearchType
0,CHICAGO POLICE,13194,01/14/2007 05:57,HONDA,2003,,Female,Hispanic,Moving Violation,Traffic Sign or Signal,...,2323,No,,,,,,,,
1,CHICAGO POLICE,13194,01/14/2007 06:20,HONDA,2000,,Male,Caucasian,Moving Violation,Speed,...,1933,No,,,,,,,,
2,CHICAGO POLICE,13194,02/20/2007 21:45,HONDA,1994,,Male,Caucasian,Moving Violation,Traffic Sign or Signal,...,2324,No,,,,,,,,
3,CHICAGO POLICE,13194,02/20/2007 21:45,FORD,1992,,Male,African American,Moving Violation,Traffic Sign or Signal,...,313,No,,,,,,,,
4,CHICAGO POLICE,13194,02/20/2007 21:45,DODGE,1996,,Female,Hispanic,Moving Violation,Traffic Sign or Signal,...,823,No,,,,,,,,
5,CHICAGO POLICE,13194,02/20/2007 21:45,MITSUBISHI,1992,,Male,African American,Moving Violation,SeatBelt,...,421,No,,,,,,,,
6,CHICAGO POLICE,13194,02/20/2007 21:45,ISUZU,1994,,Male,Hispanic,Moving Violation,Speed,...,813,No,,,,,,,,
7,CHICAGO POLICE,13194,02/20/2007 21:48,HONDA,1999,,Female,Caucasian,Moving Violation,Other,...,3100,No,,,,,,,,
8,CHICAGO POLICE,13194,01/01/2007 00:00,CHEVROLET,1995,,Male,Hispanic,License Plate/Registration,,...,1031,No,,,,,,,,
9,CHICAGO POLICE,13194,01/01/2007 00:03,DODGE,1991,,Male,African American,Moving Violation,SeatBelt,...,1213,No,,,,,,,,


similar format to rest of data

In [109]:
year = '2007'
tmpdfs = []

for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Chicago*/*2.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df_chi2 = pd.concat(tmpdfs)
df_chi2 = df_chi2.reset_index(drop=True)
df_chi2.rename(columns=lambda x: x.replace(' ',''),inplace=True)
df_chi2.rename(columns=lambda x: x.replace('YearofBirth','YearOfBirth'),inplace=True)
df_chi2.rename(columns=lambda x: x.replace('andTimeofStop','AndTimeOfStop'),inplace=True)
df_chi2 = process_2007(df_chi2)
df_chi2

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Chicago Police,13194,No,Male,white,No,No
1,Chicago Police,13194,No,Male,white,No,No
2,Chicago Police,13194,No,Male,hispanic,No,No
3,Chicago Police,13194,No,Male,white,No,No
4,Chicago Police,13194,No,Male,black,No,No
5,Chicago Police,13194,No,Male,hispanic,No,No
6,Chicago Police,13194,No,Male,hispanic,No,No
7,Chicago Police,13194,No,Male,hispanic,No,No
8,Chicago Police,13194,No,Female,black,No,No
9,Chicago Police,13194,No,Male,white,No,No


In [110]:
year = '2007'
tmpdfs = []

for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Chicago*/*3.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df_chi3 = pd.concat(tmpdfs)
df_chi3 = df_chi3.reset_index(drop=True)
df_chi3.rename(columns=lambda x: x.replace(' ',''),inplace=True)
df_chi3.rename(columns=lambda x: x.replace('YrBirth','DriverYearOfBirth'),inplace=True)
df_chi3.rename(columns=lambda x: x.replace('DateStopTimeStop','DateAndTimeOfStop'),inplace=True)
df_chi3 = process_2007(df_chi3)
df_chi3

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Chicago Police,13194,1982,Male,white,No,No
1,Chicago Police,13194,1968,Male,black,No,No
2,Chicago Police,13194,1974,Male,black,No,No
3,Chicago Police,13194,1983,Male,white,No,No
4,Chicago Police,13194,1972,Male,hispanic,No,No
5,Chicago Police,13194,1980,Male,black,Yes,No
6,Chicago Police,13194,1981,Male,hispanic,No,No
7,Chicago Police,13194,1977,Male,hispanic,No,No
8,Chicago Police,13194,1987,Male,white,No,No
9,Chicago Police,13194,1981,Male,black,No,No


Combine state and last 2/3 of chicago data

In [114]:
tmpdf = pd.concat([tmpdf,df_chi2,df_chi3])
tmpdf

In [None]:
tmpdf['year'] = year


In [115]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Adams County Sheriff,13054,1959,Male,white,No,No
1,Adams County Sheriff,13054,1983,Male,white,No,No
2,Adams County Sheriff,13054,1988,Male,black,No,No
3,Adams County Sheriff,13054,1977,Male,white,No,No
4,Adams County Sheriff,13054,1970,Female,black,No,No
5,Adams County Sheriff,13054,1958,Female,white,No,No
6,Adams County Sheriff,13054,1972,Male,white,No,No
7,Adams County Sheriff,13054,1979,Female,white,No,No
8,Adams County Sheriff,13054,1988,Male,white,No,No
9,Adams County Sheriff,13054,1981,Male,white,No,No


# 2008

In [119]:
year = '2008'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Statewide*/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df.rename(columns=lambda x: x.replace('YearofBirth','YearOfBirth'),inplace=True)
df = df.reset_index(drop=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateAndTimeOfStop,DurationOfStop,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,Race,ReasonForStop,...,WasConsentGranted,WasConsentSearchPerformed,WasConsentContrabandFound,ConsentDrugsFound,ConsentAlcoholFound,ConsentParaphernaliaFound,ConsentWeaponFound,ConsentStolenPropertyFound,ConsentOtherContrabandFound,ConsentDrugQuantity
0,Abingdon Police Department,13462,01/04/2008 19:00,8,Mercury,1996,1986,Female,Caucasian,Equipment,...,,,,,,,,,,
1,Abingdon Police Department,13462,01/06/2008 00:00,10,Jeep,2006,1945,Female,Caucasian,Moving Violation,...,,,,,,,,,,
2,Abingdon Police Department,13462,01/06/2008 16:30,18,Oldsmobile,1992,1990,Male,Caucasian,Moving Violation,...,,,,,,,,,,
3,Abingdon Police Department,13462,01/06/2008 18:49,20,Chevrolet,1994,1958,Female,Caucasian,Equipment,...,,,,,,,,,,
4,Abingdon Police Department,13462,01/07/2008 17:35,16,GMC,1987,1989,Female,Caucasian,Equipment,...,,,,,,,,,,
5,Abingdon Police Department,13462,01/07/2008 20:40,45,Jeep,1991,1964,Male,Caucasian,Moving Violation,...,,,,,,,,,,
6,Abingdon Police Department,13462,01/09/2008 15:30,4,Chrysler,2001,1991,Female,Caucasian,Equipment,...,,,,,,,,,,
7,Abingdon Police Department,13462,01/09/2008 15:34,10,Dodge,2005,1975,Male,Caucasian,License Plate/Registration,...,,,,,,,,,,
8,Abingdon Police Department,13462,01/09/2008 16:03,10,Land Rover,1997,1981,Female,Caucasian,License Plate/Registration,...,,,,,,,,,,
9,Abingdon Police Department,13462,01/09/2008 16:20,12,Ford,2004,1980,Female,Caucasian,Moving Violation,...,,,,,,,,,,


In [120]:
process_2008 = process_2004

In [121]:
tmpdf = process_2008(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Abingdon Police Department,13462,1986,Female,white,No,No
1,Abingdon Police Department,13462,1945,Female,white,No,No
2,Abingdon Police Department,13462,1990,Male,white,No,No
3,Abingdon Police Department,13462,1958,Female,white,No,No
4,Abingdon Police Department,13462,1989,Female,white,No,No
5,Abingdon Police Department,13462,1964,Male,white,Yes,No
6,Abingdon Police Department,13462,1991,Female,white,No,No
7,Abingdon Police Department,13462,1975,Male,white,No,No
8,Abingdon Police Department,13462,1981,Female,white,No,No
9,Abingdon Police Department,13462,1980,Female,white,No,No


In [None]:
tmpdf['year'] = year


In [None]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

# 2009

In [122]:
year = '2009'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*Statewide*/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df.rename(columns=lambda x: x.replace('YearofBirth','YearOfBirth'),inplace=True)
df = df.reset_index(drop=True)
df

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,AgencyName,AgencyCode,DateAndTimeOfStop,DurationOfStop,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,Race,ReasonForStop,...,WasConsentGranted,WasConsentSearchPerformed,WasConsentContrabandFound,ConsentDrugsFound,ConsentAlcoholFound,ConsentParaphernaliaFound,ConsentWeaponFound,ConsentStolenPropertyFound,ConsentOtherContrabandFound,ConsentDrugQuantity
0,Abingdon Police Department,13462,1/2/2009 3:58:00 AM,14,Mitsubishi,2007,1973,Male,Caucasian,Equipment,...,,,,,,,,,,
1,Abingdon Police Department,13462,1/2/2009 10:23:00 AM,15,Ford,1997,1964,Male,Caucasian,Equipment,...,,,,,,,,,,
2,Abingdon Police Department,13462,1/2/2009 7:47:00 PM,65,Chevrolet,1983,1980,Male,Caucasian,Equipment,...,,,,,,,,,,
3,Abingdon Police Department,13462,1/2/2009 8:51:00 PM,13,Chevrolet,2001,1969,Female,Caucasian,Moving Violation,...,,,,,,,,,,
4,Abingdon Police Department,13462,1/3/2009 1:00:00 AM,20,Dodge,2004,1986,Female,Caucasian,Moving Violation,...,Yes,Yes,No,,,,,,,
5,Abingdon Police Department,13462,1/3/2009 1:46:00 AM,20,Chevrolet,1997,1982,Female,Caucasian,Moving Violation,...,,,,,,,,,,
6,Abingdon Police Department,13462,1/3/2009 4:10:00 AM,12,Pontiac,1997,1952,Male,Caucasian,Moving Violation,...,,,,,,,,,,
7,Abingdon Police Department,13462,1/3/2009 8:49:00 PM,9,Plymouth,1993,1991,Male,Caucasian,Equipment,...,,,,,,,,,,
8,Abingdon Police Department,13462,1/3/2009 9:10:00 PM,10,Chevrolet,1996,1985,Female,African American,Equipment,...,,,,,,,,,,
9,Abingdon Police Department,13462,1/4/2009 12:42:00 AM,18,Oldsmobile,1990,1976,Female,Caucasian,Equipment,...,,,,,,,,,,


In [123]:
process_2009 = process_2004

In [124]:
tmpdf = process_2009(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Abingdon Police Department,13462,1973,Male,white,No,No
1,Abingdon Police Department,13462,1964,Male,white,No,No
2,Abingdon Police Department,13462,1980,Male,white,Yes,No
3,Abingdon Police Department,13462,1969,Female,white,No,No
4,Abingdon Police Department,13462,1986,Female,white,No,No
5,Abingdon Police Department,13462,1982,Female,white,Yes,No
6,Abingdon Police Department,13462,1952,Male,white,No,No
7,Abingdon Police Department,13462,1991,Male,white,No,No
8,Abingdon Police Department,13462,1985,Female,black,No,No
9,Abingdon Police Department,13462,1976,Female,white,No,No


In [None]:
tmpdf['year'] = year


In [None]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

# 2010

In [141]:
year = '2010'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df.rename(columns=lambda x: x.replace('YearofBirth','YearOfBirth'),inplace=True)
df = df.reset_index(drop=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateAndTimeOfStop,DurationOfStop,ZIPCode,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,Race,...,WasConsentGranted,WasConsentSearchPerformed,WasConsentContrabandFound,ConsentDrugsFound,ConsentAlcoholFound,ConsentParaphernaliaFound,ConsentWeaponFound,ConsentStolenPropertyFound,ConsentOtherContrabandFound,ConsentDrugQuantity
0,SOUTH SUBURBAN COLLEGE POLICE,12987,03/19/2010 17:14,17,60438,OLDSMOBILE,1997,1991,1,2,...,0,0,0,0,0,0,0,0,0,0
1,SOUTH SUBURBAN COLLEGE POLICE,12987,12/03/2010 09:42,12,60827,FORD,1998,1939,1,2,...,0,0,0,0,0,0,0,0,0,0
2,SOUTH SUBURBAN COLLEGE POLICE,12987,01/25/2010 15:40,5,60411,CHEVROLET,2005,1963,1,2,...,0,0,0,0,0,0,0,0,0,0
3,SOUTH SUBURBAN COLLEGE POLICE,12987,01/25/2010 15:50,87,60827,DODGE,1996,1988,2,2,...,0,0,0,0,0,0,0,0,0,0
4,SOUTH SUBURBAN COLLEGE POLICE,12987,01/27/2010 19:15,3,60651,HYUNDAI,2010,1990,1,2,...,0,0,0,0,0,0,0,0,0,0
5,SOUTH SUBURBAN COLLEGE POLICE,12987,02/03/2010 08:30,5,60620,CHEVROLET,1993,1989,1,2,...,0,0,0,0,0,0,0,0,0,0
6,SOUTH SUBURBAN COLLEGE POLICE,12987,03/18/2010 17:02,4,60478,CHEVROLET,1998,1987,2,2,...,0,0,0,0,0,0,0,0,0,0
7,SOUTH SUBURBAN COLLEGE POLICE,12987,03/24/2010 11:48,6,60473,CHEVROLET,2007,1990,1,2,...,0,0,0,0,0,0,0,0,0,0
8,SOUTH SUBURBAN COLLEGE POLICE,12987,03/24/2010 14:08,7,60633,PONTIAC,2004,1988,1,2,...,0,0,0,0,0,0,0,0,0,0
9,SOUTH SUBURBAN COLLEGE POLICE,12987,03/31/2010 08:30,10,60473,OLDSMOBILE,2002,1990,2,2,...,0,0,0,0,0,0,0,0,0,0


In [142]:

def process_2010(df):
    #change race reporting format
    tmpdf = pd.DataFrame(df)
    tmprace = tmpdf['Race']
    tmprace = tmprace.map({1:'white',
                           2:'black',
                           3:'native_american',
                           4:'hispanic',
                           5:'asian'})
    tmpdf['Race'] = tmprace
    #map gender
    tmpsex = tmpdf['DriverSex']
    tmpsex = tmpsex.map({1:'Male',2:'Female'})
    tmpdf['DriverSex'] = tmpsex
    #map search conducted
    tmpsearch = tmpdf['SearchConducted']
    tmpsearch = tmpsearch.map({1:'Yes',2:'No'})
    tmpdf['SearchConducted'] = tmpsearch
    #map contraband found 
    tmpcontra = tmpdf['ContrabandFound']
    tmpcontra = tmpcontra.map({0:'No',1:'Yes',2:'No'})
    tmpdf['ContrabandFound'] = tmpcontra
    #parse datese
    #tmpdf['DateAndTimeOfStop'] = pd.to_datetime(tmpdf['DateAndTimeOfStop'])
    #tmpdf['year'] = tmpdf['DateAndTimeOfStop'].map(lambda x: x.year)
    #tmpdf['month'] = tmpdf['DateAndTimeOfStop'].map(lambda x: x.month)
    #fillna
    #pluck out columns of interest
    tmpdf =  tmpdf[['AgencyName',
                       'AgencyCode',
                       'DriverYearOfBirth',
                       'DriverSex',
                       'Race',
                       'SearchConducted',
                       'ContrabandFound']]#, 'year', #'month']]
    #format agency title
    tmpdf['AgencyName'] = tmpdf['AgencyName'].map(str.title)
    #format column names
    tmpdf.rename(columns=str.lower,inplace=True)
    return tmpdf


In [143]:
tmpdf = process_2010(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,South Suburban College Police,12987,1991,Male,black,No,No
1,South Suburban College Police,12987,1939,Male,black,No,No
2,South Suburban College Police,12987,1963,Male,black,No,No
3,South Suburban College Police,12987,1988,Female,black,No,No
4,South Suburban College Police,12987,1990,Male,black,No,No
5,South Suburban College Police,12987,1989,Male,black,No,No
6,South Suburban College Police,12987,1987,Female,black,No,No
7,South Suburban College Police,12987,1990,Male,black,No,No
8,South Suburban College Police,12987,1988,Male,black,No,No
9,South Suburban College Police,12987,1990,Female,black,No,No


In [None]:
tmpdf['year'] = year


In [None]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

# 2011

In [150]:
year = '2011'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df.rename(columns=lambda x: x.replace('YearofBirth','YearOfBirth'),inplace=True)
df.rename(columns=lambda x: x.replace(' ',''),inplace=True)
#splits data and time of stop
df = df.reset_index(drop=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateOfStop,TimeOfStop,DurationOfStop,ZIPCode,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,...,WasConsentGranted,WasConsentSearchPerformed,WasConsentContrabandFound,ConsentDrugsFound,ConsentAlcoholFound,ConsentParaphernaliaFound,ConsentWeaponFound,ConsentStolenPropertyFound,ConsentOtherContrabandFound,ConsentDrugQuantity
0,Abingdon Police Department,13462,01/01/2011,00:36,20,61410,FORD,1999,1987,1,...,0,0,0,0,0,0,0,0,0,0
1,Abingdon Police Department,13462,01/02/2011,22:00,13,61488,PONTIAC,2009,1992,1,...,0,0,0,0,0,0,0,0,0,0
2,Abingdon Police Department,13462,01/02/2011,11:10,6,61422,PLYMOUTH,1998,1950,1,...,0,0,0,0,0,0,0,0,0,0
3,Abingdon Police Department,13462,01/01/2011,19:01,14,61410,NISSAN,2006,1979,2,...,0,0,0,0,0,0,0,0,0,0
4,Abingdon Police Department,13462,01/01/2011,19:30,9,61401,BUICK,2003,1951,2,...,0,0,0,0,0,0,0,0,0,0
5,Abingdon Police Department,13462,01/02/2011,00:43,10,61410,FORD,2006,1988,1,...,0,0,0,0,0,0,0,0,0,0
6,Abingdon Police Department,13462,01/01/2011,22:27,10,61401,JEEP,1991,1994,1,...,0,0,0,0,0,0,0,0,0,0
7,Abingdon Police Department,13462,01/01/2011,19:31,10,61474,CHEVROLET,1997,1984,1,...,0,0,0,0,0,0,0,0,0,0
8,Abingdon Police Department,13462,01/04/2011,22:10,10,61401,MAZDA,2002,1951,2,...,0,0,0,0,0,0,0,0,0,0
9,Abingdon Police Department,13462,01/04/2011,21:46,20,61410,CHRYSLER,2004,1968,2,...,0,0,0,0,0,0,0,0,0,0


In [151]:
process_2011 = process_2010
tmpdf = process_2011(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Abingdon Police Department,13462,1987,Male,white,Yes,No
1,Abingdon Police Department,13462,1992,Male,white,No,No
2,Abingdon Police Department,13462,1950,Male,white,No,No
3,Abingdon Police Department,13462,1979,Female,white,No,No
4,Abingdon Police Department,13462,1951,Female,white,No,No
5,Abingdon Police Department,13462,1988,Male,white,No,No
6,Abingdon Police Department,13462,1994,Male,white,No,No
7,Abingdon Police Department,13462,1984,Male,white,No,No
8,Abingdon Police Department,13462,1951,Female,white,No,No
9,Abingdon Police Department,13462,1968,Female,white,No,No


In [None]:
tmpdf['year'] = year


In [None]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

# 2012

In [188]:
year = '2012'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df.rename(columns=lambda x: x.replace('DriversYearofBirth','DriverYearOfBirth'),inplace=True)
df.rename(columns=lambda x: x.replace('DriverRace','Race'),inplace=True)

df.rename(columns=lambda x: x.replace(' ',''),inplace=True)
#splits data and time of stop
df = df.reset_index(drop=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateOfStop,TimeOfStop,DurationOfStop,ZIP,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,...,PoliceDogAlertIfSniffed,PoliceDogVehicleSearched,PoliceDogContrabandFound,PoliceDogDrugsFound,PoliceDogDrugParaphernaliaFound,PoliceDogAlcoholFound,PoliceDogWeaponFound,PoliceDogStolenPropertyFound,PoliceDogOtherContrabandFound,PoliceDogDrugAmount
0,CHICAGO POLICE,13194,09/15/2012,10:20,30,60641,CHRYSLER,2004,1957,1,...,0,0,0,0,0,0,0,0,0,0
1,CHICAGO POLICE,13194,09/15/2012,10:20,14,60189,LEXUS,1999,1991,2,...,0,0,0,0,0,0,0,0,0,0
2,CHICAGO POLICE,13194,09/15/2012,10:20,10,60638,WHITE GMC (BEGIN. MODEL YEAR 1988),2010,1971,2,...,0,0,0,0,0,0,0,0,0,0
3,CHICAGO POLICE,13194,09/15/2012,10:20,10,60638,CHEVROLET,2008,1961,1,...,0,0,0,0,0,0,0,0,0,0
4,CHICAGO POLICE,13194,09/15/2012,10:21,1,,DODGE,2007,1950,1,...,0,0,0,0,0,0,0,0,0,0
5,CHICAGO POLICE,13194,09/15/2012,10:30,10,60632,CHEVROLET,2000,1981,1,...,0,0,0,0,0,0,0,0,0,0
6,CHICAGO POLICE,13194,09/15/2012,10:33,12,60625,VOLKSWAGEN,2012,1981,1,...,0,0,0,0,0,0,0,0,0,0
7,CHICAGO POLICE,13194,09/15/2012,10:35,10,60639,JEEP,1996,1969,1,...,0,0,0,0,0,0,0,0,0,0
8,CHICAGO POLICE,13194,09/15/2012,10:35,10,60638,VOLVO,2006,1972,2,...,0,0,0,0,0,0,0,0,0,0
9,CHICAGO POLICE,13194,09/15/2012,10:37,13,60623,WHITE GMC (BEGIN. MODEL YEAR 1988),2000,1940,1,...,0,0,0,0,0,0,0,0,0,0


In [187]:

def process_2012(df):
    #change race reporting format
    tmpdf = pd.DataFrame(df)
    tmprace = tmpdf['Race']
    tmprace = tmprace.map({1:'white',
                           2:'black',
                           3:'native_american',
                           4:'hispanic',
                           5:'asian'})
    tmpdf['Race'] = tmprace
    tmpdf['Race'] = tmpdf['Race'].fillna('other')
    #map gender
    tmpsex = tmpdf['DriverSex']
    tmpsex = tmpsex.map({1:'Male',2:'Female'})
    tmpdf['DriverSex'] = tmpsex
    #compute search conducted from OR of vehicle, driver and passenger
    tmpdf['SearchConducted'] = ((tmpdf['VehicleSearchConducted']==1) |\
                           (tmpdf['DriverSearchConducted']==1) |\
                           (tmpdf['PassengerSearchConducted']==1)).map(lambda x: "Yes" if x else "No")

    #compute contraband found  from OR of vehicle, driver and passenger
    tmpdf['ContrabandFound'] = ((tmpdf['VehicleContrabandFound']==1) |\
                           (tmpdf['DriverPassengerContrabandFound']==1)).map(lambda x: "Yes" if x else "No")
    #parse datese
    #tmpdf['DateAndTimeOfStop'] = pd.to_datetime(tmpdf['DateAndTimeOfStop'])
    #tmpdf['year'] = tmpdf['DateAndTimeOfStop'].map(lambda x: x.year)
    #tmpdf['month'] = tmpdf['DateAndTimeOfStop'].map(lambda x: x.month)
    #fillna
    #pluck out columns of interest
    tmpdf =  tmpdf[['AgencyName',
                       'AgencyCode',
                       'DriverYearOfBirth',
                       'DriverSex',
                       'Race',
                       'SearchConducted',
                       'ContrabandFound']]#, 'year', #'month']]
    #format agency title
    tmpdf['AgencyName'] = tmpdf['AgencyName'].map(str.title)
    #format column names
    tmpdf.rename(columns=str.lower,inplace=True)
    return tmpdf


In [189]:
tmpdf = process_2012(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Chicago Police,13194,1957,Male,hispanic,No,No
1,Chicago Police,13194,1991,Female,white,No,No
2,Chicago Police,13194,1971,Female,hispanic,No,No
3,Chicago Police,13194,1961,Male,white,No,No
4,Chicago Police,13194,1950,Male,black,No,No
5,Chicago Police,13194,1981,Male,hispanic,No,No
6,Chicago Police,13194,1981,Male,white,No,No
7,Chicago Police,13194,1969,Male,hispanic,No,No
8,Chicago Police,13194,1972,Female,white,No,No
9,Chicago Police,13194,1940,Male,hispanic,No,No


In [None]:
tmpdf['year'] = year


In [192]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

No     2033837
Yes      98169
Name: searchconducted, dtype: int64

# 2013

In [193]:
year = '2013'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df.rename(columns=lambda x: x.replace('DriversYearofBirth','DriverYearOfBirth'),inplace=True)
df.rename(columns=lambda x: x.replace('DriverRace','Race'),inplace=True)

df.rename(columns=lambda x: x.replace(' ',''),inplace=True)
#splits data and time of stop
df = df.reset_index(drop=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateOfStop,TimeOfStop,DurationOfStop,ZIP,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,...,PoliceDogAlertIfSniffed,PoliceDogVehicleSearched,PoliceDogContrabandFound,PoliceDogDrugsFound,PoliceDogDrugParaphernaliaFound,PoliceDogAlcoholFound,PoliceDogWeaponFound,PoliceDogStolenPropertyFound,PoliceDogOtherContrabandFound,PoliceDogDrugAmount
0,ABINGDON POLICE,13462,2013-12-07 00:00:00,17:47,10,61410,Chevrolet,1995,1962,1,...,0,0,0,0,0,0,0,0,0,0
1,ABINGDON POLICE,13462,2013-12-16 00:00:00,18:06,10,61401,Oldsmobile,2002,1955,2,...,0,0,0,0,0,0,0,0,0,0
2,ABINGDON POLICE,13462,2013-12-17 00:00:00,20:16,10,61469,Chevrolet,1996,1994,1,...,0,0,0,0,0,0,0,0,0,0
3,ABINGDON POLICE,13462,2013-12-18 00:00:00,00:45,20,61413,Buick,1991,1994,1,...,0,0,0,0,0,0,0,0,0,0
4,ABINGDON POLICE,13462,2013-12-18 00:00:00,22:00,13,61410,Toyota,1988,1995,1,...,0,0,0,0,0,0,0,0,0,0
5,ABINGDON POLICE,13462,2013-12-18 00:00:00,22:27,45,61448,Buick,1999,1989,1,...,0,0,0,0,0,0,0,0,0,0
6,ABINGDON POLICE,13462,2013-12-17 00:00:00,02:20,10,61415,Pontiac,2004,1976,1,...,0,0,0,0,0,0,0,0,0,0
7,ABINGDON POLICE,13462,2013-12-21 00:00:00,01:17,18,55112,Ford,1998,1977,2,...,0,0,0,0,0,0,0,0,0,0
8,ABINGDON POLICE,13462,2013-12-20 00:00:00,01:38,16,61462,Chevrolet,2002,1990,1,...,0,0,0,0,0,0,0,0,0,0
9,ABINGDON POLICE,13462,2013-12-18 00:00:00,01:35,15,61401,Toyota,2004,1992,1,...,0,0,0,0,0,0,0,0,0,0


In [194]:
process_2013 = process_2012
tmpdf = process_2013(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Abingdon Police,13462,1962,Male,white,No,No
1,Abingdon Police,13462,1955,Female,white,No,No
2,Abingdon Police,13462,1994,Male,white,No,No
3,Abingdon Police,13462,1994,Male,white,No,No
4,Abingdon Police,13462,1995,Male,white,No,No
5,Abingdon Police,13462,1989,Male,white,Yes,Yes
6,Abingdon Police,13462,1976,Male,white,No,No
7,Abingdon Police,13462,1977,Female,other,No,No
8,Abingdon Police,13462,1990,Male,white,No,No
9,Abingdon Police,13462,1992,Male,white,No,No


In [None]:
tmpdf['year'] = year


In [195]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')

white              322667
black              128929
hispanic            91430
asian               18573
other                1595
native_american      1563
Name: race, dtype: int64

# 2014

In [199]:
year = '2014'
tmpdfs = []
for filename in glob('/Users/along528/Dropbox/insight/project/datasets/traffic_il/'+year+'*Data/*.txt'):
    tmpdfs.append(pd.read_csv(filename,delimiter='~',parse_dates='DateAndTimeOfStop'))
df = pd.concat(tmpdfs)
df.rename(columns=lambda x: x.replace('DriversYearofBirth','DriverYearOfBirth'),inplace=True)
df.rename(columns=lambda x: x.replace('DriverRace','Race'),inplace=True)
df.rename(columns=lambda x: x.replace(' ',''),inplace=True)
#splits data and time of stop
df = df.reset_index(drop=True)
df

Unnamed: 0,AgencyName,AgencyCode,DateOfStop,TimeOfStop,DurationOfStop,ZIP,VehicleMake,VehicleYear,DriverYearOfBirth,DriverSex,...,PoliceDogAlertIfSniffed,PoliceDogVehicleSearched,PoliceDogContrabandFound,PoliceDogDrugsFound,PoliceDogDrugParaphernaliaFound,PoliceDogAlcoholFound,PoliceDogWeaponFound,PoliceDogStolenPropertyFound,PoliceDogOtherContrabandFound,PoliceDogDrugAmount
0,ABINGDON POLICE,13462,06/18/2014,01:30,16,61465,Chevrolet,2005,1970,1,...,0,0,0,0,0,0,0,0,0,0
1,ABINGDON POLICE,13462,06/20/2014,01:50,15,61410,Chevrolet,1998,1982,1,...,0,0,0,0,0,0,0,0,0,0
2,ABINGDON POLICE,13462,06/21/2014,02:58,16,61410,Dodge,2005,1978,1,...,0,0,0,0,0,0,0,0,0,0
3,ABINGDON POLICE,13462,06/22/2014,00:42,13,61436,Chevrolet,2002,1973,1,...,0,0,0,0,0,0,0,0,0,0
4,ABINGDON POLICE,13462,06/28/2014,00:00,13,61410,Pontiac,1994,1990,2,...,0,0,0,0,0,0,0,0,0,0
5,ABINGDON POLICE,13462,01/24/2014,01:58,13,61410,Ford,1998,1990,2,...,0,0,0,0,0,0,0,0,0,0
6,ABINGDON POLICE,13462,01/11/2014,02:26,13,61410,Buick,2006,1987,2,...,0,0,0,0,0,0,0,0,0,0
7,ABINGDON POLICE,13462,01/11/2014,21:51,13,61520,Nissan,2009,1968,1,...,0,0,0,0,0,0,0,0,0,0
8,ABINGDON POLICE,13462,01/11/2014,22:30,13,61410,Chevrolet,1997,1970,2,...,0,0,0,0,0,0,0,0,0,0
9,ABINGDON POLICE,13462,01/11/2014,22:47,13,61410,Dodge,2012,1980,1,...,0,0,0,0,0,0,0,0,0,0


In [200]:
process_2014 = process_2012
tmpdf = process_2014(df)
tmpdf

Unnamed: 0,agencyname,agencycode,driveryearofbirth,driversex,race,searchconducted,contrabandfound
0,Abingdon Police,13462,1970,Male,white,No,No
1,Abingdon Police,13462,1982,Male,white,No,No
2,Abingdon Police,13462,1978,Male,black,No,No
3,Abingdon Police,13462,1973,Male,white,No,No
4,Abingdon Police,13462,1990,Female,white,No,No
5,Abingdon Police,13462,1990,Female,white,No,No
6,Abingdon Police,13462,1987,Female,white,No,No
7,Abingdon Police,13462,1968,Male,white,No,No
8,Abingdon Police,13462,1970,Female,white,No,No
9,Abingdon Police,13462,1980,Male,white,No,No


In [None]:
tmpdf['year'] = year


In [None]:
tmpdf.to_sql('traffic_stops_'+year, engine, if_exists='replace')