Collecting & Cleaning Data:

In [28]:
import pandas as pd
import googlemaps
import re
import os

In [57]:
API_KEY = (hidden)

In [58]:
googmap = googlemaps.Client(key=API_KEY)

---

Package Theft Addresses:

In [368]:
# File of reported package thefts from Irvine PD 
addresses = pd.read_csv('PRA Packages Thefts.csv')

In [369]:
addresses.head(1)

Unnamed: 0,DR,date_rept,monthstamp,yearstamp,Block
0,1607426,5/12/2016,5,2016,100 Blk ESPLANADE


In [370]:
addresses = addresses.drop(['monthstamp'], axis=1)

In [371]:
addresses.date_rept = pd.to_datetime(addresses.date_rept).dt.date

In [372]:
addresses = addresses.sort_values(['date_rept', 'DR']).reset_index(drop=True)

In [373]:
addresses.head(1)

Unnamed: 0,DR,date_rept,yearstamp,Block
0,1100325,2011-01-07,2011,100 Blk ROCKVIEW


In [374]:
addresses.tail(1)

Unnamed: 0,DR,date_rept,yearstamp,Block
968,1980370,2019-05-26,2019,200 Blk CULTIVATE


In [375]:
addresses.yearstamp.value_counts()

2018    176
2017    168
2016    151
2015    148
2019     93
2014     68
2013     61
2011     60
2012     44
Name: yearstamp, dtype: int64

In [376]:
addresses.head()

Unnamed: 0,DR,date_rept,yearstamp,Block
0,1100325,2011-01-07,2011,100 Blk ROCKVIEW
1,1102024,2011-02-10,2011,900 Blk SOMERVILLE
2,1105426,2011-04-19,2011,400 Blk ORANGE BLOSSOM
3,1107360,2011-05-26,2011,0 Blk WEATHERSFIELD
4,1109744,2011-06-26,2011,0 Blk EL CAJON


In [377]:
addresses['Street'] = addresses.Block.str.extract('([^\d+].+$)')
remove_block = [name[4:] for name in addresses.Street]
addresses['Street'] = remove_block

In [378]:
addresses['Block'] = addresses.Block.str.extract('(\d+)')

In [401]:
addresses.head()

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
0,1100325,2011-01-07,2011,100,ROCKVIEW,"100 ROCKVIEW, Irvine, CA",33.652512,-117.81985
1,1102024,2011-02-10,2011,900,SOMERVILLE,"900 SOMERVILLE, Irvine, CA",33.728637,-117.760803
2,1105426,2011-04-19,2011,400,ORANGE BLOSSOM,"400 ORANGE BLOSSOM, Irvine, CA",33.672377,-117.774126
3,1107360,2011-05-26,2011,0,WEATHERSFIELD,"0 WEATHERSFIELD, Irvine, CA",33.729731,-117.778779
4,1109744,2011-06-26,2011,0,EL CAJON,"0 EL CAJON, Irvine, CA",33.734975,-117.769954


In [380]:
addresses['Address'] = addresses.Block + addresses.Street + ', Irvine, CA'

In [381]:
addresses.head()

Unnamed: 0,DR,date_rept,yearstamp,Block,Street,Address
0,1100325,2011-01-07,2011,100,ROCKVIEW,"100 ROCKVIEW, Irvine, CA"
1,1102024,2011-02-10,2011,900,SOMERVILLE,"900 SOMERVILLE, Irvine, CA"
2,1105426,2011-04-19,2011,400,ORANGE BLOSSOM,"400 ORANGE BLOSSOM, Irvine, CA"
3,1107360,2011-05-26,2011,0,WEATHERSFIELD,"0 WEATHERSFIELD, Irvine, CA"
4,1109744,2011-06-26,2011,0,EL CAJON,"0 EL CAJON, Irvine, CA"


In [262]:
# This took about 6 minutes to complete
lats = []
lons = []

for address in addresses.Address:
    geocode = googmap.geocode(address)
    try:
        lats.append(float(geocode[0]['geometry']['location']['lat']))
        lons.append(float(geocode[0]['geometry']['location']['lng']))
    except:
        lats.append(None)
        lons.append(None)

In [382]:
addresses['Lat'] = lats
addresses['Lon'] = lons

In [383]:
addresses = addresses.rename({'date_rept':'Date', 'yearstamp':'Year'}, axis=1)

In [384]:
addresses.head()

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
0,1100325,2011-01-07,2011,100,ROCKVIEW,"100 ROCKVIEW, Irvine, CA",33.652512,-117.81985
1,1102024,2011-02-10,2011,900,SOMERVILLE,"900 SOMERVILLE, Irvine, CA",33.728637,-117.760803
2,1105426,2011-04-19,2011,400,ORANGE BLOSSOM,"400 ORANGE BLOSSOM, Irvine, CA",33.672377,-117.774126
3,1107360,2011-05-26,2011,0,WEATHERSFIELD,"0 WEATHERSFIELD, Irvine, CA",33.729731,-117.778779
4,1109744,2011-06-26,2011,0,EL CAJON,"0 EL CAJON, Irvine, CA",33.734975,-117.769954


In [385]:
addresses.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 969 entries, 0 to 968
Data columns (total 8 columns):
DR         969 non-null int64
Date       969 non-null object
Year       969 non-null int64
Block      967 non-null object
Street     969 non-null object
Address    967 non-null object
Lat        967 non-null float64
Lon        967 non-null float64
dtypes: float64(2), int64(2), object(4)
memory usage: 60.6+ KB


In [386]:
addresses[addresses.Address.isna()]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
117,1303395,2013-03-18,2013,,UE!,,33.791638,-84.389488
387,1600542,2016-01-11,2016,,UE!,,33.791638,-84.389488


In [387]:
addresses[addresses.Lat.isna()]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
502,1616423,2016-10-26,2016,0,LEMON GROVE,"0 LEMON GROVE, Irvine, CA",,
879,1900223,2019-01-05,2019,0,LEMON GROVE,"0 LEMON GROVE, Irvine, CA",,


In [388]:
# Removing 2 records w/ missing addresses
addresses = addresses.dropna(subset=['Address'])

In [389]:
len(addresses)

967

In [390]:
# Editing lat/lon of address w/ missing lat/lon
addresses[addresses.Address == '0 LEMON GROVE, Irvine, CA']

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
502,1616423,2016-10-26,2016,0,LEMON GROVE,"0 LEMON GROVE, Irvine, CA",,
879,1900223,2019-01-05,2019,0,LEMON GROVE,"0 LEMON GROVE, Irvine, CA",,


In [391]:
# Changing address from 0 to 1 Lemon Grove (since no address corresponds to '0 Lemon Grove' in Google Maps)
addresses.loc[addresses.Address == '0 LEMON GROVE, Irvine, CA', 'Address'] = '1 LEMON GROVE, Irvine, CA'
addresses.loc[addresses.Address == '1 LEMON GROVE, Irvine, CA', 'Block'] = 1

In [392]:
addresses[addresses.Lat.isna()]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
502,1616423,2016-10-26,2016,0,LEMON GROVE,"1 LEMON GROVE, Irvine, CA",,
879,1900223,2019-01-05,2019,0,LEMON GROVE,"1 LEMON GROVE, Irvine, CA",,


In [393]:
# Maually inserting Lat & Lon values for 1 Lemon Grove
geocode = googmap.geocode('1 LEMON GROVE, Irvine, CA')
addresses.loc[addresses.Address == '1 LEMON GROVE, Irvine, CA', 'Lat'] = float(geocode[0]['geometry']['location']['lat'])
addresses.loc[addresses.Address == '1 LEMON GROVE, Irvine, CA', 'Lon'] = float(geocode[0]['geometry']['location']['lng'])

In [396]:
addresses[addresses.Address == '1 LEMON GROVE, Irvine, CA']

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
502,1616423,2016-10-26,2016,1,LEMON GROVE,"1 LEMON GROVE, Irvine, CA",33.672522,-117.771004
879,1900223,2019-01-05,2019,1,LEMON GROVE,"1 LEMON GROVE, Irvine, CA",33.672522,-117.771004


In [413]:
addresses[addresses.Address.str.contains('EL CAMINO REAL')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
191,1409643,2014-08-01,2014,4600,EL CAMINO REAL,"4600 EL CAMINO REAL, Irvine, CA",35.496727,-120.681378
495,1614520,2016-09-22,2016,4300,EL CAMINO REAL,"4300 EL CAMINO REAL, Irvine, CA",37.405366,-122.119734


In [414]:
# Editing address for El Camino Real entries, based on google maps results (adding 'North')
addresses.loc[addresses.Address == '4600 EL CAMINO REAL, Irvine, CA', 'Address'] = '4600 EL CAMINO REAL NORTH, Irvine, CA'
addresses.loc[addresses.Address == '4300 EL CAMINO REAL, Irvine, CA', 'Address'] = '4300 EL CAMINO REAL NORTH, Irvine, CA'

In [415]:
addresses[addresses.Address.str.contains('EL CAMINO REAL')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
191,1409643,2014-08-01,2014,4600,EL CAMINO REAL,"4600 EL CAMINO REAL NORTH, Irvine, CA",35.496727,-120.681378
495,1614520,2016-09-22,2016,4300,EL CAMINO REAL,"4300 EL CAMINO REAL NORTH, Irvine, CA",37.405366,-122.119734


In [416]:
# Maually editing Lat & Lon values for 4600 EL CAMINO REAL NORTH
geocode = googmap.geocode('4600 EL CAMINO REAL NORTH, Irvine, CA')
addresses.loc[addresses.Address == '4600 EL CAMINO REAL NORTH, Irvine, CA', 'Lat'] = float(geocode[0]['geometry']['location']['lat'])
addresses.loc[addresses.Address == '4600 EL CAMINO REAL NORTH, Irvine, CA', 'Lon'] = float(geocode[0]['geometry']['location']['lng'])

In [417]:
# Maually editing Lat & Lon values for 4300 EL CAMINO REAL NORTH
geocode = googmap.geocode('4300 EL CAMINO REAL NORTH, Irvine, CA')
addresses.loc[addresses.Address == '4300 EL CAMINO REAL NORTH, Irvine, CA', 'Lat'] = float(geocode[0]['geometry']['location']['lat'])
addresses.loc[addresses.Address == '4300 EL CAMINO REAL NORTH, Irvine, CA', 'Lon'] = float(geocode[0]['geometry']['location']['lng'])

In [418]:
addresses[addresses.Address.str.contains('EL CAMINO REAL')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
191,1409643,2014-08-01,2014,4600,EL CAMINO REAL,"4600 EL CAMINO REAL NORTH, Irvine, CA",33.718494,-117.783739
495,1614520,2016-09-22,2016,4300,EL CAMINO REAL,"4300 EL CAMINO REAL NORTH, Irvine, CA",33.718494,-117.783739


In [424]:
addresses[addresses.Address.str.contains('ARBORETUM')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
40,1115622,2011-11-10,2011,0,ARBORETUM,"0 ARBORETUM, Irvine, CA",33.746032,-117.808931
64,1202079,2012-02-12,2012,0,ARBORETUM,"0 ARBORETUM, Irvine, CA",33.746032,-117.808931
65,1202407,2012-02-12,2012,0,ARBORETUM,"0 ARBORETUM, Irvine, CA",33.746032,-117.808931
791,1810122,2018-07-24,2018,0,ARBORETUM,"0 ARBORETUM, Irvine, CA",33.746032,-117.808931


In [425]:
addresses[addresses.Address.str.contains('BUCKEYE')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
110,1301884,2013-02-14,2013,0,BUCKEYE,"0 BUCKEYE, Irvine, CA",33.638995,-117.872792


In [434]:
addresses[addresses.Address.str.contains('KNOLLWOOD')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
520,1618990,2016-12-15,2016,0,KNOLLWOOD,"0 KNOLLWOOD, Irvine, CA",33.746032,-117.808931


In [426]:
# Editing address for ARBORETUM, BUCKEYE, & KNOLLWOOD (replacing 0 w/ 1, since 0 isn't registered in google maps)
# The original addresses were linked to similarly-named addresses outside of Irvine
addresses.loc[addresses.Address == '0 ARBORETUM, Irvine, CA', 'Address'] = '1 ARBORETUM, Irvine, CA'
addresses.loc[addresses.Address == '0 BUCKEYE, Irvine, CA', 'Address'] = '1 BUCKEYE, Irvine, CA'
addresses.loc[addresses.Address == '0 KNOLLWOOD, Irvine, CA', 'Address'] = '1 KNOLLWOOD, Irvine, CA'

In [427]:
# Maually editing Lat & Lon values for 4300 EL CAMINO REAL NORTH
geocode = googmap.geocode('1 ARBORETUM, Irvine, CA')
addresses.loc[addresses.Address == '1 ARBORETUM, Irvine, CA', 'Lat'] = float(geocode[0]['geometry']['location']['lat'])
addresses.loc[addresses.Address == '1 ARBORETUM, Irvine, CA', 'Lon'] = float(geocode[0]['geometry']['location']['lng'])

In [428]:
# Maually editing Lat & Lon values for 4300 EL CAMINO REAL NORTH
geocode = googmap.geocode('1 BUCKEYE, Irvine, CA')
addresses.loc[addresses.Address == '1 BUCKEYE, Irvine, CA', 'Lat'] = float(geocode[0]['geometry']['location']['lat'])
addresses.loc[addresses.Address == '1 BUCKEYE, Irvine, CA', 'Lon'] = float(geocode[0]['geometry']['location']['lng'])

In [436]:
# Maually editing Lat & Lon values for 4300 EL CAMINO REAL NORTH
geocode = googmap.geocode('1 KNOLLWOOD, Irvine, CA')
addresses.loc[addresses.Address == '1 KNOLLWOOD, Irvine, CA', 'Lat'] = float(geocode[0]['geometry']['location']['lat'])
addresses.loc[addresses.Address == '1 KNOLLWOOD, Irvine, CA', 'Lon'] = float(geocode[0]['geometry']['location']['lng'])

In [429]:
addresses[addresses.Address.str.contains('BUCKEYE')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
110,1301884,2013-02-14,2013,0,BUCKEYE,"1 BUCKEYE, Irvine, CA",33.70313,-117.783765


In [430]:
addresses[addresses.Address.str.contains('ARBORETUM')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
40,1115622,2011-11-10,2011,0,ARBORETUM,"1 ARBORETUM, Irvine, CA",33.698452,-117.747519
64,1202079,2012-02-12,2012,0,ARBORETUM,"1 ARBORETUM, Irvine, CA",33.698452,-117.747519
65,1202407,2012-02-12,2012,0,ARBORETUM,"1 ARBORETUM, Irvine, CA",33.698452,-117.747519
791,1810122,2018-07-24,2018,0,ARBORETUM,"1 ARBORETUM, Irvine, CA",33.698452,-117.747519


In [437]:
addresses[addresses.Address.str.contains('KNOLLWOOD')]

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
520,1618990,2016-12-15,2016,0,KNOLLWOOD,"1 KNOLLWOOD, Irvine, CA",33.73225,-117.776461


In [431]:
addresses.head()

Unnamed: 0,DR,Date,Year,Block,Street,Address,Lat,Lon
0,1100325,2011-01-07,2011,100,ROCKVIEW,"100 ROCKVIEW, Irvine, CA",33.652512,-117.81985
1,1102024,2011-02-10,2011,900,SOMERVILLE,"900 SOMERVILLE, Irvine, CA",33.728637,-117.760803
2,1105426,2011-04-19,2011,400,ORANGE BLOSSOM,"400 ORANGE BLOSSOM, Irvine, CA",33.672377,-117.774126
3,1107360,2011-05-26,2011,0,WEATHERSFIELD,"0 WEATHERSFIELD, Irvine, CA",33.729731,-117.778779
4,1109744,2011-06-26,2011,0,EL CAJON,"0 EL CAJON, Irvine, CA",33.734975,-117.769954


In [438]:
addresses.to_csv('irvine_theft_lat_lon', index=False, encoding='utf-8', )