In [1]:
# Import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gmaps
import pyproj

In [2]:
# Load Sampled Data
df=pd.read_csv("Simplified_Citation_After_2015July_to_2019.csv",low_memory=False)
df.head(5)

Unnamed: 0.1,Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Marked Time,RP State Plate,VIN,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
0,28,4361326712,2019/09/17 12:00:00 AM,1205.0,,,CA,,TOYT,PA,GY,309 WINDWARD AVE,163,51.0,80.69BS,NO PARK/STREET CLEAN,73.0,6419487.0,1818861.0
1,112,4361286823,2019/09/17 12:00:00 AM,853.0,,,CA,,TOYT,PA,WT,934 84TH ST W,553,55.0,80.69BS,NO PARK/STREET CLEAN,73.0,6473690.0,1808839.0
2,224,4361159515,2019/09/17 12:00:00 AM,833.0,,,CA,,NISS,PA,WT,2601 MONMOUTH AVE,536,55.0,80.69BS,NO PARK/STREET CLEAN,73.0,6475329.0,1834194.0
3,473,4361237226,2019/09/17 12:00:00 AM,1015.0,,,CA,,HOND,PA,RD,615 HAMPTON DR,133,51.0,80.69BS,NO PARK/STREET CLEAN,73.0,6417752.0,1820642.0
4,592,4360129196,2019/09/17 12:00:00 AM,1007.0,,,CA,,VOLV,PA,SL,1346 MCCADDEN PL N,487,54.0,80.69BS,NO PARK/STREET CLEAN,73.0,6459495.0,1857205.0


In [3]:
# Identify bad Longitude/Latitude values
df['Longitude'].value_counts()

9.999900e+04    11416
1.819688e+06       98
1.859071e+06       87
1.882602e+06       64
1.849114e+06       49
                ...  
1.843271e+06        1
1.845315e+06        1
1.909664e+06        1
1.835485e+06        1
1.857892e+06        1
Name: Longitude, Length: 50929, dtype: int64

In [4]:
# Fraction of records with bad Lat/Lon values
len(df[(df['Latitude'] == 99999) | (df['Longitude'] == 99999)])/len(df)

0.13038810334193754

In [5]:
# Filter and create cleaned dataset
df_cleaned = df[(df['Latitude'] != 99999) | (df['Longitude'] != 99999)].copy()

In [6]:
# New dataset statistics
df_cleaned[['Latitude', 'Longitude']].describe()

Unnamed: 0,Latitude,Longitude
count,76138.0,76138.0
mean,6455758.0,1848533.0
std,27349.72,28669.01
min,6360390.0,1715644.0
25%,6431560.0,1837057.0
50%,6458808.0,1845113.0
75%,6478079.0,1859516.0
max,6513150.0,1941866.0


In [9]:
# Conversion using pyproj module
inProj = pyproj.Proj({'init': 'epsg:2229'}, preserve_units=True) # make sure unit preserve 
outProj = pyproj.Proj({'init': 'epsg:4326'})
y_1,x_1 = pyproj.transform(inProj,outProj,df_cleaned["Latitude"].values,df_cleaned["Longitude"].values)

In [10]:
# Add on the converted coordinates
df_cleaned["Latitude_new"] = y_1
df_cleaned["Longitude_new"] = x_1
df_cleaned.head()

Unnamed: 0.1,Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Marked Time,RP State Plate,VIN,Make,Body Style,...,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude,Latitude_new,Longitude_new
0,28,4361326712,2019/09/17 12:00:00 AM,1205.0,,,CA,,TOYT,PA,...,309 WINDWARD AVE,163,51.0,80.69BS,NO PARK/STREET CLEAN,73.0,6419487.0,1818861.0,-118.469024,33.989412
1,112,4361286823,2019/09/17 12:00:00 AM,853.0,,,CA,,TOYT,PA,...,934 84TH ST W,553,55.0,80.69BS,NO PARK/STREET CLEAN,73.0,6473690.0,1808839.0,-118.290126,33.962435
2,224,4361159515,2019/09/17 12:00:00 AM,833.0,,,CA,,NISS,PA,...,2601 MONMOUTH AVE,536,55.0,80.69BS,NO PARK/STREET CLEAN,73.0,6475329.0,1834194.0,-118.284957,34.03212
3,473,4361237226,2019/09/17 12:00:00 AM,1015.0,,,CA,,HOND,PA,...,615 HAMPTON DR,133,51.0,80.69BS,NO PARK/STREET CLEAN,73.0,6417752.0,1820642.0,-118.474775,33.994284
4,592,4360129196,2019/09/17 12:00:00 AM,1007.0,,,CA,,VOLV,PA,...,1346 MCCADDEN PL N,487,54.0,80.69BS,NO PARK/STREET CLEAN,73.0,6459495.0,1857205.0,-118.337472,34.095217


In [12]:
# Check for null values
df_cleaned.isnull().sum()

Unnamed: 0                   0
Ticket number                0
Issue Date                   0
Issue time                  11
Meter Id                 54920
Marked Time              73582
RP State Plate               1
VIN                      76138
Make                        31
Body Style                  40
Color                       12
Location                     0
Route                      331
Agency                       0
Violation code               0
Violation Description        2
Fine amount                 23
Latitude                     0
Longitude                    0
Latitude_new                 0
Longitude_new                0
dtype: int64

In [10]:
#picked 100000 samples since my computer cannot process more data than that 
gmaps.configure(api_key='') # type in your own API 
morning_time = (df_cleaned["Issue time"] > 600) & (df_cleaned["Issue time"] <= 1200) & (df_cleaned.iloc[:100000]["Violation Description"] == "RED ZONE")
morning_time_df = df_cleaned[morning_time][["Latitude_new","Longitude_new"]]

afternoon_time = (df_cleaned["Issue time"] > 1200) & (df_cleaned["Issue time"] <= 1800) & (df_cleaned.iloc[:100000]["Violation Description"] == "RED ZONE")
afternoon_time_df = df_cleaned[afternoon_time][["Latitude_new","Longitude_new"]]

evening_time = (df_cleaned["Issue time"] > 1800)  & (df_cleaned.iloc[:100000]["Violation Description"] == "RED ZONE")
evening_time_df = df_cleaned[evening_time][["Latitude_new","Longitude_new"]]

fig = gmaps.figure()
morning_layer = gmaps.symbol_layer(morning_time_df, fill_color = "red", stroke_color = "red", scale = 1)
afternoon_layer = gmaps.symbol_layer(afternoon_time_df, fill_color = "green", stroke_color = "green", scale = 1)
evening_layer = gmaps.symbol_layer(evening_time_df, fill_color = "blue", stroke_color = "blue", scale = 1)
fig.add_layer(morning_layer)
fig.add_layer(afternoon_layer)
fig.add_layer(evening_layer)
fig


Figure(layout=FigureLayout(height='420px'))