In [1]:
# import dependencies
import pandas as pd
import numpy as np
from pandas import DataFrame
import json
import csv
from pyproj import Proj, transform
# from sqlalchemy import create_engine

In [11]:
# read csv into dataframe
csv_file = "parking_citations_raw.csv"
la_parking_citations_df = pd.read_csv(csv_file)
la_parking_citations_df.head()

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Marked Time,RP State Plate,Plate Expiry Date,VIN,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
0,4272349605,12/30/2015,2201.0,,,CA,201605.0,,OLDS,PA,GN,3069 SAN MARINO ST,403,54.0,80.56E4+,RED ZONE,93.0,6471840.7,1842349.7
1,4272349616,12/30/2015,2205.0,,,CA,201508.0,,HOND,PA,WT,2936 8TH ST W,403,54.0,80.56E1,WHITE ZONE,58.0,6473823.2,1843512.0
2,4272821512,12/30/2015,1725.0,,,CA,10.0,,TOYT,PA,SL,301 LAUREL AV N,401,54.0,5204A-,DISPLAY OF TABS,25.0,6451207.5,1850273.2
3,4272821523,12/30/2015,1738.0,WF74,,CA,2.0,,RROV,PA,BK,8321 3RD ST W,401,54.0,88.13B+,METER EXP.,63.0,6449387.2,1849063.5
4,4272821534,12/30/2015,1807.0,13,,CA,1.0,,FORD,PA,GN,121 CROFT AVE,401,54.0,80.58L,PREFERENTIAL PARKING,68.0,6448347.2,1849662.2


In [3]:
# count number of values in each column
la_parking_citations_df.count()

Ticket number            9477527
Issue Date               9476959
Issue time               9474602
Meter Id                 2444382
Marked Time               312215
RP State Plate           9476762
Plate Expiry Date        8611025
VIN                        16309
Make                     9468006
Body Style               9467597
Color                    9473004
Location                 9476589
Route                    9406744
Agency                   9476949
Violation code           9477527
Violation Description    9476516
Fine amount              9470399
Latitude                 9477523
Longitude                9477523
dtype: int64

## Data Clean Up

#### STEP 1 - Drop the following columns:
* Marked Time
* RP State Plate
* Plate Expiry Date
* VIN

In [12]:
# delete columns
la_parking_citations_df.drop(['Marked Time','RP State Plate','Plate Expiry Date','VIN'], axis = 1, inplace = True)

In [13]:
# load data into new dataframe
parking_citations_new = pd.DataFrame(la_parking_citations_df)
parking_citations_new.head(5)

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
0,4272349605,12/30/2015,2201.0,,OLDS,PA,GN,3069 SAN MARINO ST,403,54.0,80.56E4+,RED ZONE,93.0,6471840.7,1842349.7
1,4272349616,12/30/2015,2205.0,,HOND,PA,WT,2936 8TH ST W,403,54.0,80.56E1,WHITE ZONE,58.0,6473823.2,1843512.0
2,4272821512,12/30/2015,1725.0,,TOYT,PA,SL,301 LAUREL AV N,401,54.0,5204A-,DISPLAY OF TABS,25.0,6451207.5,1850273.2
3,4272821523,12/30/2015,1738.0,WF74,RROV,PA,BK,8321 3RD ST W,401,54.0,88.13B+,METER EXP.,63.0,6449387.2,1849063.5
4,4272821534,12/30/2015,1807.0,13,FORD,PA,GN,121 CROFT AVE,401,54.0,80.58L,PREFERENTIAL PARKING,68.0,6448347.2,1849662.2


#### STEP 2 - Convert Latitude and Longitude Values

Latitude and longitude in the dataset are given in accordance with [ESRI:102645 NAD 1983 StatePlane California V FIPS 0405 Feet](https://epsg.io/102645) format. 
<br>
In order to use those coordinates for our map visualization, we would need to transform geospatial coordinates from one coordinate reference system to another. 
<br>
Python offers [PROJ](https://proj.org/index.html) library for performing conversions between cartographic projections.
<br>
1,401,193 (approximately 15% of total) parking tickets have latitude and longitude with 9.999900e+04 values.
<br>
Conversion of those values gives us location in the Pacific Ocean, thus we cannot use them for our visualization.
<br>
We have drop those parking tickets for the conversion of coordinates and save them in a separate csv file.

In [6]:
# sort parking tickets by latitude and longitude with 9.999900e+04 values
# load them into a new dataframe and save into a separate csv file
parking_citations_lat_long_na = parking_citations_new[parking_citations_new["Longitude"]==9.999900e+04]
parking_citations_lat_long_na.to_csv("parking_citations_lat_long_na.csv", encoding='utf-8', index=False)
parking_citations_lat_long_na.head(5)

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
5,4272889353,12/30/2015,514.0,,DODG,PA,GY,4939 ECHO ST,00611,56.0,80.69BS,NO PARK/STREET CLEAN,73.0,99999.0,99999.0
7,1112509506,12/21/2015,1255.0,,CIMC,TL,BL,PENNINGTON/I ST,1A27,4.0,000,80691a,,99999.0,99999.0
8,1112509510,12/21/2015,1305.0,,CIMC,TL,BL,PENNINGTON/I ST,1A27,4.0,000,80691a,,99999.0,99999.0
9,1112509926,12/21/2015,1340.0,,HYTR,TL,BK,CANNERY ST/WAY ST,L59,3.0,000,80691a,,99999.0,99999.0
10,1112510291,12/25/2015,1255.0,,HOND,,SI,2500 NAVY WAY,M71,3.0,000,8709b,,99999.0,99999.0


In [14]:
# drop parking tickets with latitude / longitude values of 99999.0
parking_citations_v2 = parking_citations_new[parking_citations_new['Latitude'] != 99999.0]

In [15]:
parking_citations_v2

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
0,4272349605,12/30/2015,2201.0,,OLDS,PA,GN,3069 SAN MARINO ST,00403,54.0,80.56E4+,RED ZONE,93.0,6.471841e+06,1.842350e+06
1,4272349616,12/30/2015,2205.0,,HOND,PA,WT,2936 8TH ST W,00403,54.0,80.56E1,WHITE ZONE,58.0,6.473823e+06,1.843512e+06
2,4272821512,12/30/2015,1725.0,,TOYT,PA,SL,301 LAUREL AV N,00401,54.0,5204A-,DISPLAY OF TABS,25.0,6.451208e+06,1.850273e+06
3,4272821523,12/30/2015,1738.0,WF74,RROV,PA,BK,8321 3RD ST W,00401,54.0,88.13B+,METER EXP.,63.0,6.449387e+06,1.849064e+06
4,4272821534,12/30/2015,1807.0,13,FORD,PA,GN,121 CROFT AVE,00401,54.0,80.58L,PREFERENTIAL PARKING,68.0,6.448347e+06,1.849662e+06
6,4274301964,01/04/2016,1205.0,,OTHR,TR,WT,2650 12TH ST E,6T8,56.0,80.69B,NO PARKING,73.0,6.494776e+06,1.830623e+06
11,4273433511,12/30/2015,1805.0,83,TOYT,PA,GY,4603 MAUBERT AVE,00403,54.0,80.58L,PREFERENTIAL PARKING,68.0,6.473985e+06,1.858570e+06
21,4273826944,12/30/2015,1924.0,53,AUDI,PA,BK,736 DETROIT ST N,00402,54.0,80.58L,PREFERENTIAL PARKING,68.0,6.457156e+06,1.853174e+06
22,4273826955,12/30/2015,1931.0,41,HYUN,PA,BL,166 POINSETTIA PL N,00402,54.0,80.58L,PREFERENTIAL PARKING,68.0,6.456089e+06,1.848716e+06
24,4273857486,12/30/2015,1224.0,,VOLK,PA,GY,3330 SEPULVEDA BLVD,00184,51.0,80.69BS,NO PARK/STREET CLEAN,73.0,6.433118e+06,1.830581e+06


In [16]:
# assign coordinate transformation engine
pm = '+proj=lcc +lat_1=34.03333333333333 +lat_2=35.46666666666667 +lat_0=33.5 +lon_0=-118 +x_0=2000000 ' \
     '+y_0=500000.0000000002 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs'

# convert latitude and longitude to geographic coordinates
x_in,y_in = parking_citations_v2['Latitude'].values, parking_citations_v2['Longitude'].values
parking_citations_v2['Longitude'],parking_citations_v2['Latitude'] = transform(Proj(pm, preserve_units = True), Proj("+init=epsg:4326"), x_in,y_in)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [17]:
parking_citations_v2

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
0,4272349605,12/30/2015,2201.0,,OLDS,PA,GN,3069 SAN MARINO ST,00403,54.0,80.56E4+,RED ZONE,93.0,34.054504,-118.296550
1,4272349616,12/30/2015,2205.0,,HOND,PA,WT,2936 8TH ST W,00403,54.0,80.56E1,WHITE ZONE,58.0,34.057714,-118.290016
2,4272821512,12/30/2015,1725.0,,TOYT,PA,SL,301 LAUREL AV N,00401,54.0,5204A-,DISPLAY OF TABS,25.0,34.076090,-118.364762
3,4272821523,12/30/2015,1738.0,WF74,RROV,PA,BK,8321 3RD ST W,00401,54.0,88.13B+,METER EXP.,63.0,34.072748,-118.370758
4,4272821534,12/30/2015,1807.0,13,FORD,PA,GN,121 CROFT AVE,00401,54.0,80.58L,PREFERENTIAL PARKING,68.0,34.074382,-118.374200
6,4274301964,01/04/2016,1205.0,,OTHR,TR,WT,2650 12TH ST E,6T8,56.0,80.69B,NO PARKING,73.0,34.022442,-118.220747
11,4273433511,12/30/2015,1805.0,83,TOYT,PA,GY,4603 MAUBERT AVE,00403,54.0,80.58L,PREFERENTIAL PARKING,68.0,34.099092,-118.289626
21,4273826944,12/30/2015,1924.0,53,AUDI,PA,BK,736 DETROIT ST N,00402,54.0,80.58L,PREFERENTIAL PARKING,68.0,34.084118,-118.345151
22,4273826955,12/30/2015,1931.0,41,HYUN,PA,BL,166 POINSETTIA PL N,00402,54.0,80.58L,PREFERENTIAL PARKING,68.0,34.071859,-118.348626
24,4273857486,12/30/2015,1224.0,,VOLK,PA,GY,3330 SEPULVEDA BLVD,00184,51.0,80.69BS,NO PARK/STREET CLEAN,73.0,34.021785,-118.424223


In [18]:
parking_citations_v2.to_csv("parking_citations_final.csv", encoding='utf-8', index=False)
parking_citations_v2.head(5)

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
0,4272349605,12/30/2015,2201.0,,OLDS,PA,GN,3069 SAN MARINO ST,403,54.0,80.56E4+,RED ZONE,93.0,34.054504,-118.29655
1,4272349616,12/30/2015,2205.0,,HOND,PA,WT,2936 8TH ST W,403,54.0,80.56E1,WHITE ZONE,58.0,34.057714,-118.290016
2,4272821512,12/30/2015,1725.0,,TOYT,PA,SL,301 LAUREL AV N,401,54.0,5204A-,DISPLAY OF TABS,25.0,34.07609,-118.364762
3,4272821523,12/30/2015,1738.0,WF74,RROV,PA,BK,8321 3RD ST W,401,54.0,88.13B+,METER EXP.,63.0,34.072748,-118.370758
4,4272821534,12/30/2015,1807.0,13,FORD,PA,GN,121 CROFT AVE,401,54.0,80.58L,PREFERENTIAL PARKING,68.0,34.074382,-118.3742


In [2]:
csv_file = "parking_citations_final.csv"
la_parking_citations_df = pd.read_csv(csv_file)
la_parking_citations_df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
0,4272349605,12/30/2015,2201.0,,OLDS,PA,GN,3069 SAN MARINO ST,403,54.0,80.56E4+,RED ZONE,93.0,34.054504,-118.29655
1,4272349616,12/30/2015,2205.0,,HOND,PA,WT,2936 8TH ST W,403,54.0,80.56E1,WHITE ZONE,58.0,34.057714,-118.290016
2,4272821512,12/30/2015,1725.0,,TOYT,PA,SL,301 LAUREL AV N,401,54.0,5204A-,DISPLAY OF TABS,25.0,34.07609,-118.364762
3,4272821523,12/30/2015,1738.0,WF74,RROV,PA,BK,8321 3RD ST W,401,54.0,88.13B+,METER EXP.,63.0,34.072748,-118.370758
4,4272821534,12/30/2015,1807.0,13,FORD,PA,GN,121 CROFT AVE,401,54.0,80.58L,PREFERENTIAL PARKING,68.0,34.074382,-118.3742


In [3]:
la_parking_citations_df["Make"].value_counts()

TOYT    1365173
HOND     907738
FORD     689924
NISS     576495
CHEV     537312
BMW      377689
MERZ     339796
VOLK     277374
HYUN     254988
LEXS     238081
DODG     231856
KIA      194316
JEEP     191521
AUDI     162432
MAZD     149594
OTHR     139360
GMC      113260
INFI     105803
CHRY     102141
ACUR      96421
SUBA      91888
VOLV      76373
MITS      66799
CADI      62081
MNNI      59797
LNDR      48966
TOYO      47928
PORS      43291
FRHT      38549
UNK       38137
         ...   
DAVI          1
OPNR          1
LEGR          1
LNK           1
HNDA          1
CRYL          1
BNDE          1
OSCK          1
CHNO          1
SEAR          1
CHSS          1
ELOM          1
BRAU          1
SNCR          1
GNSI          1
HACK          1
WHT           1
PTRS          1
ELO           1
ODHM          1
AVON          1
HELD          1
LAZD          1
WNBA          1
AUDB          1
MECU          1
KONA          1
ANMN          1
HUES          1
MEZD          1
Name: Make, Length: 1352