## Reading and Cleaning Data for 2014

In [7]:
import pandas as pd
import matplotlib as mpl

In [8]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2014.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,7291421410,04/23/2014,1041A,16,13,PICK,WH,PAS
1,7297902834,04/24/2014,1014A,21,48,VAN,WHITE,COM
2,7685905840,04/01/2014,0408P,38,68,4DSD,BK,PAS
3,7083775576,01/24/2014,1206P,38,90,SUBN,GREY,PAS
4,7751459062,05/06/2014,0727A,24,41,SUBN,GREEN,PAS
...,...,...,...,...,...,...,...,...
84067,7829944295,02/22/2014,0929A,47,18,DELV,RED,COM
84068,7664955397,08/30/2013,1007A,21,25,4DSD,GY,PAS
84069,7111596614,05/14/2014,0220P,38,104,DELV,WH,COM
84070,7947530087,01/10/2014,0855A,14,1,DELV,ORANG,PAS


In [9]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,04/23/2014,13
1,04/24/2014,48
2,04/01/2014,68
3,01/24/2014,90
4,05/06/2014,41
...,...,...
84067,02/22/2014,18
84068,08/30/2013,25
84069,05/14/2014,104
84070,01/10/2014,1


In [10]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2014,1,1
1,01/01/2014,5,0
2,01/01/2014,6,0
3,01/01/2014,7,0
4,01/01/2014,9,0
...,...,...,...
27638,12/31/2013,115,9
27639,12/31/2013,120,3
27640,12/31/2013,121,0
27641,12/31/2013,122,2


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [11]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13629,2013-07-02,1,0
13630,2013-07-02,5,0
13631,2013-07-02,6,0
13632,2013-07-02,7,0
13633,2013-07-02,9,0
...,...,...,...
13624,2014-06-26,115,0
13625,2014-06-26,120,0
13626,2014-06-26,121,0
13627,2014-06-26,122,0


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [12]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2014.csv", index=False)

## Reading and Cleaning Data for 2015

In [13]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2015.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,8000137800,09/17/2014,1158A,21,62,SUBN,WH,PAS
1,7057929596,02/27/2015,1009A,14,13,DELV,WH,COM
2,8018017244,05/15/2015,0910A,31,14,SUBN,WHITE,PAS
3,7115978487,02/26/2015,1120A,38,20,DELV,WHITE,PAS
4,1371424597,07/10/2014,1253A,78,23,VAN,GRAY,COM
...,...,...,...,...,...,...,...,...
98228,7988730812,07/30/2014,1056A,37,19,SUBN,GY,PAS
98229,7508681125,08/18/2014,0820A,21,42,SUBN,MR,PAS
98230,7990363331,03/29/2015,0516P,20,19,SUBN,TN,PAS
98231,7699841505,11/21/2014,0816A,38,61,4DSD,GY,PAS


In [14]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,09/17/2014,62
1,02/27/2015,13
2,05/15/2015,14
3,02/26/2015,20
4,07/10/2014,23
...,...,...
98228,07/30/2014,19
98229,08/18/2014,42
98230,03/29/2015,19
98231,11/21/2014,61


In [15]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2015,1,2
1,01/01/2015,5,1
2,01/01/2015,6,0
3,01/01/2015,7,0
4,01/01/2015,9,0
...,...,...,...
28100,12/31/2014,115,13
28101,12/31/2014,120,0
28102,12/31/2014,121,1
28103,12/31/2014,122,3


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [16]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13937,2014-07-01,1,15
13938,2014-07-01,5,8
13939,2014-07-01,6,6
13940,2014-07-01,7,3
13941,2014-07-01,9,10
...,...,...,...
13932,2015-06-30,115,8
13933,2015-06-30,120,1
13934,2015-06-30,121,0
13935,2015-06-30,122,1


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [17]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2015.csv", index=False)

## Reading and Cleaning Data for 2016

In [18]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2016.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,7176985467,01/03/2016,0911A,74,120,4DSD,GY,PAS
1,7759974218,09/02/2015,0135P,20,103,4DSD,GREY,PAS
2,7490557288,09/16/2015,0313A,85,109,VAN,WH,COM
3,7605647529,09/11/2015,0726A,40,44,4DSD,GREY,PAS
4,7448797128,04/15/2016,1139A,20,104,4DSD,BK,PAS
...,...,...,...,...,...,...,...,...
85916,1409262327,05/06/2016,0154P,46,78,REFG,WHITE,COM
85917,7738792140,09/22/2015,1159A,71,83,SUBN,BK,PAS
85918,7314733624,05/13/2016,0155P,46,28,VAN,BROWN,COM
85919,8137018890,03/19/2016,0813A,21,52,SUBN,GY,PAS


In [19]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,01/03/2016,120
1,09/02/2015,103
2,09/16/2015,109
3,09/11/2015,44
4,04/15/2016,104
...,...,...
85916,05/06/2016,78
85917,09/22/2015,83
85918,05/13/2016,28
85919,03/19/2016,52


In [20]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2016,1,0
1,01/01/2016,5,0
2,01/01/2016,6,0
3,01/01/2016,7,0
4,01/01/2016,9,0
...,...,...,...
27946,12/31/2015,115,7
27947,12/31/2015,120,1
27948,12/31/2015,121,0
27949,12/31/2015,122,1


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [21]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13783,2015-07-01,1,11
13784,2015-07-01,5,7
13785,2015-07-01,6,10
13786,2015-07-01,7,4
13787,2015-07-01,9,1
...,...,...,...
13778,2016-06-27,115,1
13779,2016-06-27,120,0
13780,2016-06-27,121,0
13781,2016-06-27,122,1


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [22]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2016.csv", index=False)

## Reading and Cleaning Data for 2017

In [23]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2017.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,8495810402,04/25/2017,0848A,21,10,VAN,WHITE,PAS
1,8471882772,02/02/2017,0943A,21,77,SUBN,GY,PAS
2,8537358666,04/11/2017,0846P,14,61,4DSD,RD,PAS
3,7854705707,02/13/2017,1234P,14,18,DELV,WHITE,PAS
4,7085197766,09/06/2016,0334P,37,72,SUBN,WH,PAS
...,...,...,...,...,...,...,...,...
85845,8532361020,06/21/2017,0814P,38,24,SUBN,BLACK,PAS
85846,8510816890,05/05/2017,0955A,10,24,DELV,GREEN,PAS
85847,1418982131,06/18/2017,0549P,19,110,SUBN,GRY,OMT
85848,8301034063,09/27/2016,0811P,50,50,SUBN,GY,PAS


In [24]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,04/25/2017,10
1,02/02/2017,77
2,04/11/2017,61
3,02/13/2017,18
4,09/06/2016,72
...,...,...
85845,06/21/2017,24
85846,05/05/2017,24
85847,06/18/2017,110
85848,09/27/2016,50


In [25]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2017,1,2
1,01/01/2017,5,0
2,01/01/2017,6,0
3,01/01/2017,7,1
4,01/01/2017,9,0
...,...,...,...
27946,12/31/2016,115,2
27947,12/31/2016,120,0
27948,12/31/2016,121,3
27949,12/31/2016,122,0


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [26]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13783,2016-07-01,1,7
13784,2016-07-01,5,1
13785,2016-07-01,6,2
13786,2016-07-01,7,4
13787,2016-07-01,9,3
...,...,...,...
13778,2017-06-30,115,0
13779,2017-06-30,120,0
13780,2017-06-30,121,0
13781,2017-06-30,122,0


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [27]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2017.csv", index=False)

## Reading and Cleaning Data for 2018

In [28]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2018.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,8666360884,06/17/2018,0703A,19,50,SUBN,WHITE,PAS
1,8633254657,11/24/2017,1020A,14,24,DELV,WHITE,PAS
2,8496826399,09/06/2017,0355P,14,13,VAN,BROWN,COM
3,8522684560,08/08/2017,0708A,20,45,SUBN,WHITE,PAS
4,1439889041,06/07/2018,0806A,21,62,SDN,BLACK,OMT
...,...,...,...,...,...,...,...,...
97869,8644418610,06/05/2018,1219P,21,66,SUBN,BK,OMT
97870,8636513339,04/22/2018,0111P,46,20,4DSD,BK,OMT
97871,8624352113,04/07/2018,0118P,37,19,4DSD,WH,PAS
97872,8620456003,12/23/2017,0707A,31,14,SUBN,BLUE,PAS


In [29]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,06/17/2018,50
1,11/24/2017,24
2,09/06/2017,13
3,08/08/2017,45
4,06/07/2018,62
...,...,...
97869,06/05/2018,66
97870,04/22/2018,20
97871,04/07/2018,19
97872,12/23/2017,14


In [30]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2018,1,0
1,01/01/2018,5,0
2,01/01/2018,6,0
3,01/01/2018,7,0
4,01/01/2018,9,0
...,...,...,...
27792,12/31/2017,115,2
27793,12/31/2017,120,0
27794,12/31/2017,121,0
27795,12/31/2017,122,0


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [31]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13629,2017-07-01,1,8
13630,2017-07-01,5,4
13631,2017-07-01,6,2
13632,2017-07-01,7,1
13633,2017-07-01,9,4
...,...,...,...
13624,2018-06-26,115,11
13625,2018-06-26,120,4
13626,2018-06-26,121,0
13627,2018-06-26,122,2


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [32]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2018.csv", index=False)

## Reading and Cleaning Data for 2019

In [33]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2019.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,8627887354,08/10/2018,0936A,21,9,SUBN,BK,PAS
1,1451256024,01/02/2019,0200A,78,71,VAN,WHITE,COM
2,8658582875,08/24/2018,0600P,74,26,SUBN,BK,PAS
3,8615348340,03/29/2019,0202P,69,1,VAN,BROWN,COM
4,8723571998,03/29/2019,1247P,42,13,VAN,WH,COM
...,...,...,...,...,...,...,...,...
94421,8679189376,12/10/2018,0956A,20,23,VAN,WH,COM
94422,8715264427,04/16/2019,1151A,21,78,SUBN,GY,PAS
94423,8667590502,10/29/2018,1136A,21,103,SUBN,BK,PAS
94424,8691835254,06/05/2019,0721A,31,14,SUBN,BL,PAS


In [34]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,08/10/2018,9
1,01/02/2019,71
2,08/24/2018,26
3,03/29/2019,1
4,03/29/2019,13
...,...,...
94421,12/10/2018,23
94422,04/16/2019,78
94423,10/29/2018,103
94424,06/05/2019,14


In [35]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2019,1,3
1,01/01/2019,5,0
2,01/01/2019,6,0
3,01/01/2019,7,2
4,01/01/2019,9,2
...,...,...,...
27715,12/31/2018,115,8
27716,12/31/2018,120,2
27717,12/31/2018,121,2
27718,12/31/2018,122,1


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [36]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13552,2018-07-01,1,3
13553,2018-07-01,5,2
13554,2018-07-01,6,0
13555,2018-07-01,7,3
13556,2018-07-01,9,0
...,...,...,...
13547,2019-06-25,115,3
13548,2019-06-25,120,1
13549,2019-06-25,121,2
13550,2019-06-25,122,2


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [37]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2019.csv", index=False)

## Reading and Cleaning Data for 2020

In [38]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2020.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,8722390765,07/10/2019,0326P,71,122,SUBN,GY,PAS
1,8698448495,01/24/2020,0806A,62,88,2DSD,BL,PAS
2,8733882526,07/17/2019,0230P,19,115,SUBN,GY,PAS
3,8582858024,11/07/2019,1139A,71,40,SUBN,GY,PAS
4,8743776565,10/25/2019,0913A,19,20,SUBN,GY,PAS
...,...,...,...,...,...,...,...,...
77788,8705497474,12/26/2019,0936A,14,1,4DSD,BK,PAS
77789,8805607794,12/20/2019,0410P,70,84,SUBN,GY,PAS
77790,8758694869,01/22/2020,0234P,74,28,2DSD,BK,PAS
77791,8747393683,12/18/2019,0818A,71,10,SUBN,GR,PAS


In [39]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,07/10/2019,122
1,01/24/2020,88
2,07/17/2019,115
3,11/07/2019,40
4,10/25/2019,20
...,...,...
77788,12/26/2019,1
77789,12/20/2019,84
77790,01/22/2020,28
77791,12/18/2019,10


In [40]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2020,1,1
1,01/01/2020,5,1
2,01/01/2020,6,0
3,01/01/2020,7,1
4,01/01/2020,9,0
...,...,...,...
27792,12/31/2019,115,5
27793,12/31/2019,120,2
27794,12/31/2019,121,0
27795,12/31/2019,122,1


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [41]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13629,2019-07-01,1,7
13630,2019-07-01,5,2
13631,2019-07-01,6,7
13632,2019-07-01,7,2
13633,2019-07-01,9,4
...,...,...,...
13624,2020-06-25,115,2
13625,2020-06-25,120,2
13626,2020-06-25,121,0
13627,2020-06-25,122,0


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [42]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2020.csv", index=False)

## Reading and Cleaning Data for 2021

In [43]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2021.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,8943664242,05/17/2021,0814A,84,28,REFG,BLACK,PAS
1,8837225842,01/29/2021,0244P,20,49,SUBN,BLACK,PAS
2,8893136636,06/25/2021,1001P,53,108,4DSD,BK,PAS
3,8852664610,11/28/2020,0925A,21,34,SUBN,BR,PAS
4,8738795905,07/16/2020,0815A,69,13,DELV,WHITE,PAS
...,...,...,...,...,...,...,...,...
97627,8952925592,06/18/2021,0558P,40,24,4DSD,GY,PAS
97628,8734860680,09/09/2020,0137P,17,10,4DSD,BK,OMT
97629,8813622545,10/05/2020,0250P,37,114,4DSD,GREY,PAS
97630,8811079895,09/30/2020,0847A,21,112,SUBN,GREY,PAS


In [44]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,05/17/2021,28
1,01/29/2021,49
2,06/25/2021,108
3,11/28/2020,34
4,07/16/2020,13
...,...,...
97627,06/18/2021,24
97628,09/09/2020,10
97629,10/05/2020,114
97630,09/30/2020,112


In [45]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2021,1,1
1,01/01/2021,5,0
2,01/01/2021,6,0
3,01/01/2021,7,2
4,01/01/2021,9,0
...,...,...,...
28023,12/31/2020,115,9
28024,12/31/2020,120,1
28025,12/31/2020,121,2
28026,12/31/2020,122,0


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [46]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13860,2020-07-01,1,1
13861,2020-07-01,5,2
13862,2020-07-01,6,0
13863,2020-07-01,7,2
13864,2020-07-01,9,2
...,...,...,...
13855,2021-06-29,115,0
13856,2021-06-29,120,0
13857,2021-06-29,121,0
13858,2021-06-29,122,0


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [47]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2021.csv", index=False)

## Reading and Cleaning Data for 2022

In [48]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2022.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,8992777279,07/10/2021,0243P,31,90,SUBN,GY,PAS
1,8813819950,11/26/2021,0926A,21,77,SUBN,GY,PAS
2,8861926265,12/31/2021,0240P,40,19,PICK,RED,PAS
3,8984208700,07/16/2021,0106P,21,71,4DSD,WHITE,PAS
4,8891714641,11/29/2021,0422P,19,18,SUBN,GREY,PAS
...,...,...,...,...,...,...,...,...
92685,8927175438,11/13/2021,1102A,71,14,SUBN,RD,PAS
92686,8931728682,02/17/2022,0128P,31,14,4DSD,WH,PAS
92687,8898491116,02/13/2022,0235P,14,5,4DSD,GY,PAS
92688,8966394980,11/19/2021,0807A,21,44,SUBN,WH,PAS


In [49]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,07/10/2021,90
1,11/26/2021,77
2,12/31/2021,19
3,07/16/2021,71
4,11/29/2021,18
...,...,...
92685,11/13/2021,14
92686,02/17/2022,14
92687,02/13/2022,5
92688,11/19/2021,44


In [50]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2022,1,2
1,01/01/2022,5,0
2,01/01/2022,6,0
3,01/01/2022,7,0
4,01/01/2022,9,0
...,...,...,...
28023,12/31/2021,115,0
28024,12/31/2021,120,0
28025,12/31/2021,121,0
28026,12/31/2021,122,0


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [51]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13860,2021-07-01,1,12
13861,2021-07-01,5,8
13862,2021-07-01,6,13
13863,2021-07-01,7,3
13864,2021-07-01,9,6
...,...,...,...
13855,2022-06-29,115,0
13856,2022-06-29,120,0
13857,2022-06-29,121,0
13858,2022-06-29,122,0


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [52]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2022.csv", index=False)

## Reading and Cleaning Data for 2023

In [53]:
# Read Data
df = pd.read_csv("../../data/main_data/cleaned/Parking_Violations_Issued_2023.csv")
display(df)

Unnamed: 0,Summons Number,Issue Date,Violation Time,Violation Code,Violation Precinct,Vehicle Body Type,Vehicle Color,Plate Type
0,9054647449,05/25/2023,0626A,40,110,SUBN,BK,OMT
1,8892972947,01/30/2023,1255P,37,104,SUBN,SILVE,PAS
2,9030528436,02/13/2023,1115A,69,18,VAN,WHITE,PAS
3,9017588044,11/28/2022,1108A,21,6,4DSD,SILVE,PAS
4,2009467929,12/23/2022,1110A,21,108,SUBN,GY,PAS
...,...,...,...,...,...,...,...,...
94888,8941027640,09/02/2022,1244P,21,88,4DSD,BLUE,PAS
94889,8885210170,07/08/2022,1028A,38,63,4DSD,BL,PAS
94890,9031235258,01/30/2023,0326P,19,32,VAN,WHITE,COM
94891,9031306228,02/03/2023,0220P,16,100,SUBN,GY,PAS


In [54]:
# Remove All Other Columns than Date
df1 = df.drop(columns = ['Summons Number', 'Violation Time', 'Violation Code', 
                         'Vehicle Body Type', 'Vehicle Color', 'Plate Type'])
display(df1)

Unnamed: 0,Issue Date,Violation Precinct
0,05/25/2023,110
1,01/30/2023,104
2,02/13/2023,18
3,11/28/2022,6
4,12/23/2022,108
...,...,...
94888,09/02/2022,88
94889,07/08/2022,63
94890,01/30/2023,32
94891,02/03/2023,100


In [55]:
# Get Counts for Each Date
df2 = df1.groupby(['Issue Date', 'Violation Precinct']).size().reset_index(name='Counts')

# an extra step to make sure date-precinct pairs with count = 0 are included
df2 = df2.groupby(['Issue Date', 'Violation Precinct'])['Counts'].sum().unstack(fill_value=0).stack()
df2 = df2.reset_index()
df2 = df2.rename(columns={0:'Counts'})

display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
0,01/01/2023,1,0
1,01/01/2023,5,0
2,01/01/2023,6,0
3,01/01/2023,7,0
4,01/01/2023,9,0
...,...,...,...
28100,12/31/2022,115,1
28101,12/31/2022,120,0
28102,12/31/2022,121,0
28103,12/31/2022,122,0


Issue Date            object
Violation Precinct     int64
Counts                 int64
dtype: object

In [56]:
# Converting to Correct Format and Sorting DF
df2['Issue Date'] = pd.to_datetime(df2['Issue Date'])
df2 = df2.sort_values(by=['Issue Date', 'Violation Precinct'])
display(df2)
df2.dtypes

Unnamed: 0,Issue Date,Violation Precinct,Counts
13937,2022-07-01,1,13
13938,2022-07-01,5,4
13939,2022-07-01,6,14
13940,2022-07-01,7,7
13941,2022-07-01,9,3
...,...,...,...
13932,2023-06-30,115,8
13933,2023-06-30,120,1
13934,2023-06-30,121,0
13935,2023-06-30,122,1


Issue Date            datetime64[ns]
Violation Precinct             int64
Counts                         int64
dtype: object

In [57]:
# Writing this DF to File
df2.to_csv("../../data/time_series/cleaned/TS_2023.csv", index=False)