In [1]:
import pandas as pd
from pandas import Series, DataFrame

In [2]:
filename = '../data/nyc-parking-violations-2020.csv'

df = pd.read_csv(filename,
                usecols=['Date First Observed', 'Plate ID', 'Registration State',
                        'Issue Date', 'Vehicle Make', 'Street Name', 'Vehicle Color'])
df.columns

Index(['Plate ID', 'Registration State', 'Issue Date', 'Vehicle Make',
       'Street Name', 'Date First Observed', 'Vehicle Color'],
      dtype='object')

# Beyond 1

What were the three most commonly ticket car makes to be issued tickets on January 2nd through January 10th?

In [3]:
# We can use a slice, but only after sorting the index from lowest to highest
df = df.set_index('Issue Date')
df = df.sort_index()
df.loc['01/02/2020 12:00:00 AM':'01/10/2020 23:59:59 PM', 'Vehicle Make'].value_counts().head(3)

Vehicle Make
FORD     38958
TOYOT    37096
HONDA    35962
Name: count, dtype: int64

In [4]:

df = df.reset_index()   # undo the setting of the index from the previous cell

# let's do it in chained format:

(
    df
    .set_index('Issue Date')
    .sort_index()
    .loc['01/02/2020 12:00:00 AM':'01/10/2020 23:59:59 PM', 
         'Vehicle Make']
    .value_counts()
    .head(3)
)

Vehicle Make
FORD     38958
TOYOT    37096
HONDA    35962
Name: count, dtype: int64

# Beyond 2

How many tickets did the second-most-ticketed car get in 2020?  (And why am I not interested in the most-ticketed plate?) What state was that car from, and was it always ticketed in the same location?

In [5]:
# Most common plate is... BLANKPLATE!
# Second-most common is 2704819
df = df.reset_index()
df['Plate ID'].value_counts().head(2)

Plate ID
BLANKPLATE    8882
2704819       1535
Name: count, dtype: int64

In [6]:
# It's from Indiana
df = df.set_index('Plate ID')
df.loc['2704819', 'Registration State']

Plate ID
2704819    IN
2704819    IN
2704819    IN
2704819    IN
2704819    IN
           ..
2704819    IN
2704819    IN
2704819    IN
2704819    IN
2704819    IN
Name: Registration State, Length: 1535, dtype: object

In [7]:
# was it always ticketed in the same place?  No, but there were a lot in the same area...
df.loc['2704819', 'Street Name'].value_counts()

Street Name
8th Ave              395
Penn Plz             230
7th Ave               92
9th Ave               63
Broadway              57
                    ... 
6TH AVE                1
W 54TH ST              1
E 39th St              1
N/S NW C/O W 30TH      1
E 49th St              1
Name: count, Length: 113, dtype: int64

# Beyond 3

Would it be useful to set the index to "Date First Observed"? Why or why not?

In [8]:
# Not very useful -- the value is set to 0 for 99% of the values!

df = df.reset_index()
df['Date First Observed'].value_counts()

Date First Observed
0           12371344
20200311         887
20200205         795
20200212         793
20200310         770
              ...   
20220412           1
20191131           1
20200813           1
20160614           1
20201230           1
Name: count, Length: 465, dtype: int64