In [1]:
import pandas as pd
import numpy as np
import re

import matplotlib.pyplot as plt
import altair as alt
import seaborn as sns
import pickle

pd.set_option('display.width', 2000)
pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', 99)
pd.set_option('display.max_colwidth', None)

### NYC Parking Violations and Results

In [2]:
fp = '/Users/blake/Box/Project/Data/'

In [None]:
# Load parking violation data
# URL: https://data.cityofnewyork.us/City-Government/Parking-Violations-Issued-Fiscal-Year-2021/kvfd-bves
fp = '/Users/blake/Box/Project/Data/'
df = pd.read_csv(fp + 'Parking_Violations_Issued_-_Fiscal_Year_2021.csv', low_memory=False)
# data_dict = data_dict.drop(columns=['Unnamed: 3']).dropna()

display(df.head())
display(df.info())

In [None]:
# Pickle for faster save/load time
df.to_pickle(fp + 'df_Parking_Violations_Issued.pkl')

In [4]:
# Load Pickle
df = pd.read_pickle(fp + 'df_Parking_Violations_Issued.pkl')

In [None]:
# Change date cols to type datetime
dt_pattern = r'Date|Time'
date_cols = [col for col in df.columns if re.search(dt_pattern, col)]

for col_name in date_cols: 
    df[col_name] = pd.to_datetime(df[col_name], errors='coerce')

In [None]:
df.groupby('Registration State').count().sort_values(by='Summons Number', ascending=False)

In [None]:
df['Summons Number'].nunique()

In [None]:
# Load parking violation data
# URL: https://data.cityofnewyork.us/City-Government/Open-Parking-and-Camera-Violations/nc67-uf89
fp = '/Users/blake/Box/Project/Data/'
df_status = pd.read_csv(fp + 'Open_Parking_and_Camera_Violations.csv', low_memory=False,
                       dtype={'Summons Number': 'str'})

display(df_status.head())
display(df_status.info())

In [None]:
# Pickle for faster save/load time
df_status.to_pickle(fp + 'df_parking_and_camera_violations_backup.pkl')

In [5]:
# Load Pickled Data
# Parking violations issued in FY 2021
df = pd.read_pickle(fp + 'df_Parking_Violations_Issued.pkl')

# Open parking/camera violations
df_status = pd.read_pickle(fp + 'df_parking_and_camera_violations.pkl')

display(df_status.head())
display(df_status.info())

Unnamed: 0,Plate,State,License Type,Summons Number,Issue Date,Violation Time,Violation,Judgment Entry Date,Fine Amount,Penalty Amount,Interest Amount,Reduction Amount,Payment Amount,Amount Due,Precinct,County,Issuing Agency,Violation Status,Summons Image
0,2107978,TX,PAS,7036614365,11/23/2016,09:26A,NO STANDING-BUS STOP,,115.0,10.0,0.0,0.0,125.0,0.0,102.0,Q,TRAFFIC,,View Summons (http://nycserv.nyc.gov/NYCServWeb/ShowImage?searchID=VG5wQmVrNXFXWGhPUkUweVRsRTlQUT09&locationName=_____________________)
1,EBW7395,NY,PAS,8399013663,01/05/2017,01:36P,CROSSWALK,,115.0,0.0,0.0,0.0,115.0,0.0,62.0,K,TRAFFIC,,View Summons (http://nycserv.nyc.gov/NYCServWeb/ShowImage?searchID=VDBSTk5VOVVRWGhOZWxreVRYYzlQUT09&locationName=_____________________)
2,AM491V,NJ,PAS,8075056917,08/26/2016,11:53A,NO STANDING-DAY/TIME LIMITS,,115.0,0.0,0.0,0.0,115.0,0.0,33.0,NY,TRAFFIC,HEARING HELD-GUILTY,View Summons (http://nycserv.nyc.gov/NYCServWeb/ShowImage?searchID=VDBSQk0wNVVRVEZPYW10NFRuYzlQUT09&locationName=_____________________)
3,2380560,IN,PAS,7809134577,09/20/2016,08:00A,DOUBLE PARKING,,115.0,0.0,0.0,115.0,0.0,0.0,6.0,NY,TRAFFIC,HEARING HELD-NOT GUILTY,View Summons (http://nycserv.nyc.gov/NYCServWeb/ShowImage?searchID=VG5wbmQwOVVSWHBPUkZVelRuYzlQUT09&locationName=_____________________)
4,GWZ1993,NY,PAS,8075056838,08/26/2016,11:09A,INSP. STICKER-EXPIRED/MISSING,,65.0,30.0,0.0,0.0,95.0,0.0,30.0,NY,TRAFFIC,,View Summons (http://nycserv.nyc.gov/NYCServWeb/ShowImage?searchID=VDBSQk0wNVVRVEZPYW1kNlQwRTlQUT09&locationName=_____________________)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8810176 entries, 0 to 8810175
Data columns (total 19 columns):
 #   Column               Dtype  
---  ------               -----  
 0   Plate                object 
 1   State                object 
 2   License Type         object 
 3   Summons Number       object 
 4   Issue Date           object 
 5   Violation Time       object 
 6   Violation            object 
 7   Judgment Entry Date  object 
 8   Fine Amount          float64
 9   Penalty Amount       float64
 10  Interest Amount      float64
 11  Reduction Amount     float64
 12  Payment Amount       float64
 13  Amount Due           float64
 14  Precinct             float64
 15  County               object 
 16  Issuing Agency       object 
 17  Violation Status     object 
 18  Summons Image        object 
dtypes: float64(7), object(12)
memory usage: 1.2+ GB


None

In [6]:
df_status['Violation Status'].unique()

array([nan, 'HEARING HELD-GUILTY', 'HEARING HELD-NOT GUILTY',
       'HEARING HELD-GUILTY REDUCTION', 'HEARING PENDING',
       'ADMIN CLAIM GRANTED', 'APPEAL AFFIRMED', 'HEARING ADJOURNMENT',
       'ADMIN REDUCTION', 'APPEAL REVERSED', 'ADMIN CLAIM DENIED',
       'HEARING HELD-REINSTATEMENT', 'APPEAL ABANDONED',
       'APPEAL MODIFIED', 'HEARING WAIVED', 'APPEAL REMANDED'],
      dtype=object)

### Get relevant data for analyzing parking violations

In [8]:
df_slice = df[['Summons Number', 'Plate ID', 'Registration State', 'Vehicle Body Type', 'Vehicle Make', 
               'Vehicle Color', 'Vehicle Year', 'Violation Location', 'Violation Precinct']]
df_status_slice = df_status[['Summons Number', 'Violation', 'Violation Status', 'Fine Amount', 'Penalty Amount',
                     'Reduction Amount', 'Payment Amount', 'Amount Due']]
df_parking_merged = df_slice.merge(df_status_slice, on='Summons Number', how='outer')

In [9]:
# Pickle df_parking_merged
df_parking_merged.to_pickle(fp + 'df_parking_merged.pkl')

In [None]:
# Load df_park_merged
df_park_merged = pd.from_pickle(fp + 'df_parking_merged.pkl')

In [10]:
df_parking_merged.describe()

Unnamed: 0,Vehicle Year,Violation Location,Violation Precinct,Fine Amount,Penalty Amount,Reduction Amount,Payment Amount,Amount Due
count,5398815.0,3305367.0,5398815.0,8179068.0,8179068.0,8179068.0,8179068.0,8179068.0
mean,1600.068,55.38381,33.90815,70.68652,11.57318,10.66075,72.35903,0.007557096
std,813.5871,38.08878,40.20459,30.79833,19.57692,27.18391,43.25966,0.8998671
min,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2004.0,19.0,0.0,50.0,0.0,0.0,45.0,0.0
50%,2013.0,49.0,14.0,65.0,0.0,0.0,65.0,0.0
75%,2018.0,90.0,67.0,115.0,10.0,0.13,107.75,0.0
max,2069.0,904.0,904.0,515.0,60.0,718.47,710.64,209.0


In [None]:
# violation status based on vehicle year, other vars?
df_parking_merged['Vehicle Year'].describe() #.str.contains('gray', case=False).count()

In [None]:
# Look at vehicle year
df_vehicle_yr = df_parking_merged.loc[(df_parking_merged['Vehicle Year'] != 0) & 
                                      (df_parking_merged['Vehicle Year'] < 2023)].groupby('Vehicle Year').count()

In [None]:
df_vehicle_yr.index.values

In [None]:
df_vehicle_yr.reset_index().plot.scatter(x='Vehicle Year', y='Summons Number')

In [11]:
# Analyze based on Violation Status
df_violation_status = df_parking_merged.loc[(df_parking_merged['Violation Status'] != 'HEARING PENDING') & 
                                            (df_parking_merged['Violation Status'] != np.nan)]

In [12]:
df_violation_status.groupby('Violation Status').count()

Unnamed: 0_level_0,Summons Number,Plate ID,Registration State,Vehicle Body Type,Vehicle Make,Vehicle Color,Vehicle Year,Violation Location,Violation Precinct,Violation,Fine Amount,Penalty Amount,Reduction Amount,Payment Amount,Amount Due
Violation Status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ADMIN CLAIM DENIED,3012,0,0,0,0,0,0,0,0,3012,3012,3012,3012,3012,3012
ADMIN CLAIM GRANTED,69563,1,1,1,1,1,1,0,1,69560,69563,69563,69563,69563,69563
ADMIN REDUCTION,15281,1,1,1,1,0,1,0,1,15279,15281,15281,15281,15281,15281
APPEAL ABANDONED,965,0,0,0,0,0,0,0,0,965,965,965,965,965,965
APPEAL AFFIRMED,13031,0,0,0,0,0,0,0,0,13031,13031,13031,13031,13031,13031
APPEAL MODIFIED,92,0,0,0,0,0,0,0,0,92,92,92,92,92,92
APPEAL REMANDED,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1
APPEAL REVERSED,4189,2,2,2,2,2,2,1,2,4189,4189,4189,4189,4189,4189
HEARING ADJOURNMENT,18091,7,7,7,7,7,7,1,7,18091,18091,18091,18091,18091,18091
HEARING HELD-GUILTY,805182,70,70,69,69,64,70,30,70,805150,805182,805182,805182,805182,805182


In [13]:
denied_cols = ['ADMIN CLAIM DENIED', 'APPEAL AFFIRMED', 'HEARING HELD-GUILTY']
reduced_cols = ['ADMIN REDUCTION', 'HEARING HELD-GUILTY REDUCTION']
granted_cols = ['ADMIN CLAIM GRANTED', 'APPEAL REVERSED', 'HEARING HELD-NOT GUILTY']

In [15]:
df_violation_status['Coded Violation Status'] = np.where(
    df_violation_status['Violation Status'].isin(denied_cols), 'Denied', np.where(
    df_violation_status['Violation Status'].isin(reduced_cols), 'Reduced', np.where(
    df_violation_status['Violation Status'].isin(granted_cols), 'Granted', 'Other')))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [16]:
df_violation_status['Coded Violation Status'].value_counts()

Other      11666859
Granted      843622
Denied       821225
Reduced      753799
Name: Coded Violation Status, dtype: int64

In [17]:
df_violation_status_dropped = df_violation_status.dropna(subset=['Violation Status'])
df_violation_status_dropped['Coded Violation Status'].value_counts()

Granted    843622
Denied     821225
Reduced    753799
Other       21041
Name: Coded Violation Status, dtype: int64

In [18]:
# Pickle for faster save/load time
df_violation_status_dropped.to_pickle(fp + 'df_violation_status_dropped.pkl')

In [3]:
# Load pickled df
df_violation_status_dropped = pd.read_pickle(fp + 'df_violation_status_dropped.pkl')

In [4]:
df_violation_status_dropped.columns.values

array(['Summons Number', 'Plate ID', 'Registration State',
       'Vehicle Body Type', 'Vehicle Make', 'Vehicle Color',
       'Vehicle Year', 'Violation Location', 'Violation Precinct',
       'Violation', 'Violation Status', 'Fine Amount', 'Penalty Amount',
       'Reduction Amount', 'Payment Amount', 'Amount Due',
       'Coded Violation Status'], dtype=object)

In [19]:
cols = ['Coded Violation Status', 'Fine Amount', 'Penalty Amount',
       'Reduction Amount', 'Payment Amount', 'Amount Due']
df_violation_status_dropped[cols].groupby('Coded Violation Status').describe()

Unnamed: 0_level_0,Fine Amount,Fine Amount,Fine Amount,Fine Amount,Fine Amount,Fine Amount,Fine Amount,Fine Amount,Penalty Amount,Penalty Amount,Penalty Amount,Penalty Amount,Penalty Amount,Penalty Amount,Penalty Amount,Penalty Amount,Reduction Amount,Reduction Amount,Reduction Amount,Reduction Amount,Reduction Amount,Reduction Amount,Reduction Amount,Reduction Amount,Payment Amount,Payment Amount,Payment Amount,Payment Amount,Payment Amount,Payment Amount,Payment Amount,Payment Amount,Amount Due,Amount Due,Amount Due,Amount Due,Amount Due,Amount Due,Amount Due,Amount Due
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
Coded Violation Status,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2
Denied,821225.0,83.638585,31.100303,0.0,65.0,65.0,115.0,515.0,821225.0,12.519687,21.154155,0.0,0.0,0.0,10.0,60.0,821225.0,0.565047,5.595157,0.0,0.0,0.0,0.0,715.2,821225.0,96.418009,36.928566,0.0,65.0,115.0,115.0,689.05,821225.0,0.00144,0.464859,0.0,0.0,0.0,0.0,209.0
Granted,843622.0,72.115022,35.247368,0.0,35.0,65.0,115.0,515.0,843622.0,5.101035,12.140624,0.0,0.0,0.0,10.0,60.0,843622.0,77.274032,36.750008,0.0,45.0,65.0,115.0,691.68,843622.0,7.7e-05,0.070768,0.0,0.0,0.0,0.0,65.0,843622.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Other,21041.0,92.131315,81.884499,35.0,50.0,65.0,115.0,515.0,21041.0,20.557245,23.97454,0.0,0.0,10.0,30.0,60.0,21041.0,1.421078,9.691088,0.0,0.0,0.0,0.04,284.06,21041.0,114.656667,86.882199,0.0,65.0,95.0,125.28,653.66,21041.0,0.013568,1.212558,0.0,0.0,0.0,0.0,145.0
Reduced,753799.0,90.970759,29.147563,25.0,65.0,115.0,115.0,515.0,753799.0,2.309064,10.084948,0.0,0.0,0.0,0.0,60.0,753799.0,24.932755,17.574402,0.0,15.0,23.0,33.0,409.95,753799.0,68.729448,34.531708,0.0,30.0,82.0,100.0,525.0,753799.0,0.000268,0.148259,0.0,0.0,0.0,0.0,116.11


In [20]:
# What types of violations are more likely to be reduced?
# df_violation_status_dropped['Violation'].unique()
df_violation_status_dropped_slice = df_violation_status_dropped.loc[df_violation_status_dropped['Coded Violation Status'] == 'Denied']
df_violation_status_dropped.groupby(['Violation', 'Coded Violation Status']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Summons Number,Plate ID,Registration State,Vehicle Body Type,Vehicle Make,Vehicle Color,Vehicle Year,Violation Location,Violation Precinct,Violation Status,Fine Amount,Penalty Amount,Reduction Amount,Payment Amount,Amount Due
Violation,Coded Violation Status,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
ALTERING INTERCITY BUS PERMIT,Denied,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1
ALTERING INTERCITY BUS PERMIT,Granted,23,0,0,0,0,0,0,0,0,23,23,23,23,23,23
ALTERING INTERCITY BUS PERMIT,Reduced,3,0,0,0,0,0,0,0,0,3,3,3,3,3,3
ANGLE PARKING,Denied,794,0,0,0,0,0,0,0,0,794,794,794,794,794,794
ANGLE PARKING,Granted,458,0,0,0,0,0,0,0,0,458,458,458,458,458,458
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WASH/REPAIR VEHCL-REPAIR ONLY,Granted,5,0,0,0,0,0,0,0,0,5,5,5,5,5,5
WRONG WAY,Denied,992,0,0,0,0,0,0,0,0,992,992,992,992,992,992
WRONG WAY,Granted,591,0,0,0,0,0,0,0,0,591,591,591,591,591,591
WRONG WAY,Other,18,0,0,0,0,0,0,0,0,18,18,18,18,18,18


In [6]:
df_violation_status_dropped_trimmed = df_violation_status_dropped[['Violation', 'Coded Violation Status', 'Summons Number']]
groupby_cols = ['Violation', 'Coded Violation Status']
df_violation_grouped = df_violation_status_dropped_trimmed.groupby(groupby_cols).count()

In [16]:
df_violation_grouped.index.get_level_values(0)

Index(['ALTERING INTERCITY BUS PERMIT', 'ALTERING INTERCITY BUS PERMIT', 'ALTERING INTERCITY BUS PERMIT', 'ANGLE PARKING', 'ANGLE PARKING', 'ANGLE PARKING', 'ANGLE PARKING', 'ANGLE PARKING-COMM VEHICLE', 'ANGLE PARKING-COMM VEHICLE', 'ANGLE PARKING-COMM VEHICLE',
       ...
       'VEHICLE FOR SALE(DEALERS ONLY)', 'VIN OBSCURED', 'VIN OBSCURED', 'VIN OBSCURED', 'WASH/REPAIR VEHCL-REPAIR ONLY', 'WASH/REPAIR VEHCL-REPAIR ONLY', 'WRONG WAY', 'WRONG WAY', 'WRONG WAY', 'WRONG WAY'], dtype='object', name='Violation', length=343)

In [None]:
df_violation_grouped['Violation']

In [28]:
def add_pct_col(df):
    df = df.reset_index()
    df['Pct'] = round(df['Summons Number'] / df.groupby('Violation')['Summons Number'].transform('sum') * 100, 2)
    return df

df_violation_grouped = add_pct_col(df_violation_grouped)
display(df_violation_grouped.sort_values(by=['Violation', 'Pct'], ascending=False).head(99))

Unnamed: 0,Violation,Coded Violation Status,Summons Number,Pct
339,WRONG WAY,Denied,992,39.55
342,WRONG WAY,Reduced,907,36.16
340,WRONG WAY,Granted,591,23.56
341,WRONG WAY,Other,18,0.72
338,WASH/REPAIR VEHCL-REPAIR ONLY,Granted,5,83.33
337,WASH/REPAIR VEHCL-REPAIR ONLY,Denied,1,16.67
335,VIN OBSCURED,Granted,18,62.07
336,VIN OBSCURED,Reduced,7,24.14
334,VIN OBSCURED,Denied,4,13.79
331,VEHICLE FOR SALE(DEALERS ONLY),Granted,80,86.96


In [37]:
# Most 
coded_violation_status_list = list(set(df_violation_grouped['Coded Violation Status'].unique()))
for status in coded_violation_status_list:
    print(status)
    display(df_violation_grouped.loc[df_violation_grouped['Coded Violation Status'] == status].sort_values(
        by='Pct', ascending=False).head())
    print('\n')

Denied


Unnamed: 0,Violation,Coded Violation Status,Summons Number,Pct
38,DIVIDED HIGHWAY,Denied,72,72.73
112,INSP. STICKER-EXPIRED/MISSING,Denied,60532,70.8
285,REG. STICKER-EXPIRED/MISSING,Denied,50988,70.7
18,BUS LANE VIOLATION,Denied,11290,63.83
7,ANGLE PARKING-COMM VEHICLE,Denied,49,62.82




Other


Unnamed: 0,Violation,Coded Violation Status,Summons Number,Pct
242,OT PARKING-MISSING/BROKEN METR,Other,1,6.67
269,PHTO SCHOOL ZN SPEED VIOLATION,Other,2132,6.52
86,FAILURE TO STOP AT RED LIGHT,Other,925,5.09
51,ELEVATED/DIVIDED HIGHWAY/TUNNL,Other,1,4.35
318,UNALTERED COMM VEHICLE,Other,3,3.85




Reduced


Unnamed: 0,Violation,Coded Violation Status,Summons Number,Pct
212,NO STANDING-SNOW EMERGENCY,Reduced,60,76.92
273,PKG IN EXC. OF LIM-COMM MTR ZN,Reduced,16,72.73
60,EXPIRED METER,Reduced,330,65.74
17,BIKE LANE,Reduced,25916,59.43
76,FAIL TO DISP. MUNI METER RECPT,Reduced,88269,58.03




Granted


Unnamed: 0,Violation,Coded Violation Status,Summons Number,Pct
289,REMOVE/REPLACE FLAT TIRE,Granted,4,100.0
123,MISCELLANEOUS,Granted,2,100.0
120,MARGINAL STREET/WATER FRONT,Granted,1,100.0
249,OVERNIGHT TRACTOR TRAILER PKG,Granted,182,91.46
82,FAILURE TO DISPLAY BUS PERMIT,Granted,366,90.37






In [35]:
# Get Descriptive Status by Coded Violation Status
coded_violation_status_list = list(set(df_violation_grouped['Coded Violation Status'].unique()))
for status in coded_violation_status_list:
    print(status)
    display(df_violation_grouped.loc[df_violation_grouped['Coded Violation Status'] == status].describe())
    print('\n\n')

Denied


Unnamed: 0,Summons Number,Pct
count,89.0,89.0
mean,9226.88764,32.396517
std,19597.331806,16.973632
min,1.0,3.7
25%,49.0,16.67
50%,575.0,30.01
75%,8629.0,41.47
max,118511.0,72.73





Other


Unnamed: 0,Summons Number,Pct
count,72.0,72.0
mean,292.236111,1.175833
std,621.777654,1.368896
min,1.0,0.03
25%,6.25,0.3775
50%,25.0,0.715
75%,223.0,1.335
max,3800.0,6.67





Reduced


Unnamed: 0,Summons Number,Pct
count,86.0,86.0
mean,8765.034884,24.171395
std,25507.060524,20.295472
min,1.0,0.28
25%,18.75,5.2875
50%,347.0,19.915
75%,4696.75,37.7675
max,182897.0,76.92





Granted


Unnamed: 0,Summons Number,Pct
count,96.0,96.0
mean,8787.375,47.43
std,26936.902496,26.10976
min,1.0,2.39
25%,49.75,24.3375
50%,797.5,45.68
75%,4643.0,66.2575
max,226576.0,100.0







In [None]:
df_violation_status_dropped.plot.scatter(x='Coded Violation Status', y='Summons Number')