In [1]:
import pandas as pd
import numpy as np

In [2]:
type_dict = {"Violation Code": float, "Definition": "string", 
             "Manhattan 96th St. & Below": "string", "All Other Areas": "string"}
fines_data = pd.read_csv('../../data/Parking Violation Code.csv', dtype = type_dict)
fines_data.drop(fines_data.tail(4).index, inplace = True)

In [3]:
fines_data["Manhattan 96th St. & Below"] = fines_data["Manhattan 96th St. & Below"].map(lambda x: x[1:] if (not pd.isna(x) and x[0] == "$") else 0)
fines_data["All Other Areas"] = fines_data["All Other Areas"].map(lambda x: x[1:] if (not pd.isna(x) and x[0] == "$") else 0)

In [4]:
fines_data["Violation Code"] = fines_data["Violation Code"].map(lambda x: int(x))
fines_data["Manhattan 96th St. & Below"] = fines_data["Manhattan 96th St. & Below"].map(lambda x: int(x))
fines_data["All Other Areas"] = fines_data["All Other Areas"].map(lambda x: int(x))

Man_96_Below = [1, 5, 6, 7, 9, 10, 13, 14, 17, 18, 19, 20, 22]

year_list = [i for i in range(2014, 2024)]
flag = 0

for year in year_list:
    print(f"Started {year}")
    data = pd.read_csv(f"../../data/main_data/cleaned/Parking_Violations_Issued_{str(year)}.csv")
    
    data["Month"] = data["Issue Date"].map(lambda x: int(x[:2]))
    data["Year"] = data["Issue Date"].map(lambda x: int(x[-4:]))
    data["Violation Precinct"] = data["Violation Precinct"].map(lambda x: int(x))
    data["Violation Code"] = data["Violation Code"].map(lambda x: int(x))
    data.drop(columns=["Issue Date", "Vehicle Body Type", "Vehicle Color", "Plate Type"], inplace=True)

    Missing_Fines = [0, 15, 23, 24, 34, 41, 43, 88, 90, 95, 99]
    data1 = data[~data["Violation Code"].isin(Missing_Fines)]
    clean_data = data1[data1["Violation Code"] < 99]
    
    codes = clean_data.groupby(by = ["Year","Month","Violation Precinct","Violation Code"], as_index=False).size().reset_index(names='Size')
    codes.drop_duplicates(["Year","Month","Violation Precinct","Violation Code"])

    merge = pd.merge(codes, fines_data, on="Violation Code")
    merge["Fine"] = merge["Violation Code"].isin(Man_96_Below)
    merge["Fine"] = np.where(merge["Fine"] == True, merge["Manhattan 96th St. & Below"], merge["All Other Areas"])  
    merge["Total Fines"] = merge["Size"]*merge["Fine"]
    merge.drop(columns=["Manhattan 96th St. & Below", "All Other Areas"], inplace=True)
    
    total_sum = merge.groupby(by = ["Year","Month","Violation Precinct"])["Total Fines"].sum().reset_index(name = "Sum")
    total_sum.drop_duplicates(["Year","Month","Violation Precinct"])
    
    if not flag:
        flag = 1
        final_data = total_sum
    else:
        final_data = pd.concat([final_data, total_sum])
    
    print(f"Finished {str(year)}")

final_data.to_csv(f"../../data/final_data_to_join/fines_data.csv", index=False)

Started 2014
Finished 2014
Started 2015
Finished 2015
Started 2016
Finished 2016
Started 2017
Finished 2017
Started 2018
Finished 2018
Started 2019
Finished 2019
Started 2020
Finished 2020
Started 2021
Finished 2021
Started 2022
Finished 2022
Started 2023
Finished 2023
