In [44]:
import datetime
from collections import namedtuple
from itertools import islice

In [45]:
# Project with data validation
def validate_int(data, default = 'NAN'):
    try:
        data = int(data)
    except ValueError as er:
        data = default
        print(f"Input is not of the format that can be converted to integer, returning default {default}")
    finally:
        return data

def validate_str(data, default=None):
    try:
        data = str(data).strip()
        if len(data) == 0:
            data = default
    except:
        data = default
    finally:
        return data

def validate_dt(data, format = '%m/%d/%Y',default=None):
    try:
        data = datetime.datetime.strptime(data,format)
    except ValueError:
        print(f"Input is not of the format that can be converted to datetime, returning default {default}")
        data = default
    finally:
        return data

In [46]:
data_types = 'INT','STRING','STRING','STRING','DATETIME','INT','STRING','STRING','STRING'
def type_caster(val,type_):
    if type_ == 'INT':
        return validate_int(val)
    elif type_ == 'DATETIME':
        return validate_dt(val,'%m/%d/%Y')
    return validate_str(val) # don't change if it's a string already

In [74]:
with open('nyc_parking_tickets_extract.csv') as f:
    file_iter = iter(f)
    # Strip \n from string next(file_iter) and split the result on ',' 
    headers = next(file_iter).strip('\n').split(',') 
    # Replace " " by "_" in each element in headers in order to get propertly
    # formatted names for namedtuple
    headers = [elt.replace(" ","_").lower() for elt in headers]
    # Make a namedtuple for headers
    Cars = namedtuple('Cars',headers)
    tickets = []
    for r in file_iter:
        r = next(file_iter).strip('\n').split(',') # split as with headers
        r = (type_caster(elt[0],elt[1]) for elt in zip(r,data_types)) #typecast each element using type_caster
        r = Cars(*r) # pass the row to namedtuple
        tickets.append(r) # append the result to the tickets list

 

for ticket in islice(tickets,10):
    print(ticket)


Cars(summons_number=4006462396, plate_id='22834JK', registration_state='NY', plate_type='COM', issue_date=datetime.datetime(2016, 9, 30, 0, 0), violation_code=5, vehicle_body_type='VAN', vehicle_make='CHEVR', violation_description='BUS LANE VIOLATION')
Cars(summons_number=4006265037, plate_id='FZX9232', registration_state='NY', plate_type='PAS', issue_date=datetime.datetime(2016, 8, 23, 0, 0), violation_code=5, vehicle_body_type='SUBN', vehicle_make='FORD', violation_description='BUS LANE VIOLATION')
Cars(summons_number=4007156700, plate_id='92163MG', registration_state='NY', plate_type='COM', issue_date=datetime.datetime(2017, 4, 13, 0, 0), violation_code=5, vehicle_body_type='VAN', vehicle_make='FRUEH', violation_description='BUS LANE VIOLATION')
Cars(summons_number=4006943052, plate_id='2AE3984', registration_state='MD', plate_type='PAS', issue_date=datetime.datetime(2017, 2, 1, 0, 0), violation_code=5, vehicle_body_type='SW', vehicle_make='LINCO', violation_description='BUS LANE VI

In [72]:
from collections import defaultdict
def violation_counts():
    """Returns the dictionary of sorted violation counts by make in greatest-to-lowest order"""
    num_violations = defaultdict(int)
    for row in tickets: # vehicle make is at index 7
        make = row.vehicle_make
        num_violations[make] += 1
    #Check:
    assert sum(num_violations.values())==500, "Violations don't add up"
    #Sort the dictionary based on the values highest to lowest
    counts = ((key, val) for key, val in num_violations.items())
    counts_sorted = dict(sorted(counts, key=lambda el:el[1], reverse=True))
    #Keep only keys that are not None
    counts_sorted_not_null = {key:val for key, val in counts_sorted.items() if key is not None}
    return counts_sorted_not_null

unsorted_counts = violation_counts()
print(unsorted_counts)


{'TOYOT': 54, 'FORD': 52, 'HONDA': 51, 'CHEVR': 41, 'NISSA': 34, 'FRUEH': 22, 'DODGE': 21, 'HYUND': 18, 'ME/BE': 18, 'GMC': 18, 'BMW': 15, 'LEXUS': 13, 'JEEP': 11, 'INTER': 11, 'SUBAR': 10, 'NS/OT': 9, 'LINCO': 7, 'CHRYS': 7, 'AUDI': 7, 'ACURA': 7, 'VOLVO': 6, 'INFIN': 6, 'HIN': 5, 'ISUZU': 4, 'CADIL': 4, 'VOLKS': 4, 'MITSU': 4, 'JAGUA': 3, 'MERCU': 3, 'KENWO': 3, 'KIA': 3, 'BUICK': 3, 'SATUR': 2, 'MAZDA': 2, 'SMART': 2, 'ROVER': 2, 'FIR': 1, 'WORKH': 1, 'OLDSM': 1, 'PETER': 1, 'YAMAH': 1, 'MINI': 1, 'SPRI': 1, 'PLYMO': 1, 'SCION': 1, 'PORSC': 1, 'UPS': 1, 'UD': 1, 'STAR': 1, 'SAAB': 1, 'AM/T': 1, 'HINO': 1, 'MI/F': 1}
