In [1]:
file_name = 'nyc_parking_tickets_extract-1.csv'

In [2]:
import csv
from collections import namedtuple, Counter
from datetime import datetime

## Goal 1

Create a lazy iterator that will return a named tuple of the data in each row. The data types should be appropriate - i.e. if the column is a date, you should be storing dates in the named tuple, if the field is an integer, then it should be stored as an integer, etc.


In [3]:
def cast(data_type, value):
    if data_type == 'INT':
        return int(value)
    elif data_type =='DATE':
        return datetime.strptime(value,'%m/%d/%Y').date()
    else:
        return str(value)
    
def cast_row(data_types, data_row):
    return [cast(data_type, value) 
            for data_type, value in zip(data_types, data_row)]

In [4]:

def get_ticket_details():
    with open(file_name, mode='r') as csv_file:
        csv_reader = csv.reader(csv_file)
        
        fieldnames = [fieldname.strip().replace(" ","_") for fieldname in next(csv_reader)]
        ticket_record_types = ['INT', 'STRING', 'STRING', 'STRING', 'DATE', 'INT', 'STRING', 'STRING', 'STRING']
        
        Ticket = namedtuple('Ticket', fieldnames)
        for row in csv_reader:
            yield Ticket(*cast_row(ticket_record_types, row))


In [5]:
limit = 3
count = 0

for ticket in get_ticket_details():
    print(ticket)
    print(' -')
    count += 1
    if count >= limit:
        break

Ticket(Summons_Number=4006478550, Plate_ID='VAD7274', Registration_State='VA', Plate_Type='PAS', Issue_Date=datetime.date(2016, 10, 5), Violation_Code=5, Vehicle_Body_Type='4D', Vehicle_Make='BMW', Violation_Description='BUS LANE VIOLATION')
 -
Ticket(Summons_Number=4006462396, Plate_ID='22834JK', Registration_State='NY', Plate_Type='COM', Issue_Date=datetime.date(2016, 9, 30), Violation_Code=5, Vehicle_Body_Type='VAN', Vehicle_Make='CHEVR', Violation_Description='BUS LANE VIOLATION')
 -
Ticket(Summons_Number=4007117810, Plate_ID='21791MG', Registration_State='NY', Plate_Type='COM', Issue_Date=datetime.date(2017, 4, 10), Violation_Code=5, Vehicle_Body_Type='VAN', Vehicle_Make='DODGE', Violation_Description='BUS LANE VIOLATION')
 -


## Goal 2

Calculate the number of violations by car make


In [6]:
voilations_counter = Counter()
for ticket in get_ticket_details():
    voilations_counter[ticket.Vehicle_Make] += 1

In [7]:
voilations_counter

Counter({'BMW': 34,
         'CHEVR': 76,
         'DODGE': 45,
         'FORD': 104,
         'FRUEH': 44,
         'HONDA': 106,
         'LINCO': 12,
         'TOYOT': 112,
         'CADIL': 9,
         'CHRYS': 12,
         'FIR': 1,
         'GMC': 35,
         'HYUND': 35,
         'JAGUA': 3,
         'JEEP': 22,
         'LEXUS': 26,
         'ME/BE': 38,
         'MERCU': 4,
         'MITSU': 11,
         'NISSA': 70,
         'HIN': 6,
         'NS/OT': 18,
         'WORKH': 2,
         'ACURA': 12,
         'AUDI': 12,
         'INTER': 25,
         'ISUZU': 10,
         'KENWO': 5,
         'KIA': 8,
         'OLDSM': 1,
         'SUBAR': 18,
         'VOLVO': 12,
         'SATUR': 2,
         'SMART': 3,
         'INFIN': 13,
         'PETER': 1,
         '': 5,
         'CITRO': 1,
         'ROVER': 5,
         'BUICK': 5,
         'GEO': 1,
         'MAZDA': 5,
         'PORSC': 3,
         'VOLKS': 8,
         'YAMAH': 1,
         'BSA': 1,
         'MINI': 1,
         