In [1]:
# imports
from datetime import datetime
from collections import namedtuple  
from collections import Counter

# Goal 1

### Create a lazy iterator that will return a named tuple of the data in each row. The data types should be appropriate - i.e. if the column is a date, you should be storing dates in the named tuple, if the field is an integer, then it should be stored as an integer, etc.

In [2]:
def cast_row(row):
    """Function to cast the Row into proper data type"""
    row[0] = int(row[0]) # Summons Number
    row[5] = int(row[5]) # Violation Code
    date_str = row[4].replace("/", "") # Issue Date
    format_str = '%m%d%Y' # The format
    datetime_obj = datetime.strptime(date_str, format_str)
    row[4] = datetime_obj.date()
    return row

In [3]:
def read_ticket_lazy(file_name):
    """Generator to yeild one row at a time from a file as a named tuple"""
    with open(file_name) as file:
        headers = next(file).strip('\n').split(',')
        headers = (item.replace(" ", "_") for item in headers)
        Ticket = namedtuple('Ticket', headers) # Create named tuple type
        for line in file:
            data = line.strip('\n').split(',')
            data = cast_row(data) # cast to the data types
            ticket = Ticket(*data) # named tuple
            yield ticket

## get generator object

In [4]:
# get generator object
ticket_gen = read_ticket_lazy('nyc_parking_tickets_extract-1.csv')
ticket_gen

<generator object read_ticket_lazy at 0x7fa99b41fe40>

# get iterable (list) from genrator object

In [5]:
# get iterable (list) from genrator object
ticket_list = list(ticket_gen)
# find the number of elements in list
len(ticket_list)

1000

# Check few elements ( output is named tuples)

In [7]:
# Check few elements ( output is named tuples) 
ticket_list[1], ticket_list[999]

(Ticket(Summons_Number=4006462396, Plate_ID='22834JK', Registration_State='NY', Plate_Type='COM', Issue_Date=datetime.date(2016, 9, 30), Violation_Code=5, Vehicle_Body_Type='VAN', Vehicle_Make='CHEVR', Violation_Description='BUS LANE VIOLATION'),
 Ticket(Summons_Number=1420795430, Plate_ID='HBS5720', Registration_State='NY', Plate_Type='PAS', Issue_Date=datetime.date(2017, 4, 18), Violation_Code=98, Vehicle_Body_Type='SDN', Vehicle_Make='NISSA', Violation_Description=''))

# Check for next object ( no output as generator got exhausted)

In [9]:
# Check for next object ( no output as generator got exhausted)
next(ticket_gen)

StopIteration: 

In [10]:
# get new generator object
ticket_gen = read_ticket_lazy('nyc_parking_tickets_extract-1.csv')

In [11]:
next(ticket_gen)

Ticket(Summons_Number=4006478550, Plate_ID='VAD7274', Registration_State='VA', Plate_Type='PAS', Issue_Date=datetime.date(2016, 10, 5), Violation_Code=5, Vehicle_Body_Type='4D', Vehicle_Make='BMW', Violation_Description='BUS LANE VIOLATION')

In [12]:
next(ticket_gen)

Ticket(Summons_Number=4006462396, Plate_ID='22834JK', Registration_State='NY', Plate_Type='COM', Issue_Date=datetime.date(2016, 9, 30), Violation_Code=5, Vehicle_Body_Type='VAN', Vehicle_Make='CHEVR', Violation_Description='BUS LANE VIOLATION')

# Goal 2
## Calculate the number of violations by car make.

In [13]:
def get_voilations_by_car_make(make_name):
    """Return the number of voilations by a car make"""
    violating_cars = (ticket.Vehicle_Make for ticket in ticket_list) # generator object
    v_card_dict = Counter(violating_cars)

    return(v_card_dict[make_name]) # Return the voilations

In [14]:
get_voilations_by_car_make('BMW')

34

In [15]:
get_voilations_by_car_make('CHEVR')

76