<a href="https://colab.research.google.com/github/abalaji-blr/Session11_NYCParking/blob/main/NYCParking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [99]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [100]:
import sys
sys.path.append('/content/gdrive/MyDrive/Python/ParkingTicket')

Goal 1: 
Create a lazy iterator that will return a namedtuple of the data in each row. The data type should be appropriate - if the column is date, you should be storing dates in the named tuple. If the field is integer, then it should be stored as integer. etc.

Goal 2: Calculate the number of violations by car make.

Note: Try to use lazy evaluation as much as possible - it may not be always possible though. That's OK, as long as it's kept to a minimum.

In [101]:
import csv
from collections import namedtuple

In [102]:

file='/content/gdrive/MyDrive/Python/ParkingTicket/nyc_parking_tickets_extract-1.csv'

with open(file) as f:
  print(next(f))
  print(next(f))

Summons Number,Plate ID,Registration State,Plate Type,Issue Date,Violation Code,Vehicle Body Type,Vehicle Make,Violation Description

4006478550,VAD7274,VA,PAS,10/5/2016,5,4D,BMW,BUS LANE VIOLATION



In [103]:
def my_csv_reader(file):
  with open(file) as f:
    # process the header
    hdr = next(f)
    hdr.strip('\n').split(',')
    new_hdr =[ item.replace(' ', '_') for item in hdr.strip('\n').split(',')]
    yield new_hdr
    #print(new_hdr)

    # get the remaining lines
    for row in f:
      yield row


In [104]:
csv_gen  = my_csv_reader(file)
hdr = next(csv_gen)
print(hdr)

# sample rows
for _ in range(4):
  row = next(csv_gen)
  row_values = row.strip('\n').split(',')
  print(row_values)

['Summons_Number', 'Plate_ID', 'Registration_State', 'Plate_Type', 'Issue_Date', 'Violation_Code', 'Vehicle_Body_Type', 'Vehicle_Make', 'Violation_Description']
['4006478550', 'VAD7274', 'VA', 'PAS', '10/5/2016', '5', '4D', 'BMW', 'BUS LANE VIOLATION']
['4006462396', '22834JK', 'NY', 'COM', '9/30/2016', '5', 'VAN', 'CHEVR', 'BUS LANE VIOLATION']
['4007117810', '21791MG', 'NY', 'COM', '4/10/2017', '5', 'VAN', 'DODGE', 'BUS LANE VIOLATION']
['4006265037', 'FZX9232', 'NY', 'PAS', '8/23/2016', '5', 'SUBN', 'FORD', 'BUS LANE VIOLATION']


## Build parsers for the row values.

In [105]:
from datetime import datetime

In [106]:
def parse_int(value):
  try:
    return int(value)
  except ValueError:
    return None

def parse_string(value):
  try:
    return str(value)
  except ValueError:
    return None

def parse_date(value):
  try:
    date = datetime.strptime(value, '%m/%d/%Y').date()
    #print(date)
    return(date)
  except ValueError:
    return None


In [107]:
print(parse_int('4006478550'))
print(parse_string(123))
print(parse_date('10/5/2016'))

4006478550
123
2016-10-05


## Build Row Parser

In [108]:
row_value_parser = (parse_int,
                    parse_string,
                    parse_string,
                    parse_string,
                    parse_date,
                    parse_int,
                    parse_string,
                    parse_string,
                    parse_string
                    )

## Plug in Row Value Parser.

In [109]:
def my_csv_reader(file):
  with open(file) as f:
    # process the header
    hdr = next(f)
    hdr.strip('\n').split(',')
    new_hdr =[ item.replace(' ', '_') for item in hdr.strip('\n').split(',')]
    yield new_hdr
    #print(new_hdr)

    # get the remaining lines
    for row in f:
      row_values = row.strip('\n').split(',')
      parsed_row = [ func(value) for func, value in zip(row_value_parser, row_values)]
      #print(parsed_row)
      yield parsed_row

In [110]:
csv_gen  = my_csv_reader(file)
hdr = next(csv_gen)
print(hdr)

# sample rows
for _ in range(4):
  row = next(csv_gen)
  print(row)

['Summons_Number', 'Plate_ID', 'Registration_State', 'Plate_Type', 'Issue_Date', 'Violation_Code', 'Vehicle_Body_Type', 'Vehicle_Make', 'Violation_Description']
[4006478550, 'VAD7274', 'VA', 'PAS', datetime.date(2016, 10, 5), 5, '4D', 'BMW', 'BUS LANE VIOLATION']
[4006462396, '22834JK', 'NY', 'COM', datetime.date(2016, 9, 30), 5, 'VAN', 'CHEVR', 'BUS LANE VIOLATION']
[4007117810, '21791MG', 'NY', 'COM', datetime.date(2017, 4, 10), 5, 'VAN', 'DODGE', 'BUS LANE VIOLATION']
[4006265037, 'FZX9232', 'NY', 'PAS', datetime.date(2016, 8, 23), 5, 'SUBN', 'FORD', 'BUS LANE VIOLATION']


## Create Named tuple

In [111]:
Data = namedtuple('Data', hdr)

for _ in range(10):
  row = next(csv_gen)
  data = Data._make(row)
  print(data)

Data(Summons_Number=4006535600, Plate_ID='N203399C', Registration_State='NY', Plate_Type='OMT', Issue_Date=datetime.date(2016, 10, 19), Violation_Code=5, Vehicle_Body_Type='SUBN', Vehicle_Make='FORD', Violation_Description='BUS LANE VIOLATION')
Data(Summons_Number=4007156700, Plate_ID='92163MG', Registration_State='NY', Plate_Type='COM', Issue_Date=datetime.date(2017, 4, 13), Violation_Code=5, Vehicle_Body_Type='VAN', Vehicle_Make='FRUEH', Violation_Description='BUS LANE VIOLATION')
Data(Summons_Number=4006687989, Plate_ID='MIQ600', Registration_State='SC', Plate_Type='PAS', Issue_Date=datetime.date(2016, 11, 21), Violation_Code=5, Vehicle_Body_Type='VN', Vehicle_Make='HONDA', Violation_Description='BUS LANE VIOLATION')
Data(Summons_Number=4006943052, Plate_ID='2AE3984', Registration_State='MD', Plate_Type='PAS', Issue_Date=datetime.date(2017, 2, 1), Violation_Code=5, Vehicle_Body_Type='SW', Vehicle_Make='LINCO', Violation_Description='BUS LANE VIOLATION')
Data(Summons_Number=400730679

## Putting all things together

In [112]:

## to convert the input str data to relevant types
def parse_int(value):
  try:
    return int(value)
  except ValueError:
    return None

def parse_string(value):
  try:
    return str(value)
  except ValueError:
    return None

def parse_date(value):
  try:
    date = datetime.strptime(value, '%m/%d/%Y').date()
    #print(date)
    return(date)
  except ValueError:
    return None


row_value_parser = (parse_int,
                    parse_string,
                    parse_string,
                    parse_string,
                    parse_date,
                    parse_int,
                    parse_string,
                    parse_string,
                    parse_string
                    )

def my_csv_reader(file):
  '''
  CSV reader to read nyc parking ticket file.
  '''
  with open(file) as f:
    # process the header
    hdr = next(f)
    hdr.strip('\n').split(',')
    new_hdr =[ item.replace(' ', '_') for item in hdr.strip('\n').split(',')]
    yield new_hdr

    # get the remaining lines
    for row in f:
      row_values = row.strip('\n').split(',')
      parsed_row = [ func(value) for func, value in zip(row_value_parser, row_values)]
      yield parsed_row

# read the csv file!
csv_gen  = my_csv_reader(file)
hdr = next(csv_gen)

# create namedtuple data
Data = namedtuple('Data', hdr)

for _ in range(10):
  row = next(csv_gen)
  data = Data._make(row)
  print(data)

Data(Summons_Number=4006478550, Plate_ID='VAD7274', Registration_State='VA', Plate_Type='PAS', Issue_Date=datetime.date(2016, 10, 5), Violation_Code=5, Vehicle_Body_Type='4D', Vehicle_Make='BMW', Violation_Description='BUS LANE VIOLATION')
Data(Summons_Number=4006462396, Plate_ID='22834JK', Registration_State='NY', Plate_Type='COM', Issue_Date=datetime.date(2016, 9, 30), Violation_Code=5, Vehicle_Body_Type='VAN', Vehicle_Make='CHEVR', Violation_Description='BUS LANE VIOLATION')
Data(Summons_Number=4007117810, Plate_ID='21791MG', Registration_State='NY', Plate_Type='COM', Issue_Date=datetime.date(2017, 4, 10), Violation_Code=5, Vehicle_Body_Type='VAN', Vehicle_Make='DODGE', Violation_Description='BUS LANE VIOLATION')
Data(Summons_Number=4006265037, Plate_ID='FZX9232', Registration_State='NY', Plate_Type='PAS', Issue_Date=datetime.date(2016, 8, 23), Violation_Code=5, Vehicle_Body_Type='SUBN', Vehicle_Make='FORD', Violation_Description='BUS LANE VIOLATION')
Data(Summons_Number=4006535600,

## Goal 2: Calculate the number of violations by car make.

In [113]:
def my_csv_reader2(file):
  '''
  CSV reader to read nyc parking ticket file.
  '''
  with open(file) as f:
    # process the header
    hdr = next(f)
    hdr.strip('\n').split(',')
    new_hdr =[ item.replace(' ', '_') for item in hdr.strip('\n').split(',')]

    # get the remaining lines
    for row in f:
      row_values = row.strip('\n').split(',')
      parsed_row = [ func(value) for func, value in zip(row_value_parser, row_values)]
      yield zip(new_hdr, parsed_row)

In [114]:
csv_gen = my_csv_reader2(file)

In [115]:
for _ in range(4):
  print(list(next(csv_gen)))

[('Summons_Number', 4006478550), ('Plate_ID', 'VAD7274'), ('Registration_State', 'VA'), ('Plate_Type', 'PAS'), ('Issue_Date', datetime.date(2016, 10, 5)), ('Violation_Code', 5), ('Vehicle_Body_Type', '4D'), ('Vehicle_Make', 'BMW'), ('Violation_Description', 'BUS LANE VIOLATION')]
[('Summons_Number', 4006462396), ('Plate_ID', '22834JK'), ('Registration_State', 'NY'), ('Plate_Type', 'COM'), ('Issue_Date', datetime.date(2016, 9, 30)), ('Violation_Code', 5), ('Vehicle_Body_Type', 'VAN'), ('Vehicle_Make', 'CHEVR'), ('Violation_Description', 'BUS LANE VIOLATION')]
[('Summons_Number', 4007117810), ('Plate_ID', '21791MG'), ('Registration_State', 'NY'), ('Plate_Type', 'COM'), ('Issue_Date', datetime.date(2017, 4, 10)), ('Violation_Code', 5), ('Vehicle_Body_Type', 'VAN'), ('Vehicle_Make', 'DODGE'), ('Violation_Description', 'BUS LANE VIOLATION')]
[('Summons_Number', 4006265037), ('Plate_ID', 'FZX9232'), ('Registration_State', 'NY'), ('Plate_Type', 'PAS'), ('Issue_Date', datetime.date(2016, 8, 23

In [116]:
for _ in range(4):
  print(list(next(csv_gen))[7])

('Vehicle_Make', 'FORD')
('Vehicle_Make', 'FRUEH')
('Vehicle_Make', 'HONDA')
('Vehicle_Make', 'LINCO')


In [120]:
violations_cnt = {}

csv_gen = my_csv_reader2(file)
for _ in csv_gen:
  row = list(next(csv_gen))
  car_make = row[7][1]
  #print(row[7])
  if car_make in violations_cnt:
    violations_cnt[car_make] += 1
  else:
    violations_cnt[car_make] = 1


In [118]:
violations_cnt

{'': 1,
 'ACURA': 7,
 'AM/T': 1,
 'AUDI': 7,
 'BMW': 15,
 'BUICK': 3,
 'CADIL': 4,
 'CHEVR': 40,
 'CHRYS': 7,
 'DODGE': 21,
 'FIR': 1,
 'FORD': 51,
 'FRUEH': 21,
 'GMC': 18,
 'HIN': 5,
 'HINO': 1,
 'HONDA': 51,
 'HYUND': 18,
 'INFIN': 6,
 'INTER': 11,
 'ISUZU': 4,
 'JAGUA': 3,
 'JEEP': 11,
 'KENWO': 3,
 'KIA': 3,
 'LEXUS': 13,
 'LINCO': 6,
 'MAZDA': 2,
 'ME/BE': 18,
 'MERCU': 3,
 'MI/F': 1,
 'MINI': 1,
 'MITSU': 4,
 'NISSA': 34,
 'NS/OT': 9,
 'OLDSM': 1,
 'PETER': 1,
 'PLYMO': 1,
 'PORSC': 1,
 'ROVER': 2,
 'SAAB': 1,
 'SATUR': 2,
 'SCION': 1,
 'SMART': 2,
 'SPRI': 1,
 'STAR': 1,
 'SUBAR': 10,
 'TOYOT': 54,
 'UD': 1,
 'UPS': 1,
 'VOLKS': 4,
 'VOLVO': 6,
 'WORKH': 1,
 'YAMAH': 1}

In [121]:
sum(violations_cnt.values())

500