#### Create tickets.csv iterator with data parsing

In [1]:
import datetime
import contextlib
from collections import namedtuple


class TicketsDataIterator:
    def __init__(self, file_name: str):
        self.file_name = file_name
        with open(self.file_name) as f:
            header_names = next(f, None)
            if not header_names:
                raise ValueError("File must not be empty.")

        header_names = header_names.replace("\n", "").replace(" ", "_")
        header_names = ",".join([h.casefold().strip() for h in header_names.split(",")])
        self.DataRow = namedtuple(
            "DataRow", 
            header_names,
        )

        self.data_row_gen = self._get_data_row()

    def __iter__(self):
        return self

    def __next__(self):
        row = next(self.data_row_gen, None)
        if not row:
            raise StopIteration

        return self.DataRow(*row)

    def _get_data_row(self):
        with open(self.file_name) as f:
            next(f)  # ignore headers
            for row in f:
                row = row.replace("\n", "").split(",")
                yield from self._parse_data_types(row)

    def _parse_data_types(self, row: list[str]) -> list[str | int | datetime.date]:
        data_row = []
        for el in row:
            with contextlib.suppress(ValueError):
                data_row.append(datetime.datetime.strptime(el, "%m/%d/%Y").date())
                continue
                
            if el.isdigit():
                data_row.append(int(el))
            else:
                data_row.append(str(el))
        yield data_row


In [2]:
tdi = TicketsDataIterator("tickets.csv")

In [3]:
next(tdi), next(tdi), next(tdi)

(DataRow(summons_number=4006478550, plate_id='VAD7274', registration_state='VA', plate_type='PAS', issue_date=datetime.date(2016, 10, 5), violation_code=5, vehicle_body_type='4D', vehicle_make='BMW', violation_description='BUS LANE VIOLATION'),
 DataRow(summons_number=4006462396, plate_id='22834JK', registration_state='NY', plate_type='COM', issue_date=datetime.date(2016, 9, 30), violation_code=5, vehicle_body_type='VAN', vehicle_make='CHEVR', violation_description='BUS LANE VIOLATION'),
 DataRow(summons_number=4007117810, plate_id='21791MG', registration_state='NY', plate_type='COM', issue_date=datetime.date(2017, 4, 10), violation_code=5, vehicle_body_type='VAN', vehicle_make='DODGE', violation_description='BUS LANE VIOLATION'))

In [4]:
# violations per car make
violations = {}
for data_row in TicketsDataIterator("tickets.csv"):
    violations[data_row.vehicle_make] = violations.get(data_row.vehicle_make, 0) + 1

In [5]:
violations

{'BMW': 34,
 'CHEVR': 76,
 'DODGE': 45,
 'FORD': 104,
 'FRUEH': 44,
 'HONDA': 106,
 'LINCO': 12,
 'TOYOT': 112,
 'CADIL': 9,
 'CHRYS': 12,
 'FIR': 1,
 'GMC': 35,
 'HYUND': 35,
 'JAGUA': 3,
 'JEEP': 22,
 'LEXUS': 26,
 'ME/BE': 38,
 'MERCU': 4,
 'MITSU': 11,
 'NISSA': 70,
 'HIN': 6,
 'NS/OT': 18,
 'WORKH': 2,
 'ACURA': 12,
 'AUDI': 12,
 'INTER': 25,
 'ISUZU': 10,
 'KENWO': 5,
 'KIA': 8,
 'OLDSM': 1,
 'SUBAR': 18,
 'VOLVO': 12,
 'SATUR': 2,
 'SMART': 3,
 'INFIN': 13,
 'PETER': 1,
 '': 5,
 'CITRO': 1,
 'ROVER': 5,
 'BUICK': 5,
 'GEO': 1,
 'MAZDA': 5,
 'PORSC': 3,
 'VOLKS': 8,
 'YAMAH': 1,
 'BSA': 1,
 'MINI': 1,
 'PONTI': 1,
 'SPRI': 1,
 'PLYMO': 1,
 'SCION': 2,
 'UPS': 1,
 'FIAT': 1,
 'UD': 1,
 'UTILI': 1,
 'GMCQ': 1,
 'SAAB': 2,
 'HINO': 2,
 'STAR': 1,
 'AM/T': 1,
 'MI/F': 1}

In [6]:
sorted(violations.items(), key=lambda t: t[1], reverse=True)

[('TOYOT', 112),
 ('HONDA', 106),
 ('FORD', 104),
 ('CHEVR', 76),
 ('NISSA', 70),
 ('DODGE', 45),
 ('FRUEH', 44),
 ('ME/BE', 38),
 ('GMC', 35),
 ('HYUND', 35),
 ('BMW', 34),
 ('LEXUS', 26),
 ('INTER', 25),
 ('JEEP', 22),
 ('NS/OT', 18),
 ('SUBAR', 18),
 ('INFIN', 13),
 ('LINCO', 12),
 ('CHRYS', 12),
 ('ACURA', 12),
 ('AUDI', 12),
 ('VOLVO', 12),
 ('MITSU', 11),
 ('ISUZU', 10),
 ('CADIL', 9),
 ('KIA', 8),
 ('VOLKS', 8),
 ('HIN', 6),
 ('KENWO', 5),
 ('', 5),
 ('ROVER', 5),
 ('BUICK', 5),
 ('MAZDA', 5),
 ('MERCU', 4),
 ('JAGUA', 3),
 ('SMART', 3),
 ('PORSC', 3),
 ('WORKH', 2),
 ('SATUR', 2),
 ('SCION', 2),
 ('SAAB', 2),
 ('HINO', 2),
 ('FIR', 1),
 ('OLDSM', 1),
 ('PETER', 1),
 ('CITRO', 1),
 ('GEO', 1),
 ('YAMAH', 1),
 ('BSA', 1),
 ('MINI', 1),
 ('PONTI', 1),
 ('SPRI', 1),
 ('PLYMO', 1),
 ('UPS', 1),
 ('FIAT', 1),
 ('UD', 1),
 ('UTILI', 1),
 ('GMCQ', 1),
 ('STAR', 1),
 ('AM/T', 1),
 ('MI/F', 1)]