In [1]:
import csv
from collections import namedtuple

FILE_SAMPLE_CHARS_COUNT = 500


def get_csv_dialect(fname: str):
    with open(fname) as f:
        file_sample = f.read(FILE_SAMPLE_CHARS_COUNT)
        dialect = csv.Sniffer().sniff(file_sample)
        return dialect


def get_data_row_factory(fname: str):
    with open(fname) as f:
        dialect = get_csv_dialect(fname)
        headers = next(f).strip("\n").split(dialect.delimiter)
        return namedtuple("DataRow", ",".join(str(h).casefold().replace(" ", "_") for h in headers))


class CsvFileReader:
    """
    CSV files reader. 
    Acts as a context manager that handles the file iterator.
    """
    _FILE_SAMPLE_CHARS_COUNT = FILE_SAMPLE_CHARS_COUNT
    
    def __init__(self, fname: str):
        self._fname = fname
        self._reader = None
        self._file = None
        self._dialect = self._get_csv_dialect()
        self.DataRow = self._get_data_row_factory()

    def __enter__(self):
        self._file = open(self._fname)
        self._reader = csv.reader(self._file, self._dialect)
        next(self._reader)  # ignore the headers
        return self

    def __exit__(self, exc_type, exc_value, exc_tb):
        self._file.close()
        return False

    def __iter__(self):
        return self

    def __next__(self):
        if self._file.closed:
            raise StopIteration

        return self.DataRow(*next(self._reader))

    def _get_data_row_factory(self):
        return get_data_row_factory(self._fname)

    def _get_csv_dialect(self):
        return get_csv_dialect(self._fname)


In [2]:
with CsvFileReader("personal_info.csv") as file:
    print(next(file))
    print(next(file))
    print(next(file))
    print(next(file))
    for row in file:
        print(row)

DataRow(ssn='100-53-9824', first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic')
DataRow(ssn='101-71-4702', first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao')
DataRow(ssn='101-84-0356', first_name='Nomi', last_name='Lipprose', gender='Female', language='Yiddish')
DataRow(ssn='104-22-0928', first_name='Justinian', last_name='Kunzelmann', gender='Male', language='Dhivehi')
DataRow(ssn='104-84-7144', first_name='Claudianus', last_name='Brixey', gender='Male', language='Afrikaans')
DataRow(ssn='105-27-5541', first_name='Federico', last_name='Aggett', gender='Male', language='Chinese')
DataRow(ssn='105-85-7486', first_name='Angelina', last_name='McAvey', gender='Female', language='Punjabi')
DataRow(ssn='105-91-5022', first_name='Moselle', last_name='Apfel', gender='Female', language='Latvian')
DataRow(ssn='105-91-7777', first_name='Audi', last_name='Roach', gender='Female', language='Estonian')
DataRow(ssn='106-35-1938', first_name='Macke

In [3]:
with CsvFileReader("cars.csv") as file:
    print(next(file))
    print(next(file))
    print(next(file))
    print(next(file))
    for row in file:
        print(row)

DataRow(car='Chevrolet Chevelle Malibu', mpg='18.0', cylinders='8', displacement='307.0', horsepower='130.0', weight='3504.', acceleration='12.0', model='70', origin='US')
DataRow(car='Buick Skylark 320', mpg='15.0', cylinders='8', displacement='350.0', horsepower='165.0', weight='3693.', acceleration='11.5', model='70', origin='US')
DataRow(car='Plymouth Satellite', mpg='18.0', cylinders='8', displacement='318.0', horsepower='150.0', weight='3436.', acceleration='11.0', model='70', origin='US')
DataRow(car='AMC Rebel SST', mpg='16.0', cylinders='8', displacement='304.0', horsepower='150.0', weight='3433.', acceleration='12.0', model='70', origin='US')
DataRow(car='Ford Torino', mpg='17.0', cylinders='8', displacement='302.0', horsepower='140.0', weight='3449.', acceleration='10.5', model='70', origin='US')
DataRow(car='Ford Galaxie 500', mpg='15.0', cylinders='8', displacement='429.0', horsepower='198.0', weight='4341.', acceleration='10.0', model='70', origin='US')
DataRow(car='Chevr

In [4]:
from contextlib import contextmanager

@contextmanager
def csv_file_reader(fname: str):
    dialect = get_csv_dialect(fname)
    try:
        f = open(fname)
        csv_reader = csv.reader(f, dialect)
        next(csv_reader)  # ignore headers
        yield csv_reader
    finally:
        f.close()


In [5]:
fname = "personal_info.csv"
DataRow = get_data_row_factory(fname)
with csv_file_reader(fname) as file:
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    for row in file:
        print(row)


DataRow(ssn='100-53-9824', first_name='Sebastiano', last_name='Tester', gender='Male', language='Icelandic')
DataRow(ssn='101-71-4702', first_name='Cayla', last_name='MacDonagh', gender='Female', language='Lao')
DataRow(ssn='101-84-0356', first_name='Nomi', last_name='Lipprose', gender='Female', language='Yiddish')
DataRow(ssn='104-22-0928', first_name='Justinian', last_name='Kunzelmann', gender='Male', language='Dhivehi')
DataRow(ssn='104-84-7144', first_name='Claudianus', last_name='Brixey', gender='Male', language='Afrikaans')
['105-27-5541', 'Federico', 'Aggett', 'Male', 'Chinese']
['105-85-7486', 'Angelina', 'McAvey', 'Female', 'Punjabi']
['105-91-5022', 'Moselle', 'Apfel', 'Female', 'Latvian']
['105-91-7777', 'Audi', 'Roach', 'Female', 'Estonian']
['106-35-1938', 'Mackenzie', 'Nussey', 'Male', 'Swedish']
['106-36-3293', 'Martino', 'Tregoning', 'Male', 'Tok Pisin']
['110-84-3641', 'Amberly', 'Huws', 'Female', 'Papiamento']
['111-35-1034', 'Giacopo', 'Timperley', 'Male', 'Gagauz']


In [6]:
fname = "cars.csv"
DataRow = get_data_row_factory(fname)
with csv_file_reader(fname) as file:
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    for row in file:
        print(row)


DataRow(car='Chevrolet Chevelle Malibu', mpg='18.0', cylinders='8', displacement='307.0', horsepower='130.0', weight='3504.', acceleration='12.0', model='70', origin='US')
DataRow(car='Buick Skylark 320', mpg='15.0', cylinders='8', displacement='350.0', horsepower='165.0', weight='3693.', acceleration='11.5', model='70', origin='US')
DataRow(car='Plymouth Satellite', mpg='18.0', cylinders='8', displacement='318.0', horsepower='150.0', weight='3436.', acceleration='11.0', model='70', origin='US')
DataRow(car='AMC Rebel SST', mpg='16.0', cylinders='8', displacement='304.0', horsepower='150.0', weight='3433.', acceleration='12.0', model='70', origin='US')
DataRow(car='Ford Torino', mpg='17.0', cylinders='8', displacement='302.0', horsepower='140.0', weight='3449.', acceleration='10.5', model='70', origin='US')
['Ford Galaxie 500', '15.0', '8', '429.0', '198.0', '4341.', '10.0', '70', 'US']
['Chevrolet Impala', '14.0', '8', '454.0', '220.0', '4354.', '9.0', '70', 'US']
['Plymouth Fury iii'

In [7]:
from contextlib import contextmanager

@contextmanager
def csv_file_reader(fname: str):
    dialect = get_csv_dialect(fname)
    try:
        f = open(fname)
        csv_reader = csv.reader(f, dialect)
        DataRow = get_data_row_factory(fname)
        next(csv_reader)  # ignore headers as they are handles by DataRow
        def iter_():
            for row in csv_reader:
                yield DataRow(*row)
        yield iter_()
    finally:
        f.close()

In [8]:
fname = "cars.csv"
DataRow = get_data_row_factory(fname)
with csv_file_reader(fname) as file:
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    for row in file:
        print(row)


DataRow(car='Chevrolet Chevelle Malibu', mpg='18.0', cylinders='8', displacement='307.0', horsepower='130.0', weight='3504.', acceleration='12.0', model='70', origin='US')
DataRow(car='Buick Skylark 320', mpg='15.0', cylinders='8', displacement='350.0', horsepower='165.0', weight='3693.', acceleration='11.5', model='70', origin='US')
DataRow(car='Plymouth Satellite', mpg='18.0', cylinders='8', displacement='318.0', horsepower='150.0', weight='3436.', acceleration='11.0', model='70', origin='US')
DataRow(car='AMC Rebel SST', mpg='16.0', cylinders='8', displacement='304.0', horsepower='150.0', weight='3433.', acceleration='12.0', model='70', origin='US')
DataRow(car='Ford Torino', mpg='17.0', cylinders='8', displacement='302.0', horsepower='140.0', weight='3449.', acceleration='10.5', model='70', origin='US')
DataRow(car='Ford Galaxie 500', mpg='15.0', cylinders='8', displacement='429.0', horsepower='198.0', weight='4341.', acceleration='10.0', model='70', origin='US')
DataRow(car='Chevr

In [9]:
from contextlib import contextmanager

@contextmanager
def csv_file_reader(fname: str):
    dialect = get_csv_dialect(fname)
    try:
        f = open(fname)
        csv_reader = csv.reader(f, dialect)
        DataRow = get_data_row_factory(fname)
        next(csv_reader)  # ignore headers as they are handles by DataRow
        yield (DataRow(*row) for row in csv_reader)
    finally:
        f.close()

with csv_file_reader(fname) as file:
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))
    print(DataRow(*next(file)))


DataRow(car='Chevrolet Chevelle Malibu', mpg='18.0', cylinders='8', displacement='307.0', horsepower='130.0', weight='3504.', acceleration='12.0', model='70', origin='US')
DataRow(car='Buick Skylark 320', mpg='15.0', cylinders='8', displacement='350.0', horsepower='165.0', weight='3693.', acceleration='11.5', model='70', origin='US')
DataRow(car='Plymouth Satellite', mpg='18.0', cylinders='8', displacement='318.0', horsepower='150.0', weight='3436.', acceleration='11.0', model='70', origin='US')
DataRow(car='AMC Rebel SST', mpg='16.0', cylinders='8', displacement='304.0', horsepower='150.0', weight='3433.', acceleration='12.0', model='70', origin='US')
DataRow(car='Ford Torino', mpg='17.0', cylinders='8', displacement='302.0', horsepower='140.0', weight='3449.', acceleration='10.5', model='70', origin='US')
