In [1]:
import csv

In [2]:
def read_csv_in_chunks(path, chunk_size):
    # Open the CSV file in read mode ('r').
    # Using the with statement to open the file adds convenience
    # because the file object doesn't have to be manually closed.
    with open(path, mode='r') as f:
        # csv.DictReader reads each row of the CSV as a dictionary, where:
        # The keys are taken from the header (first line),
        # The values are strings from each row.
        reader = csv.DictReader(f)
        
        print('reader:', reader)

        chunk = []

        for row in reader:
            chunk.append(row)

            if len(chunk) == chunk_size:
                '''
                When you use yield, Python creates a generator object.
                
                The advantage of a generator object is its memory efficiency.
                It produces values on demand, without loading everything into
                memory.

                This allows the function to pause at yield, return a chunk,
                and resume where it left off on the next iteration.
                
                If you used return chunk, you'd get only the first chunk,
                and then the function would stop.
                
                With return, only the first 100 rows would be returned by
                the function, and then the program would exit. This is not
                what you would want to transpire when reading thousands or
                millions of rows in a CSV file.
                '''
                yield chunk
                chunk = []
        if chunk:
            yield chunk

In [None]:
for chunk in read_csv_in_chunks("orders.csv", 100):
    process(chunk)