# Looping Over Data Sets

In [None]:
import pandas as pd
for filename in ['data/2011_circ.csv', 'data/2012_circ.csv']:
  data = pd.read_csv(filename)
  print(filename, data['ytd'].max())

## Use `glob` to find sets of files whose names match a pattern.

In [None]:
import glob
print(f"all csv files in data directory: {glob.glob('data/*.csv')}")

## Use `glob` and `for` to process batches of files.

In [None]:
for csv in glob.glob('data/*.csv'):
  data = pd.read_csv(csv)
  print(csv, data['ytd'].max())

In [None]:
for csv in sorted(glob.glob('data/*.csv')):
    data = pd.read_csv(csv)
    print(csv, data['ytd'].max())

## Appending DataFrames to a list

### Convert Year in filenames to a column

In [None]:
for csv in sorted(glob.glob('data/*.csv')):
        year = csv[5:9] #the 5th to 9th characters in each file match the year
        print(f'filename: {csv} year: {year}')

In [None]:
dfs = [] # an empty list to hold all of our DataFrames
counter = 1

for csv in sorted(glob.glob('data/*.csv')):
  year = csv[5:9] 
  data = pd.read_csv(csv) 
  data['year'] = year 
  print(f'{counter} Saving {len(data)} rows from {csv}')
  dfs.append(data)
  counter += 1

print(f'Number of saved DataFrames: {len(dfs)}')

In [None]:
dfs[0].head(2) # we can add a number to head() to ask for a specific number of rows

## Concatenating DataFrames

In [None]:
df = pd.concat(dfs, ignore_index=True)
f'Number of rows in df: {len(df)}'

## Read in csv files without `pandas` using `csv`

CSV files can be read using the `csv` modules. The resulting data types are simpler, either `list` (using `csv.reader()`) or `dict` (using `csv.DictReader()`).

In [None]:
import csv

for filename in ['data/2011_circ.csv', 'data/2012_circ.csv']:
    with open(filename, "r") as f:
        reader = csv.reader(f)
        next(reader) # skip first line
        for r in reader:
            print(r)

In [None]:
import csv

for filename in ['data/2011_circ.csv', 'data/2012_circ.csv']:
    with open(filename, "r") as f:
        reader = csv.DictReader(f)
        for r in reader:
            print(r)