# FileIO, csv's and Pandas first look

<br><br>
#### Use `file.readlines()` and `file.write()`




* Write to a text file using `with open('demo.txt', 'w') as f: ...`


In [1]:
lines = ['I am a line', 'So am I', 'I am not a line, oh wait, yeah i am']

with open('data/demo.txt', mode='w') as f:
    #do stuff
    f.write(str(lines[0]) + '\n')

    for line in lines[1:]:
        f.write(str(line)+ '\n')

print('done writing to file' + '\n')

done writing to file



* Read from a text file using `with open('demo.txt', 'r') as f: ...`
	* Using `with open` means we don't need to `.close()` the file
		* can verify with `f.closed #--> True`
	* Understand the `open` tags:
		* `'r'` : read only
		* `'w'` : write only, overwrites the file
		* `'a'` : read, and append to the end

In [2]:
with open('data/demo.txt', mode='r') as f:
    lines = []

    line = True
    while line:
        line = f.readline()
        if line:
            lines.append(line[:-1])

print(lines)

['I am a line', 'So am I', 'I am not a line, oh wait, yeah i am']


#### Read from a CSV file using the `csv` module
* You don't need this for reading CSV's, but it has some nice simple features
* Read a CSV

In [3]:
import csv

results = []

with open('data/Demographic_Statistics_By_Zip_Code.csv', mode='r') as f:
    # print(type(f))

    lines = csv.reader(f, delimiter=',')

    # print(type(lines))

    for line in lines:
        results.append(line)

# print(results[0][0], results[0][1])
# print(results[1][0], results[1][1])
# print(results[1])

#### Write to a CSV file using the `csv` module
* You don't need this for reading CSV's, but it has some nice simple features
* Read a CSV

In [4]:
results_to_write = results[0:101]

# trim to 4 cols
for i,row in enumerate(results_to_write):
    results_to_write[i] = row[0:5]

print(results_to_write)

with open('data/demo_out.csv', mode='w') as f:
    writer = csv.writer(f)
    writer.writerows(results_to_write)

[['JURISDICTION NAME', 'COUNT PARTICIPANTS', 'COUNT FEMALE', 'PERCENT FEMALE', 'COUNT MALE'], ['10001', '44', '22', '0.5', '22'], ['10002', '35', '19', '0.54', '16'], ['10003', '1', '1', '1', '0'], ['10004', '0', '0', '0', '0'], ['10005', '2', '2', '1', '0'], ['10006', '6', '2', '0.33', '4'], ['10007', '1', '0', '0', '1'], ['10009', '2', '0', '0', '2'], ['10010', '0', '0', '0', '0'], ['10011', '3', '2', '0.67', '1'], ['10012', '0', '0', '0', '0'], ['10013', '8', '1', '0.13', '7'], ['10014', '0', '0', '0', '0'], ['10016', '17', '12', '0.71', '5'], ['10017', '0', '0', '0', '0'], ['10018', '3', '2', '0.67', '1'], ['10019', '0', '0', '0', '0'], ['10020', '0', '0', '0', '0'], ['10021', '0', '0', '0', '0'], ['10022', '1', '1', '1', '0'], ['10023', '7', '5', '0.71', '2'], ['10024', '4', '4', '1', '0'], ['10025', '27', '17', '0.63', '10'], ['10026', '0', '0', '0', '0'], ['10027', '7', '4', '0.57', '3'], ['10028', '0', '0', '0', '0'], ['10029', '20', '13', '0.65', '7'], ['10030', '0', '0', '0',

#### Read from a CSV using `pandas` `read_csv`

In [5]:
import pandas as pd

df = pd.read_csv('data/Demographic_Statistics_By_Zip_Code.csv')

# print(type(df))

# print(df.head())
# print(df.columns)
# print(df.dtypes)
# cols = list(df.columns)
# print(df[cols[0]][0:100])

print(f"Participants in { df['JURISDICTION NAME'][0] }: { df['COUNT PARTICIPANTS'][0] }")

Participants in 10001: 44
