# Reading & Writing Files

Based on Chapter 3 of Data Engineering in Python by Crickard.

- Install `faker` library: `pip3 install faker`.
- Install `pandas` library: `pip3 install pandas`.


In [1]:
import csv
from faker import Faker
import pandas as pd

## Simple write for CSV content

In [2]:
filename = 'myCSV.csv'
output = open(filename, mode='w')
mywriter = csv.writer(output)
header = ['name', 'age']
mywriter.writerow(header)
data = ['Bob Smith', 40]
mywriter.writerow(data)
output.close()

## Simple write for CSV content with `fake` data generator

In [3]:
filename = 'data.csv'
output = open(filename,'w')
fake = Faker()

header = ['name','age','street','city','state','zip','lng','lat']
mywriter = csv.writer(output)
mywriter.writerow(header)

for r in range(1000):
    mywriter.writerow([fake.name(),fake.random_int(min=18,
    max=80, step=1), fake.street_address(), fake.city(),fake.
    state(),fake.zipcode(),fake.longitude(),fake.latitude()])
    
output.close()

## Simple read for CSV content.

In [4]:
filename = 'data.csv'
data = []
with open(filename, 'r') as f:
    myreader = csv.DictReader(f)
    headers = next(myreader)
    for row in myreader:
        data.append(row)
# print(data)        

## Simple read for CSV content using Pandas library

In [5]:
df = pd.read_csv(filename)
df.head(10)

Unnamed: 0,name,age,street,city,state,zip,lng,lat
0,Anthony Scott,21,95009 Latoya River,Port Chris,New Mexico,98047,9.066664,2.072821
1,Timothy Ward Jr.,49,0740 Brian Isle,Johnsonview,Arkansas,641,-50.114069,36.508569
2,Jill Spencer,57,82037 Thornton Meadow,West Shirley,Washington,85966,-137.113351,61.626136
3,Peter Ali,47,2652 Joseph Springs,Port Samantha,Montana,96703,165.438152,-21.898822
4,Mrs. Wendy Pennington,21,233 David Harbors Suite 971,Christopherborough,New Mexico,4032,-29.803583,34.210608
5,Kenneth Davies,45,3065 Brady Fork Suite 451,Powerstown,Hawaii,67847,63.140109,51.041402
6,Shannon Baird,80,095 Williams Dam,North Crystal,Rhode Island,59215,158.107597,-51.430523
7,Anthony Hogan,46,218 Garcia Vista,Grantside,Ohio,37656,-175.4337,-65.052341
8,Elizabeth Hawkins,31,074 Micheal Curve,Port Julietown,Ohio,88567,124.825678,54.441215
9,Natalie Thomas,58,72214 Klein Islands,South Robert,Alabama,51537,-27.360547,15.005816


## Simple Write for JSON

In [6]:
import json
import pandas.io.json as pd_JSON

In [7]:
filename = 'data.json'
output = open(filename, 'w')
fake = Faker()

alldata = {}
alldata ['records'] = []

for x in range(1000):
    data={"name":fake.name(),"age":fake.random_int(min=18, max=80, step=1),
          "street":fake.street_address(),
          "city":fake.city(),"state":fake.state(),
          "zip":fake.zipcode(),
          "lng":float(fake.longitude()),
          "lat":float(fake.latitude())}
    
    alldata['records'].append(data)

output.write(json.dumps(alldata, indent=4, sort_keys=True))

288011

# Simple Read for JSON content

In [8]:
filename = 'data.json'
with open(filename, 'r') as f:
    data = json.load(f)
    
data['records'][0]

{'age': 74,
 'city': 'Finleyhaven',
 'lat': 65.036803,
 'lng': 53.429872,
 'name': 'John Olsen',
 'state': 'New Hampshire',
 'street': '25522 Knight Cape',
 'zip': '65196'}

## Simple read for JSON content using Pandas DataFrame

In [9]:
filename = 'data.json'
df = pd.read_json(filename)
df

Unnamed: 0,records
0,"{'age': 74, 'city': 'Finleyhaven', 'lat': 65.0..."
1,"{'age': 30, 'city': 'Port Jamesshire', 'lat': ..."
2,"{'age': 24, 'city': 'Carrfort', 'lat': 86.9667..."
3,"{'age': 20, 'city': 'Hallmouth', 'lat': -74.27..."
4,"{'age': 54, 'city': 'South April', 'lat': -43...."
...,...
995,"{'age': 53, 'city': 'North Stephenview', 'lat'..."
996,"{'age': 64, 'city': 'Watsonfurt', 'lat': -81.4..."
997,"{'age': 68, 'city': 'East Jacob', 'lat': -11.5..."
998,"{'age': 70, 'city': 'Meganside', 'lat': 40.050..."


In [10]:
df=pd_JSON.json_normalize(data,record_path='records')

  df=pd_JSON.json_normalize(data,record_path='records')


In [11]:
df

Unnamed: 0,age,city,lat,lng,name,state,street,zip
0,74,Finleyhaven,65.036803,53.429872,John Olsen,New Hampshire,25522 Knight Cape,65196
1,30,Port Jamesshire,21.552645,7.045669,Maurice Rivers,South Dakota,17301 Todd Fort,64517
2,24,Carrfort,86.966742,-101.352826,Melissa Hunt MD,Colorado,436 Poole Motorway Suite 554,60904
3,20,Hallmouth,-74.273999,-4.928255,Jennifer Stevens,Oklahoma,5350 Mills Trail Suite 316,01837
4,54,South April,-43.362642,72.601647,Madeline King,Wisconsin,429 Larson Court,73301
...,...,...,...,...,...,...,...,...
995,53,North Stephenview,-24.109969,39.562163,Michael Lewis,Delaware,93919 Bush Spur Suite 674,12124
996,64,Watsonfurt,-81.428474,58.348779,Michael Chaney,Arizona,692 Ward Walks,89361
997,68,East Jacob,-11.582520,-173.897153,Stacey Reed,Hawaii,1698 Janet Inlet,80252
998,70,Meganside,40.050132,104.403391,Cheryl Carson,North Carolina,912 Fields Harbors,91480


In [12]:
df.head(2).to_json()

'{"age":{"0":74,"1":30},"city":{"0":"Finleyhaven","1":"Port Jamesshire"},"lat":{"0":65.036803,"1":21.552645},"lng":{"0":53.429872,"1":7.045669},"name":{"0":"John Olsen","1":"Maurice Rivers"},"state":{"0":"New Hampshire","1":"South Dakota"},"street":{"0":"25522 Knight Cape","1":"17301 Todd Fort"},"zip":{"0":"65196","1":"64517"}}'

In [13]:
df.head(2).to_json(orient='records')

'[{"age":74,"city":"Finleyhaven","lat":65.036803,"lng":53.429872,"name":"John Olsen","state":"New Hampshire","street":"25522 Knight Cape","zip":"65196"},{"age":30,"city":"Port Jamesshire","lat":21.552645,"lng":7.045669,"name":"Maurice Rivers","state":"South Dakota","street":"17301 Todd Fort","zip":"64517"}]'