# Reading & Writing Files

Based on Chapter 3 of Data Engineering in Python by Crickard.

- Install `faker` library: `pip3 install faker`.
- Install `pandas` library: `pip3 install pandas`.


In [12]:
import csv
from faker import Faker
import pandas as pd

## Simple write for CSV content

In [5]:
filename = 'myCSV.csv'
output = open(filename, mode='w')
mywriter = csv.writer(output)
header = ['name', 'age']
mywriter.writerow(header)
data = ['Bob Smith', 40]
mywriter.writerow(data)
output.close()

## Simple write for CSV content with `fake` data generator

In [8]:
filename = 'data.csv'
output = open(filename,'w')
fake = Faker()

header = ['name','age','street','city','state','zip','lng','lat']
mywriter = csv.writer(output)
mywriter.writerow(header)

for r in range(1000):
    mywriter.writerow([fake.name(),fake.random_int(min=18,
    max=80, step=1), fake.street_address(), fake.city(),fake.
    state(),fake.zipcode(),fake.longitude(),fake.latitude()])
    
output.close()

## Simple read for CSV content.

In [13]:
filename = 'data.csv'
data = []
with open(filename, 'r') as f:
    myreader = csv.DictReader(f)
    headers = next(myreader)
    for row in myreader:
        data.append(row)
# print(data)        

## Simple read for CSV content using Pandas library

In [14]:
df = pd.read_csv(filename)
df.head(10)

Unnamed: 0,name,age,street,city,state,zip,lng,lat
0,Joshua Baker,48,10599 Taylor Streets Suite 612,West Shelly,Alabama,68918,-135.491595,67.377201
1,Caleb May,51,696 Jenkins Lock Apt. 819,Williamsberg,Montana,73335,96.343044,24.252357
2,Chase Wilson DVM,22,100 Scott Dam,North Scott,Pennsylvania,19442,35.610473,-88.751728
3,Amanda Arnold,69,3528 Tucker Tunnel Suite 249,Lake Jeffreyborough,Oregon,17280,-114.207265,56.258842
4,Patrick Dixon,35,214 Cervantes Village Suite 939,Lake Lisafurt,Rhode Island,66620,107.692251,-34.529648
5,Anna Allison,36,56440 Jackson Isle,West Wesleyview,Wisconsin,29485,131.377746,84.229962
6,Marcus Werner,20,9224 Hughes Villages Suite 060,Port Valeriehaven,Tennessee,66529,57.867875,-84.805444
7,Michael Bond,71,00552 Lee Meadow,Walkerborough,Maryland,26080,91.168829,-10.772884
8,Wanda Perez,55,303 Norton Branch,Walkerbury,New Hampshire,2368,126.409154,85.912323
9,Gary Anderson,68,469 Miller Station Suite 782,Judithmouth,West Virginia,49564,57.094034,36.766121


## Simple Write for JSON

In [34]:
import json
import pandas.io.json as pd_JSON

In [31]:
filename = 'data.json'
output = open(filename, 'w')
fake = Faker()

alldata = {}
alldata ['records'] = []

for x in range(1000):
    data={"name":fake.name(),"age":fake.random_int(min=18, max=80, step=1),
          "street":fake.street_address(),
          "city":fake.city(),"state":fake.state(),
          "zip":fake.zipcode(),
          "lng":float(fake.longitude()),
          "lat":float(fake.latitude())}
    
    alldata['records'].append(data)

output.write(json.dumps(alldata, indent=4, sort_keys=True))

287967

# Simple Read for JSON content

In [32]:
filename = 'data.json'
with open(filename, 'r') as f:
    data = json.load(f)
    
data['records'][0]

{'age': 68,
 'city': 'New Danielfort',
 'lat': -40.516858,
 'lng': 115.2507,
 'name': 'Dennis Gallagher',
 'state': 'Rhode Island',
 'street': '61868 Heather Ford',
 'zip': '40800'}

## Simple read for JSON content using Pandas DataFrame

In [33]:
filename = 'data.json'
df = pd.read_json(filename)
df

Unnamed: 0,records
0,"{'age': 68, 'city': 'New Danielfort', 'lat': -..."
1,"{'age': 33, 'city': 'Port Karinamouth', 'lat':..."
2,"{'age': 72, 'city': 'Timothystad', 'lat': 34.8..."
3,"{'age': 32, 'city': 'Wilcoxside', 'lat': -22.8..."
4,"{'age': 62, 'city': 'Nicolefurt', 'lat': 82.29..."
...,...
995,"{'age': 27, 'city': 'Wallschester', 'lat': -70..."
996,"{'age': 61, 'city': 'West Patriciaview', 'lat'..."
997,"{'age': 40, 'city': 'Barbaraburgh', 'lat': 11...."
998,"{'age': 70, 'city': 'New Justinbury', 'lat': -..."


In [35]:
df=pd_JSON.json_normalize(data,record_path='records')

  df=pd_JSON.json_normalize(data,record_path='records')


In [36]:
df

Unnamed: 0,age,city,lat,lng,name,state,street,zip
0,68,New Danielfort,-40.516858,115.250700,Dennis Gallagher,Rhode Island,61868 Heather Ford,40800
1,33,Port Karinamouth,28.278718,82.314419,Michelle Harper,Mississippi,59938 Garcia Estate,70453
2,72,Timothystad,34.812452,179.820495,Chris Sawyer,Michigan,885 Gonzalez Branch Apt. 429,51686
3,32,Wilcoxside,-22.880790,-96.902495,Jeffrey Cohen,Kentucky,560 Leslie Fall,46006
4,62,Nicolefurt,82.292174,-54.331305,Joshua Roberts,Arizona,314 Flores Land Suite 111,58849
...,...,...,...,...,...,...,...,...
995,27,Wallschester,-70.208219,-82.540125,Christina Turner,Colorado,87208 Blanchard Curve Suite 042,89978
996,61,West Patriciaview,78.145451,72.610241,Brenda Castro,Minnesota,56006 Julie Mount,94148
997,40,Barbaraburgh,11.646768,179.900343,Evelyn Macias,West Virginia,008 Stephanie Course Apt. 687,21355
998,70,New Justinbury,-83.719995,-35.063490,Nancy Scott,South Carolina,592 Carl Rapid,90992


In [37]:
df.head(2).to_json()

'{"age":{"0":68,"1":33},"city":{"0":"New Danielfort","1":"Port Karinamouth"},"lat":{"0":-40.516858,"1":28.278718},"lng":{"0":115.2507,"1":82.314419},"name":{"0":"Dennis Gallagher","1":"Michelle Harper"},"state":{"0":"Rhode Island","1":"Mississippi"},"street":{"0":"61868 Heather Ford","1":"59938 Garcia Estate"},"zip":{"0":"40800","1":"70453"}}'

In [39]:
df.head(2).to_json(orient='records')

'[{"age":68,"city":"New Danielfort","lat":-40.516858,"lng":115.2507,"name":"Dennis Gallagher","state":"Rhode Island","street":"61868 Heather Ford","zip":"40800"},{"age":33,"city":"Port Karinamouth","lat":28.278718,"lng":82.314419,"name":"Michelle Harper","state":"Mississippi","street":"59938 Garcia Estate","zip":"70453"}]'