## Writing CSV using the Python CSV library

In [None]:
from faker import Faker
import csv

In [None]:
output = open('data.csv', 'w')
fake = Faker()
header = ['name', 'age', 'street', 'city', 'state', 'zip', 'lng', 'lat']
writer=csv.writer(output)
writer.writerow(header)
for r in range(2000):
  writer.writerow([fake.name(), fake.random_int(min=18, max=80, step=1),
                  fake.street_address(), fake.city(), fake.state(),
                  fake.zipcode(), fake.longitude(), fake.latitude()])
output.close()

## Reading CSVs

In [None]:
with open('data.csv') as f:
  reader = csv.DictReader(f)
  headers = next(reader)
  for row in reader:
    print(row['name'])

## Reading and writing CSV using pandas DataFrames

In [7]:
import pandas as pd

In [None]:
df = pd.read_csv('data.csv')
df.head(10)

In [None]:
data={'Name':['Paul','Bob','Susan','Yolanda'],
      'Age':[23,45,18,21]}
df = pd.DataFrame(data)
df.to_csv('fromdf.csv', index=False)

## Writing JSON with Python

In [1]:
from faker import Faker
import json

In [3]:
fake = Faker()
all_data={}
all_data['records']=[]
for x in range(1000):
  data={
        "name":fake.name(), 
        "age":fake.random_int(min=18, max=80, step=1),
        "street":fake.street_address(), 
        "city":fake.city(),
        "state":fake.state(), 
        "zip":fake.zipcode(),
        "lng":float(fake.longitude()), 
        "lat":float(fake.latitude())
       }
  all_data['records'].append(data)
with open("data2.json","w") as f:
    json.dump(all_data, f)

In [5]:
with open("data2.json", "r") as f:
    data = json.load(f)
data['records'][0]

{'name': 'Laura Edwards',
 'age': 74,
 'street': '186 Stephen Neck',
 'city': 'Michelleburgh',
 'state': 'Indiana',
 'zip': '66423',
 'lng': 102.261045,
 'lat': -67.597456}

In [6]:
data['records'][0]['name']

'Laura Edwards'

## Pandas DataFrames

In [11]:
f = open("data2.json","r")
data = pd_JSON.loads(f.read())
# Normalize the JSON. Make it flatten to fit in a table
df = pd.json_normalize(data, record_path='records')
# When writting to JSON, you can pass the orient parameter, which determines the format of the JSON returned.
df.head(2).to_json()

'{"name":{"0":"Laura Edwards","1":"Clifford Malone"},"age":{"0":74,"1":79},"street":{"0":"186 Stephen Neck","1":"8624 James Island Apt. 049"},"city":{"0":"Michelleburgh","1":"Billyburgh"},"state":{"0":"Indiana","1":"Washington"},"zip":{"0":"66423","1":"83996"},"lng":{"0":102.261045,"1":-153.205167},"lat":{"0":-67.597456,"1":-30.0869795}}'