In [12]:
#pip install faker


Writing and reading CSVs

In [2]:
import csv

output = open('myCSV.csv', mode = 'w')
mywriter = csv.writer(output)
header = ['name', 'age']
mywriter.writerow(header)
data = ['bob smith', 50]
mywriter.writerow(data)
output.close()

# It will always overwrite the data into the csv files using the code above

In [5]:
from faker import Faker
import csv

output=open('data.csv','w')
fake=Faker()
header=['name','age','street','city','state','zip','lng','lat']
mywriter=csv.writer(output)
mywriter.writerow(header)
for r in range(1000):
    mywriter.writerow([fake.name(),
                       fake.random_int(min=18, max=80, step=1), 
                       fake.street_address(), 
                       fake.city(),
                       fake.state(),
                       fake.zipcode(),
                       fake.longitude(),
                       fake.latitude()])
output.close()

# Use Faker Libary to generate and write 1000 records into the data.csv

In [None]:
with open('data.csv') as f:
    myreader = csv.DictReader(f)
    headers = next(myreader)
    for row in myreader:
        print(row['name'])
# It will show all 1000 fake names

Reading and writing CSVs using pandas Dataframes

In [22]:
import pandas as pd

df = pd.read_csv('data.csv')
df.head(10)
# read csv using pandas

Unnamed: 0,name,age,street,city,state,zip,lng,lat
0,James Stafford,73,876 Rivera Squares,Port Jason,New Hampshire,60025,-139.39671,-29.286733
1,Donald Davis,19,8587 Reed Light,Jamesberg,Connecticut,67789,-90.572524,-36.912299
2,Melissa Waters,35,676 Amy Ridges Suite 169,Timothychester,Maryland,79609,-51.49832,-5.750014
3,Taylor Floyd,28,082 Thomas Fall Apt. 746,North Samuel,Minnesota,55436,-65.241163,39.210252
4,April Rosario,58,0158 Pearson Curve Suite 438,Berryhaven,Colorado,67018,-40.637525,-80.300582
5,James Washington,78,0993 Daniel Pass,Doughertychester,Indiana,60402,-118.113337,73.2744
6,Benjamin Myers,50,7206 Brown Forks Suite 315,Lopezshire,Washington,41204,174.242569,55.528093
7,James Gallagher,77,296 John Overpass Suite 281,Jenniferborough,Arizona,59600,104.187945,77.776713
8,Eric Bell,29,069 Christopher Islands,Shermanfurt,New Mexico,7688,92.31744,12.251204
9,Michael Campbell,34,3769 Melissa Locks Apt. 848,West Ericstad,Alaska,71657,-65.08543,-20.421753


In [3]:
# write csv using pandas
import pandas as pd

data={'Name':['Paul','Bob','Susan','Yolanda'],'Age':[23,45,18,21]}
df = pd.DataFrame(data)
df.head(10)

df.to_csv('fromdf.csv', index=False)


Writing JSON with Python

In [24]:
from faker import Faker
import json

output = open('data.json', 'w')
fake = Faker()

alldata = {}
alldata['records'] = []

for _ in range(1000):
    data={"name":fake.name(),
          "age":fake.random_int(min=18, max=80, step=1),
          "street":fake.street_address(),
          "city":fake.city(),
          "state":fake.state(),
          "zip":fake.zipcode(),
          "lng":float(fake.longitude()),
          "lat":float(fake.latitude())}
    alldata['records'].append(data)

json.dump(alldata, output)
# Use Faker Libary to generate and write 1000 records into the data.json

In [25]:
with open('data.json', 'r') as f:
    data = json.load(f)

    print(data['records'][0])

{'name': 'Samantha Rodriguez', 'age': 19, 'street': '045 Russell Views Suite 342', 'city': 'South Jeffreyside', 'state': 'Connecticut', 'zip': '36709', 'lng': -118.148404, 'lat': -28.3180065}


Reading and writing JSON with pandas DataFrames

In [26]:
df = pd.read_json('data.json')
df.head()
# Below results shows read_json can't load properly if the records are nested in a records dictionary

Unnamed: 0,records
0,"{'name': 'Samantha Rodriguez', 'age': 19, 'str..."
1,"{'name': 'Stephen Le', 'age': 27, 'street': '9..."
2,"{'name': 'Brian Cuevas', 'age': 31, 'street': ..."
3,"{'name': 'Brett Spence', 'age': 44, 'street': ..."
4,"{'name': 'Emily Parker MD', 'age': 40, 'street..."


In [30]:
import pandas.io.json as pd_JSON

f = open('data.json', 'r')
data = pd_JSON.loads(f.read())
df = pd.json_normalize(data, record_path='records')
df.head()

Unnamed: 0,name,age,street,city,state,zip,lng,lat
0,Samantha Rodriguez,19,045 Russell Views Suite 342,South Jeffreyside,Connecticut,36709,-118.148404,-28.318006
1,Stephen Le,27,948 Hardin Trail,Bakerville,New Hampshire,91556,-34.202032,84.992442
2,Brian Cuevas,31,76547 Robert Corners,North Rebeccaville,Oklahoma,83425,63.995119,53.705333
3,Brett Spence,44,1977 Kathleen Cape,East Scottton,Massachusetts,39443,-60.316145,-28.72784
4,Emily Parker MD,40,77588 Robert Streets,Karenchester,Kentucky,64641,157.56294,-25.077534


In [31]:
df.head(2).to_json()
# without orient parameter, which determins the format of JSON that is returned, the default is columns

'{"name":{"0":"Samantha Rodriguez","1":"Stephen Le"},"age":{"0":19,"1":27},"street":{"0":"045 Russell Views Suite 342","1":"948 Hardin Trail"},"city":{"0":"South Jeffreyside","1":"Bakerville"},"state":{"0":"Connecticut","1":"New Hampshire"},"zip":{"0":"36709","1":"91556"},"lng":{"0":-118.148404,"1":-34.202032},"lat":{"0":-28.3180065,"1":84.9924425}}'

In [32]:
df.head(2).to_json(orient='records')

'[{"name":"Samantha Rodriguez","age":19,"street":"045 Russell Views Suite 342","city":"South Jeffreyside","state":"Connecticut","zip":"36709","lng":-118.148404,"lat":-28.3180065},{"name":"Stephen Le","age":27,"street":"948 Hardin Trail","city":"Bakerville","state":"New Hampshire","zip":"91556","lng":-34.202032,"lat":84.9924425}]'