In [1]:
import pandas as pd
import numpy as np
import sqlalchemy as sqla
from io import StringIO

#establish connection to database
conn = sqla.create_engine('sqlite:///yelp.sqlite')

In [2]:
#split business.json into bussines, attributes, hours and store in a database 
business=pd.read_json("yelp_dataset/yelp_academic_dataset_business.json",lines=True)
business.set_index("business_id",inplace=True)

attributes=pd.DataFrame.from_dict(business["attributes"].dropna().to_dict(),orient="index")
attributes.index.name="business_id"

hours=pd.DataFrame.from_dict(business["hours"].dropna().to_dict(),orient="index")
hours.index.name="business_id"

business.drop(["hours","attributes"],axis=1,inplace=True)

business.to_sql('business',conn,if_exists="replace")
hours.to_sql('hours',conn,if_exists="replace")
attributes.to_sql('attributes',conn,if_exists="replace")

#convert checkin.json into clean and neat dataframe and store in the database
checkins=pd.read_json("yelp_dataset/yelp_academic_dataset_checkin.json",lines=True,orient="record",chunksize=10000)
for checkin in checkins:
    checkin.set_index("business_id",inplace=True)
    checkin=pd.DataFrame.from_dict(checkin["time"].to_dict(),orient="index")
    checkin.reset_index(inplace=True)
    checkin=checkin.melt(id_vars="index")
    days=pd.read_table(StringIO(checkin['variable'].to_csv(None,index=None)),sep='-',header=None)
    checkin=checkin.join(days)
    checkin.drop("variable",axis=1,inplace=True)
    checkin=checkin[["index",0,1,"value"]]
    checkin.columns=["business_id","weekday","hour","checkins"]
    checkin.set_index("business_id",inplace=True)
    checkin["weekday"]=checkin["weekday"].astype("category")
    checkin.dropna(inplace=True)
    checkin.to_sql('checkin',conn,if_exists="append")

#store review.json into the database
reviews=pd.read_json("yelp_dataset/yelp_academic_dataset_review.json",chunksize=100000,lines=True)
for review in reviews:
    review.set_index("business_id",inplace=True)
    review.to_sql("review",conn,if_exists="append")