In [2]:
from py2neo import Graph
import json

In [3]:
YELP_REVIEW_FILE = "../data/yelp_academic_dataset_review.json"
YELP_TIP_FILE = "../data/yelp_academic_dataset_tip.json"
YELP_USER_FILE = "../data/yelp_academic_dataset_user.json"
YELP_CHECKIN_FILE = "../data/yelp_academic_dataset_checkin.json"
YELP_BUSINESS_FILE = "../data/yelp_academic_dataset_business.json"

In [4]:
graph = Graph()

In [20]:
graph.run("CREATE CONSTRAINT ON (r:Review) ASSERT r.review_id IS UNIQUE;")
graph.run("CREATE CONSTRAINT ON (b:Business) ASSERT b.business_id IS UNIQUE;")
graph.run("CREATE CONSTRAINT ON (u:User) ASSERT u.user_id IS UNIQUE;")
graph.run("CREATE CONSTRAINT ON (c:Category) ASSERT c.name IS UNIQUE;")

<py2neo.database.Cursor at 0x109237e80>

In [14]:
CYPHER_REVIEW_IMPORT = '''
WITH {items} AS reviews
UNWIND reviews AS review
MERGE (b:Business {business_id: review.business_id})
MERGE (u:User {user_id: review.user_id})
MERGE (r:Review {reivew_id: review.user_id})
ON CREATE SET r.text   = review.text,
              r.type   = review.type,
              r.date   = review.date, // FIXE: date format?
              r.cool   = review.cool,
              r.funny  = review.funny,
              r.stars  = review.stars,
              r.useful = review.useful
MERGE (u)-[:WROTE]->(r)
MERGE (r)-[:REVIEWS]->(b)
'''

CYPHER_BUSINESS_IMPORT = '''
WITH {items} AS businesses
UNWIND businesses AS business
MERGE (b:Business {business_id: business.business_id})
SET b.address = business.address,
              b.lat     = business.latitude,
              b.lon     = business.longitude,
              b.name    = business.name,
              b.city    = business.city,
              b.postal_code = business.postal_code,
              b.state = business.state,
              b.review_count = business.review_count,
              b.stars = business.stars,
              // FIXME: inconsistent attributes data type
              //b.bike_parking = business.attributes.BikeParking,
              //b.accepts_bitcoin = business.attributes.BusinessAcceptsBitcoin,
              //b.accepts_credit_cards = business.attributes.BusinessAcceptsCreditCards,
              //b.garage_parking = business.attributes.BusinessParking.garage,
              //b.street_parking = business.attributes.BusinessParking.street,
              //b.validated_parking = business.attributes.BusinessParking.validated,
              //b.lot_parking = business.attributes.BusinessParking.lot,
              //b.valet_parking = business.attributes.BusinessParking.valet,
              b.is_open = CASE WHEN business.open = 1 THEN True ELSE False END,
              b.neighborhood = business.neighborhood
WITH *
UNWIND business.categories AS cat
MERGE (c:Category {name: cat})
MERGE (b)-[:IN_CATEGORY]->(c)
'''

CYPHER_USER_IMPORT = '''
WITH {items} AS users
UNWIND users AS user
MERGE (u:User {user_id: user.user_id})
SET u.name               = user.name,
    u.type               = user.type,
    u.useful             = user.useful,
    u.yelping_since      = user.yelping_since, //FIXME: consistent date format
    u.funny              = user.funny,
    u.review_count       = user.review_count,
    u.average_stars      = user.average_stars,
    u.fans               = user.fans,
    u.compliment_cool    = user.compliment_cool,
    u.compliment_cute    = user.compliment_cute,
    u.compliment_funny   = user.compliment_funny,
    u.compliment_hot     = user.compliment_hot,
    u.compliment_list    = user.compliment_list,
    u.compliment_more    = user.compliment_more,
    u.compliment_note    = user.compliment_note,
    u.compliment_photos  = user.compliment_photos,
    u.compliment_plain   = user.compliment_plain,
    u.compliment_profile = user.compliment_profile,
    u.compliment_writer  = user.compliment_writer,
    u.cool               = user.cool
WITH *
UNWIND user.friends AS friend
MERGE (f:User {user_id: friend})
MERGE (u)-[:FRIENDS]->(f)
'''

CYPHER_TIP_IMPORT = '''
WITH {items} AS tips
UNWIND tips AS tip
MERGE (u:User {user_id: tip.user_id})
MERGE (b:Business {business_id: tip.business_id})
CREATE (u)-[t:TIP]->(b)
SET t.date  = tip.date, // FIXME: consistent date format
    t.text  = tip.text,
    t.likes = tip.likes,
    t.type  = tip.type
'''

In [20]:
#TODO: iterate file,cypher maps
parts = [{'datafile': YELP_REVIEW_FILE, 'cypher': CYPHER_REVIEW_IMPORT}]
#for part in parts:
with open(YELP_REVIEW_FILE, "r") as reviewfile:
    reviews = []
    count = 0
    for line in reviewfile:
        reviews.append(json.loads(line))
        count +=1
        if count > 10000:
            # WRITE TO GRAPH
            graph.run(CYPHER_REVIEW_IMPORT, parameters={'items': reviews})
            reviews = []
            count = 0
            #break



KeyboardInterrupt: 

In [18]:
# BUSINESS
with open(YELP_BUSINESS_FILE, "r") as file:
    businesses = []
    count = 0
    for line in file:
        businesses.append(json.loads(line))
        count +=1
        if count > 100:
            #WRITE TO GRAPH
            graph.run(CYPHER_BUSINESS_IMPORT, parameters={'items': businesses})
            businesses = []
            count = 0
            #break

KeyboardInterrupt: 

In [10]:
# USER
with open(YELP_USER_FILE, "r") as file:
    users = []
    count = 0
    for line in file:
        users.append(json.loads(line))
        count += 1
        if count > 100:
            graph.run(CYPHER_USER_IMPORT, parameters={'items': users})
            count = 0
            users = []
            break

In [16]:
#TIP
with open(YELP_TIP_FILE, "r") as file:
    tips = []
    count = 0
    for line in file:
        tips.append(json.loads(line))
        count += 1
        if count > 100:
            graph.run(CYPHER_TIP_IMPORT, parameters={'items': tips})
            count = 0
            tips  = []
            break

In [17]:
#CHECKIN
with open(YELP_CHECKIN_FILE, "r") as file:
    checkins = []
    count = 0
    for line in file:
        checkins.append(json.loads(line))
        count += 1
        if count > 1:
            break

In [18]:
checkins

[{'business_id': '7KPBkxAOEtb3QeIL9PEErg',
  'time': ['Fri-0:2',
   'Sat-0:1',
   'Sun-0:1',
   'Wed-0:2',
   'Sat-1:2',
   'Thu-1:1',
   'Wed-1:1',
   'Sat-2:1',
   'Sun-2:2',
   'Thu-2:1',
   'Wed-2:1',
   'Fri-3:1',
   'Sun-3:3',
   'Thu-4:1',
   'Tue-4:1',
   'Sun-6:1',
   'Wed-6:1',
   'Fri-10:1',
   'Sat-10:1',
   'Mon-11:1',
   'Wed-11:2',
   'Mon-12:1',
   'Sat-12:1',
   'Tue-12:1',
   'Sat-13:2',
   'Thu-13:1',
   'Tue-13:2',
   'Wed-13:2',
   'Fri-14:2',
   'Sat-14:1',
   'Wed-14:1',
   'Fri-15:1',
   'Sat-15:1',
   'Thu-15:1',
   'Tue-15:1',
   'Fri-16:1',
   'Sat-16:2',
   'Sun-16:1',
   'Tue-16:1',
   'Sat-17:3',
   'Sun-17:1',
   'Fri-18:1',
   'Mon-18:1',
   'Sat-18:2',
   'Sun-18:1',
   'Tue-18:2',
   'Wed-18:1',
   'Fri-19:2',
   'Mon-19:1',
   'Sun-19:2',
   'Thu-19:1',
   'Wed-19:1',
   'Mon-20:1',
   'Sun-20:5',
   'Thu-20:1',
   'Tue-20:1',
   'Wed-20:2',
   'Fri-21:2',
   'Sun-21:1',
   'Thu-21:4',
   'Tue-21:1',
   'Wed-21:1',
   'Fri-22:1',
   'Thu-22:1',
   'Fr