In [2]:
from pprint import pprint
from py2neo import Graph

In [3]:
graph = Graph(host="localhost://7474", auth=("neo4j", "1234"))

In [13]:
query = """MATCH (p:Reviewer)-[w:Wrote]->(r:Review)-[a:About]->(re:Restaurant)
RETURN p.name AS reviewer, COUNT(w) AS num_of_reviews
ORDER BY num_of_reviews DESC LIMIT 10"""


In [14]:
#Find out reviewer order by number of wrote reviews
graph.run(query).to_data_frame()

Unnamed: 0,num_of_reviews,reviewer
0,655,Veronica Phua
1,647,Burpple Guides
2,565,Xing Wei Chua
3,383,Justin Teo
4,339,Julius Lim
5,312,Wei Zhi Chiang
6,309,Jason Wong
7,305,Wuu Yyiizzhhoouu
8,271,Ivan Teh
9,265,Melissa Chee


# Build up recommendation engine

Our recommendation engine will recommend restaurant to target user based on the preference of the usesr that will have a similar taste on restaurant. For each user, we will identify the most similar users to them and preferences of these simialr users are then used to generate recommendations for the target user.

This algorithm is known as Collaborative Filtering.

Collaborative filtering (CF) is a technique commonly used to build personalized recommendation systems. Some popular websites that use CF technology include Amazon, Netflix, and IMDB. In CF, predictions about a user's interests are made by compiling preferences from similar users.


The similarity metric we will use here is Jaccard Similarity Coefficient or Jaccard Index also known as Intersection over Union(IoU).

<img src="reco_data\IoU.jpg">

Jaccard Index between two sets A and B is the ratio of the number of elements in the intersection of A and B over the number of elements in the union of A and B.

Reference: https://deepai.org/machine-learning-glossary-and-terms/jaccard-index

## Get the most similar users to our target user, the Jaccard Index, number of Restaurants in common (Intersection), number of Restaurants in total (Union)

In [17]:
query = """
        // get target user and their neighbors pairs and count 
        // of distinct restaurant that they have reviewed in common
        MATCH (p1:Reviewer)-[:Wrote]->(r1:Review)-[:About]->(re:Restaurant)<-[:About]-(r2:Review)<-[:Wrote]-(p2:Reviewer)
        WHERE p1 <> p2 AND p1.name = {p_name}
        WITH p1, p2, COUNT(DISTINCT re) as intersection
        
        // get count of all the distinct restaurants that they have reviewed in total (Union)
        MATCH (p:Reviewer)-[:Wrote]->(r:Review)-[:About]->(re:Restaurant)
        WHERE p in [p1, p2]
        WITH p1, p2, intersection, COUNT(DISTINCT re) as union
        
        // compute Jaccard index
        WITH p1, p2, intersection, union, (intersection * 1.0 / union) as jaccard_index
        
        // get top k nearest neighbors based on Jaccard index
        ORDER BY jaccard_index DESC, p2.name
        WITH p1, COLLECT([p2.name, jaccard_index, intersection, union])[0..{k}] as neighbors
        
        WHERE SIZE(neighbors) = {k}   // return users with enough neighbors
        RETURN p1.name as reviewer, neighbors

        """

In [18]:
neighbors = {}
for i in graph.run(query, p_name = "Justin Teo", k = 10):
    neighbors[i[0]] = i[1]

print("# User Justin Teo's 10 nearest neighbors: customerID, jaccard_index, intersection, union")
pprint(neighbors)


{'Justin Teo': [['Julius Lim', 0.192, 48, 250],
                ['Cassie Ong', 0.18974358974358974, 37, 195],
                ['Zhihui Lim', 0.18614718614718614, 43, 231],
                ['Burpple Guides', 0.18201754385964913, 83, 456],
                ['Veronica Phua', 0.17254901960784313, 44, 255],
                ['Vanessa Kou', 0.1659919028340081, 41, 247],
                ['Wei Zhi Chiang', 0.16317991631799164, 39, 239],
                ['I makan Sg', 0.16170212765957448, 38, 235],
                ['Karl Ng', 0.16149068322981366, 26, 161],
                ['Xing Wei Chua', 0.15680473372781065, 53, 338]]}


## Find out the top 5 restaurants ranked by number of reviewed times from the nearest neighbors's reviewed restaurants 

In [22]:
# get the list of the nearest neighbors names
nearest_neighbors = [neighbors['Justin Teo'][i][0] for i in range(len(neighbors['Justin Teo']))]

query = """
        // get top n recommendations for user 'Justin Teo' from his nearest neighbors
        MATCH (p1:Reviewer),
              (neighbor:Reviewer)-[:Wrote]->(r:Review)-[:About]->(re:Restaurant)    
        WHERE p1.name = {p_name}
          AND neighbor.name in {nearest_neighbors}
          AND not (p1)-[:Wrote]->(r:Review)-[:About]->(re:Restaurant)         // filter for movies that our user hasn't reviewed
        
        WITH p1, re, COUNT(DISTINCT neighbor) as countnns // times reviewed by nearest neighbors
        ORDER BY p1.name, countnns DESC               
        RETURN p1.name as user, COLLECT([re.name, countnns])[0..{n}] as recommendations  
        """

recommendations = {}
for i in graph.run(query, p_name = "Justin Teo", nearest_neighbors = nearest_neighbors, n = 5):
    recommendations[i[0]] = i[1]
    
print("User Justin Teo's recommendations: Restaurant, number of reviewed by neighbors")
pprint(recommendations)


User Justin Teo's recommendations: Restaurant, number of reviewed by neighbors
{'Justin Teo': [['/keng-eng-kee-seafood', 9],
                ['/sunday-folks', 9],
                ['/firebakesg', 9],
                ['/birds-of-a-feather-sg', 8],
                ['/clinton-street-baking-company-singapore', 8]]}


## Unify the two parts above as a whole function

In [48]:
import sys
from pprint import pprint
from py2neo import Graph

# p_name = sys.argv[1:]
p_name = ['Justin Teo','Julius Lim','Jason Wong','Wei Zhi Chiang']

graph = Graph(host="localhost://7474", auth=("neo4j", "1234"))

def r_recommender(graph, cid, num_nearest_neighbors, num_recommendations):

    query = """
           MATCH (p1:Reviewer)-[:Wrote]->(r1:Review)-[:About]->(re:Restaurant)<-[:About]-(r2:Review)<-[:Wrote]-(p2:Reviewer)
           WHERE p1 <> p2 AND p1.name = {p_name}
           WITH p1, p2, COUNT(DISTINCT re) as intersection
           
           MATCH (p:Reviewer)-[:Wrote]->(r:Review)-[:About]->(re:Restaurant)
           WHERE p in [p1, p2]
           WITH p1, p2, intersection, COUNT(DISTINCT re) as union

           WITH p1, p2, intersection, union, 
              (intersection * 1.0 / union) as jaccard_index

           ORDER BY jaccard_index DESC, p2.name
           WITH p1, COLLECT(p2)[0..{k}] as neighbors
           WHERE SIZE(neighbors) = {k}                                              
           UNWIND neighbors as neighbor
           WITH p1, neighbor

           MATCH (neighbor)-[:Wrote]->(r:Review)-[:About]->(re:Restaurant)         
           WHERE not (p1)-[:Wrote]->(r:Review)-[:About]->(re:Restaurant)                        
           WITH p1, re, COUNT(DISTINCT neighbor) as countnns
           ORDER BY p1.name, countnns DESC                            
           RETURN p1.name as user, 
              COLLECT(re.name)[0..{n}] as recommendations  
           """

    recommendations = {}
    for p in p_name:
        for i in graph.run(query, p_name = p, k = num_nearest_neighbors, n = num_recommendations):
            recommendations[i[0]] = i[1]
    return recommendations

pprint(r_recommender(graph, p_name, 10, 5))

{'Jason Wong': ['/burnt-ends',
                '/park-bench-deli',
                '/the-coconut-club',
                '/birds-of-a-feather-sg',
                '/clinton-street-baking-company-singapore'],
 'Julius Lim': ['/alter-ego',
                '/park-bench-deli',
                '/birds-of-a-feather-sg',
                '/wildseed',
                '/keng-eng-kee-seafood'],
 'Justin Teo': ['/keng-eng-kee-seafood',
                '/sunday-folks',
                '/firebakesg',
                '/birds-of-a-feather-sg',
                '/clinton-street-baking-company-singapore'],
 'Wei Zhi Chiang': ['/matchaya-2',
                    '/park-bench-deli',
                    '/the-refinery-1',
                    '/birds-of-a-feather-sg',
                    '/clinton-street-baking-company-singapore']}
