In [61]:
# pip install neo4j
from neo4j import GraphDatabase
import pandas as pd
pd.set_option('expand_frame_repr', True)
pd.set_option("display.max_rows", 999)
pd.set_option('max_colwidth',100)

In [7]:
# First provide your neo4j account information for connection
username = input("Please input your neo4j username (neo4j is default): ")
password = input("Please input your password: ")

Please input your neo4j username (neo4j is default): neo4j
Please input your password: 961007


In [8]:
# Connect to the neo4j database server
graphdb = GraphDatabase.driver(uri = "bolt://localhost:7687", auth = (username, password))

In [9]:
session = graphdb.session()

<h4>User Input</h4>

In [27]:
# Input your name
myname = input("What is your name? ")

What is your name? Boyuan Li


In [28]:
# Create your user node to the database
q1 = 'CREATE (p:User {{name:"{}"}})'.format(myname)
session.run(q1)

<neo4j.BoltStatementResult at 0x134b476df48>

![title](img/user_node.png)

In [29]:
# Input the number of movies you want to rate. At least 10 is required in order to get enough information to get recommendations
n = int(input("How many movies you want to rate?(At least 10) "))

# Input movvies' titles and ratings
movie_list = dict()
for i in range(n):
    data = input('Please enter title whose intial letter is capitalized & rating separated by ";"')
    temp = data.split(';')
    movie_list[temp[0]] = int(temp[1])

# Statement to create a rating relationship between users and movies
q2 = """MATCH (p:User),(m:Movie)
WHERE p.name = '{0}' AND m.title = '{1}'
CREATE (p)-[r:RATED {{rating:{2}}}]->(m)
RETURN type(r)"""

# Establish a rating relationship
for key, value in movie_list.items():
	session.run(q2.format(myname, key, value))

How many movies you want to rate?(At least 10) 12
Please enter title whose intial letter is capitalized & rating separated by ":"Toy Story:9
Please enter title whose intial letter is capitalized & rating separated by ":"Congo:8
Please enter title whose intial letter is capitalized & rating separated by ":"Jurassic Park:8
Please enter title whose intial letter is capitalized & rating separated by ":"Forrest Gump:10
Please enter title whose intial letter is capitalized & rating separated by ":"Batman:9
Please enter title whose intial letter is capitalized & rating separated by ":"Ghost:4
Please enter title whose intial letter is capitalized & rating separated by ":"Speed:6
Please enter title whose intial letter is capitalized & rating separated by ":"Inception:9
Please enter title whose intial letter is capitalized & rating separated by ":"Tombstone:8
Please enter title whose intial letter is capitalized & rating separated by ":"Home Alone:7
Please enter title whose intial letter is capi

![title](img/rating_relationship.png)

<h4>1. Collaborative Filtering - Only Consider Genres Liked by the User</h4>
    
    What genres have a rating greater than the average rating for a particular user? Use this to score similar movies

![title](img/urmg.png)

In [73]:
# This simple approach is to only take genres into consideration
# Based on the movies rated by you, what genres have a rating greater than the average rating. Output similar movies as recommendations
# It only focuses on yourself.
q3 = """MATCH (u:User {{name: "{0}"}})-[r:RATED]->(m:Movie)
WITH u, avg(r.rating) AS mean

MATCH (u)-[r:RATED]->(m:Movie)-[:IN_GENRE]->(g:Genre)
WHERE r.rating > mean

WITH u, g, COUNT(*) AS score

MATCH (g)<-[:IN_GENRE]-(rec:Movie)
WHERE NOT EXISTS((u)-[:RATED]->(rec))

RETURN rec.title AS recommendation, rec.year AS year, COLLECT(DISTINCT g.name) AS genres, SUM(score) AS sumscore
ORDER BY sumscore DESC LIMIT 25""".format(myname)
recommendations1_temp = session.run(q3)


# Transform the outup as dataframe
recommendation = []
year = []
genres = []
sscore = []

for node in recommendations1_temp:
    recommendation.append(node[0])
    year.append(node[1])
    genres.append(node[2])
    sscore.append(node[3])
    
recommendations1 = pd.DataFrame(columns = ["Recommendation", "Year", "Genres", "Total Score"])

for i in range(0, len(recommendation)):
    recommendations1 = recommendations1.append({"Recommendation":recommendation[i], "Year":year[i], "Genres":genres[i], 
                       "Total Score":sscore[i]}, ignore_index=True)
# Display recommendations
recommendations1


Unnamed: 0,Recommendation,Year,Genres,Total Score
0,War of the Worlds,2005.0,"[Adventure, Action, Sci-Fi, Thriller]",26
1,Rubber,2010.0,"[Comedy, Adventure, Western, Action, Drama, Crime, Mystery, Thriller]",26
2,Aqua Teen Hunger Force Colon Movie Film for Theaters,2007.0,"[Fantasy, Comedy, Animation, Adventure, Action, Mystery, Sci-Fi]",24
3,Motorama,1991.0,"[Fantasy, Comedy, Adventure, Drama, Crime, Mystery, Sci-Fi, Thriller]",24
4,Mars Needs Moms,2011.0,"[Comedy, Children, Animation, Adventure, Action, Sci-Fi, IMAX]",23
5,"Stunt Man, The",1980.0,"[Romance, Comedy, Adventure, Action, Drama, Thriller]",23
6,Super Mario Bros.,1993.0,"[Fantasy, Comedy, Children, Adventure, Action, Sci-Fi]",22
7,Osmosis Jones,2001.0,"[Romance, Comedy, Animation, Action, Drama, Crime, Thriller]",22
8,Futurama: Bender's Game,2008.0,"[Fantasy, Comedy, Animation, Adventure, Action, Sci-Fi]",22
9,Meet the Robinsons,2007.0,"[Comedy, Children, Animation, Adventure, Action, Sci-Fi]",22


<h4>2. Collaborative Filtering - Cosine Similarity</h4>

Each relationship has a weight that we can take into account

The value of cosine similarity will tell us how similar two users are

Users with a higher value of cosine similarity will have more similar preferences

![title](img/cosine.png)

![title](img/urmru.png)

In [74]:
# This approach is using cosine similarity to find users who have similar preferences.
# The higher sim is, the more similar preference a user has
# It will show the user who has the most similar preference for watching movies
# Then we decide to choose 25 movies which have the highest rating from the user as recommendation

q4 = """MATCH (u1:User {{name: "{}"}})-[x:RATED]->(m:Movie)<-[y:RATED]-(u2:User)
WITH COUNT(m) AS numMovies, SUM(x.rating * y.rating) AS xyDotProduct,
SQRT(REDUCE(xDot = 0.0, a IN COLLECT(x.rating) | xDot + a^2)) AS xLength,
SQRT(REDUCE(yDot = 0.0, b IN COLLECT(y.rating) | yDot + b^2)) AS yLength,
u1, u2 WHERE numMovies > 10
RETURN u1.name, u2.name, xyDotProduct / (xLength * yLength) AS sim
ORDER BY sim DESC
LIMIT 1;""".format(myname)

result = session.run(q4)

# Save the user name
user_name = result.values()[0][1]

# Display the 10 highest rating movies of the user
q5 = """MATCH (u:User {{name: "{0}"}})-[x:RATED]->(m:Movie)
RETURN m.title, x.rating
ORDER BY x.rating DESC
LIMIT 25""".format(user_name)

recommendations2_temp = session.run(q5)


# Transform the outup as dataframe
recommendation_2 = []
rating = []

for node in recommendations2_temp:
    recommendation_2.append(node[0])
    rating.append(node[1])

    
recommendations2 = pd.DataFrame(columns = ["Recommendation", "Rating"])

for i in range(0, len(recommendation_2)):
    recommendations2 = recommendations2.append({"Recommendation":recommendation_2[i], 
                       "Rating":rating[i]}, ignore_index=True)
# Display recommendations
recommendations2

Unnamed: 0,Recommendation,Rating
0,The Slipper and the Rose: The Story of Cinderella,5.0
1,Cinderella,5.0
2,Arsenic and Old Lace,5.0
3,Moulin Rouge,5.0
4,"Phantom of the Opera, The",5.0
5,Wallace & Gromit: A Close Shave,5.0
6,"Ideal Husband, An",5.0
7,"Little Mermaid, The",5.0
8,"Princess Bride, The",5.0
9,Shrek,5.0


<h4>3. Collaborative Filtering - kNN Recommendations</h4>

This requires to use pearson similarity which takes into account the fact that different users could have different average ratings on average

We should consider that some users tend to rate movies higher than others

![title](img/pearson.png)

In [70]:
# This method is to get recommendations voted by the k most similar users

q_1 = 'MATCH (u1:User {name:"'
q_2 = myname
q_3 = '''"})-[r:RATED]->(m:Movie)
WITH u1, avg(r.rating) AS u1_mean
MATCH (u1)-[r1:RATED]->(m:Movie)<-[r2:RATED]-(u2)
WITH u1, u1_mean, u2, COLLECT({r1: r1, r2: r2}) AS ratings WHERE size(ratings) > 10
MATCH (u2)-[r:RATED]->(m:Movie)
WITH u1, u1_mean, u2, avg(r.rating) AS u2_mean, ratings
UNWIND ratings AS r
WITH sum( (r.r1.rating-u1_mean) * (r.r2.rating-u2_mean) ) AS nom,
     sqrt( sum( (r.r1.rating - u1_mean)^2) * sum( (r.r2.rating - u2_mean) ^2)) AS denom,
     u1, u2 WHERE denom <> 0
WITH u1, u2, nom/denom AS pearson
ORDER BY pearson DESC LIMIT 10
MATCH (u2)-[r:RATED]->(m:Movie) WHERE NOT EXISTS( (u1)-[:RATED]->(m) )
RETURN m.title, SUM( pearson * r.rating) AS score
ORDER BY score DESC 
LIMIT 25'''
q6 = q_1+q_2+q_3


In [71]:
recommendations3_temp = session.run(q6)

# Transform the outup as dataframe
recommendation_3 = []
score = []

for node in recommendations3_temp:
    recommendation_3.append(node[0])
    score.append(node[1])

    
recommendations3 = pd.DataFrame(columns = ["Recommendation", "Score"])

for i in range(0, len(recommendation_3)):
    recommendations3 = recommendations3.append({"Recommendation":recommendation_3[i], 
                       "Score":score[i]}, ignore_index=True)
# Display recommendations
recommendations3

Unnamed: 0,Recommendation,Score
0,King Kong,3.284594
1,"Italian Job, The",3.225118
2,Singin' in the Rain,3.008975
3,"Matrix, The",2.985357
4,"Phantom of the Opera, The",2.925007
5,Back to the Future,2.896574
6,Star Wars: Episode V - The Empire Strikes Back,2.896574
7,Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark),2.847853
8,"Lord of the Rings: The Two Towers, The",2.831409
9,Star Wars: Episode IV - A New Hope,2.831409


<h4>Compare and Contrast for Content-Based Filtering Algorithms</h4>

In [75]:
comparison = pd.DataFrame(columns = ["Recommend1", "Recommend2", "Recommend3"])

for i in range(0, len(recommendation)):
    comparison = comparison.append({"Recommend1":recommendation[i],"Recommend2":recommendation_2[i],
                    "Recommend3":recommendation_3[i]},
                   ignore_index=True)
comparison

Unnamed: 0,Recommend1,Recommend2,Recommend3
0,War of the Worlds,The Slipper and the Rose: The Story of Cinderella,King Kong
1,Rubber,Cinderella,"Italian Job, The"
2,Aqua Teen Hunger Force Colon Movie Film for Theaters,Arsenic and Old Lace,Singin' in the Rain
3,Motorama,Moulin Rouge,"Matrix, The"
4,Mars Needs Moms,"Phantom of the Opera, The","Phantom of the Opera, The"
5,"Stunt Man, The",Wallace & Gromit: A Close Shave,Back to the Future
6,Super Mario Bros.,"Ideal Husband, An",Star Wars: Episode V - The Empire Strikes Back
7,Osmosis Jones,"Little Mermaid, The",Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark)
8,Futurama: Bender's Game,"Princess Bride, The","Lord of the Rings: The Two Towers, The"
9,Meet the Robinsons,Shrek,Star Wars: Episode IV - A New Hope
