In [53]:
from utils import create_movie_graph_db, query_db
from neo4j import GraphDatabase
import numpy as np

### Generate a simple movie, actor, director graph database
Before running this notebook, do the following:
* Run `docker-compose up &` to deploy your local neo4j database.
* Run `python create_movie_graph_db.py` to ingest data into the db.

### TODO
Check the `create_movie_graph_db.py` script and discuss if you understand what is done there. Let's also test some creating some more nodes and relationships below.

In [44]:
res = query_db("""
    MATCH (m: Movie {title: "GoldenEye"})
               
    MERGE (p: Person {name: "Aappo Pulkkinen"})
    MERGE (p)-[:VIEWED]->(m)
    MERGE (p)-[:RATED {rating: 4}]->(m)
    RETURN p
""")
print(res)

[{'p': {'name': 'Aappo Pulkkinen'}}]


### Generate some more (dummy) viewer data

In [68]:
np.random.random()*10

6.5251991254462265

In [74]:
# get all movies
res = query_db("""
    MATCH (m: Movie)
    RETURN m.title as title
""")

# Create random viewer/raters
nr_movies = len(res)
raters = []
for iter in range(0,3):
    np.random.shuffle(res)
    raters = raters + [{"name": f"user_{i}", "rated": res[i]["title"], "rating": np.random.random()*10} for i in range(0,nr_movies)]

# Add raters to the database
for rater in raters:
    query_db("""
        MATCH (m: Movie {title: '$title'})
                
        MERGE (p: Person {name: '$name'})
        MERGE (p)-[:VIEWED]->(m)
        MERGE (p)-[:RATED {rating: $rating}]->(m)
        RETURN p
    """.replace("$title", rater["rated"]).replace("$rating", str(rater["rating"])).replace("$name", rater["name"]))

### Test Cypher queries

In [75]:
# Match all nodes
res = query_db("""
  MATCH (n) 
  RETURN count(n) AS numberOfNodes
  """)
print(res)

[{'numberOfNodes': 6182}]


In [76]:
# Match only movie nodes
res = query_db("""
  MATCH (m:Movie) 
  RETURN count(m) AS numberOfMovies
  """)
print(res)

[{'numberOfMovies': 1134}]


In [77]:
# Fetch one movie node
res = query_db("""
  MATCH (m: Movie)
  RETURN m 
  LIMIT 1
  """)
print(res)

[{'m': {'imdbRating': 6.6, 'embedding': [-0.20988285541534424, -0.2100420594215393, 0.41892680525779724, 0.08248212933540344, 0.13814087212085724, -0.2891976833343506, -0.13973107933998108, 0.6192314624786377, -0.7924983501434326, 0.061394356191158295, 0.5391546487808228, -0.19037604331970215, 0.4622304439544678, 0.15591566264629364, -0.232987180352211, 0.20786692202091217, 0.1844959855079651, 0.013133191503584385, 0.5213022232055664, -0.7000389099121094, 0.05497686192393303, 0.025182131677865982, 0.5445494055747986, 0.03348461538553238, 0.24711187183856964, 0.1899382770061493, 0.36684226989746094, 0.4195251166820526, 0.24327564239501953, -0.2636401355266571, -0.6813348531723022, 0.637729823589325, 0.13960522413253784, -0.05595546215772629, -0.03702997788786888, -0.38619402050971985, 0.119007408618927, 0.5217693448066711, 0.2272239774465561, 0.6469639539718628, -0.2893790006637573, 0.10400788486003876, -0.03127666935324669, -0.5564677119255066, 0.12245607376098633, -0.1823558509349823,

In [78]:
# Match all relationships
res = query_db("""
  MATCH ()<-[r]-()
  RETURN count(r) AS numberOfRelationships
  """)
print(res)

[{'numberOfRelationships': 15768}]


In [79]:
# Get all relationship types
res = query_db("""
  MATCH ()<-[r]-()
  RETURN DISTINCT TYPE(r) as relationshipType
  """)
print(res)

[{'relationshipType': 'DIRECTED'}, {'relationshipType': 'ACTED_IN'}, {'relationshipType': 'IN_GENRE'}, {'relationshipType': 'VIEWED'}, {'relationshipType': 'RATED'}]


In [None]:
# Match only ACTED_IN relationships
res = query_db("""
  MATCH ()<-[a:ACTED_IN]-()
  RETURN count(a) AS numberOfActedInRelationships
  """)
print(res)

In [None]:
# Match only movies with Pierce Brosnan acting in them
res = query_db("""
  MATCH (m)<-[a:ACTED_IN]-(p:Person {name: "Pierce Brosnan"})
  RETURN m.title as brosnanMovie
  """)
print(res)

In [None]:
# Match only movies with Pierce Brosnan acting in them
res = query_db("""
  MATCH (d:Person)-[:DIRECTED]->(m)<-[a:ACTED_IN]-(p:Person {name: "Pierce Brosnan"})
  RETURN d.name as brosnanMovieDirector
  """)
print(res)

In [None]:
# Fetch movies that have some actor in common with GoldenEye
res = query_db("""
    MATCH (m:Movie)<-[:ACTED_IN]-(p:Person)-[:ACTED_IN]->(goldenEye:Movie {title: 'GoldenEye'})
    RETURN m.title as goldenEyeActorMovie
    """)
print(res)

### TODO

* Create and run a query to fetch the actors in movie GoldenEye.
* Create and run a query to fetch all the movies that have rating > 8.5.
* Advanced: Create and run a query to fetch all the actors that have acted in movies that have rating > 8.5. Hint: You can "chain" MATCH statements