In [19]:
from utils import create_movie_graph_db, query_db
from neo4j import GraphDatabase
import numpy as np

### Generate a simple movie, actor, director graph database
Before running this notebook, do the following:
* Run `docker-compose up &` to deploy your local neo4j database.
* Run `python create_movie_graph_db.py` to ingest data into the db.

### TODO
Check the `create_movie_graph_db.py` script and discuss if you understand what is done there. Let's also test some creating some more nodes and relationships below.

In [20]:
res = query_db("""
    MATCH (m: Movie {title: "GoldenEye"})
               
    MERGE (p: Person {name: "Aappo Pulkkinen"})
    MERGE (p)-[:VIEWED]->(m)
    MERGE (p)-[:RATED {rating: 4}]->(m)
    RETURN p
""")
print(res)

[{'p': {'name': 'Aappo Pulkkinen'}}]


### Generate some more (dummy) viewer data

In [21]:
np.random.random()*10

0.7589742459342441

In [22]:
# get all movies
res = query_db("""
    MATCH (m: Movie)
    RETURN m.title as title
""")

# Create random viewer/raters
nr_movies = len(res)
raters = []
for iter in range(0,3):
    np.random.shuffle(res)
    raters = raters + [{"name": f"user_{i}", "rated": res[i]["title"], "rating": np.random.random()*10} for i in range(0,nr_movies)]

# Add raters to the database
for rater in raters:
    query_db("""
        MATCH (m: Movie {title: '$title'})
                
        MERGE (p: Person {name: '$name'})
        MERGE (p)-[:VIEWED]->(m)
        MERGE (p)-[:RATED {rating: $rating}]->(m)
        RETURN p
    """.replace("$title", rater["rated"]).replace("$rating", str(rater["rating"])).replace("$name", rater["name"]))

### Test Cypher queries

In [23]:
# Match all nodes
res = query_db("""
  MATCH (n) 
  RETURN count(n) AS numberOfNodes
  """)
print(res)

[{'numberOfNodes': 6182}]


In [24]:
# Match only movie nodes
res = query_db("""
  MATCH (m:Movie) 
  RETURN count(m) AS numberOfMovies
  """)
print(res)

[{'numberOfMovies': 1134}]


In [25]:
# Fetch one movie node
res = query_db("""
  MATCH (m: Movie)
  RETURN m 
  LIMIT 1
  """)
print(res)

[{'m': {'imdbRating': 6.6, 'id': '3', 'title': 'Grumpier Old Men', 'released': neo4j.time.Date(1995, 12, 22), 'info': 'This was Burgess Meredith\'s last film. He died of complications of Alzheimer\'s disease on September 9, 1997. He showed symptoms at the time of this movie\'s filming, and had to be coached during each scene in which he appeared. Meredith\'s acting talents are evident despite his failing mental faculties.,In the first movie Grandpa Gustafson says he\'s 94 years old. In the second movie he says he just turned 95. So less than a year has passed but somehow Melanie\'s daughter Allie aged 3 or 4 years.,Grandpa Gustafson: What the... what the hell is this?\nJohn Gustafson: That\'s lite beer.\nGrandpa Gustafson: Gee, I weigh ninety goddamn pounds, and you bring me this sloppin\' foam?\nJohn Gustafson: Ariel\'s got me on a diet because the doc said my cholestorol\'s a little too high.\nGrandpa Gustafson: Well let me tell you something now, Johnny. Last Thursday, I turned 95 y

In [26]:
# Match all relationships
res = query_db("""
  MATCH ()<-[r]-()
  RETURN count(r) AS numberOfRelationships
  """)
print(res)

[{'numberOfRelationships': 15767}]


In [27]:
# Get all relationship types
res = query_db("""
  MATCH ()<-[r]-()
  RETURN DISTINCT TYPE(r) as relationshipType
  """)
print(res)

[{'relationshipType': 'DIRECTED'}, {'relationshipType': 'ACTED_IN'}, {'relationshipType': 'IN_GENRE'}, {'relationshipType': 'VIEWED'}, {'relationshipType': 'RATED'}]


In [28]:
# Match only ACTED_IN relationships
res = query_db("""
  MATCH ()<-[a:ACTED_IN]-()
  RETURN count(a) AS numberOfActedInRelationships
  """)
print(res)

[{'numberOfActedInRelationships': 4509}]


In [29]:
# Match only movies with Pierce Brosnan acting in them
res = query_db("""
  MATCH (m)<-[a:ACTED_IN]-(p:Person {name: "Pierce Brosnan"})
  RETURN m.title as brosnanMovie
  """)
print(res)

[{'brosnanMovie': 'After the Sunset'}, {'brosnanMovie': 'GoldenEye'}]


In [30]:
# Match only movies with Pierce Brosnan acting in them
res = query_db("""
  MATCH (d:Person)-[:DIRECTED]->(m)<-[a:ACTED_IN]-(p:Person {name: "Pierce Brosnan"})
  RETURN d.name as brosnanMovieDirector
  """)
print(res)

[{'brosnanMovieDirector': 'Brett Ratner'}, {'brosnanMovieDirector': 'Martin Campbell'}]


In [31]:
# Fetch movies that have some actor in common with GoldenEye
res = query_db("""
    MATCH (m:Movie)<-[:ACTED_IN]-(p:Person)-[:ACTED_IN]->(goldenEye:Movie {title: 'GoldenEye'})
    RETURN m.title as goldenEyeActorMovie
    """)
print(res)

[{'goldenEyeActorMovie': 'Black Death'}, {'goldenEyeActorMovie': 'Black Beauty'}, {'goldenEyeActorMovie': 'Flightplan'}, {'goldenEyeActorMovie': 'City of Industry'}, {'goldenEyeActorMovie': 'After the Sunset'}]


### TODO

* Create and run a query to fetch the actors in movie GoldenEye.
* Create and run a query to fetch all the movies that have rating > 8.5.
* Advanced: Create and run a query to fetch all the actors that have acted in movies that have rating > 8.5. Hint: You can "chain" MATCH statements

In [32]:
res = query_db("""
    MATCH (p:Person)-[:ACTED_IN]->(goldenEye:Movie {title: 'GoldenEye'})
    RETURN p.name as goldenEyeActor
    """)
print(res)

[{'goldenEyeActor': 'Izabella Scorupco'}, {'goldenEyeActor': 'Sean Bean'}, {'goldenEyeActor': 'Famke Janssen'}, {'goldenEyeActor': 'Pierce Brosnan'}]


In [33]:
res = query_db("""
    MATCH (m:Movie)
    WHERE m.imdbRating > 8.5
    RETURN m.title as title
    """)
print(res)

[{'title': 'Forrest Gump'}, {'title': 'Goodfellas'}, {'title': 'American History X'}, {'title': 'City Lights'}, {'title': 'Berlin Alexanderplatz'}]


In [34]:
res = query_db("""
    MATCH (m:Movie)
    WHERE m.imdbRating > 8.5
               
    MATCH (p:Person)-[:ACTED_IN]->(m)
    RETURN p.name as actor, m.title as title
    """)
print(res)

[{'actor': 'Tom Hanks', 'title': 'Forrest Gump'}, {'actor': 'Robin Wright', 'title': 'Forrest Gump'}, {'actor': 'Michael Conner Humphreys', 'title': 'Forrest Gump'}, {'actor': 'Sally Field', 'title': 'Forrest Gump'}, {'actor': 'Ray Liotta', 'title': 'Goodfellas'}, {'actor': 'Lorraine Bracco', 'title': 'Goodfellas'}, {'actor': 'Joe Pesci', 'title': 'Goodfellas'}, {'actor': 'Robert De Niro', 'title': 'Goodfellas'}, {'actor': 'Edward Norton', 'title': 'American History X'}, {'actor': 'Edward Furlong', 'title': 'American History X'}, {'actor': "Beverly D'Angelo", 'title': 'American History X'}, {'actor': 'Jennifer Lien', 'title': 'American History X'}, {'actor': 'Harry Myers', 'title': 'City Lights'}, {'actor': 'Al Ernest Garcia', 'title': 'City Lights'}, {'actor': 'Florence Lee', 'title': 'City Lights'}, {'actor': 'Virginia Cherrill', 'title': 'City Lights'}, {'actor': 'Günter Lamprecht', 'title': 'Berlin Alexanderplatz'}, {'actor': 'Claus Holm', 'title': 'Berlin Alexanderplatz'}, {'actor