## 1. Setup

In [1]:
from gqlalchemy import Memgraph
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Connect to MemGraph
memgraph = Memgraph(host='127.0.0.1', port=7687)
memgraph.drop_database()

print("[OK] Connected to MemGraph")

[OK] Connected to MemGraph


## 2. Create Sample Movie Database

Let's create a more complex graph with movies, actors, and directors.

In [2]:
# Create movies
movies = [
    {"title": "The Matrix", "year": 1999, "genre": "Sci-Fi"},
    {"title": "The Matrix Reloaded", "year": 2003, "genre": "Sci-Fi"},
    {"title": "John Wick", "year": 2014, "genre": "Action"},
    {"title": "Speed", "year": 1994, "genre": "Action"},
    {"title": "The Devil's Advocate", "year": 1997, "genre": "Thriller"}
]

for movie in movies:
    query = f"""
    CREATE (m:Movie {{title: "{movie['title']}", year: {movie['year']}, genre: "{movie['genre']}"}});
    """
    print(f"query = {query}")
    memgraph.execute(query)

print(f"[OK] Created {len(movies)} movies")

query = 
    CREATE (m:Movie {title: "The Matrix", year: 1999, genre: "Sci-Fi"});
    
query = 
    CREATE (m:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Sci-Fi"});
    
query = 
    CREATE (m:Movie {title: "John Wick", year: 2014, genre: "Action"});
    
query = 
    CREATE (m:Movie {title: "Speed", year: 1994, genre: "Action"});
    
query = 
    CREATE (m:Movie {title: "The Devil's Advocate", year: 1997, genre: "Thriller"});
    
[OK] Created 5 movies


In [3]:
# Create actors and directors
people = [
    {"name": "Keanu Reeves", "role": "Actor", "birth_year": 1964},
    {"name": "Laurence Fishburne", "role": "Actor", "birth_year": 1961},
    {"name": "Carrie-Anne Moss", "role": "Actor", "birth_year": 1967},
    {"name": "Sandra Bullock", "role": "Actor", "birth_year": 1964},
    {"name": "Al Pacino", "role": "Actor", "birth_year": 1940},
    {"name": "Lana Wachowski", "role": "Director", "birth_year": 1965},
    {"name": "Lilly Wachowski", "role": "Director", "birth_year": 1967},
    {"name": "Chad Stahelski", "role": "Director", "birth_year": 1968}
]

for person in people:
    query = f"""
    CREATE (p:Person:{person['role']} {{name: "{person['name']}", birth_year: {person['birth_year']}}});
    """
    memgraph.execute(query)

print(f"[OK] Created {len(people)} people")

[OK] Created 8 people


In [4]:
# Create relationships
relationships = [
    # The Matrix
    ("Keanu Reeves", "ACTED_IN", "The Matrix", {"role": "Neo"}),
    ("Laurence Fishburne", "ACTED_IN", "The Matrix", {"role": "Morpheus"}),
    ("Carrie-Anne Moss", "ACTED_IN", "The Matrix", {"role": "Trinity"}),
    ("Lana Wachowski", "DIRECTED", "The Matrix", {}),
    ("Lilly Wachowski", "DIRECTED", "The Matrix", {}),
    # The Matrix Reloaded
    ("Keanu Reeves", "ACTED_IN", "The Matrix Reloaded", {"role": "Neo"}),
    ("Laurence Fishburne", "ACTED_IN", "The Matrix Reloaded", {"role": "Morpheus"}),
    ("Carrie-Anne Moss", "ACTED_IN", "The Matrix Reloaded", {"role": "Trinity"}),
    ("Lana Wachowski", "DIRECTED", "The Matrix Reloaded", {}),
    ("Lilly Wachowski", "DIRECTED", "The Matrix Reloaded", {}),
    # John Wick
    ("Keanu Reeves", "ACTED_IN", "John Wick", {"role": "John Wick"}),
    ("Chad Stahelski", "DIRECTED", "John Wick", {}),
    # Speed
    ("Keanu Reeves", "ACTED_IN", "Speed", {"role": "Jack Traven"}),
    ("Sandra Bullock", "ACTED_IN", "Speed", {"role": "Annie Porter"}),
    # The Devil's Advocate
    ("Keanu Reeves", "ACTED_IN", "The Devil's Advocate", {"role": "Kevin Lomax"}),
    ("Al Pacino", "ACTED_IN", "The Devil's Advocate", {"role": "John Milton"}),
]

for person, rel_type, movie, props in relationships:
    props_str = ", ".join([f"{k}: '{v}'" for k, v in props.items()])
    props_clause = f" {{{props_str}}}" if props_str else ""
    
    query = f"""
    MATCH (p:Person {{name: "{person}"}}), (m:Movie {{title: "{movie}"}})
    CREATE (p)-[:{rel_type}{props_clause}]->(m);
    """
    memgraph.execute(query)

print(f"[OK] Created {len(relationships)} relationships")

[OK] Created 16 relationships


## 3. Pattern Matching Queries

Find complex patterns in the graph.

In [18]:
# Find all actors who worked with Keanu Reeves
query = """
MATCH (keanu:Actor {name: 'Keanu Reeves'})-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(costar:Actor)
WHERE keanu.name <> costar.name
RETURN DISTINCT costar.name as costar, m.title as movie
ORDER BY costar;
"""
results = memgraph.execute_and_fetch(query)


df = pd.DataFrame([dict(row) for row in results])
print("Actors who worked with Keanu Reeves:")
print(df)

Actors who worked with Keanu Reeves:
               costar                 movie
0           Al Pacino  The Devil's Advocate
1    Carrie-Anne Moss            The Matrix
2    Carrie-Anne Moss   The Matrix Reloaded
3  Laurence Fishburne            The Matrix
4  Laurence Fishburne   The Matrix Reloaded
5      Sandra Bullock                 Speed


In [19]:
# Find actors who appeared in multiple movies together
query = """
MATCH (a1:Actor)-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(a2:Actor)
WHERE id(a1) < id(a2)
WITH a1, a2, collect(m.title) as movies
WHERE size(movies) > 1
RETURN a1.name as actor1, a2.name as actor2, movies, size(movies) as movie_count
ORDER BY movie_count DESC;
"""
results = memgraph.execute_and_fetch(query)

df = pd.DataFrame([dict(row) for row in results])
print("Actors who appeared in multiple movies together:")
print(df)

Actors who appeared in multiple movies together:
               actor1              actor2                             movies  \
0        Keanu Reeves    Carrie-Anne Moss  [The Matrix, The Matrix Reloaded]   
1  Laurence Fishburne    Carrie-Anne Moss  [The Matrix, The Matrix Reloaded]   
2        Keanu Reeves  Laurence Fishburne  [The Matrix, The Matrix Reloaded]   

   movie_count  
0            2  
1            2  
2            2  


## 4. Path Finding

Find shortest paths and all paths between nodes.

In [34]:
query = """
MATCH (sandra:Actor {name: 'Sandra Bullock'}), (al:Actor {name: 'Al Pacino'})
MATCH path = (sandra)-[*..4]-(al)
RETURN path
"""
results = list(memgraph.execute_and_fetch(query))
print(f"results={results}\n\nresults[0].keys() = {results[0].keys()}\n\nresults[0]['path'] = {results[0]['path']}")

results=[{'path': <Path nodes=[<Node id=96101 labels={'Actor', 'Person'} properties={'name': 'Sandra Bullock', 'birth_year': 1964}>, <Node id=96096 labels={'Movie'} properties={'title': 'Speed', 'year': 1994, 'genre': 'Action'}>, <Node id=96098 labels={'Actor', 'Person'} properties={'name': 'Keanu Reeves', 'birth_year': 1964}>, <Node id=96097 labels={'Movie'} properties={'title': "The Devil's Advocate", 'year': 1997, 'genre': 'Thriller'}>, <Node id=96102 labels={'Actor', 'Person'} properties={'name': 'Al Pacino', 'birth_year': 1940}>] relationships=[<Relationship id=225098 start_node_id=96101 end_node_id=96096 nodes=(96101, 96096) type=ACTED_IN properties={'role': 'Annie Porter'}>, <Relationship id=225097 start_node_id=96098 end_node_id=96096 nodes=(96098, 96096) type=ACTED_IN properties={'role': 'Jack Traven'}>, <Relationship id=225099 start_node_id=96098 end_node_id=96097 nodes=(96098, 96097) type=ACTED_IN properties={'role': 'Kevin Lomax'}>, <Relationship id=225100 start_node_id=961

In [36]:
query = """
MATCH (sandra:Actor {name: 'Sandra Bullock'}), (al:Actor {name: 'Al Pacino'})
MATCH path = (sandra)-[*..4]-(al)
RETURN nodes(path)
"""
results = list(memgraph.execute_and_fetch(query))
results

[{'nodes(path)': [<mgclient.Node(id=96101, labels={'Actor', 'Person'}, properties={'birth_year': 1964, 'name': 'Sandra Bullock'}) at 0x7f7b84e2c990>,
   <mgclient.Node(id=96096, labels={'Movie'}, properties={'genre': 'Action', 'title': 'Speed', 'year': 1994}) at 0x7f7b84e2c030>,
   <mgclient.Node(id=96098, labels={'Actor', 'Person'}, properties={'birth_year': 1964, 'name': 'Keanu Reeves'}) at 0x7f7b84e2d110>,
   <mgclient.Node(id=96097, labels={'Movie'}, properties={'genre': 'Thriller', 'title': "The Devil's Advocate", 'year': 1997}) at 0x7f7b84e2cba0>,
   <mgclient.Node(id=96102, labels={'Actor', 'Person'}, properties={'birth_year': 1940, 'name': 'Al Pacino'}) at 0x7f7b84e2c8d0>]}]

In [50]:
query = """
MATCH (sandra:Actor {name: 'Sandra Bullock'}), (al:Actor {name: 'Al Pacino'})
MATCH path = (sandra)-[*..4]-(al)
RETURN nodes(path) as path_nodes
"""
results = list(memgraph.execute_and_fetch(query))
print(f"results[0]['path_nodes'] = {results[0]['path_nodes']}\n\n\n")
for node in results[0]['path_nodes']:
    # print(f"node = {node}\n\ndir(node) = {dir(node)}\n\n")
    if 'Actor' in node.labels:
        print(f"node.name = {node.properties['name']}\n\n\n")
    if 'Movie' in node.labels:
        print(f"node.title = {node.properties['title']}\n\n\n")

results[0]['path_nodes'] = [<mgclient.Node(id=96101, labels={'Actor', 'Person'}, properties={'birth_year': 1964, 'name': 'Sandra Bullock'}) at 0x7f7b84ecfcf0>, <mgclient.Node(id=96096, labels={'Movie'}, properties={'genre': 'Action', 'title': 'Speed', 'year': 1994}) at 0x7f7b84e155f0>, <mgclient.Node(id=96098, labels={'Actor', 'Person'}, properties={'birth_year': 1964, 'name': 'Keanu Reeves'}) at 0x7f7b84e15740>, <mgclient.Node(id=96097, labels={'Movie'}, properties={'genre': 'Thriller', 'title': "The Devil's Advocate", 'year': 1997}) at 0x7f7b84e15c80>, <mgclient.Node(id=96102, labels={'Actor', 'Person'}, properties={'birth_year': 1940, 'name': 'Al Pacino'}) at 0x7f7b84e15da0>]



node.name = Sandra Bullock



node.title = Speed



node.name = Keanu Reeves



node.title = The Devil's Advocate



node.name = Al Pacino





In [56]:
query = """
MATCH (sandra:Actor {name: 'Sandra Bullock'}), (al:Actor {name: 'Al Pacino'})
MATCH path = (sandra)-[*..9]-(al)
RETURN nodes(path) as path_nodes
"""
results = list(memgraph.execute_and_fetch(query))
for result in results:
    print(f"result = {result}\n=================================\n")
    print(f"len(result) = {len(result['path_nodes'])}")
    for node in result['path_nodes']:
        if 'Actor' in node.labels:
            print(f"({node.properties['name']}) --")
        if 'Movie' in node.labels:
            print(f"[{node.properties['title']}] --")
    print("\n\n\n\n")

result = {'path_nodes': [<mgclient.Node(id=96101, labels={'Actor', 'Person'}, properties={'birth_year': 1964, 'name': 'Sandra Bullock'}) at 0x7f7b84ecd5c0>, <mgclient.Node(id=96096, labels={'Movie'}, properties={'genre': 'Action', 'title': 'Speed', 'year': 1994}) at 0x7f7b84ece070>, <mgclient.Node(id=96098, labels={'Actor', 'Person'}, properties={'birth_year': 1964, 'name': 'Keanu Reeves'}) at 0x7f7b84e16cd0>, <mgclient.Node(id=96094, labels={'Movie'}, properties={'genre': 'Sci-Fi', 'title': 'The Matrix Reloaded', 'year': 2003}) at 0x7f7b84e160a0>, <mgclient.Node(id=96099, labels={'Actor', 'Person'}, properties={'birth_year': 1961, 'name': 'Laurence Fishburne'}) at 0x7f7b84e154a0>, <mgclient.Node(id=96093, labels={'Movie'}, properties={'genre': 'Sci-Fi', 'title': 'The Matrix', 'year': 1999}) at 0x7f7b84e155f0>, <mgclient.Node(id=96098, labels={'Actor', 'Person'}, properties={'birth_year': 1964, 'name': 'Keanu Reeves'}) at 0x7f7b84e15740>, <mgclient.Node(id=96097, labels={'Movie'}, prop

In [57]:
# Find shortest path between two actors
query = """
MATCH (sandra:Actor {name: 'Sandra Bullock'}), (al:Actor {name: 'Al Pacino'})
MATCH path = shortestPath(
    (sandra:Actor {name: 'Sandra Bullock'})-[*..5]-(al:Actor {name: 'Al Pacino'})
)
RETURN [node in nodes(path) | 
    CASE 
        WHEN 'Movie' IN labels(node) THEN node.title 
        ELSE node.name 
    END
] as path_nodes,
length(path) as path_length;
"""
results = list(memgraph.execute_and_fetch(query))

if results:
    result = results[0]
    print(f"Shortest path from Sandra Bullock to Al Pacino:")
    print(f"Path: {' -> '.join(result['path_nodes'])}")
    print(f"Length: {result['path_length']} hops")
else:
    print("No path found")

DatabaseError: line 3:14 extraneous input 'shortestPath' expecting '('

In [None]:
# Find all paths of length 2 (actor -> movie -> actor)
query = """
MATCH path = (a1:Actor)-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(a2:Actor)
WHERE a1.name = 'Keanu Reeves' AND a1.name <> a2.name
RETURN a1.name as from_actor, m.title as via_movie, a2.name as to_actor
ORDER BY via_movie, to_actor;
"""
results = memgraph.execute_and_fetch(query)

df = pd.DataFrame([dict(row) for row in results])
print("All co-star connections for Keanu Reeves:")
print(df)

## 5. Aggregation Queries

Perform aggregations and statistical analysis.

In [None]:
# Count movies per genre
query = """
MATCH (m:Movie)
RETURN m.genre as genre, count(*) as movie_count
ORDER BY movie_count DESC;
"""
results = memgraph.execute_and_fetch(query)

df = pd.DataFrame([dict(row) for row in results])
print("Movies per genre:")
print(df)

In [None]:
# Find most prolific actors (by number of movies)
query = """
MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)
RETURN a.name as actor, count(m) as movie_count, collect(m.title) as movies
ORDER BY movie_count DESC
LIMIT 5;
"""
results = memgraph.execute_and_fetch(query)

df = pd.DataFrame([dict(row) for row in results])
print("Most prolific actors:")
print(df)

In [None]:
# Calculate average year of movies per actor
query = """
MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)
RETURN a.name as actor, 
       count(m) as movie_count,
       avg(m.year) as avg_movie_year,
       min(m.year) as first_movie_year,
       max(m.year) as latest_movie_year
ORDER BY movie_count DESC;
"""
results = memgraph.execute_and_fetch(query)

df = pd.DataFrame([dict(row) for row in results])
print("Actor career statistics:")
print(df)

## 6. Conditional Queries

Use CASE statements and conditional logic.

In [None]:
# Categorize movies by era
query = """
MATCH (m:Movie)
RETURN m.title as title, 
       m.year as year,
       CASE 
           WHEN m.year < 2000 THEN '90s'
           WHEN m.year >= 2000 AND m.year < 2010 THEN '2000s'
           ELSE '2010s+'
       END as era
ORDER BY m.year;
"""
results = memgraph.execute_and_fetch(query)

df = pd.DataFrame([dict(row) for row in results])
print("Movies by era:")
print(df)

## 7. Subqueries and Advanced Filtering

Use WITH clauses for complex filtering.

In [None]:
# Find actors who have worked in multiple genres
query = """
MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)
WITH a, collect(DISTINCT m.genre) as genres
WHERE size(genres) > 1
RETURN a.name as actor, genres, size(genres) as genre_count
ORDER BY genre_count DESC;
"""
results = memgraph.execute_and_fetch(query)

df = pd.DataFrame([dict(row) for row in results])
print("Actors who worked in multiple genres:")
print(df)

## 8. Visualize the Movie Graph

In [None]:
# Get all nodes and edges
query = """
MATCH (p:Person)-[r]->(m:Movie)
RETURN p.name as person, type(r) as relationship, m.title as movie;
"""
results = memgraph.execute_and_fetch(query)

# Create a NetworkX graph
G = nx.DiGraph()

for row in results:
    data = dict(row)
    G.add_edge(data['person'], data['movie'], type=data['relationship'])

# Draw the graph
plt.figure(figsize=(16, 12))
pos = nx.spring_layout(G, k=3, iterations=50)

# Separate node types
movie_nodes = [n for n in G.nodes() if any(m['title'] == n for m in movies)]
person_nodes = [n for n in G.nodes() if n not in movie_nodes]

# Draw nodes
nx.draw_networkx_nodes(G, pos, nodelist=person_nodes, 
                       node_color='lightblue', node_size=2000, alpha=0.9, label='People')
nx.draw_networkx_nodes(G, pos, nodelist=movie_nodes, 
                       node_color='lightcoral', node_size=2000, alpha=0.9, label='Movies')

# Draw edges
nx.draw_networkx_edges(G, pos, edge_color='gray', 
                       arrows=True, arrowsize=15, alpha=0.6)

# Draw labels
nx.draw_networkx_labels(G, pos, font_size=8, font_weight='bold')

plt.title("Movie Database Graph", fontsize=16)
plt.legend()
plt.axis('off')
plt.tight_layout()
plt.show()

print(f"[OK] Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

## Summary

In this notebook, you learned:
1. Pattern matching for complex queries
2. Path finding algorithms
3. Aggregation and statistical queries
4. Conditional logic in queries
5. Advanced filtering with WITH clauses
6. Visualizing complex graphs

Next: Explore `03-algorithms.ipynb` for graph algorithms!