# Neo4j with Python: Graph Database Examples

This notebook demonstrates the power of Neo4j graph database with Python. We'll explore various use cases from basic operations to advanced graph analytics.

In [None]:
from neo4j import GraphDatabase
import json
from datetime import datetime
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd

## 1. Connecting to Neo4j

First, let's establish a connection to Neo4j. Make sure Neo4j is running on your system.

In [None]:
# Connection parameters - update these for your setup
uri = "bolt://localhost:7687"
user = "neo4j"
password = "password"  # Change this to your actual password

# Test connection
try:
    driver = GraphDatabase.driver(uri, auth=(user, password))
    # Test the connection
    with driver.session() as session:
        result = session.run("RETURN 'Hello from Neo4j!' as message")
        message = result.single()["message"]
        print(f"✅ {message}")
        print("Connection successful!")
except Exception as e:
    print(f"❌ Connection failed: {e}")
    print("\nMake sure Neo4j is running and credentials are correct.")
    print("You can start Neo4j with: neo4j start")
    driver = None

## 2. Basic Graph Operations

Let's start with some fundamental graph operations.

In [None]:
def create_sample_graph():
    """Create a simple social network graph"""
    with driver.session() as session:
        # Clear existing data
        session.run("MATCH (n) DETACH DELETE n")
        
        # Create people
        session.run("""
            CREATE (alice:Person {name: 'Alice', age: 30, city: 'New York'})
            CREATE (bob:Person {name: 'Bob', age: 25, city: 'San Francisco'})
            CREATE (charlie:Person {name: 'Charlie', age: 35, city: 'Boston'})
            CREATE (diana:Person {name: 'Diana', age: 28, city: 'Seattle'})
        """)
        
        # Create relationships
        session.run("""
            MATCH (alice:Person {name: 'Alice'}), (bob:Person {name: 'Bob'})
            CREATE (alice)-[:KNOWS {since: 2020}]->(bob)
        """)
        
        session.run("""
            MATCH (bob:Person {name: 'Bob'}), (charlie:Person {name: 'Charlie'})
            CREATE (bob)-[:KNOWS {since: 2021}]->(charlie)
        """)
        
        session.run("""
            MATCH (alice:Person {name: 'Alice'}), (diana:Person {name: 'Diana'})
            CREATE (alice)-[:KNOWS {since: 2019}]->(diana)
        """)
        
        session.run("""
            MATCH (charlie:Person {name: 'Charlie'}), (diana:Person {name: 'Diana'})
            CREATE (charlie)-[:KNOWS {since: 2022}]->(diana)
        """)
        
        print("Sample graph created successfully!")

if driver:
    create_sample_graph()

In [None]:
def query_people():
    """Query all people in the graph"""
    with driver.session() as session:
        result = session.run("""
            MATCH (p:Person)
            RETURN p.name as name, p.age as age, p.city as city
            ORDER BY p.name
        """)
        
        people = [dict(record) for record in result]
        return people

if driver:
    people = query_people()
    print("People in the graph:")
    for person in people:
        print(f"  {person['name']} ({person['age']} years old, {person['city']})")

In [None]:
def query_relationships():
    """Query all relationships in the graph"""
    with driver.session() as session:
        result = session.run("""
            MATCH (p1:Person)-[r:KNOWS]->(p2:Person)
            RETURN p1.name as person1, p2.name as person2, r.since as since
            ORDER BY r.since
        """)
        
        relationships = [dict(record) for record in result]
        return relationships

if driver:
    relationships = query_relationships()
    print("Relationships in the graph:")
    for rel in relationships:
        print(f"  {rel['person1']} knows {rel['person2']} since {rel['since']}")

## 3. Advanced Graph Queries

Now let's explore some more complex graph operations.

In [None]:
def find_shortest_path(person1, person2):
    """Find the shortest path between two people"""
    with driver.session() as session:
        result = session.run("""
            MATCH path = shortestPath(
                (p1:Person {name: $person1})-[:KNOWS*]-(p2:Person {name: $person2})
            )
            RETURN [node IN nodes(path) WHERE node:Person | node.name] as path
        """, person1=person1, person2=person2)
        
        record = result.single()
        return record["path"] if record else None

if driver:
    path = find_shortest_path("Alice", "Charlie")
    if path:
        print(f"Shortest path from Alice to Charlie: {' -> '.join(path)}")
    else:
        print("No path found between Alice and Charlie")

In [None]:
def find_mutual_friends(person1, person2):
    """Find mutual friends between two people"""
    with driver.session() as session:
        result = session.run("""
            MATCH (p1:Person {name: $person1})-[:KNOWS]->(mutual:Person)<-[:KNOWS]-(p2:Person {name: $person2})
            RETURN mutual.name as mutual_friend, mutual.city as city
            ORDER BY mutual.name
            """, person1=person1, person2=person2)
        
        mutual_friends = [dict(record) for record in result]
        return mutual_friends

if driver:
    mutual_friends = find_mutual_friends("Alice", "Charlie")
    if mutual_friends:
        print("Mutual friends between Alice and Charlie:")
        for friend in mutual_friends:
            print(f"  {friend['mutual_friend']} ({friend['city']})")
    else:
        print("No mutual friends found between Alice and Charlie")

In [None]:
def analyze_network():
    """Analyze the social network metrics"""
    with driver.session() as session:
        # Total people
        total_people = session.run("MATCH (p:Person) RETURN count(p) as count").single()["count"]
        
        # Total relationships
        total_relationships = session.run("MATCH ()-[r:KNOWS]->() RETURN count(r) as count").single()["count"]
        
        # Average connections per person
        avg_connections = session.run("""
            MATCH (p:Person)
            OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
            WITH p, count(friend) as connections
            RETURN avg(connections) as avg_connections
        """).single()["avg_connections"]
        
        # Most connected person
        most_connected = session.run("""
            MATCH (p:Person)
            OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person)
            WITH p, count(friend) as connections
            ORDER BY connections DESC
            LIMIT 1
            RETURN p.name as name, connections
        """).single()
        
        return {
            "total_people": total_people,
            "total_relationships": total_relationships,
            "avg_connections_per_person": round(avg_connections, 2),
            "most_connected_person": most_connected["name"],
            "most_connected_count": most_connected["connections"]
        }

if driver:
    metrics = analyze_network()
    print("Network Analysis:")
    for key, value in metrics.items():
        print(f"  {key.replace('_', ' ').title()}: {value}")

## 4. Complex Scenario: Movie Recommendation System

Let's create a more complex example with movies, actors, and user ratings.

In [None]:
def create_movie_database():
    """Create a movie recommendation database"""
    with driver.session() as session:
        # Clear existing data
        session.run("MATCH (n) DETACH DELETE n")
        
        # Create movies
        movies_data = [
            {"title": "The Matrix", "year": 1999, "genre": ["Sci-Fi", "Action"]},
            {"title": "Inception", "year": 2010, "genre": ["Sci-Fi", "Thriller"]},
            {"title": "The Dark Knight", "year": 2008, "genre": ["Action", "Crime"]},
            {"title": "Interstellar", "year": 2014, "genre": ["Sci-Fi", "Adventure"]},
            {"title": "Pulp Fiction", "year": 1994, "genre": ["Crime", "Drama"]}
        ]
        
        for movie in movies_data:
            session.run("""
                CREATE (m:Movie {title: $title, year: $year, genre: $genre})
            "", movie)
        
        # Create actors
        actors_data = [
            {"name": "Keanu Reeves", "age": 59},
            {"name": "Leonardo DiCaprio", "age": 49},
            {"name": "Christian Bale", "age": 50},
            {"name": "Matthew McConaughey", "age": 54},
            {"name": "John Travolta", "age": 70}
        ]
        
        for actor in actors_data:
            session.run("""
                CREATE (a:Actor {name: $name, age: $age})
            "", actor)
        
        # Create users
        users_data = [
            {"name": "Alice", "age": 25},
            {"name": "Bob", "age": 30},
            {"name": "Charlie", "age": 35}
        ]
        
        for user in users_data:
            session.run("""
                CREATE (u:User {name: $name, age: $age})
            "", user)
        
        # Create ACTED_IN relationships
        acted_in = [
            ("Keanu Reeves", "The Matrix"),
            ("Leonardo DiCaprio", "Inception"),
            ("Christian Bale", "The Dark Knight"),
            ("Matthew McConaughey", "Interstellar"),
            ("John Travolta", "Pulp Fiction")
        ]
        
        for actor_name, movie_title in acted_in:
            session.run("""
                CREATE (a:Actor {name: $actor})-[:ACTED_IN]->(m:Movie {title: $movie})
            "", actor=actor_name, movie=movie_title)
        
        # Create RATED relationships
        ratings = [
            ("Alice", "The Matrix", 5),
            ("Alice", "Inception", 4),
            ("Bob", "The Matrix", 4),
            ("Bob", "The Dark Knight", 5),
            ("Bob", "Interstellar", 3),
            ("Charlie", "Inception", 5),
            ("Charlie", "Pulp Fiction", 4)
        ]
        
        for user_name, movie_title, rating in ratings:
            session.run("""
                CREATE (u:User {name: $user})-[:RATED {rating: $rating}]->(m:Movie {title: $movie})
            "", user=user_name, movie=movie_title, rating=rating)
        
        print("Movie database created successfully!")

if driver:
    create_movie_database()

In [None]:
def recommend_movies_for_user(user_name):
    """Recommend movies based on user's rating history"""
    with driver.session() as session:
        result = session.run("""
            MATCH (user:User {name: $user_name})-[:RATED]->(rated_movie:Movie)
            MATCH (other_user:User)-[:RATED]->(rated_movie)
            WHERE other_user <> user
            MATCH (other_user)-[:RATED]->(recommended_movie:Movie)
            WHERE NOT (user)-[:RATED]->(recommended_movie)
            
            WITH recommended_movie, avg(other_user.rating) as avg_rating, count(other_user) as similar_users
            WHERE similar_users >= 1
            
            RETURN recommended_movie.title as title,
                   recommended_movie.year as year,
                   recommended_movie.genre as genre,
                   avg_rating as predicted_rating,
                   similar_users as users_who_liked_similar
            ORDER BY avg_rating DESC, similar_users DESC
        "", user_name=user_name)
        
        recommendations = [dict(record) for record in result]
        return recommendations

if driver:
    print("Movie recommendations for Alice:")
    recommendations = recommend_movies_for_user("Alice")
    for rec in recommendations:
        print(f"  {rec['title']} ({rec['year']}) - Predicted rating: {rec['predicted_rating']:.1f}")
        print(f"    Genre: {', '.join(rec['genre'])}")
        print(f"    Based on {rec['users_who_liked_similar']} similar users")

In [None]:
def analyze_actors():
    """Analyze actors and their movie connections"""
    with driver.session() as session:
        # Actors with their movies and average ratings
        result = session.run("""
            MATCH (actor:Actor)-[:ACTED_IN]->(movie:Movie)
            OPTIONAL MATCH (user:User)-[:RATED]->(movie)
            WITH actor, movie, avg(user.rating) as avg_rating
            RETURN actor.name as actor_name,
                   movie.title as movie_title,
                   movie.year as year,
                   avg_rating as avg_rating
            ORDER BY actor.name, movie.year
        """)
        
        actor_data = [dict(record) for record in result]
        return actor_data

if driver:
    print("Actor Analysis:")
    actor_data = analyze_actors()
    
    # Group by actor
    actors = {}
    for record in actor_data:
        actor_name = record['actor_name']
        if actor_name not in actors:
            actors[actor_name] = []
        actors[actor_name].append(record)
    
    for actor_name, movies in actors.items():
        print(f"\n{actor_name}:")
        for movie in movies:
            rating = movie['avg_rating'] if movie['avg_rating'] else "No ratings"
            print(f"  {movie['movie_title']} ({movie['year']}) - Avg rating: {rating}")

## 5. Graph Visualization

Let's visualize our graph data using NetworkX and matplotlib.

In [None]:
def visualize_social_network():
    """Visualize the social network graph"""
    # First, let's recreate the simple social network
    create_sample_graph()
    
    # Get graph data from Neo4j
    with driver.session() as session:
        # Get nodes
        nodes_result = session.run("""
            MATCH (p:Person)
            RETURN p.name as name, p.city as city
        """)
        
        # Get edges
        edges_result = session.run("""
            MATCH (p1:Person)-[r:KNOWS]->(p2:Person)
            RETURN p1.name as source, p2.name as target, r.since as since
        """)
    
    # Create NetworkX graph
    G = nx.DiGraph()
    
    # Add nodes
    for record in nodes_result:
        G.add_node(record["name"], city=record["city"])
    
    # Add edges
    for record in edges_result:
        G.add_edge(record["source"], record["target"], since=record["since"])
    
    # Create visualization
    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G, k=1, iterations=50)
    
    # Draw nodes
    nx.draw_networkx_nodes(G, pos, node_color='lightblue', node_size=1000)
    
    # Draw edges
    nx.draw_networkx_edges(G, pos, edge_color='gray', arrows=True, arrowsize=20)
    
    # Draw labels
    nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
    
    plt.title("Social Network Graph", fontsize=16)
    plt.axis('off')
    plt.tight_layout()
    plt.show()
    
    # Print some graph metrics
    print(f"Graph metrics:")
    print(f"  Number of nodes: {G.number_of_nodes()}")
    print(f"  Number of edges: {G.number_of_edges()}")
    print(f"  Average degree: {sum(dict(G.degree()).values()) / G.number_of_nodes():.2f}")

if driver:
    try:
        visualize_social_network()
    except ImportError:
        print("Visualization requires matplotlib and networkx. Install with:")
        print("pip install matplotlib networkx")
    except Exception as e:
        print(f"Visualization error: {e}")

## 6. Cleanup

Don't forget to close the database connection when you're done.

In [None]:
if driver:
    driver.close()
    print("Database connection closed.")