# Lab 3: Neo4j Exercises - Graph Database

## 🎯 Objectives
- Understand graph data modeling
- Learn Cypher query language
- Practice relationship analysis
- Implement recommendation algorithms
- Optimize graph queries

## 📋 Prerequisites
- Complete Lab 1 (Setup & Connections)
- Neo4j container is running
- Sample data is loaded


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from neo4j import GraphDatabase
from datetime import datetime

# Neo4j connection (assuming Lab 1 is completed)
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password123"

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

# Test connection
with driver.session() as session:
    result = session.run("RETURN 1 as test")
    test_value = result.single()["test"]

print("✅ Neo4j connected successfully!")
print(f"Test query result: {test_value}")


✅ Neo4j connected successfully!
Test query result: 1


## Exercise 1: Graph Schema Creation


In [2]:
# Clear existing data
with driver.session() as session:
    session.run("MATCH (n) DETACH DELETE n")
    print("✅ Existing graph data cleared")

# Create sample users
users = [
    {"id": "user_001", "name": "Nguyen Van A", "email": "a@example.com"},
    {"id": "user_002", "name": "Tran Thi B", "email": "b@example.com"},
    {"id": "user_003", "name": "Le Van C", "email": "c@example.com"},
    {"id": "user_004", "name": "Pham Thi D", "email": "d@example.com"},
    {"id": "user_005", "name": "Hoang Van E", "email": "e@example.com"}
]

# Create sample products
products = [
    {"id": "prod_001", "name": "iPhone 15 Pro", "category": "electronics"},
    {"id": "prod_002", "name": "Samsung Galaxy S24", "category": "electronics"},
    {"id": "prod_003", "name": "MacBook Pro M3", "category": "electronics"},
    {"id": "prod_004", "name": "Dell XPS 13", "category": "electronics"},
    {"id": "prod_005", "name": "AirPods Pro", "category": "electronics"}
]

# Create users
with driver.session() as session:
    for user in users:
        session.run("""
            CREATE (u:User {
                id: $id,
                name: $name,
                email: $email,
                createdAt: datetime()
            })
        """, **user)
    print(f"✅ Created {len(users)} users")

# Create products
with driver.session() as session:
    for product in products:
        session.run("""
            CREATE (p:Product {
                id: $id,
                name: $name,
                category: $category,
                createdAt: datetime()
            })
        """, **product)
    print(f"✅ Created {len(products)} products")

print("✅ Graph schema created successfully!")


✅ Existing graph data cleared
✅ Created 5 users
✅ Created 5 products
✅ Graph schema created successfully!


## Exercise 2: Create Relationships


In [3]:
# Create user-product relationships (purchases, views, likes)
relationships = [
    # User purchases
    ("user_001", "prod_001", "PURCHASED", {"date": "2024-01-15", "rating": 5}),
    ("user_001", "prod_003", "PURCHASED", {"date": "2024-02-10", "rating": 4}),
    ("user_002", "prod_001", "PURCHASED", {"date": "2024-01-20", "rating": 5}),
    ("user_002", "prod_002", "PURCHASED", {"date": "2024-02-05", "rating": 4}),
    ("user_003", "prod_002", "PURCHASED", {"date": "2024-01-25", "rating": 3}),
    ("user_003", "prod_004", "PURCHASED", {"date": "2024-02-15", "rating": 4}),
    ("user_004", "prod_003", "PURCHASED", {"date": "2024-02-01", "rating": 5}),
    ("user_005", "prod_005", "PURCHASED", {"date": "2024-02-20", "rating": 4}),
    
    # User views
    ("user_001", "prod_002", "VIEWED", {"date": "2024-01-10"}),
    ("user_001", "prod_005", "VIEWED", {"date": "2024-01-12"}),
    ("user_002", "prod_003", "VIEWED", {"date": "2024-01-18"}),
    ("user_002", "prod_005", "VIEWED", {"date": "2024-02-01"}),
    ("user_003", "prod_001", "VIEWED", {"date": "2024-01-20"}),
    ("user_003", "prod_003", "VIEWED", {"date": "2024-02-10"}),
    ("user_004", "prod_001", "VIEWED", {"date": "2024-01-25"}),
    ("user_004", "prod_002", "VIEWED", {"date": "2024-01-30"}),
    ("user_005", "prod_001", "VIEWED", {"date": "2024-02-15"}),
    ("user_005", "prod_003", "VIEWED", {"date": "2024-02-18"}),
    
    # User likes
    ("user_001", "prod_002", "LIKES", {"date": "2024-01-10"}),
    ("user_001", "prod_005", "LIKES", {"date": "2024-01-12"}),
    ("user_002", "prod_003", "LIKES", {"date": "2024-01-18"}),
    ("user_003", "prod_001", "LIKES", {"date": "2024-01-20"}),
    ("user_004", "prod_001", "LIKES", {"date": "2024-01-25"}),
    ("user_005", "prod_001", "LIKES", {"date": "2024-02-15"}),
    
    # User friendships
    ("user_001", "user_002", "FRIENDS_WITH", {"since": "2024-01-01"}),
    ("user_001", "user_003", "FRIENDS_WITH", {"since": "2024-01-05"}),
    ("user_002", "user_004", "FRIENDS_WITH", {"since": "2024-01-10"}),
    ("user_003", "user_005", "FRIENDS_WITH", {"since": "2024-01-15"}),
    ("user_004", "user_005", "FRIENDS_WITH", {"since": "2024-01-20"}),
]

# Create relationships
with driver.session() as session:
    for user_id, target_id, rel_type, properties in relationships:
        if rel_type in ["PURCHASED", "VIEWED", "LIKES"]:
            # User-Product relationship
            session.run(f"""
                MATCH (u:User {{id: $user_id}})
                MATCH (p:Product {{id: $target_id}})
                MERGE (u)-[r:{rel_type}]->(p)
                SET r += $properties
            """, user_id=user_id, target_id=target_id, properties=properties)
        elif rel_type == "FRIENDS_WITH":
            # User-User relationship
            session.run(f"""
                MATCH (u1:User {{id: $user_id}})
                MATCH (u2:User {{id: $target_id}})
                MERGE (u1)-[r:{rel_type}]->(u2)
                SET r += $properties
            """, user_id=user_id, target_id=target_id, properties=properties)

print(f"✅ Created {len(relationships)} relationships")

# Verify relationships
with driver.session() as session:
    result = session.run("""
        MATCH (n)-[r]->(m)
        RETURN type(r) as relationship_type, count(r) as count
        ORDER BY count DESC
    """)
    
    print("\n📊 Relationship counts:")
    for record in result:
        print(f"{record['relationship_type']}: {record['count']}")

print("✅ Relationships created successfully!")


✅ Created 29 relationships

📊 Relationship counts:
VIEWED: 10
PURCHASED: 8
LIKES: 6
FRIENDS_WITH: 5
✅ Relationships created successfully!


## Exercise 3: Recommendation System


In [4]:
# Collaborative Filtering Recommendation
def get_recommendations(user_id, limit=5):
    """Get product recommendations based on similar users"""
    with driver.session() as session:
        result = session.run("""
            MATCH (u:User {id: $user_id})-[:PURCHASED]->(p1:Product)<-[:PURCHASED]-(u2:User)-[:PURCHASED]->(p2:Product)
            WHERE NOT (u)-[:PURCHASED]->(p2)
            RETURN p2.id as productId, p2.name as productName, count(*) as score
            ORDER BY score DESC
            LIMIT $limit
        """, user_id=user_id, limit=limit)
        
        return [record.data() for record in result]

# Get recommendations for user_001
recommendations = get_recommendations("user_001", 3)
print("🎯 Recommendations for user_001:")
for rec in recommendations:
    print(f"- {rec['productName']} (Score: {rec['score']})")

# Content-Based Recommendation
def get_content_recommendations(user_id, limit=5):
    """Get recommendations based on user's preferences"""
    with driver.session() as session:
        result = session.run("""
            MATCH (u:User {id: $user_id})-[:LIKES]->(p1:Product)
            MATCH (p1)-[:SIMILAR_TO]->(p2:Product)
            WHERE NOT (u)-[:PURCHASED]->(p2)
            RETURN p2.id as productId, p2.name as productName, count(*) as score
            ORDER BY score DESC
            LIMIT $limit
        """, user_id=user_id, limit=limit)
        
        return [record.data() for record in result]

print("\n✅ Recommendation system implemented!")


🎯 Recommendations for user_001:
- Samsung Galaxy S24 (Score: 1)

✅ Recommendation system implemented!


## Exercise 4: Graph Analytics


In [5]:
# Find most popular products
with driver.session() as session:
    result = session.run("""
        MATCH (p:Product)<-[r:PURCHASED]-(u:User)
        RETURN p.name as productName, count(r) as purchaseCount, avg(r.rating) as avgRating
        ORDER BY purchaseCount DESC
    """)
    
    print("📊 Most Popular Products:")
    for record in result:
        print(f"- {record['productName']}: {record['purchaseCount']} purchases, {record['avgRating']:.1f} avg rating")

# Find users with similar preferences
with driver.session() as session:
    result = session.run("""
        MATCH (u1:User {id: 'user_001'})-[:PURCHASED]->(p:Product)<-[:PURCHASED]-(u2:User)
        WHERE u1 <> u2
        RETURN u2.name as userName, count(p) as commonProducts
        ORDER BY commonProducts DESC
    """)
    
    print("\n👥 Users with similar preferences to user_001:")
    for record in result:
        print(f"- {record['userName']}: {record['commonProducts']} common products")

# Social network analysis
with driver.session() as session:
    result = session.run("""
        MATCH (u:User)
        OPTIONAL MATCH (u)-[:FRIENDS_WITH]-(friend:User)
        RETURN u.name as userName, count(friend) as friendCount
        ORDER BY friendCount DESC
    """)
    
    print("\n🌐 Social Network Analysis:")
    for record in result:
        print(f"- {record['userName']}: {record['friendCount']} friends")

print("\n✅ Graph analytics completed!")


📊 Most Popular Products:
- iPhone 15 Pro: 2 purchases, 5.0 avg rating
- Samsung Galaxy S24: 2 purchases, 3.5 avg rating
- MacBook Pro M3: 2 purchases, 4.5 avg rating
- Dell XPS 13: 1 purchases, 4.0 avg rating
- AirPods Pro: 1 purchases, 4.0 avg rating

👥 Users with similar preferences to user_001:
- Pham Thi D: 1 common products
- Tran Thi B: 1 common products

🌐 Social Network Analysis:
- Nguyen Van A: 2 friends
- Tran Thi B: 2 friends
- Le Van C: 2 friends
- Pham Thi D: 2 friends
- Hoang Van E: 2 friends

✅ Graph analytics completed!


In [None]:
a

## 🎓 Student Exercises - Advanced Neo4j Practice

### Exercise 5: Complex Graph Queries Challenge
**Objective**: Master advanced Cypher queries and graph traversal

**Tasks**:
1. **Find the shortest path** between two users through their mutual connections
2. **Calculate user influence score** based on their network connections
3. **Find products with highest co-purchase patterns** (products bought together)
4. **Implement a friend recommendation algorithm** using graph algorithms

**Requirements**:
- Use advanced Cypher patterns and functions
- Implement graph algorithms (PageRank, Betweenness Centrality)
- Include proper error handling
- Optimize queries for performance
- Document your approach

---

### Exercise 6: Social Network Analysis
**Objective**: Analyze social relationships and user behavior patterns

**Scenario**: You need to build a **Social Commerce Platform** with the following features:
- User friendship networks
- Product sharing and recommendations
- Influence-based marketing
- Community detection
- Viral product analysis

**Tasks**:
1. **Design the social graph schema**:
   - User relationships (friends, followers, influencers)
   - Product sharing and viral patterns
   - Community structures
   - Interaction timestamps

2. **Implement social features**:
   - Friend recommendation system
   - Product sharing network
   - Influence scoring algorithm
   - Community detection
   - Viral product identification

3. **Create analytics queries**:
   - Most influential users
   - Product sharing patterns
   - Community analysis
   - Engagement metrics

**Requirements**:
- Use proper graph modeling techniques
- Implement social network algorithms
- Include temporal analysis
- Handle large-scale data
- Create visualization outputs

---

### Exercise 7: Recommendation System Implementation
**Objective**: Build sophisticated recommendation algorithms using graph data

**Tasks**:
1. **Implement collaborative filtering**:
   - User-based recommendations
   - Item-based recommendations
   - Matrix factorization using graph structure

2. **Create content-based filtering**:
   - Product similarity graphs
   - User preference modeling
   - Feature-based recommendations

3. **Build hybrid recommendation system**:
   - Combine multiple approaches
   - Weight different algorithms
   - Handle cold start problems

4. **Evaluate recommendation quality**:
   - Precision and recall metrics
   - A/B testing framework
   - User satisfaction scoring

**Requirements**:
- Use graph algorithms for recommendations
- Implement multiple recommendation strategies
- Include evaluation metrics
- Handle edge cases (new users, new products)
- Optimize for real-time performance

---

### Exercise 8: Graph Database Performance Optimization
**Objective**: Learn advanced optimization techniques for graph databases

**Tasks**:
1. **Create appropriate indexes**:
   - Node property indexes
   - Relationship property indexes
   - Composite indexes
   - Full-text indexes

2. **Optimize query performance**:
   - Use query profiling
   - Implement query hints
   - Optimize graph traversal
   - Use appropriate algorithms

3. **Implement caching strategies**:
   - Query result caching
   - Graph structure caching
   - Session-based caching
   - Distributed caching

4. **Monitor and tune performance**:
   - Query execution plans
   - Performance metrics
   - Resource utilization
   - Scalability testing

**Requirements**:
- Use Neo4j performance tuning techniques
- Implement proper indexing strategies
- Monitor query performance
- Handle large-scale graphs
- Document optimization results

---

### Exercise 9: Real-time Graph Analytics
**Objective**: Build real-time analytics system using graph data

**Scenario**: Build a **Real-time E-commerce Analytics Platform** with:
- Live user behavior tracking
- Real-time recommendation updates
- Dynamic pricing based on demand
- Fraud detection using graph patterns
- Live inventory management

**Tasks**:
1. **Design real-time data pipeline**:
   - Event streaming integration
   - Real-time graph updates
   - Data consistency handling
   - Error recovery mechanisms

2. **Implement real-time features**:
   - Live recommendation updates
   - Dynamic user segmentation
   - Real-time fraud detection
   - Live inventory tracking

3. **Create monitoring dashboard**:
   - Real-time metrics
   - Alert systems
   - Performance monitoring
   - User behavior analytics

**Requirements**:
- Use streaming data processing
- Implement real-time graph updates
- Handle data consistency
- Create monitoring systems
- Include alert mechanisms

---

### Exercise 10: Advanced Graph Algorithms
**Objective**: Implement complex graph algorithms for business intelligence

**Tasks**:
1. **Implement graph algorithms**:
   - PageRank for user influence
   - Betweenness Centrality for key connectors
   - Community Detection for user segmentation
   - Shortest Path for recommendation paths

2. **Create business intelligence queries**:
   - Customer lifetime value prediction
   - Churn prediction using graph patterns
   - Market basket analysis
   - Supply chain optimization

3. **Build predictive models**:
   - User behavior prediction
   - Product success prediction
   - Market trend analysis
   - Risk assessment models

**Requirements**:
- Use Neo4j Graph Algorithms library
- Implement machine learning integration
- Create predictive models
- Include model evaluation
- Handle model deployment

---

## 📝 Submission Guidelines

### For Each Exercise:
1. **Complete all tasks** as specified
2. **Include proper documentation** and comments
3. **Test your code** with sample data
4. **Handle errors** appropriately
5. **Optimize performance** where possible

### Code Quality Requirements:
- Follow Python best practices
- Use meaningful variable names
- Include type hints where appropriate
- Write clean, readable code
- Add comprehensive comments

### Testing Requirements:
- Test with various graph scenarios
- Test edge cases and error conditions
- Verify expected outputs
- Include performance benchmarks

### Documentation Requirements:
- Explain your graph modeling approach
- Document algorithm choices
- Include usage examples
- Provide performance analysis

---

## 🏆 Bonus Challenges

### Challenge 1: Multi-modal Graph Database
Implement a graph database that handles multiple data types (text, images, time-series) in a single graph.

### Challenge 2: Distributed Graph Processing
Create a distributed graph processing system using Neo4j clustering and external processing frameworks.

### Challenge 3: Graph-based Machine Learning
Implement graph neural networks for advanced analytics and predictions.

### Challenge 4: Graph Database Migration
Create tools to migrate data from relational databases to Neo4j while preserving relationships.

---

**Good luck with your Neo4j journey! 🚀**


In [None]:
# Exercise 5: Complex Graph Queries Challenge - Starter Code
# Complete the functions below according to the requirements

def find_shortest_path_between_users(user1_id, user2_id):
    """
    Find the shortest path between two users through their connections
    TODO: Implement this function using Cypher queries
    """
    pass

def calculate_user_influence_score(user_id):
    """
    Calculate user influence score based on network connections
    TODO: Implement this function using graph algorithms
    """
    pass

def find_co_purchase_patterns():
    """
    Find products with highest co-purchase patterns
    TODO: Implement this function using Cypher aggregation
    """
    pass

def recommend_friends(user_id, limit=5):
    """
    Implement friend recommendation algorithm
    TODO: Implement this function using graph algorithms
    """
    pass

# Test your functions here
print("Exercise 5: Complex Graph Queries Challenge")
print("Complete the functions above and test them here!")


In [None]:
# Exercise 6: Social Network Analysis - Starter Code
# Design and implement a social commerce platform

class SocialCommerceService:
    def __init__(self, driver):
        self.driver = driver
    
    def create_friendship(self, user1_id, user2_id):
        """
        Create friendship relationship between two users
        TODO: Implement this function
        """
        pass
    
    def recommend_friends(self, user_id, limit=5):
        """
        Recommend friends based on mutual connections
        TODO: Implement this function
        """
        pass
    
    def share_product(self, user_id, product_id, message=""):
        """
        Share a product with the user's network
        TODO: Implement this function
        """
        pass
    
    def calculate_influence_score(self, user_id):
        """
        Calculate user influence score based on network activity
        TODO: Implement this function
        """
        pass
    
    def detect_communities(self):
        """
        Detect communities in the social network
        TODO: Implement this function using community detection algorithms
        """
        pass
    
    def find_viral_products(self, days=30):
        """
        Find products that are going viral in the network
        TODO: Implement this function
        """
        pass

# Sample social network data structure (for reference)
sample_social_data = {
    "users": [
        {"id": "user_001", "name": "Alice", "followers": 1000, "influence_score": 0.8},
        {"id": "user_002", "name": "Bob", "followers": 500, "influence_score": 0.6},
        {"id": "user_003", "name": "Charlie", "followers": 200, "influence_score": 0.4}
    ],
    "relationships": [
        {"from": "user_001", "to": "user_002", "type": "FOLLOWS"},
        {"from": "user_002", "to": "user_003", "type": "FOLLOWS"},
        {"from": "user_001", "to": "user_003", "type": "FOLLOWS"}
    ],
    "shares": [
        {"user_id": "user_001", "product_id": "prod_001", "timestamp": "2024-01-15"},
        {"user_id": "user_002", "product_id": "prod_001", "timestamp": "2024-01-16"}
    ]
}

# Initialize service
social_service = SocialCommerceService(driver)
print("Exercise 6: Social Network Analysis")
print("Complete the SocialCommerceService methods above!")


In [None]:
# Exercise 7: Recommendation System Implementation - Starter Code
# Build sophisticated recommendation algorithms using graph data

class GraphRecommendationService:
    def __init__(self, driver):
        self.driver = driver
    
    def collaborative_filtering_user_based(self, user_id, limit=10):
        """
        User-based collaborative filtering recommendations
        TODO: Implement this function
        """
        pass
    
    def collaborative_filtering_item_based(self, product_id, limit=10):
        """
        Item-based collaborative filtering recommendations
        TODO: Implement this function
        """
        pass
    
    def content_based_filtering(self, user_id, limit=10):
        """
        Content-based filtering using product similarity
        TODO: Implement this function
        """
        pass
    
    def hybrid_recommendation(self, user_id, limit=10):
        """
        Hybrid recommendation combining multiple approaches
        TODO: Implement this function
        """
        pass
    
    def evaluate_recommendations(self, user_id, recommended_products):
        """
        Evaluate recommendation quality using precision and recall
        TODO: Implement this function
        """
        pass
    
    def handle_cold_start(self, user_id, user_preferences=None):
        """
        Handle cold start problem for new users
        TODO: Implement this function
        """
        pass

# Sample recommendation data structure (for reference)
sample_recommendation_data = {
    "user_preferences": {
        "user_001": ["electronics", "gadgets", "premium"],
        "user_002": ["electronics", "budget", "android"],
        "user_003": ["computers", "professional", "business"]
    },
    "product_features": {
        "prod_001": {"category": "electronics", "price_range": "premium", "brand": "Apple"},
        "prod_002": {"category": "electronics", "price_range": "mid", "brand": "Samsung"},
        "prod_003": {"category": "computers", "price_range": "premium", "brand": "Apple"}
    },
    "user_interactions": {
        "user_001": [{"product_id": "prod_001", "rating": 5, "timestamp": "2024-01-15"}],
        "user_002": [{"product_id": "prod_002", "rating": 4, "timestamp": "2024-01-16"}]
    }
}

# Initialize service
recommendation_service = GraphRecommendationService(driver)
print("Exercise 7: Recommendation System Implementation")
print("Complete the GraphRecommendationService methods above!")
