# Lab 2: MongoDB Exercises - Document Store

## 🎯 Objectives
- Understand MongoDB document modeling
- Practice CRUD operations
- Learn aggregation pipelines
- Implement indexing strategies
- Optimize query performance

## 📋 Prerequisites
- Complete Lab 1 (Setup & Connections)
- MongoDB container is running
- Sample data is loaded


## 1. Import Libraries and Setup


In [1]:
import pymongo
import pandas as pd
import json
from datetime import datetime
from bson import ObjectId
import matplotlib.pyplot as plt
import seaborn as sns

# Import from Lab 1
from pymongo import MongoClient

# MongoDB connection (assuming Lab 1 is completed)
MONGODB_URI = "mongodb://admin:password123@localhost:27017"
client = MongoClient(MONGODB_URI)
db = client["ecommerce"]

print("✅ MongoDB connection established!")
print(f"Database: {db.name}")
print(f"Collections: {db.list_collection_names()}")


✅ MongoDB connection established!
Database: ecommerce
Collections: []


## 2. Product Service Implementation


In [2]:
class ProductService:
    def __init__(self, db):
        self.db = db
        self.products = db.products
    
    def create_product(self, product_data):
        """Create a new product with flexible schema"""
        product_data["createdAt"] = datetime.utcnow()
        product_data["updatedAt"] = datetime.utcnow()
        return self.products.insert_one(product_data)
    
    def get_product_by_id(self, product_id):
        """Get product by productId"""
        return self.products.find_one({"productId": product_id})
    
    def get_products_by_category(self, category):
        """Query products by category"""
        return list(self.products.find({"category": category}))
    
    def update_inventory(self, product_id, quantity_change):
        """Update inventory with atomic operations"""
        return self.products.update_one(
            {"productId": product_id},
            {
                "$inc": {"inventory.available": quantity_change},
                "$set": {"updatedAt": datetime.utcnow()}
            }
        )
    
    def search_products(self, search_term):
        """Search products by name or tags"""
        return list(self.products.find({
            "$or": [
                {"name": {"$regex": search_term, "$options": "i"}},
                {"metadata.tags": {"$in": [search_term]}}
            ]
        }))
    
    def get_products_by_price_range(self, min_price, max_price):
        """Get products within price range"""
        return list(self.products.find({
            "price": {"$gte": min_price, "$lte": max_price}
        }))

# Initialize service
product_service = ProductService(db)
print("✅ ProductService initialized")


✅ ProductService initialized


## 3. Exercise 1: CRUD Operations


In [3]:
# Sample products from Lab 1
sample_products = [
    {
        "productId": "prod_001",
        "name": "iPhone 15 Pro",
        "category": "electronics",
        "price": 25000000,
        "attributes": {
            "brand": "Apple",
            "model": "iPhone 15 Pro",
            "storage": "256GB",
            "color": "Natural Titanium"
        },
        "inventory": {
            "available": 50,
            "reserved": 5,
            "warehouse": "Hanoi"
        },
        "metadata": {
            "tags": ["smartphone", "premium", "5G"],
            "reviews": {
                "averageRating": 4.8,
                "totalReviews": 1250
            }
        }
    },
    {
        "productId": "prod_002",
        "name": "Samsung Galaxy S24",
        "category": "electronics",
        "price": 22000000,
        "attributes": {
            "brand": "Samsung",
            "model": "Galaxy S24",
            "storage": "128GB",
            "color": "Onyx Black"
        },
        "inventory": {
            "available": 30,
            "reserved": 2,
            "warehouse": "Ho Chi Minh"
        },
        "metadata": {
            "tags": ["smartphone", "android", "5G"],
            "reviews": {
                "averageRating": 4.6,
                "totalReviews": 890
            }
        }
    }
]

# CREATE: Insert products
print("📝 Creating products...")
for product in sample_products:
    result = product_service.create_product(product)
    print(f"Created product: {result.inserted_id}")

print(f"\nTotal products in collection: {product_service.products.count_documents({})}")


📝 Creating products...
Created product: 68d00d9230396139e208eb37
Created product: 68d00d9230396139e208eb38

Total products in collection: 2


In [4]:
# READ: Query products
print("🔍 Reading products...")

# Get product by ID
product = product_service.get_product_by_id("prod_001")
print(f"Product by ID: {product['name'] if product else 'Not found'}")

# Get products by category
electronics = product_service.get_products_by_category("electronics")
print(f"Electronics products: {len(electronics)}")

# Search products
search_results = product_service.search_products("iPhone")
print(f"Search results for 'iPhone': {len(search_results)}")

# Price range query
price_range = product_service.get_products_by_price_range(20000000, 30000000)
print(f"Products in price range 20M-30M: {len(price_range)}")


🔍 Reading products...
Product by ID: iPhone 15 Pro
Electronics products: 2
Search results for 'iPhone': 1
Products in price range 20M-30M: 2


In [5]:
# UPDATE: Modify products
print("✏️ Updating products...")

# Update inventory
result = product_service.update_inventory("prod_001", -5)  # Reduce by 5
print(f"Inventory update result: {result.modified_count} document(s) modified")

# Update product price
update_result = product_service.products.update_one(
    {"productId": "prod_001"},
    {
        "$set": {
            "price": 24000000,
            "updatedAt": datetime.utcnow()
        }
    }
)
print(f"Price update result: {update_result.modified_count} document(s) modified")

# Verify updates
updated_product = product_service.get_product_by_id("prod_001")
print(f"Updated product - Price: {updated_product['price']}, Available: {updated_product['inventory']['available']}")


✏️ Updating products...
Inventory update result: 1 document(s) modified
Price update result: 1 document(s) modified
Updated product - Price: 24000000, Available: 45


In [6]:
# DELETE: Remove products
print("🗑️ Deleting products...")

# Delete a specific product
delete_result = product_service.products.delete_one({"productId": "prod_002"})
print(f"Delete result: {delete_result.deleted_count} document(s) deleted")

# Verify deletion
remaining_products = product_service.products.count_documents({})
print(f"Remaining products: {remaining_products}")

print("✅ CRUD operations completed!")


🗑️ Deleting products...
Delete result: 1 document(s) deleted
Remaining products: 1
✅ CRUD operations completed!


In [None]:
thêm bài tập

## 🎓 Student Exercises - Practice Tasks

### Exercise 5: Advanced Querying Challenge
**Objective**: Practice complex MongoDB queries and aggregation pipelines

**Tasks**:
1. **Create a function** that finds products with inventory below a threshold
2. **Write an aggregation pipeline** to calculate average price by category
3. **Implement a function** to find products with highest ratings
4. **Create a query** to find products created in the last 30 days

**Requirements**:
- Use MongoDB aggregation framework
- Include proper error handling
- Add performance optimization with indexes
- Document your code with comments

---

### Exercise 6: Data Modeling Challenge
**Objective**: Design flexible schemas for different use cases

**Scenario**: You need to model a **User Profile** system with the following requirements:
- Users can have multiple addresses (home, work, billing)
- Users can have different subscription tiers (free, premium, enterprise)
- Users can have various preferences (notifications, privacy settings)
- Users can have purchase history and wishlist

**Tasks**:
1. **Design the user schema** using MongoDB's flexible document model
2. **Create sample user documents** with different subscription tiers
3. **Implement functions** to:
   - Add/remove addresses
   - Update subscription tier
   - Add items to wishlist
   - Get purchase history

**Requirements**:
- Use embedded documents for addresses and preferences
- Use arrays for purchase history and wishlist
- Include validation for required fields
- Handle edge cases (empty arrays, null values)

---

### Exercise 7: Performance Optimization
**Objective**: Learn indexing strategies and query optimization

**Tasks**:
1. **Create indexes** for common query patterns:
   - Category-based queries
   - Price range searches
   - Text search on product names
   - Date-based queries

2. **Benchmark query performance**:
   - Run queries before and after adding indexes
   - Measure execution time
   - Compare results

3. **Analyze query execution plans**:
   - Use `explain()` method
   - Identify slow queries
   - Optimize inefficient queries

**Requirements**:
- Create compound indexes where appropriate
- Use text indexes for search functionality
- Document performance improvements
- Include before/after metrics

---

### Exercise 8: Data Validation and Error Handling
**Objective**: Implement robust data validation and error handling

**Tasks**:
1. **Create validation rules** for product data:
   - Required fields validation
   - Data type validation
   - Range validation (price > 0, inventory >= 0)
   - Format validation (productId format)

2. **Implement error handling**:
   - Handle duplicate productId errors
   - Handle invalid data type errors
   - Handle connection errors
   - Handle timeout errors

3. **Create logging system**:
   - Log all CRUD operations
   - Log errors with details
   - Log performance metrics

**Requirements**:
- Use try-catch blocks appropriately
- Create custom exception classes
- Implement retry logic for transient errors
- Include comprehensive logging

---

### Exercise 9: Real-world Integration
**Objective**: Build a complete e-commerce product management system

**Scenario**: Build a product management system for an online store with the following features:
- Product catalog management
- Inventory tracking
- Price management with discounts
- Product reviews and ratings
- Search and filtering
- Analytics and reporting

**Tasks**:
1. **Design the complete schema**:
   - Products collection
   - Reviews collection
   - Discounts collection
   - Analytics collection

2. **Implement core features**:
   - Product CRUD operations
   - Inventory management
   - Review system
   - Discount application
   - Search functionality
   - Analytics queries

3. **Create a simple CLI interface**:
   - Menu-driven interface
   - Input validation
   - Error handling
   - Results display

**Requirements**:
- Use proper MongoDB design patterns
- Implement data relationships
- Include comprehensive error handling
- Create unit tests for core functions
- Document the API

---

### Exercise 10: Advanced Aggregation Pipeline
**Objective**: Master MongoDB aggregation framework

**Tasks**:
1. **Create complex aggregation pipelines**:
   - Sales analytics by category and month
   - Top-selling products with revenue
   - Customer purchase patterns
   - Inventory turnover analysis

2. **Implement data transformation**:
   - Convert data formats
   - Calculate derived fields
   - Group and summarize data
   - Create reports

3. **Optimize aggregation performance**:
   - Use appropriate indexes
   - Optimize pipeline stages
   - Handle large datasets
   - Monitor performance

**Requirements**:
- Use all major aggregation operators
- Include proper error handling
- Optimize for performance
- Create reusable pipeline functions
- Document pipeline logic

---

## 📝 Submission Guidelines

### For Each Exercise:
1. **Complete all tasks** as specified
2. **Include proper documentation** and comments
3. **Test your code** with sample data
4. **Handle errors** appropriately
5. **Optimize performance** where possible

### Code Quality Requirements:
- Follow Python best practices
- Use meaningful variable names
- Include type hints where appropriate
- Write clean, readable code
- Add comprehensive comments

### Testing Requirements:
- Test with various input scenarios
- Test edge cases and error conditions
- Verify expected outputs
- Include performance benchmarks

### Documentation Requirements:
- Explain your approach
- Document any assumptions
- Include usage examples
- Provide performance analysis

---

## 🏆 Bonus Challenges

### Challenge 1: Multi-language Support
Implement product descriptions in multiple languages using MongoDB's flexible schema.

### Challenge 2: Real-time Updates
Use MongoDB Change Streams to implement real-time inventory updates.

### Challenge 3: Data Migration
Create scripts to migrate data from a relational database to MongoDB.

### Challenge 4: Backup and Recovery
Implement automated backup and recovery procedures for MongoDB.

---

**Good luck with your MongoDB journey! 🚀**


In [None]:
# Exercise 5: Advanced Querying Challenge - Starter Code
# Complete the functions below according to the requirements

def find_low_inventory_products(threshold=10):
    """
    Find products with inventory below threshold
    TODO: Implement this function
    """
    pass

def calculate_average_price_by_category():
    """
    Calculate average price by category using aggregation
    TODO: Implement this function
    """
    pass

def find_highest_rated_products(limit=5):
    """
    Find products with highest ratings
    TODO: Implement this function
    """
    pass

def find_recent_products(days=30):
    """
    Find products created in the last N days
    TODO: Implement this function
    """
    pass

# Test your functions here
print("Exercise 5: Advanced Querying Challenge")
print("Complete the functions above and test them here!")


In [None]:
# Exercise 6: Data Modeling Challenge - User Profile System
# Design and implement a flexible user profile schema

class UserProfileService:
    def __init__(self, db):
        self.db = db
        self.users = db.users
    
    def create_user_profile(self, user_data):
        """
        Create a new user profile with flexible schema
        TODO: Implement this function
        """
        pass
    
    def add_address(self, user_id, address_data):
        """
        Add a new address to user profile
        TODO: Implement this function
        """
        pass
    
    def remove_address(self, user_id, address_type):
        """
        Remove an address from user profile
        TODO: Implement this function
        """
        pass
    
    def update_subscription_tier(self, user_id, new_tier):
        """
        Update user's subscription tier
        TODO: Implement this function
        """
        pass
    
    def add_to_wishlist(self, user_id, product_id):
        """
        Add product to user's wishlist
        TODO: Implement this function
        """
        pass
    
    def get_purchase_history(self, user_id):
        """
        Get user's purchase history
        TODO: Implement this function
        """
        pass

# Sample user data structure (for reference)
sample_user = {
    "userId": "user_001",
    "email": "john.doe@example.com",
    "profile": {
        "firstName": "John",
        "lastName": "Doe",
        "dateOfBirth": "1990-01-15",
        "phone": "+1234567890"
    },
    "addresses": [
        {
            "type": "home",
            "street": "123 Main St",
            "city": "New York",
            "state": "NY",
            "zipCode": "10001",
            "country": "USA"
        }
    ],
    "subscription": {
        "tier": "premium",
        "startDate": "2024-01-01",
        "endDate": "2024-12-31"
    },
    "preferences": {
        "notifications": {
            "email": True,
            "sms": False,
            "push": True
        },
        "privacy": {
            "profilePublic": False,
            "showEmail": False
        }
    },
    "wishlist": ["prod_001", "prod_002"],
    "purchaseHistory": [
        {
            "orderId": "order_001",
            "productId": "prod_003",
            "purchaseDate": "2024-01-15",
            "amount": 50000
        }
    ]
}

# Initialize service
user_service = UserProfileService(db)
print("Exercise 6: Data Modeling Challenge")
print("Complete the UserProfileService methods above!")


In [None]:
# Exercise 7: Performance Optimization - Indexing and Benchmarking
# Create indexes and benchmark query performance

import time
from pymongo import ASCENDING, DESCENDING, TEXT

def create_indexes():
    """
    Create indexes for common query patterns
    TODO: Implement this function
    """
    pass

def benchmark_query_performance():
    """
    Benchmark queries before and after adding indexes
    TODO: Implement this function
    """
    pass

def analyze_query_execution_plan():
    """
    Analyze query execution plans using explain()
    TODO: Implement this function
    """
    pass

# Example of how to create different types of indexes
def create_sample_indexes():
    """Example of creating various index types"""
    
    # Single field index
    # db.products.create_index("category")
    
    # Compound index
    # db.products.create_index([("category", ASCENDING), ("price", ASCENDING)])
    
    # Text index for search
    # db.products.create_index([("name", TEXT), ("description", TEXT)])
    
    # Sparse index (only for documents that have the field)
    # db.products.create_index("metadata.tags", sparse=True)
    
    print("Sample index creation examples provided above")
    print("Implement the actual index creation in create_indexes() function")

# Test your indexing functions here
print("Exercise 7: Performance Optimization")
print("Complete the indexing and benchmarking functions above!")
