# Recommendation Service Evaluation (Standalone)
This notebook evaluates the Content-Based Recommendation logic using **REAL DATA** exported from the system.

## Workflow
1.  **Export Data Locally**: Run `python export_data_for_colab.py` on your local machine (where the DB is running). This will generate `products.csv` and `interactions.csv`.
2.  **Upload to Colab**: Upload these two CSV files to the Files section of this notebook.
3.  **Run Evaluation**: Execute the cells below to evaluate the recommendation quality based on your actual product catalog and user interactions.

## Logic Evaluated
The core logic being tested is: **"Recommended products should share characteristics (Category, Collection) with products the user has interacted with."**


In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from collections import Counter

# Configuration
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

In [None]:
# --- 1. DATA LOADING ---

def generate_dummy_data():
    """Generates sample product and interaction data for testing (Fallback)."""
    print("Generating dummy data for testing...")
    # 1. Products
    categories = ['Backpack', 'Messenger', 'Tote', 'Accessories']
    collections = ['Summer 2024', 'Back to School', 'Classic', 'Collaboration']
    
    products = []
    for i in range(1, 51): # 50 products
        cat = random.choice(categories)
        col = random.choice(collections)
        products.append({
            'id': f'prod_{i}',
            'handle': f'product-{i}',
            'title': f'JanSport Product {i}',
            'category': cat,
            'collection': col,
            'price': random.randint(500000, 2000000)
        })
    
    products_df = pd.DataFrame(products)
    
    # 2. Interactions
    interactions = []
    users = [f'user_{i}' for i in range(1, 11)]
    
    for user in users:
        preferred_cat = random.choice(categories)
        n_interactions = random.randint(5, 15)
        for _ in range(n_interactions):
            if random.random() < 0.8:
                candidates = products_df[products_df['category'] == preferred_cat]
            else:
                candidates = products_df
            
            if not candidates.empty:
                prod = candidates.sample(1).iloc[0]
                interactions.append({
                    'user_id': user,
                    'product_handle': prod['handle'],
                    'interaction_type': random.choice(['view', 'view', 'view', 'add_to_cart']),
                    'timestamp': datetime.now() - timedelta(days=random.randint(0, 30))
                })
                
    interactions_df = pd.DataFrame(interactions)
    return products_df, interactions_df

# Load data (Prioritize uploaded files)
try:
    products_df = pd.read_csv('products.csv')
    interactions_df = pd.read_csv('interactions.csv')
    print("✅ Loaded REAL data from 'products.csv' and 'interactions.csv'.")
except FileNotFoundError:
    print("⚠️ CSV files not found. Please upload 'products.csv' and 'interactions.csv' generated by 'export_data_for_colab.py'.")
    print("Falling back to dummy data for demonstration...")
    products_df, interactions_df = generate_dummy_data()

print(f"Products: {len(products_df)}")
print(f"Interactions: {len(interactions_df)}")

# Ensure required columns exist (handle missing columns gracefully)
if 'category' not in products_df.columns:
    products_df['category'] = 'Unknown'
if 'collection' not in products_df.columns:
    products_df['collection'] = 'Unknown'

display(products_df.head())
display(interactions_df.head())


In [None]:
# --- 2. RECOMMENDATION ENGINE (MOCK) ---

class MockRecommendationEngine:
    def __init__(self, products_df, interactions_df):
        self.products_df = products_df
        self.interactions_df = interactions_df
        
    def get_user_history(self, user_id):
        """Get products user has interacted with."""
        user_ints = self.interactions_df[self.interactions_df['user_id'] == user_id]
        # Join with product info
        history = pd.merge(user_ints, self.products_df, left_on='product_handle', right_on='handle', how='inner')
        return history
    
    def recommend(self, user_id, limit=5):
        """
        Content-Based Recommendation Logic:
        1. Identify categories and collections the user has interacted with.
        2. Find other products in those categories/collections.
        3. Exclude products already seen.
        4. Rank by relevance (frequency of category/collection in history).
        """
        history = self.get_user_history(user_id)
        
        if history.empty:
            # Cold start: Return random popular products (simplified as random here)
            return self.products_df.sample(min(limit, len(self.products_df))).to_dict('records')
        
        # 1. User Profile
        user_cats = history['category'].tolist()
        user_cols = history['collection'].tolist()
        
        cat_counts = Counter(user_cats)
        col_counts = Counter(user_cols)
        
        # 2. Score Candidates
        # Score = (Category Match Count) + (Collection Match Count)
        candidates = self.products_df.copy()
        
        def calculate_score(row):
            score = 0
            score += cat_counts.get(row['category'], 0)
            score += col_counts.get(row['collection'], 0)
            return score
            
        candidates['score'] = candidates.apply(calculate_score, axis=1)
        
        # 3. Filter Seen
        seen_handles = history['handle'].unique()
        candidates = candidates[~candidates['handle'].isin(seen_handles)]
        
        # 4. Sort and Return
        recs = candidates.sort_values('score', ascending=False).head(limit)
        return recs.to_dict('records')

# Initialize Engine
engine = MockRecommendationEngine(products_df, interactions_df)

In [None]:
# --- 3. EVALUATION TOOLS ---

def evaluate_recommendations(user_id, k=5):
    """
    Evaluates recommendations for a single user.
    Metric: Relevance (Does the rec share Category or Collection with history?)
    """
    # Get History
    history = engine.get_user_history(user_id)
    if history.empty:
        return None # Skip cold start users for this eval
        
    known_cats = set(history['category'].unique())
    known_cols = set(history['collection'].unique())
    
    # Get Recs
    recs = engine.recommend(user_id, limit=k)
    
    relevant_count = 0
    details = []
    
    for rec in recs:
        is_relevant = (rec['category'] in known_cats) or (rec['collection'] in known_cols)
        if is_relevant:
            relevant_count += 1
            
        details.append({
            'handle': rec['handle'],
            'category': rec['category'],
            'collection': rec['collection'],
            'is_relevant': is_relevant,
            'reason': f"Cat: {rec['category']} in {known_cats}" if rec['category'] in known_cats else f"Col: {rec['collection']} in {known_cols}"
        })
        
    score = relevant_count / k if k > 0 else 0
    
    return {
        'user_id': user_id,
        'score': score,
        'history_cats': list(known_cats),
        'rec_details': details
    }

# Test on one user
test_user = interactions_df['user_id'].unique()[0]
print(f"Testing User: {test_user}")
eval_res = evaluate_recommendations(test_user)
print(f"Relevance Score: {eval_res['score']}")
pd.DataFrame(eval_res['rec_details'])

In [None]:
# --- 4. RUN FULL EVALUATION ---

results = []
users = interactions_df['user_id'].unique()

print(f"Evaluating {len(users)} users...")

for user in users:
    res = evaluate_recommendations(user)
    if res:
        results.append(res)

# Aggregate Results
scores = [r['score'] for r in results]
avg_score = sum(scores) / len(scores) if scores else 0

print("\n" + "="*30)
print(f"OVERALL EVALUATION REPORT")
print("="*30)
print(f"Metric: Content Relevance (Share Category/Collection)")
print(f"Users Evaluated: {len(results)}")
print(f"Average Relevance Score: {avg_score:.2%}")
print("="*30)

# Save detailed report
report_data = []
for r in results:
    for d in r['rec_details']:
        report_data.append({
            'user_id': r['user_id'],
            'rec_handle': d['handle'],
            'rec_category': d['category'],
            'rec_collection': d['collection'],
            'is_relevant': d['is_relevant'],
            'history_cats': str(r['history_cats'])
        })

report_df = pd.DataFrame(report_data)
report_df.to_csv('recommendation_evaluation_report.csv', index=False)
print("Detailed report saved to 'recommendation_evaluation_report.csv'")