In [None]:
# Imports
import pandas as pd
import numpy as np
import pickle
import os
from datetime import datetime
import sys
sys.path.append('..')

from load_data import load_dataset
from recommendations.recommendation_utils import ResourceMatcher, calculate_response_similarity

In [None]:
# Load Data
tech_survey_df = load_dataset('tech_survey')
print(f"Loaded {len(tech_survey_df)} survey responses")

# Initialize ResourceMatcher
matcher = ResourceMatcher(tech_survey_df)

# Define features for matching (adjust based on your actual columns)
matching_features = ['treatment', 'Gender', 'Age']  # Modify based on survey structure
matcher.fit(matching_features)

print(f"ResourceMatcher fitted with features: {matching_features}")

In [None]:
# Test resource matching with sample user input
# Simulate new user input
sample_user = {
    'treatment': 'No',
    'Gender': 'Female', 
    'Age': 28
}

# Get resource recommendations
recommendations = matcher.match_resources(sample_user, top_k=5)

print("Sample user profile:", sample_user)
print("\nRecommended resources:")
for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec}")

In [None]:
# Batch processing for multiple users
def process_user_batch(user_profiles, matcher):
    """Process multiple user profiles for recommendations"""
    results = []
    
    for i, user_profile in enumerate(user_profiles):
        recommendations = matcher.match_resources(user_profile)
        results.append({
            'user_id': i,
            'profile': user_profile,
            'recommendations': recommendations
        })
    
    return results

# Test with multiple sample users
sample_users = [
    {'treatment': 'Yes', 'Gender': 'Male', 'Age': 35},
    {'treatment': 'No', 'Gender': 'Female', 'Age': 22},
    {'treatment': 'Maybe', 'Gender': 'Non-binary', 'Age': 30}
]

batch_results = process_user_batch(sample_users, matcher)

for result in batch_results:
    print(f"\nUser {result['user_id']}: {result['profile']}")
    print("Recommendations:", result['recommendations'][:3])  # Show top 3

In [None]:
# Evaluate similarity patterns
similarity_scores = calculate_response_similarity(
    sample_user, 
    tech_survey_df, 
    matching_features
)

print(f"Similarity score distribution:")
print(f"Mean: {similarity_scores.mean():.3f}")
print(f"Std: {similarity_scores.std():.3f}")
print(f"Max: {similarity_scores.max():.3f}")

# Find most similar responses
top_similar_indices = similarity_scores.argsort()[-5:][::-1]
print(f"\nMost similar survey responses:")
for idx in top_similar_indices:
    print(f"Similarity: {similarity_scores[idx]:.3f}")

In [None]:
# Save recommendation system
# Create output directories
os.makedirs('../models/saved_models', exist_ok=True)
os.makedirs('../outputs/results', exist_ok=True)

# Save the trained matcher
timestamp = datetime.now().strftime('%Y%m%d')
model_filename = f"../models/saved_models/resource_matcher_{timestamp}.pkl"

with open(model_filename, 'wb') as f:
    pickle.dump(matcher, f)

print(f"ResourceMatcher saved to: {model_filename}")

# Save sample recommendations for evaluation
results_filename = f"../outputs/results/resource_recommendations_{timestamp}.json"
import json

with open(results_filename, 'w') as f:
    json.dump(batch_results, f, indent=2, default=str)

print(f"Sample recommendations saved to: {results_filename}")