# Phase 3: Recommendation Engine

## Study Resource Recommender

**Objective:** Build the recommendation engine that:
1. Takes student quiz results
2. Predicts mastery level for each skill using ML model
3. Identifies weak skills (needs_help)
4. Recommends relevant YouTube videos

---

## Flow

```
Student Quiz Data ‚Üí ML Model ‚Üí Predict Mastery ‚Üí Find Weak Skills ‚Üí Match Videos ‚Üí Recommendations
```

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import joblib
import pickle
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Libraries imported!")

## 2. Load Saved Models and Data from Phase 1 & 2

In [None]:
# Load the best model from Phase 2
best_model = joblib.load('best_model.pkl')
print("‚úÖ Best model loaded!")

# Load scaler
scaler = joblib.load('scaler.pkl')
print("‚úÖ Scaler loaded!")

# Load label encoder
label_encoder = joblib.load('label_encoder.pkl')
print("‚úÖ Label encoder loaded!")

# Load feature configuration
with open('feature_config.pkl', 'rb') as f:
    feature_config = pickle.load(f)
print("‚úÖ Feature config loaded!")

print(f"\nBest Model: {feature_config['best_model']}")
print(f"Features: {len(feature_config['feature_columns'])}")
print(f"Classes: {feature_config['label_classes']}")

In [None]:
# Load datasets from Phase 1
student_skill_df = pd.read_csv('student_skill_performance.csv')
print(f"‚úÖ Student skill performance loaded: {len(student_skill_df)} records")

skill_video_df = pd.read_csv('skill_video_mapping.csv')
print(f"‚úÖ Skill-video mapping loaded: {len(skill_video_df)} mappings")

# Show sample
print("\nSample skill-video mappings:")
skill_video_df.head()

In [None]:
# Check available skills with videos
skills_with_videos = skill_video_df['skill_name'].unique()
print(f"Skills with video recommendations: {len(skills_with_videos)}")
print("\nSample skills:")
for skill in list(skills_with_videos)[:10]:
    print(f"  - {skill}")

## 3. Build Recommendation Engine Class

In [None]:
class StudyResourceRecommender:
    """
    A recommendation engine that suggests YouTube videos based on student performance.
    
    Workflow:
    1. Analyze student's quiz performance per skill
    2. Predict mastery level using ML model
    3. Identify weak skills (needs_help)
    4. Recommend relevant YouTube videos
    """
    
    def __init__(self, model, scaler, label_encoder, feature_config, skill_video_mapping):
        """
        Initialize the recommender.
        
        Parameters:
        - model: Trained ML model
        - scaler: Fitted StandardScaler
        - label_encoder: Fitted LabelEncoder
        - feature_config: Dict with feature columns and settings
        - skill_video_mapping: DataFrame mapping skills to videos
        """
        self.model = model
        self.scaler = scaler
        self.label_encoder = label_encoder
        self.feature_columns = feature_config['feature_columns']
        self.needs_scaling = feature_config.get('needs_scaling', True)
        self.skill_video_mapping = skill_video_mapping
        
    def predict_mastery(self, student_features):
        """
        Predict mastery level for given features.
        
        Parameters:
        - student_features: DataFrame with feature values
        
        Returns:
        - predictions: List of mastery levels
        - probabilities: Array of probabilities per class
        """
        # Ensure correct feature order
        features = student_features[self.feature_columns].copy()
        
        # Scale if needed
        if self.needs_scaling:
            features_processed = self.scaler.transform(features)
        else:
            features_processed = features.values
        
        # Predict
        predictions_encoded = self.model.predict(features_processed)
        probabilities = self.model.predict_proba(features_processed)
        
        # Decode predictions
        predictions = self.label_encoder.inverse_transform(predictions_encoded)
        
        return predictions, probabilities
    
    def get_videos_for_skill(self, skill_name, top_n=5):
        """
        Get top videos for a specific skill.
        
        Parameters:
        - skill_name: Name of the skill
        - top_n: Number of videos to return
        
        Returns:
        - DataFrame with video recommendations
        """
        videos = self.skill_video_mapping[
            self.skill_video_mapping['skill_name'] == skill_name
        ].copy()
        
        if len(videos) == 0:
            return pd.DataFrame()
        
        # Sort by relevance (keyword_score) and popularity (views)
        if 'keyword_score' in videos.columns and 'views' in videos.columns:
            videos['ranking_score'] = (
                videos['keyword_score'] * 0.6 + 
                (videos['views'] / videos['views'].max()) * 0.4
            )
            videos = videos.sort_values('ranking_score', ascending=False)
        
        return videos.head(top_n)
    
    def analyze_student(self, student_data):
        """
        Analyze a student's performance across all skills.
        
        Parameters:
        - student_data: DataFrame with student's performance per skill
        
        Returns:
        - analysis: DataFrame with skills, predictions, and probabilities
        """
        # Get predictions
        predictions, probabilities = self.predict_mastery(student_data)
        
        # Create analysis DataFrame
        analysis = student_data[['skill_name']].copy()
        analysis['predicted_mastery'] = predictions
        analysis['accuracy'] = student_data['accuracy'].values
        
        # Add probabilities
        for i, label in enumerate(self.label_encoder.classes_):
            analysis[f'prob_{label}'] = probabilities[:, i]
        
        # Add confidence (max probability)
        analysis['confidence'] = probabilities.max(axis=1)
        
        return analysis
    
    def get_recommendations(self, student_data, top_skills=5, videos_per_skill=3):
        """
        Get video recommendations for a student based on their weak skills.
        
        Parameters:
        - student_data: DataFrame with student's performance per skill
        - top_skills: Number of weak skills to focus on
        - videos_per_skill: Number of videos per skill
        
        Returns:
        - recommendations: Dict with analysis and video recommendations
        """
        # Analyze student
        analysis = self.analyze_student(student_data)
        
        # Get weak skills (predicted as needs_help)
        weak_skills = analysis[
            analysis['predicted_mastery'] == 'needs_help'
        ].sort_values('accuracy').head(top_skills)
        
        # Get learning skills (might also need some help)
        learning_skills = analysis[
            analysis['predicted_mastery'] == 'learning'
        ].sort_values('accuracy').head(top_skills)
        
        # Get mastered skills
        mastered_skills = analysis[
            analysis['predicted_mastery'] == 'mastered'
        ]
        
        # Get video recommendations for weak skills
        video_recommendations = []
        
        for _, row in weak_skills.iterrows():
            skill = row['skill_name']
            videos = self.get_videos_for_skill(skill, top_n=videos_per_skill)
            
            if len(videos) > 0:
                for _, video in videos.iterrows():
                    video_recommendations.append({
                        'skill_name': skill,
                        'student_accuracy': row['accuracy'],
                        'mastery_level': 'needs_help',
                        'priority': 'HIGH',
                        'video_title': video.get('video_title', 'N/A'),
                        'video_id': video.get('video_id', 'N/A'),
                        'views': video.get('views', 0),
                        'likes': video.get('likes', 0)
                    })
        
        # Add learning skills recommendations (lower priority)
        for _, row in learning_skills.iterrows():
            skill = row['skill_name']
            videos = self.get_videos_for_skill(skill, top_n=2)  # Fewer videos
            
            if len(videos) > 0:
                for _, video in videos.iterrows():
                    video_recommendations.append({
                        'skill_name': skill,
                        'student_accuracy': row['accuracy'],
                        'mastery_level': 'learning',
                        'priority': 'MEDIUM',
                        'video_title': video.get('video_title', 'N/A'),
                        'video_id': video.get('video_id', 'N/A'),
                        'views': video.get('views', 0),
                        'likes': video.get('likes', 0)
                    })
        
        recommendations_df = pd.DataFrame(video_recommendations)
        
        # Summary
        summary = {
            'total_skills_analyzed': len(analysis),
            'skills_mastered': len(mastered_skills),
            'skills_learning': len(learning_skills),
            'skills_need_help': len(weak_skills),
            'videos_recommended': len(recommendations_df)
        }
        
        return {
            'summary': summary,
            'skill_analysis': analysis,
            'weak_skills': weak_skills,
            'learning_skills': learning_skills,
            'mastered_skills': mastered_skills,
            'video_recommendations': recommendations_df
        }
    
    def print_recommendations(self, recommendations):
        """
        Pretty print the recommendations.
        """
        summary = recommendations['summary']
        
        print("\n" + "="*70)
        print("üìä STUDENT ANALYSIS SUMMARY")
        print("="*70)
        print(f"\n   Total Skills Analyzed: {summary['total_skills_analyzed']}")
        print(f"   ‚úÖ Mastered: {summary['skills_mastered']}")
        print(f"   üìö Learning: {summary['skills_learning']}")
        print(f"   ‚ùå Needs Help: {summary['skills_need_help']}")
        print(f"   üé¨ Videos Recommended: {summary['videos_recommended']}")
        
        print("\n" + "="*70)
        print("‚ùå SKILLS THAT NEED HELP (Priority: HIGH)")
        print("="*70)
        
        weak = recommendations['weak_skills']
        if len(weak) > 0:
            for _, row in weak.iterrows():
                print(f"\n   üìå {row['skill_name']}")
                print(f"      Accuracy: {row['accuracy']:.1%}")
                print(f"      Confidence: {row['confidence']:.1%}")
        else:
            print("\n   üéâ No skills need immediate help!")
        
        print("\n" + "="*70)
        print("üé¨ VIDEO RECOMMENDATIONS")
        print("="*70)
        
        videos = recommendations['video_recommendations']
        if len(videos) > 0:
            current_skill = None
            for _, row in videos.iterrows():
                if row['skill_name'] != current_skill:
                    current_skill = row['skill_name']
                    priority_icon = "üî¥" if row['priority'] == 'HIGH' else "üü°"
                    print(f"\n   {priority_icon} {current_skill.upper()} (Accuracy: {row['student_accuracy']:.1%})")
                
                print(f"      ‚Ä¢ {row['video_title']}")
                if row['views'] > 0:
                    print(f"        Views: {row['views']:,}")
        else:
            print("\n   No video recommendations available.")
        
        print("\n" + "="*70)


print("‚úÖ StudyResourceRecommender class created!")

## 4. Initialize the Recommender

In [None]:
# Create recommender instance
recommender = StudyResourceRecommender(
    model=best_model,
    scaler=scaler,
    label_encoder=label_encoder,
    feature_config=feature_config,
    skill_video_mapping=skill_video_df
)

print("‚úÖ Recommender initialized!")

## 5. Test with Sample Students

In [None]:
# Get a sample student's data
sample_student_id = student_skill_df['user_id'].iloc[0]
print(f"Testing with Student ID: {sample_student_id}")

# Get all skills for this student
sample_student_data = student_skill_df[
    student_skill_df['user_id'] == sample_student_id
].copy()

print(f"Skills attempted: {len(sample_student_data)}")
sample_student_data.head()

In [None]:
# Ensure we have all required features
# Add engineered features if missing
def add_engineered_features(df):
    """Add engineered features from Phase 2 if they don't exist."""
    
    if 'efficiency_score' not in df.columns:
        df['efficiency_score'] = df['total_correct'] / (df['total_hints_used'] + 1)
    
    if 'struggle_score' not in df.columns:
        df['struggle_score'] = (
            (1 - df['accuracy']) * 0.4 + 
            df['avg_hint_ratio'] * 0.3 + 
            (df['avg_attempts'] / df['avg_attempts'].max()) * 0.3
        )
    
    if 'speed_score' not in df.columns:
        df['speed_score'] = 1 - (df['avg_response_time'] / df['avg_response_time'].max()).clip(0, 1)
    
    if 'hint_dependency' not in df.columns:
        df['hint_dependency'] = (df['avg_hint_ratio'] + df['pct_hint_first']) / 2
    
    if 'attempts_per_correct' not in df.columns:
        df['attempts_per_correct'] = df['total_attempts'] / (df['total_correct'] + 1)
    
    # Handle NaN/Inf
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.fillna(df.median(numeric_only=True))
    
    return df

sample_student_data = add_engineered_features(sample_student_data)
print("‚úÖ Features prepared!")

In [None]:
# Get recommendations for sample student
recommendations = recommender.get_recommendations(
    sample_student_data,
    top_skills=5,
    videos_per_skill=3
)

# Print recommendations
recommender.print_recommendations(recommendations)

## 6. Test with Multiple Students

In [None]:
# Test with 5 random students
random_students = student_skill_df['user_id'].drop_duplicates().sample(5, random_state=42)

print("Testing recommendations for 5 random students:")
print("="*70)

for student_id in random_students:
    student_data = student_skill_df[student_skill_df['user_id'] == student_id].copy()
    student_data = add_engineered_features(student_data)
    
    recs = recommender.get_recommendations(student_data, top_skills=3, videos_per_skill=2)
    
    print(f"\nüë§ Student ID: {student_id}")
    print(f"   Skills: {recs['summary']['total_skills_analyzed']}")
    print(f"   Mastered: {recs['summary']['skills_mastered']}")
    print(f"   Need Help: {recs['summary']['skills_need_help']}")
    print(f"   Videos Recommended: {recs['summary']['videos_recommended']}")

## 7. Create Function for New Quiz Input

In [None]:
def process_quiz_results(quiz_results):
    """
    Process raw quiz results into features for the recommender.
    
    Parameters:
    - quiz_results: List of dicts with quiz attempt data
      Each dict should have: skill_name, correct (0/1), hints_used, time_taken_sec
    
    Returns:
    - DataFrame ready for recommendation engine
    """
    df = pd.DataFrame(quiz_results)
    
    # Aggregate by skill
    aggregated = df.groupby('skill_name').agg({
        'correct': ['sum', 'count', 'mean'],
        'hints_used': ['sum', 'mean'],
        'time_taken_sec': 'mean',
        'hints_available': 'mean'
    }).reset_index()
    
    # Flatten columns
    aggregated.columns = [
        'skill_name', 
        'total_correct', 'total_attempts', 'accuracy',
        'total_hints_used', 'avg_hint_count',
        'avg_response_time',
        'avg_hints_available'
    ]
    
    # Calculate additional features
    aggregated['avg_hint_ratio'] = (
        aggregated['avg_hint_count'] / aggregated['avg_hints_available'].replace(0, 1)
    ).clip(0, 1)
    
    aggregated['avg_attempts'] = aggregated['total_attempts'] / aggregated['total_attempts'].max()
    aggregated['pct_hint_first'] = 0.3  # Default value
    aggregated['difficulty_score'] = (1 - aggregated['accuracy']) * 0.7 + aggregated['avg_hint_ratio'] * 0.3
    
    # Add engineered features
    aggregated = add_engineered_features(aggregated)
    
    return aggregated


# Example: Simulate new quiz results
new_quiz_results = [
    # Skill: Fractions (struggling)
    {'skill_name': 'fractions', 'correct': 0, 'hints_used': 3, 'hints_available': 3, 'time_taken_sec': 120},
    {'skill_name': 'fractions', 'correct': 0, 'hints_used': 2, 'hints_available': 3, 'time_taken_sec': 90},
    {'skill_name': 'fractions', 'correct': 1, 'hints_used': 2, 'hints_available': 3, 'time_taken_sec': 60},
    {'skill_name': 'fractions', 'correct': 0, 'hints_used': 3, 'hints_available': 3, 'time_taken_sec': 100},
    
    # Skill: Addition (doing well)
    {'skill_name': 'addition', 'correct': 1, 'hints_used': 0, 'hints_available': 3, 'time_taken_sec': 20},
    {'skill_name': 'addition', 'correct': 1, 'hints_used': 0, 'hints_available': 3, 'time_taken_sec': 15},
    {'skill_name': 'addition', 'correct': 1, 'hints_used': 0, 'hints_available': 3, 'time_taken_sec': 18},
    {'skill_name': 'addition', 'correct': 1, 'hints_used': 1, 'hints_available': 3, 'time_taken_sec': 25},
    
    # Skill: Algebra (learning)
    {'skill_name': 'algebra', 'correct': 1, 'hints_used': 1, 'hints_available': 3, 'time_taken_sec': 45},
    {'skill_name': 'algebra', 'correct': 0, 'hints_used': 2, 'hints_available': 3, 'time_taken_sec': 60},
    {'skill_name': 'algebra', 'correct': 1, 'hints_used': 1, 'hints_available': 3, 'time_taken_sec': 40},
    {'skill_name': 'algebra', 'correct': 1, 'hints_used': 0, 'hints_available': 3, 'time_taken_sec': 35},
]

print("Sample quiz results processed:")
processed_quiz = process_quiz_results(new_quiz_results)
processed_quiz

## 8. Create Simple API Functions for GUI

In [None]:
def get_student_recommendations_by_id(student_id, student_skill_df, recommender):
    """
    Get recommendations for a student by their ID.
    This function will be used by the GUI.
    
    Parameters:
    - student_id: Student's user ID
    - student_skill_df: Full student performance DataFrame
    - recommender: StudyResourceRecommender instance
    
    Returns:
    - dict with recommendations or error message
    """
    # Check if student exists
    if student_id not in student_skill_df['user_id'].values:
        return {'error': f'Student ID {student_id} not found'}
    
    # Get student data
    student_data = student_skill_df[student_skill_df['user_id'] == student_id].copy()
    student_data = add_engineered_features(student_data)
    
    # Get recommendations
    recommendations = recommender.get_recommendations(
        student_data,
        top_skills=5,
        videos_per_skill=3
    )
    
    return recommendations


def get_all_student_ids(student_skill_df):
    """
    Get list of all student IDs for dropdown in GUI.
    """
    return student_skill_df['user_id'].unique().tolist()


def get_skill_summary(recommendations):
    """
    Get a simple summary for display.
    """
    if 'error' in recommendations:
        return recommendations
    
    analysis = recommendations['skill_analysis']
    
    return {
        'mastered': analysis[analysis['predicted_mastery'] == 'mastered']['skill_name'].tolist(),
        'learning': analysis[analysis['predicted_mastery'] == 'learning']['skill_name'].tolist(),
        'needs_help': analysis[analysis['predicted_mastery'] == 'needs_help']['skill_name'].tolist()
    }


print("‚úÖ API functions created!")

In [None]:
# Test API function
test_student_id = student_skill_df['user_id'].iloc[0]
result = get_student_recommendations_by_id(test_student_id, student_skill_df, recommender)

print(f"API Test for Student {test_student_id}:")
print(f"Summary: {result['summary']}")

## 9. Save Recommender for Deployment

In [None]:
# Save everything needed for deployment
deployment_package = {
    'model': best_model,
    'scaler': scaler,
    'label_encoder': label_encoder,
    'feature_config': feature_config,
    'skill_video_mapping': skill_video_df
}

# Save as pickle
with open('recommender_package.pkl', 'wb') as f:
    pickle.dump(deployment_package, f)

print("‚úÖ Recommender package saved: recommender_package.pkl")

# Also save student data for the demo
student_skill_df.to_csv('student_data_for_app.csv', index=False)
print("‚úÖ Student data saved: student_data_for_app.csv")

## 10. Create recommender.py Module for GUI

In [None]:
# Generate the recommender.py file for the Streamlit app
recommender_module = '''
import pandas as pd
import numpy as np
import joblib
import pickle

class StudyResourceRecommender:
    """
    A recommendation engine that suggests YouTube videos based on student performance.
    """
    
    def __init__(self, model, scaler, label_encoder, feature_config, skill_video_mapping):
        self.model = model
        self.scaler = scaler
        self.label_encoder = label_encoder
        self.feature_columns = feature_config[\'feature_columns\']
        self.needs_scaling = feature_config.get(\'needs_scaling\', True)
        self.skill_video_mapping = skill_video_mapping
        
    def predict_mastery(self, student_features):
        features = student_features[self.feature_columns].copy()
        
        if self.needs_scaling:
            features_processed = self.scaler.transform(features)
        else:
            features_processed = features.values
        
        predictions_encoded = self.model.predict(features_processed)
        probabilities = self.model.predict_proba(features_processed)
        predictions = self.label_encoder.inverse_transform(predictions_encoded)
        
        return predictions, probabilities
    
    def get_videos_for_skill(self, skill_name, top_n=5):
        videos = self.skill_video_mapping[
            self.skill_video_mapping[\'skill_name\'] == skill_name
        ].copy()
        
        if len(videos) == 0:
            return pd.DataFrame()
        
        if \'keyword_score\' in videos.columns and \'views\' in videos.columns:
            videos[\'ranking_score\'] = (
                videos[\'keyword_score\'] * 0.6 + 
                (videos[\'views\'] / videos[\'views\'].max()) * 0.4
            )
            videos = videos.sort_values(\'ranking_score\', ascending=False)
        
        return videos.head(top_n)
    
    def analyze_student(self, student_data):
        predictions, probabilities = self.predict_mastery(student_data)
        
        analysis = student_data[[\'skill_name\']].copy()
        analysis[\'predicted_mastery\'] = predictions
        analysis[\'accuracy\'] = student_data[\'accuracy\'].values
        
        for i, label in enumerate(self.label_encoder.classes_):
            analysis[f\'prob_{label}\'] = probabilities[:, i]
        
        analysis[\'confidence\'] = probabilities.max(axis=1)
        
        return analysis
    
    def get_recommendations(self, student_data, top_skills=5, videos_per_skill=3):
        analysis = self.analyze_student(student_data)
        
        weak_skills = analysis[
            analysis[\'predicted_mastery\'] == \'needs_help\'
        ].sort_values(\'accuracy\').head(top_skills)
        
        learning_skills = analysis[
            analysis[\'predicted_mastery\'] == \'learning\'
        ].sort_values(\'accuracy\').head(top_skills)
        
        mastered_skills = analysis[
            analysis[\'predicted_mastery\'] == \'mastered\'
        ]
        
        video_recommendations = []
        
        for _, row in weak_skills.iterrows():
            skill = row[\'skill_name\']
            videos = self.get_videos_for_skill(skill, top_n=videos_per_skill)
            
            if len(videos) > 0:
                for _, video in videos.iterrows():
                    video_recommendations.append({
                        \'skill_name\': skill,
                        \'student_accuracy\': row[\'accuracy\'],
                        \'mastery_level\': \'needs_help\',
                        \'priority\': \'HIGH\',
                        \'video_title\': video.get(\'video_title\', \'N/A\'),
                        \'video_id\': video.get(\'video_id\', \'N/A\'),
                        \'views\': video.get(\'views\', 0),
                        \'likes\': video.get(\'likes\', 0)
                    })
        
        for _, row in learning_skills.iterrows():
            skill = row[\'skill_name\']
            videos = self.get_videos_for_skill(skill, top_n=2)
            
            if len(videos) > 0:
                for _, video in videos.iterrows():
                    video_recommendations.append({
                        \'skill_name\': skill,
                        \'student_accuracy\': row[\'accuracy\'],
                        \'mastery_level\': \'learning\',
                        \'priority\': \'MEDIUM\',
                        \'video_title\': video.get(\'video_title\', \'N/A\'),
                        \'video_id\': video.get(\'video_id\', \'N/A\'),
                        \'views\': video.get(\'views\', 0),
                        \'likes\': video.get(\'likes\', 0)
                    })
        
        recommendations_df = pd.DataFrame(video_recommendations)
        
        summary = {
            \'total_skills_analyzed\': len(analysis),
            \'skills_mastered\': len(mastered_skills),
            \'skills_learning\': len(learning_skills),
            \'skills_need_help\': len(weak_skills),
            \'videos_recommended\': len(recommendations_df)
        }
        
        return {
            \'summary\': summary,
            \'skill_analysis\': analysis,
            \'weak_skills\': weak_skills,
            \'learning_skills\': learning_skills,
            \'mastered_skills\': mastered_skills,
            \'video_recommendations\': recommendations_df
        }


def load_recommender(package_path=\'recommender_package.pkl\'):
    """Load the recommender from saved package."""
    with open(package_path, \'rb\') as f:
        package = pickle.load(f)
    
    return StudyResourceRecommender(
        model=package[\'model\'],
        scaler=package[\'scaler\'],
        label_encoder=package[\'label_encoder\'],
        feature_config=package[\'feature_config\'],
        skill_video_mapping=package[\'skill_video_mapping\']
    )


def add_engineered_features(df):
    """Add engineered features if they don\'t exist."""
    
    if \'efficiency_score\' not in df.columns:
        df[\'efficiency_score\'] = df[\'total_correct\'] / (df[\'total_hints_used\'] + 1)
    
    if \'struggle_score\' not in df.columns:
        df[\'struggle_score\'] = (
            (1 - df[\'accuracy\']) * 0.4 + 
            df[\'avg_hint_ratio\'] * 0.3 + 
            (df[\'avg_attempts\'] / df[\'avg_attempts\'].max()) * 0.3
        )
    
    if \'speed_score\' not in df.columns:
        df[\'speed_score\'] = 1 - (df[\'avg_response_time\'] / df[\'avg_response_time\'].max()).clip(0, 1)
    
    if \'hint_dependency\' not in df.columns:
        df[\'hint_dependency\'] = (df[\'avg_hint_ratio\'] + df[\'pct_hint_first\']) / 2
    
    if \'attempts_per_correct\' not in df.columns:
        df[\'attempts_per_correct\'] = df[\'total_attempts\'] / (df[\'total_correct\'] + 1)
    
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.fillna(df.median(numeric_only=True))
    
    return df
'''

with open('recommender.py', 'w') as f:
    f.write(recommender_module)

print("‚úÖ recommender.py module created!")

## 11. Summary

In [None]:
print("\n" + "="*70)
print("PHASE 3 COMPLETE - RECOMMENDATION ENGINE")
print("="*70)

print("\n‚úÖ COMPLETED:")
print("   1. Built StudyResourceRecommender class")
print("   2. Integrated ML model with video recommendations")
print("   3. Created prediction and analysis functions")
print("   4. Tested with sample students")
print("   5. Created API functions for GUI")
print("   6. Saved deployment package")

print("\nüìÅ FILES CREATED:")
print("   - recommender_package.pkl (all models + data)")
print("   - student_data_for_app.csv (student data for demo)")
print("   - recommender.py (module for Streamlit app)")

print("\nüîÑ RECOMMENDATION FLOW:")
print("   Student ID ‚Üí Get Performance Data ‚Üí ML Prediction ‚Üí")
print("   Identify Weak Skills ‚Üí Match Videos ‚Üí Return Recommendations")

print("\n" + "="*70)
print("NEXT: Phase 4 - Build GUI with Streamlit & Deploy")
print("="*70)

---

## Next Steps (Phase 4)

1. Create Streamlit app (app.py)
2. Design user interface:
   - Student ID input / dropdown
   - Skill analysis dashboard
   - Video recommendations display
3. Deploy to Streamlit Cloud or HuggingFace Spaces