In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Music Recommender System Exploration\n",
    "\n",
    "This notebook explores and sets up the music and food recommendation systems for MoodSense."
   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 8. Interactive Recommendation Testing\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Interactive testing function\\n\",\n    \"def test_custom_emotion_scenario(emotion_scores_dict):\\n\",\n    \"    \\\"\\\"\\\"Test recommendations for custom emotion scenario\\\"\\\"\\\"\\n\",\n    \"    print(f\\\"\\\\nTesting Custom Scenario: {emotion_scores_dict}\\\")\\n\",\n    \"    print(\\\"=\\\" * 60)\\n\",\n    \"    \\n\",\n    \"    # Normalize emotion scores\\n\",\n    \"    total_score = sum(emotion_scores_dict.values())\\n\",\n    \"    if total_score > 0:\\n\",\n    \"        normalized_scores = {k: v/total_score for k, v in emotion_scores_dict.items()}\\n\",\n    \"    else:\\n\",\n    \"        normalized_scores = emotion_scores_dict\\n\",\n    \"    \\n\",\n    \"    # Get recommendations\\n\",\n    \"    music_recs = music_recommender.get_recommendations(normalized_scores, num_recommendations=5)\\n\",\n    \"    food_recs = food_recommender.get_food_recommendations(normalized_scores, num_recommendations=5)\\n\",\n    \"    \\n\",\n    \"    # Display results\\n\",\n    \"    print(f\\\"🎵 Music Recommendations:\\\")\\n\",\n    \"    for i, rec in enumerate(music_recs):\\n\",\n    \"        similarity = rec.get('similarity', 0)\\n\",\n    \"        print(f\\\"   {i+1}. {rec['track_name']} by {rec['artists']} (Match: {similarity:.2%})\\\")\\n\",\n    \"        if 'audio_features' in rec:\\n\",\n    \"            features = rec['audio_features']\\n\",\n    \"            print(f\\\"      Features: Valence={features.get('valence', 0):.2f}, Energy={features.get('energy', 0):.2f}\\\")\\n\",\n    \"    \\n\",\n    \"    print(f\\\"\\\\n🍽️ Food Recommendations:\\\")\\n\",\n    \"    for i, rec in enumerate(food_recs):\\n\",\n    \"        score = rec.get('score', 0)\\n\",\n    \"        print(f\\\"   {i+1}. {rec['food_item']} (Score: {score:.2%})\\\")\\n\",\n    \"        print(f\\\"      {rec['description']}\\\")\\n\",\n    \"        if rec['benefits']:\\n\",\n    \"            print(f\\\"      Benefits: {', '.join(rec['benefits'][:3])}\\\")\\n\",\n    \"        print()\\n\",\n    \"\\n\",\n    \"# Test various scenarios\\n\",\n    \"test_scenarios = [\\n\",\n    \"    {'happy': 0.6, 'surprise': 0.4},  # Mixed positive emotions\\n\",\n    \"    {'sad': 0.7, 'fear': 0.3},         # Mixed negative emotions\\n\",\n    \"    {'angry': 0.5, 'disgust': 0.3, 'sad': 0.2},  # Complex negative mix\\n\",\n    \"    {'happy': 0.4, 'neutral': 0.6},   # Mild happiness\\n\",\n    \"    {'surprise': 1.0}                  # Pure surprise\\n\",\n    \"]\\n\",\n    \"\\n\",\n    \"for scenario in test_scenarios:\\n\",\n    \"    test_custom_emotion_scenario(scenario)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 9. Recommendation Quality Metrics\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Calculate recommendation quality metrics\\n\",\n    \"print(\\\"Recommendation Quality Assessment:\\\")\\n\",\n    \"print(\\\"=\\\" * 50)\\n\",\n    \"\\n\",\n    \"# Diversity metric - how diverse are the recommendations?\\n\",\n    \"def calculate_music_diversity(recommendations):\\n\",\n    \"    \\\"\\\"\\\"Calculate diversity of music recommendations based on audio features\\\"\\\"\\\"\\n\",\n    \"    if not recommendations or len(recommendations) < 2:\\n\",\n    \"        return 0\\n\",\n    \"    \\n\",\n    \"    features_list = []\\n\",\n    \"    for rec in recommendations:\\n\",\n    \"        if 'audio_features' in rec:\\n\",\n    \"            features = rec['audio_features']\\n\",\n    \"            feature_vector = [features.get(f, 0) for f in ['valence', 'energy', 'danceability']]\\n\",\n    \"            features_list.append(feature_vector)\\n\",\n    \"    \\n\",\n    \"    if len(features_list) < 2:\\n\",\n    \"        return 0\\n\",\n    \"    \\n\",\n    \"    # Calculate pairwise distances\\n\",\n    \"    total_distance = 0\\n\",\n    \"    count = 0\\n\",\n    \"    \\n\",\n    \"    for i in range(len(features_list)):\\n\",\n    \"        for j in range(i+1, len(features_list)):\\n\",\n    \"            distance = np.linalg.norm(np.array(features_list[i]) - np.array(features_list[j]))\\n\",\n    \"            total_distance += distance\\n\",\n    \"            count += 1\\n\",\n    \"    \\n\",\n    \"    return total_distance / count if count > 0 else 0\\n\",\n    \"\\n\",\n    \"def calculate_food_diversity(recommendations):\\n\",\n    \"    \\\"\\\"\\\"Calculate diversity of food recommendations\\\"\\\"\\\"\\n\",\n    \"    if not recommendations or len(recommendations) < 2:\\n\",\n    \"        return 0\\n\",\n    \"    \\n\",\n    \"    # Simple diversity based on food categories (you could enhance this)\\n\",\n    \"    food_types = set()\\n\",\n    \"    for rec in recommendations:\\n\",\n    \"        food_item = rec['food_item'].lower()\\n\",\n    \"        # Simple categorization\\n\",\n    \"        if any(word in food_item for word in ['chocolate', 'ice cream', 'cookie']):\\n\",\n    \"            food_types.add('sweet')\\n\",\n    \"        elif any(word in food_item for word in ['salad', 'vegetable', 'fruit']):\\n\",\n    \"            food_types.add('healthy')\\n\",\n    \"        elif any(word in food_item for word in ['coffee', 'tea']):\\n\",\n    \"            food_types.add('beverage')\\n\",\n    \"        else:\\n\",\n    \"            food_types.add('other')\\n\",\n    \"    \\n\",\n    \"    return len(food_types) / len(recommendations)\\n\",\n    \"\\n\",\n    \"# Test diversity for different emotions\\n\",\n    \"diversity_results = []\\n\",\n    \"\\n\",\n    \"for emotion in ['happy', 'sad', 'angry', 'neutral']:\\n\",\n    \"    emotion_dict = {emotion: 1.0}\\n\",\n    \"    \\n\",\n    \"    music_recs = music_recommender.get_recommendations(emotion_dict, num_recommendations=5)\\n\",\n    \"    food_recs = food_recommender.get_food_recommendations(emotion_dict, num_recommendations=5)\\n\",\n    \"    \\n\",\n    \"    music_diversity = calculate_music_diversity(music_recs)\\n\",\n    \"    food_diversity = calculate_food_diversity(food_recs)\\n\",\n    \"    \\n\",\n    \"    diversity_results.append({\\n\",\n    \"        'emotion': emotion,\\n\",\n    \"        'music_diversity': music_diversity,\\n\",\n    \"        'food_diversity': food_diversity,\\n\",\n    \"        'music_count': len(music_recs),\\n\",\n    \"        'food_count': len(food_recs)\\n\",\n    \"    })\\n\",\n    \"\\n\",\n    \"# Display diversity results\\n\",\n    \"diversity_df = pd.DataFrame(diversity_results)\\n\",\n    \"print(\\\"Recommendation Diversity by Emotion:\\\")\\n\",\n    \"print(diversity_df.round(3))\\n\",\n    \"\\n\",\n    \"# Visualize diversity\\n\",\n    \"fig, axes = plt.subplots(1, 2, figsize=(12, 5))\\n\",\n    \"\\n\",\n    \"# Music diversity\\n\",\n    \"axes[0].bar(diversity_df['emotion'], diversity_df['music_diversity'])\\n\",\n    \"axes[0].set_title('Music Recommendation Diversity')\\n\",\n    \"axes[0].set_ylabel('Diversity Score')\\n\",\n    \"axes[0].set_xlabel('Emotion')\\n\",\n    \"\\n\",\n    \"# Food diversity\\n\",\n    \"axes[1].bar(diversity_df['emotion'], diversity_df['food_diversity'])\\n\",\n    \"axes[1].set_title('Food Recommendation Diversity')\\n\",\n    \"axes[1].set_ylabel('Diversity Score')\\n\",\n    \"axes[1].set_xlabel('Emotion')\\n\",\n    \"\\n\",\n    \"plt.tight_layout()\\n\",\n    \"plt.show()\\n\",\n    \"\\n\",\n    \"# Overall system statistics\\n\",\n    \"print(f\\\"\\\\n📊 Overall System Statistics:\\\")\\n\",\n    \"print(f\\\"   Music Database Size: {len(music_recommender.spotify_data) if music_recommender.spotify_data is not None else 0} tracks\\\")\\n\",\n    \"print(f\\\"   Food Database Size: {len(food_recommender.food_data) if food_recommender.food_data is not None else 0} items\\\")\\n\",\n    \"print(f\\\"   Average Music Diversity: {diversity_df['music_diversity'].mean():.3f}\\\")\\n\",\n    \"print(f\\\"   Average Food Diversity: {diversity_df['food_diversity'].mean():.3f}\\\")\\n\",\n    \"print(f\\\"   Spotify API Available: {'Yes' if music_recommender.spotify_client else 'No'}\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## 10. Save Recommendation System Configuration\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# Save recommendation system configuration and results\\n\",\n    \"import json\\n\",\n    \"\\n\",\n    \"# Create comprehensive system summary\\n\",\n    \"system_summary = {\\n\",\n    \"    \\\"recommendation_system\\\": {\\n\",\n    \"        \\\"music_recommender\\\": {\\n\",\n    \"            \\\"type\\\": \\\"Content-based filtering with cosine similarity\\\",\\n\",\n    \"            \\\"features_used\\\": music_recommender.audio_features,\\n\",\n    \"            \\\"data_source\\\": \\\"Spotify dataset\\\",\\n\",\n    \"            \\\"total_tracks\\\": len(music_recommender.spotify_data) if music_recommender.spotify_data is not None else 0,\\n\",\n    \"            \\\"api_available\\\": music_recommender.spotify_client is not None\\n\",\n    \"        },\\n\",\n    \"        \\\"food_recommender\\\": {\\n\",\n    \"            \\\"type\\\": \\\"Emotion-score weighted matching\\\",\\n\",\n    \"            \\\"total_items\\\": len(food_recommender.food_data) if food_recommender.food_data is not None else 0,\\n\",\n    \"            \\\"emotions_supported\\\": EmotionUtils.EMOTION_LABELS\\n\",\n    \"        }\\n\",\n    \"    },\\n\",\n    \"    \\\"emotion_mapping\\\": {\\n\",\n    \"        \\\"music_features\\\": music_recommender.emotion_to_features,\\n\",\n    \"        \\\"supported_emotions\\\": EmotionUtils.EMOTION_LABELS\\n\",\n    \"    },\\n\",\n    \"    \\\"performance_metrics\\\": {},\\n\",\n    \"    \\\"test_results\\\": {}\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"# Add performance metrics if available\\n\",\n    \"if 'diversity_df' in locals():\\n\",\n    \"    system_summary[\\\"performance_metrics\\\"] = {\\n\",\n    \"        \\\"average_music_diversity\\\": float(diversity_df['music_diversity'].mean()),\\n\",\n    \"        \\\"average_food_diversity\\\": float(diversity_df['food_diversity'].mean()),\\n\",\n    \"        \\\"diversity_by_emotion\\\": diversity_df.to_dict('records')\\n\",\n    \"    }\\n\",\n    \"\\n\",\n    \"# Add coverage results if available\\n\",\n    \"if 'coverage_results' in locals():\\n\",\n    \"    system_summary[\\\"performance_metrics\\\"][\\\"coverage\\\"] = {\\n\",\n    \"        \\\"music_coverage\\\": float(music_coverage) if 'music_coverage' in locals() else 0,\\n\",\n    \"        \\\"food_coverage\\\": float(food_coverage) if 'food_coverage' in locals() else 0,\\n\",\n    \"        \\\"recommendable_music_items\\\": len(coverage_results['music']),\\n\",\n    \"        \\\"recommendable_food_items\\\": len(coverage_results['food'])\\n\",\n    \"    }\\n\",\n    \"\\n\",\n    \"# Save system summary\\n\",\n    \"os.makedirs('../models/recommendation_system', exist_ok=True)\\n\",\n    \"with open('../models/recommendation_system/system_summary.json', 'w') as f:\\n\",\n    \"    json.dump(system_summary, f, indent=4)\\n\",\n    \"\\n\",\n    \"# Save emotion-to-features mapping\\n\",\n    \"emotions_features_df = pd.DataFrame(music_recommender.emotion_to_features).T\\n\",\n    \"emotions_features_df.to_csv('../models/recommendation_system/emotion_features_mapping.csv')\\n\",\n    \"\\n\",\n    \"# Save food-emotion data\\n\",\n    \"if food_recommender.food_data is not None:\\n\",\n    \"    food_recommender.save_food_data('../models/recommendation_system/food_emotion_mapping.csv')\\n\",\n    \"\\n\",\n    \"print(\\\"📋 Recommendation System Summary:\\\")\\n\",\n    \"print(json.dumps(system_summary, indent=2))\\n\",\n    \"\\n\",\n    \"print(\\\"\\\\n✅ Music and Food Recommendation System setup completed!\\\")\\n\",\n    \"print(\\\"\\\\n📁 System files saved in: ../models/recommendation_system/\\\")\\n\",\n    \"print(\\\"   - system_summary.json (complete system summary)\\\")\\n\",\n    \"print(\\\"   - emotion_features_mapping.csv (emotion-to-audio features mapping)\\\")\\n\",\n    \"print(\\\"   - food_emotion_mapping.csv (food-emotion relationships)\\\")\\n\",\n    \"\\n\",\n    \"print(\\\"\\\\n🚀 All systems ready!\\\")\\n\",\n    \"print(\\\"   You can now run the Streamlit app: streamlit run ../src/app.py\\\")\\n\",\n    \"print(\\\"\\\\n💡 Tips for better recommendations:\\\")\\n\",\n    \"print(\\\"   - Use a larger Spotify dataset for more diverse music recommendations\\\")\\n\",\n    \"print(\\\"   - Set up Spotify API credentials for real-time music discovery\\\")\\n\",\n    \"print(\\\"   - Customize the food-emotion mappings based on nutritional research\\\")\\n\",\n    \"print(\\\"   - Train the emotion recognition models on larger datasets for better accuracy\\\")\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.8.5\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 4\n}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "sys.path.append('../src')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from plotly.subplots import make_subplots\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "from recommender import MusicRecommender, FoodRecommender\n",
    "from utils import EmotionUtils, DataUtils\n",
    "\n",
    "# Set style\n",
    "plt.style.use('default')\n",
    "sns.set_palette('husl')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Initialize Recommendation Systems"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize music recommender\n",
    "spotify_data_path = '../data/spotify_dataset/data.csv'\n",
    "music_recommender = MusicRecommender(spotify_data_path)\n",
    "\n",
    "# Initialize food recommender\n",
    "food_data_path = '../data/food_mood_mapping.csv'\n",
    "food_recommender = FoodRecommender(food_data_path)\n",
    "\n",
    "print(\"✅ Recommendation systems initialized!\")\n",
    "print(f\"Music data available: {'Yes' if music_recommender.spotify_data is not None else 'No'}\")\n",
    "print(f\"Food data available: {'Yes' if food_recommender.food_data is not None else 'No'}\")\n",
    "print(f\"Spotify API available: {'Yes' if music_recommender.spotify_client else 'No'}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Explore Music Data and Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Explore Spotify data\n",
    "if music_recommender.spotify_data is not None and len(music_recommender.spotify_data) > 0:\n",
    "    spotify_df = music_recommender.spotify_data\n",
    "    print(f\"Spotify Dataset: {len(spotify_df)} tracks\")\n",
    "    print(f\"Columns: {list(spotify_df.columns)}\")\n",
    "    \n",
    "    # Display sample data\n",
    "    print(\"\\nSample tracks:\")\n",
    "    display_cols = ['track_name', 'artists'] + [col for col in ['valence', 'energy', 'danceability'] if col in spotify_df.columns]\n",
    "    print(spotify_df[display_cols].head())\n",
    "    \n",
    "    # Audio features statistics\n",
    "    audio_features = music_recommender.audio_features\n",
    "    available_features = [f for f in audio_features if f in spotify_df.columns]\n",
    "    \n",
    "    if available_features:\n",
    "        print(f\"\\nAudio Features Statistics:\")\n",
    "        print(spotify_df[available_features].describe().round(3))\n",
    "    else:\n",
    "        print(\"\\nNo audio features found in the dataset\")\nelse:\n",
    "    print(\"\\nNo Spotify data available. Creating sample data for demonstration...\")\n",
    "    \n",
    "    # Create expanded sample data\n",
    "    sample_data = {\n",
    "        'track_name': [\n",
    "            'Happy Dance Track', 'Melancholic Ballad', 'Energetic Rock', 'Calm Ambient', \n",
    "            'Surprising Jazz', 'Dark Metal', 'Neutral Pop', 'Upbeat Funk',\n",
    "            'Sad Piano', 'Angry Punk', 'Fearful Soundtrack', 'Joyful Folk'\n",
    "        ],\n",
    "        'artists': [\n",
    "            'DJ Happy', 'The Sad Singers', 'Rock Band', 'Ambient Artist',\n",
    "            'Jazz Ensemble', 'Metal Group', 'Pop Star', 'Funk Master',\n",
    "            'Piano Player', 'Punk Rockers', 'Movie Composer', 'Folk Singer'\n",
    "        ],\n",
    "        'acousticness': [0.2, 0.8, 0.1, 0.9, 0.4, 0.1, 0.3, 0.2, 0.7, 0.1, 0.6, 0.8],\n",
    "        'danceability': [0.9, 0.3, 0.7, 0.2, 0.6, 0.5, 0.8, 0.9, 0.2, 0.6, 0.3, 0.7],\n",
    "        'energy': [0.9, 0.2, 0.95, 0.1, 0.7, 0.9, 0.6, 0.8, 0.3, 0.9, 0.4, 0.8],\n",
    "        'instrumentalness': [0.1, 0.6, 0.3, 0.8, 0.5, 0.7, 0.1, 0.2, 0.9, 0.3, 0.8, 0.4],\n",
    "        'liveness': [0.3, 0.1, 0.4, 0.1, 0.8, 0.2, 0.2, 0.3, 0.1, 0.4, 0.1, 0.6],\n",
    "        'loudness': [-5, -15, -3, -20, -8, -2, -6, -4, -12, -3, -10, -7],\n",
    "        'speechiness': [0.1, 0.05, 0.3, 0.02, 0.1, 0.4, 0.1, 0.08, 0.03, 0.5, 0.05, 0.1],\n",
    "        'tempo': [128, 70, 140, 60, 120, 160, 110, 125, 65, 150, 80, 115],\n",
    "        'valence': [0.9, 0.1, 0.8, 0.4, 0.7, 0.2, 0.5, 0.9, 0.1, 0.3, 0.2, 0.8]\n",
    "    }\n",
    "    \n",
    "    spotify_df = pd.DataFrame(sample_data)\n",
    "    music_recommender.spotify_data = spotify_df\n",
    "    \n",
    "    # Fit scaler with sample data\n",
    "    audio_features = list(sample_data.keys())[2:]  # Skip track_name and artists\n",
    "    music_recommender.audio_features = audio_features\n",
    "    music_recommender.scaler.fit(spotify_df[audio_features])\n",
    "    \n",
    "    print(f\"Created sample Spotify dataset with {len(spotify_df)} tracks\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize audio features distribution\n",
    "if 'spotify_df' in locals() and len(spotify_df) > 0:\n",
    "    audio_features = [f for f in music_recommender.audio_features if f in spotify_df.columns]\n",
    "    \n",
    "    if len(audio_features) >= 6:\n",
    "        # Create subplots for audio features\n",
    "        fig, axes = plt.subplots(2, 3, figsize=(15, 10))\n",
    "        axes = axes.flatten()\n",
    "        \n",
    "        for i, feature in enumerate(audio_features[:6]):\n",
    "            axes[i].hist(spotify_df[feature], bins=10, alpha=0.7, color='skyblue')\n",
    "            axes[i].set_title(f'{feature.capitalize()} Distribution')\n",
    "            axes[i].set_xlabel(feature.capitalize())\n",
    "            axes[i].set_ylabel('Frequency')\n",
    "        \n",
    "        plt.tight_layout()\n",
    "        plt.suptitle('Spotify Audio Features Distribution', y=1.02)\n",
    "        plt.show()\n",
    "    \n",
    "    # Correlation heatmap\n",
    "    if len(audio_features) > 1:\n",
    "        plt.figure(figsize=(10, 8))\n",
    "        correlation_matrix = spotify_df[audio_features].corr()\n",
    "        sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,\n",
    "                   square=True, fmt='.2f')\n",
    "        plt.title('Audio Features Correlation Matrix')\n",
    "        plt.tight_layout()\n",
    "        plt.show()\nelse:\n",
    "    print(\"No audio features data available for visualization\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Emotion-to-Music Feature Mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Explore emotion-to-audio features mapping\n",
    "emotion_features = music_recommender.emotion_to_features\n",
    "\n",
    "print(\"Emotion to Audio Features Mapping:\")\n",
    "print(\"=\" * 50)\n",
    "\n",
    "# Create DataFrame for better visualization\n",
    "emotions_df = pd.DataFrame(emotion_features).T\n",
    "print(emotions_df.round(2))\n",
    "\n",
    "# Visualize emotion-feature relationships\n",
    "fig, axes = plt.subplots(2, 2, figsize=(15, 12))\n",
    "\n",
    "# Valence by emotion\n",
    "axes[0, 0].bar(emotions_df.index, emotions_df['valence'], \n",
    "              color=[EmotionUtils.EMOTION_COLORS.get(e, '#888888') for e in emotions_df.index])\n",
    "axes[0, 0].set_title('Valence by Emotion')\n",
    "axes[0, 0].set_ylabel('Valence')\n",
    "axes[0, 0].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Energy by emotion\n",
    "axes[0, 1].bar(emotions_df.index, emotions_df['energy'],\n",
    "              color=[EmotionUtils.EMOTION_COLORS.get(e, '#888888') for e in emotions_df.index])\n",
    "axes[0, 1].set_title('Energy by Emotion')\n",
    "axes[0, 1].set_ylabel('Energy')\n",
    "axes[0, 1].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Danceability by emotion\n",
    "axes[1, 0].bar(emotions_df.index, emotions_df['danceability'],\n",
    "              color=[EmotionUtils.EMOTION_COLORS.get(e, '#888888') for e in emotions_df.index])\n",
    "axes[1, 0].set_title('Danceability by Emotion')\n",
    "axes[1, 0].set_ylabel('Danceability')\n",
    "axes[1, 0].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Tempo by emotion\n",
    "axes[1, 1].bar(emotions_df.index, emotions_df['tempo'],\n",
    "              color=[EmotionUtils.EMOTION_COLORS.get(e, '#888888') for e in emotions_df.index])\n",
    "axes[1, 1].set_title('Tempo by Emotion')\n",
    "axes[1, 1].set_ylabel('Tempo (BPM)')\n",
    "axes[1, 1].tick_params(axis='x', rotation=45)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Heatmap of all emotion-feature relationships\n",
    "plt.figure(figsize=(12, 8))\n",
    "sns.heatmap(emotions_df.T, annot=True, cmap='RdYlBu_r', center=0.5,\n",
    "           cbar_kws={'label': 'Feature Value'})\n",
    "plt.title('Emotion to Audio Features Mapping Heatmap')\n",
    "plt.xlabel('Emotions')\n",
    "plt.ylabel('Audio Features')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Test Music Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test music recommendations for different emotions\n",
    "print(\"Testing Music Recommendations:\")\n",
    "print(\"=\" * 40)\n",
    "\n",
    "# Test different emotion scenarios\n",
    "test_emotions = [\n",
    "    {'happy': 0.8, 'surprise': 0.2},\n",
    "    {'sad': 0.9, 'neutral': 0.1},\n",
    "    {'angry': 0.7, 'disgust': 0.3},\n",
    "    {'fear': 0.6, 'sad': 0.4},\n",
    "    {'neutral': 1.0}\n",
    "]\n",
    "\n",
    "for i, emotion_scores in enumerate(test_emotions):\n",
    "    print(f\"\\nTest Case {i+1}: {emotion_scores}\")\n",
    "    \n",
    "    # Get recommendations\n",
    "    recommendations = music_recommender.get_recommendations(\n",
    "        emotion_scores=emotion_scores,\n",
    "        num_recommendations=3,\n",
    "        use_api=False  # Use dataset-based recommendations\n",
    "    )\n",
    "    \n",
    "    if recommendations:\n",
    "        print(\"🎵 Recommended tracks:\")\n",
    "        for j, rec in enumerate(recommendations):\n",
    "            similarity = rec.get('similarity', 0)\n",
    "            print(f\"   {j+1}. {rec['track_name']} - {rec['artists']} (Match: {similarity:.2%})\")\n",
    "    else:\n",
    "        print(\"❌ No recommendations found\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Explore Food Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Explore food recommendation data\n",
    "if food_recommender.food_data is not None and len(food_recommender.food_data) > 0:\n",
    "    food_df = food_recommender.food_data\n",
    "    print(f\"Food Dataset: {len(food_df)} items\")\n",
    "    print(f\"Columns: {list(food_df.columns)}\")\n",
    "    \n",
    "    # Display sample data\n",
    "    print(\"\\nSample food items:\")\n",
    "    print(food_df.head(10))\n",
    "    \n",
    "    # Analyze food-emotion relationships\n",
    "    emotion_columns = [col for col in food_df.columns if col in EmotionUtils.EMOTION_LABELS]\n",
    "    \n",
    "    if emotion_columns:\n",
    "        # Top foods for each emotion\n",
    "        print(\"\\nTop 3 foods for each emotion:\")\n",
    "        for emotion in emotion_columns:\n",
    "            top_foods = food_df.nlargest(3, emotion)[['food_item', emotion]]\n",
    "            print(f\"\\n{emotion.capitalize()}:\")\n",
    "            for _, row in top_foods.iterrows():\n",
    "                print(f\"  - {row['food_item']} (score: {row[emotion]:.2f})\")\nelse:\n",
    "    print(\"No food data available\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize food-emotion relationships\n",
    "if 'food_df' in locals() and len(food_df) > 0:\n",
    "    emotion_columns = [col for col in food_df.columns if col in EmotionUtils.EMOTION_LABELS]\n",
    "    \n",
    "    if emotion_columns and len(emotion_columns) > 1:\n",
    "        # Food-emotion heatmap\n",
    "        plt.figure(figsize=(12, 10))\n",
    "        food_emotion_matrix = food_df.set_index('food_item')[emotion_columns]\n",
    "        sns.heatmap(food_emotion_matrix.T, \n",
    "                   cmap='YlOrRd', \n",
    "                   cbar_kws={'label': 'Emotion Match Score'},\n",
    "                   xticklabels=True, yticklabels=True)\n",
    "        plt.title('Food-Emotion Relationship Heatmap')\n",
    "        plt.xlabel('Food Items')\n",
    "        plt.ylabel('Emotions')\n",
    "        plt.xticks(rotation=45, ha='right')\n",
    "        plt.tight_layout()\n",
    "        plt.show()\n",
    "        \n",
    "        # Average emotion scores for all foods\n",
    "        plt.figure(figsize=(10, 6))\n",
    "        avg_scores = food_emotion_matrix.mean()\n",
    "        colors = [EmotionUtils.EMOTION_COLORS.get(emotion, '#888888') for emotion in avg_scores.index]\n",
    "        \n",
    "        bars = plt.bar(avg_scores.index, avg_scores.values, color=colors)\n",
    "        plt.title('Average Food-Emotion Association Scores')\n",
    "        plt.xlabel('Emotions')\n",
    "        plt.ylabel('Average Score')\n",
    "        plt.xticks(rotation=45)\n",
    "        \n",
    "        # Add value labels on bars\n",
    "        for bar, value in zip(bars, avg_scores.values):\n",
    "            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,\n",
    "                    f'{value:.2f}', ha='center', va='bottom')\n",
    "        \n",
    "        plt.tight_layout()\n",
    "        plt.show()\nelse:\n",
    "    print(\"No food emotion data available for visualization\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Test Food Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test food recommendations for different emotions\n",
    "print(\"Testing Food Recommendations:\")\n",
    "print(\"=\" * 40)\n",
    "\n",
    "for i, emotion_scores in enumerate(test_emotions):\n",
    "    print(f\"\\nTest Case {i+1}: {emotion_scores}\")\n",
    "    \n",
    "    # Get food recommendations\n",
    "    food_recommendations = food_recommender.get_food_recommendations(\n",
    "        emotion_scores=emotion_scores,\n",
    "        num_recommendations=3\n",
    "    )\n",
    "    \n",
    "    if food_recommendations:\n",
    "        print(\"🍽️ Recommended foods:\")\n",
    "        for j, rec in enumerate(food_recommendations):\n",
    "            score = rec.get('score', 0)\n",
    "            print(f\"   {j+1}. {rec['food_item']} (Score: {score:.2%})\")\n",
    "            print(f\"      {rec['description']}\")\n",
    "            if rec['benefits']:\n",
    "                print(f\"      Benefits: {', '.join(rec['benefits'][:2])}\")\n",
    "    else:\n",
    "        print(\"❌ No food recommendations found\")\n",
    "    \n",
    "    # Get meal suggestions\n",
    "    meal_suggestions = food_recommender.get_meal_suggestions(emotion_scores)\n",
    "    if meal_suggestions:\n",
    "        dominant_emotion = max(emotion_scores.items(), key=lambda x: x[1])[0]\n",
    "        print(f\"\\n🍴 Meal suggestions for {dominant_emotion} mood:\")\n",
    "        for meal_type, suggestions in meal_suggestions.items():\n",
    "            print(f\"   {meal_type.capitalize()}: {', '.join(suggestions[:2])}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Recommendation System Performance Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze recommendation system performance\n",
    "print(\"Recommendation System Analysis:\")\n",
    "print(\"=\" * 50)\n",
    "\n",
    "# Test recommendation consistency\n",
    "consistency_test = {'happy': 0.8, 'neutral': 0.2}\n",
    "\n",
    "print(f\"\\nConsistency Test with emotion: {consistency_test}\")\n",
    "print(\"Running recommendations multiple times...\")\n",
    "\n",
    "# Run recommendations multiple times to check consistency\n",
    "music_results = []\n",
    "food_results = []\n",
    "\n",
    "for run in range(3):\n",
    "    music_recs = music_recommender.get_recommendations(\n",
    "        emotion_scores=consistency_test, num_recommendations=5\n",
    "    )\n",
    "    food_recs = food_recommender.get_food_recommendations(\n",
    "        emotion_scores=consistency_test, num_recommendations=5\n",
    "    )\n",
    "    \n",
    "    music_results.append([rec['track_name'] for rec in music_recs])\n",
    "    food_results.append([rec['food_item'] for rec in food_recs])\n",
    "\n",
    "print(\"\\n🎵 Music Recommendations (3 runs):\")\n",
    "for i, result in enumerate(music_results):\n",
    "    print(f\"   Run {i+1}: {result[:3]}\")\n",
    "\n",
    "print(\"\\n🍽️ Food Recommendations (3 runs):\")\n",
    "for i, result in enumerate(food_results):\n",
    "    print(f\"   Run {i+1}: {result[:3]}\")\n",
    "\n",
    "# Calculate similarity between runs (simple overlap measure)\n",
    "if len(music_results) >= 2:\n",
    "    music_overlap = len(set(music_results[0]) & set(music_results[1])) / len(set(music_results[0]))\n",
    "    print(f\"\\n📊 Music recommendation consistency: {music_overlap:.2%}\")\n",

# Music Recommender System Exploration

This notebook explores and sets up the music and food recommendation systems for MoodSense.

## 8. Interactive Recommendation Testing

In [None]:
# Interactive testing function
def test_custom_emotion_scenario(emotion_scores_dict):
    """Test recommendations for custom emotion scenario"""
    print(f"\nTesting Custom Scenario: {emotion_scores_dict}")
    print("=" * 60)
    
    # Normalize emotion scores
    total_score = sum(emotion_scores_dict.values())
    if total_score > 0:
        normalized_scores = {k: v/total_score for k, v in emotion_scores_dict.items()}
    else:
        normalized_scores = emotion_scores_dict
    
    # Get recommendations
    music_recs = music_recommender.get_recommendations(normalized_scores, num_recommendations=5)
    food_recs = food_recommender.get_food_recommendations(normalized_scores, num_recommendations=5)
    
    # Display results
    print(f"🎵 Music Recommendations:")
    for i, rec in enumerate(music_recs):
        similarity = rec.get('similarity', 0)
        print(f"   {i+1}. {rec['track_name']} by {rec['artists']} (Match: {similarity:.2%})")
        if 'audio_features' in rec:
            features = rec['audio_features']
            print(f"      Features: Valence={features.get('valence', 0):.2f}, Energy={features.get('energy', 0):.2f}")
    
    print(f"\n🍽️ Food Recommendations:")
    for i, rec in enumerate(food_recs):
        score = rec.get('score', 0)
        print(f"   {i+1}. {rec['food_item']} (Score: {score:.2%})")
        print(f"      {rec['description']}")
        if rec['benefits']:
            print(f"      Benefits: {', '.join(rec['benefits'][:3])}")
        print()

# Test various scenarios
test_scenarios = [
    {'happy': 0.6, 'surprise': 0.4},  # Mixed positive emotions
    {'sad': 0.7, 'fear': 0.3},         # Mixed negative emotions
    {'angry': 0.5, 'disgust': 0.3, 'sad': 0.2},  # Complex negative mix
    {'happy': 0.4, 'neutral': 0.6},   # Mild happiness
    {'surprise': 1.0}                  # Pure surprise
]

for scenario in test_scenarios:
    test_custom_emotion_scenario(scenario)


## 9. Recommendation Quality Metrics

In [None]:
# Calculate recommendation quality metrics
print("Recommendation Quality Assessment:")
print("=" * 50)

# Diversity metric - how diverse are the recommendations?
def calculate_music_diversity(recommendations):
    """Calculate diversity of music recommendations based on audio features"""
    if not recommendations or len(recommendations) < 2:
        return 0
    
    features_list = []
    for rec in recommendations:
        if 'audio_features' in rec:
            features = rec['audio_features']
            feature_vector = [features.get(f, 0) for f in ['valence', 'energy', 'danceability']]
            features_list.append(feature_vector)
    
    if len(features_list) < 2:
        return 0
    
    # Calculate pairwise distances
    total_distance = 0
    count = 0
    
    for i in range(len(features_list)):
        for j in range(i+1, len(features_list)):
            distance = np.linalg.norm(np.array(features_list[i]) - np.array(features_list[j]))
            total_distance += distance
            count += 1
    
    return total_distance / count if count > 0 else 0

def calculate_food_diversity(recommendations):
    """Calculate diversity of food recommendations"""
    if not recommendations or len(recommendations) < 2:
        return 0
    
    # Simple diversity based on food categories (you could enhance this)
    food_types = set()
    for rec in recommendations:
        food_item = rec['food_item'].lower()
        # Simple categorization
        if any(word in food_item for word in ['chocolate', 'ice cream', 'cookie']):
            food_types.add('sweet')
        elif any(word in food_item for word in ['salad', 'vegetable', 'fruit']):
            food_types.add('healthy')
        elif any(word in food_item for word in ['coffee', 'tea']):
            food_types.add('beverage')
        else:
            food_types.add('other')
    
    return len(food_types) / len(recommendations)

# Test diversity for different emotions
diversity_results = []

for emotion in ['happy', 'sad', 'angry', 'neutral']:
    emotion_dict = {emotion: 1.0}
    
    music_recs = music_recommender.get_recommendations(emotion_dict, num_recommendations=5)
    food_recs = food_recommender.get_food_recommendations(emotion_dict, num_recommendations=5)
    
    music_diversity = calculate_music_diversity(music_recs)
    food_diversity = calculate_food_diversity(food_recs)
    
    diversity_results.append({
        'emotion': emotion,
        'music_diversity': music_diversity,
        'food_diversity': food_diversity,
        'music_count': len(music_recs),
        'food_count': len(food_recs)
    })

# Display diversity results
diversity_df = pd.DataFrame(diversity_results)
print("Recommendation Diversity by Emotion:")
print(diversity_df.round(3))

# Visualize diversity
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Music diversity
axes[0].bar(diversity_df['emotion'], diversity_df['music_diversity'])
axes[0].set_title('Music Recommendation Diversity')
axes[0].set_ylabel('Diversity Score')
axes[0].set_xlabel('Emotion')

# Food diversity
axes[1].bar(diversity_df['emotion'], diversity_df['food_diversity'])
axes[1].set_title('Food Recommendation Diversity')
axes[1].set_ylabel('Diversity Score')
axes[1].set_xlabel('Emotion')

plt.tight_layout()
plt.show()

# Overall system statistics
print(f"\n📊 Overall System Statistics:")
print(f"   Music Database Size: {len(music_recommender.spotify_data) if music_recommender.spotify_data is not None else 0} tracks")
print(f"   Food Database Size: {len(food_recommender.food_data) if food_recommender.food_data is not None else 0} items")
print(f"   Average Music Diversity: {diversity_df['music_diversity'].mean():.3f}")
print(f"   Average Food Diversity: {diversity_df['food_diversity'].mean():.3f}")
print(f"   Spotify API Available: {'Yes' if music_recommender.spotify_client else 'No'}")


## 10. Save Recommendation System Configuration

In [None]:
# Save recommendation system configuration and results
import json

# Create comprehensive system summary
system_summary = {
    "recommendation_system": {
        "music_recommender": {
            "type": "Content-based filtering with cosine similarity",
            "features_used": music_recommender.audio_features,
            "data_source": "Spotify dataset",
            "total_tracks": len(music_recommender.spotify_data) if music_recommender.spotify_data is not None else 0,
            "api_available": music_recommender.spotify_client is not None
        },
        "food_recommender": {
            "type": "Emotion-score weighted matching",
            "total_items": len(food_recommender.food_data) if food_recommender.food_data is not None else 0,
            "emotions_supported": EmotionUtils.EMOTION_LABELS
        }
    },
    "emotion_mapping": {
        "music_features": music_recommender.emotion_to_features,
        "supported_emotions": EmotionUtils.EMOTION_LABELS
    },
    "performance_metrics": {},
    "test_results": {}
}

# Add performance metrics if available
if 'diversity_df' in locals():
    system_summary["performance_metrics"] = {
        "average_music_diversity": float(diversity_df['music_diversity'].mean()),
        "average_food_diversity": float(diversity_df['food_diversity'].mean()),
        "diversity_by_emotion": diversity_df.to_dict('records')
    }

# Add coverage results if available
if 'coverage_results' in locals():
    system_summary["performance_metrics"]["coverage"] = {
        "music_coverage": float(music_coverage) if 'music_coverage' in locals() else 0,
        "food_coverage": float(food_coverage) if 'food_coverage' in locals() else 0,
        "recommendable_music_items": len(coverage_results['music']),
        "recommendable_food_items": len(coverage_results['food'])
    }

# Save system summary
os.makedirs('../models/recommendation_system', exist_ok=True)
with open('../models/recommendation_system/system_summary.json', 'w') as f:
    json.dump(system_summary, f, indent=4)

# Save emotion-to-features mapping
emotions_features_df = pd.DataFrame(music_recommender.emotion_to_features).T
emotions_features_df.to_csv('../models/recommendation_system/emotion_features_mapping.csv')

# Save food-emotion data
if food_recommender.food_data is not None:
    food_recommender.save_food_data('../models/recommendation_system/food_emotion_mapping.csv')

print("📋 Recommendation System Summary:")
print(json.dumps(system_summary, indent=2))

print("\n✅ Music and Food Recommendation System setup completed!")
print("\n📁 System files saved in: ../models/recommendation_system/")
print("   - system_summary.json (complete system summary)")
print("   - emotion_features_mapping.csv (emotion-to-audio features mapping)")
print("   - food_emotion_mapping.csv (food-emotion relationships)")

print("\n🚀 All systems ready!")
print("   You can now run the Streamlit app: streamlit run ../src/app.py")
print("\n💡 Tips for better recommendations:")
print("   - Use a larger Spotify dataset for more diverse music recommendations")
print("   - Set up Spotify API credentials for real-time music discovery")
print("   - Customize the food-emotion mappings based on nutritional research")
print("   - Train the emotion recognition models on larger datasets for better accuracy")


In [None]:
import sys
import os
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

from recommender import MusicRecommender, FoodRecommender
from utils import EmotionUtils, DataUtils

# Set style
plt.style.use('default')
sns.set_palette('husl')


## 1. Initialize Recommendation Systems

In [None]:
# Initialize music recommender
spotify_data_path = '../data/spotify_dataset/data.csv'
music_recommender = MusicRecommender(spotify_data_path)

# Initialize food recommender
food_data_path = '../data/food_mood_mapping.csv'
food_recommender = FoodRecommender(food_data_path)

print("✅ Recommendation systems initialized!")
print(f"Music data available: {'Yes' if music_recommender.spotify_data is not None else 'No'}")
print(f"Food data available: {'Yes' if food_recommender.food_data is not None else 'No'}")
print(f"Spotify API available: {'Yes' if music_recommender.spotify_client else 'No'}")


## 2. Explore Music Data and Features

In [None]:
# Explore Spotify data
if music_recommender.spotify_data is not None and len(music_recommender.spotify_data) > 0:
    spotify_df = music_recommender.spotify_data
    print(f"Spotify Dataset: {len(spotify_df)} tracks")
    print(f"Columns: {list(spotify_df.columns)}")
    
    # Display sample data
    print("\nSample tracks:")
    display_cols = ['track_name', 'artists'] + [col for col in ['valence', 'energy', 'danceability'] if col in spotify_df.columns]
    print(spotify_df[display_cols].head())
    
    # Audio features statistics
    audio_features = music_recommender.audio_features
    available_features = [f for f in audio_features if f in spotify_df.columns]
    
    if available_features:
        print(f"\nAudio Features Statistics:")
        print(spotify_df[available_features].describe().round(3))
    else:
        print("\nNo audio features found in the dataset")
else:
    print("\nNo Spotify data available. Creating sample data for demonstration...")
    
    # Create expanded sample data
    sample_data = {
        'track_name': [
            'Happy Dance Track', 'Melancholic Ballad', 'Energetic Rock', 'Calm Ambient', 
            'Surprising Jazz', 'Dark Metal', 'Neutral Pop', 'Upbeat Funk',
            'Sad Piano', 'Angry Punk', 'Fearful Soundtrack', 'Joyful Folk'
        ],
        'artists': [
            'DJ Happy', 'The Sad Singers', 'Rock Band', 'Ambient Artist',
            'Jazz Ensemble', 'Metal Group', 'Pop Star', 'Funk Master',
            'Piano Player', 'Punk Rockers', 'Movie Composer', 'Folk Singer'
        ],
        'acousticness': [0.2, 0.8, 0.1, 0.9, 0.4, 0.1, 0.3, 0.2, 0.7, 0.1, 0.6, 0.8],
        'danceability': [0.9, 0.3, 0.7, 0.2, 0.6, 0.5, 0.8, 0.9, 0.2, 0.6, 0.3, 0.7],
        'energy': [0.9, 0.2, 0.95, 0.1, 0.7, 0.9, 0.6, 0.8, 0.3, 0.9, 0.4, 0.8],
        'instrumentalness': [0.1, 0.6, 0.3, 0.8, 0.5, 0.7, 0.1, 0.2, 0.9, 0.3, 0.8, 0.4],
        'liveness': [0.3, 0.1, 0.4, 0.1, 0.8, 0.2, 0.2, 0.3, 0.1, 0.4, 0.1, 0.6],
        'loudness': [-5, -15, -3, -20, -8, -2, -6, -4, -12, -3, -10, -7],
        'speechiness': [0.1, 0.05, 0.3, 0.02, 0.1, 0.4, 0.1, 0.08, 0.03, 0.5, 0.05, 0.1],
        'tempo': [128, 70, 140, 60, 120, 160, 110, 125, 65, 150, 80, 115],
        'valence': [0.9, 0.1, 0.8, 0.4, 0.7, 0.2, 0.5, 0.9, 0.1, 0.3, 0.2, 0.8]
    }
    
    spotify_df = pd.DataFrame(sample_data)
    music_recommender.spotify_data = spotify_df
    
    # Fit scaler with sample data
    audio_features = list(sample_data.keys())[2:]  # Skip track_name and artists
    music_recommender.audio_features = audio_features
    music_recommender.scaler.fit(spotify_df[audio_features])
    
    print(f"Created sample Spotify dataset with {len(spotify_df)} tracks")


In [None]:
# Visualize audio features distribution
if 'spotify_df' in locals() and len(spotify_df) > 0:
    audio_features = [f for f in music_recommender.audio_features if f in spotify_df.columns]
    
    if len(audio_features) >= 6:
        # Create subplots for audio features
        fig, axes = plt.subplots(2, 3, figsize=(15, 10))
        axes = axes.flatten()
        
        for i, feature in enumerate(audio_features[:6]):
            axes[i].hist(spotify_df[feature], bins=10, alpha=0.7, color='skyblue')
            axes[i].set_title(f'{feature.capitalize()} Distribution')
            axes[i].set_xlabel(feature.capitalize())
            axes[i].set_ylabel('Frequency')
        
        plt.tight_layout()
        plt.suptitle('Spotify Audio Features Distribution', y=1.02)
        plt.show()
    
    # Correlation heatmap
    if len(audio_features) > 1:
        plt.figure(figsize=(10, 8))
        correlation_matrix = spotify_df[audio_features].corr()
        sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
                   square=True, fmt='.2f')
        plt.title('Audio Features Correlation Matrix')
        plt.tight_layout()
        plt.show()
else:
    print("No audio features data available for visualization")


## 3. Emotion-to-Music Feature Mapping

In [None]:
# Explore emotion-to-audio features mapping
emotion_features = music_recommender.emotion_to_features

print("Emotion to Audio Features Mapping:")
print("=" * 50)

# Create DataFrame for better visualization
emotions_df = pd.DataFrame(emotion_features).T
print(emotions_df.round(2))

# Visualize emotion-feature relationships
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Valence by emotion
axes[0, 0].bar(emotions_df.index, emotions_df['valence'], 
              color=[EmotionUtils.EMOTION_COLORS.get(e, '#888888') for e in emotions_df.index])
axes[0, 0].set_title('Valence by Emotion')
axes[0, 0].set_ylabel('Valence')
axes[0, 0].tick_params(axis='x', rotation=45)

# Energy by emotion
axes[0, 1].bar(emotions_df.index, emotions_df['energy'],
              color=[EmotionUtils.EMOTION_COLORS.get(e, '#888888') for e in emotions_df.index])
axes[0, 1].set_title('Energy by Emotion')
axes[0, 1].set_ylabel('Energy')
axes[0, 1].tick_params(axis='x', rotation=45)

# Danceability by emotion
axes[1, 0].bar(emotions_df.index, emotions_df['danceability'],
              color=[EmotionUtils.EMOTION_COLORS.get(e, '#888888') for e in emotions_df.index])
axes[1, 0].set_title('Danceability by Emotion')
axes[1, 0].set_ylabel('Danceability')
axes[1, 0].tick_params(axis='x', rotation=45)

# Tempo by emotion
axes[1, 1].bar(emotions_df.index, emotions_df['tempo'],
              color=[EmotionUtils.EMOTION_COLORS.get(e, '#888888') for e in emotions_df.index])
axes[1, 1].set_title('Tempo by Emotion')
axes[1, 1].set_ylabel('Tempo (BPM)')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Heatmap of all emotion-feature relationships
plt.figure(figsize=(12, 8))
sns.heatmap(emotions_df.T, annot=True, cmap='RdYlBu_r', center=0.5,
           cbar_kws={'label': 'Feature Value'})
plt.title('Emotion to Audio Features Mapping Heatmap')
plt.xlabel('Emotions')
plt.ylabel('Audio Features')
plt.tight_layout()
plt.show()


## 4. Test Music Recommendations

In [None]:
# Test music recommendations for different emotions
print("Testing Music Recommendations:")
print("=" * 40)

# Test different emotion scenarios
test_emotions = [
    {'happy': 0.8, 'surprise': 0.2},
    {'sad': 0.9, 'neutral': 0.1},
    {'angry': 0.7, 'disgust': 0.3},
    {'fear': 0.6, 'sad': 0.4},
    {'neutral': 1.0}
]

for i, emotion_scores in enumerate(test_emotions):
    print(f"\nTest Case {i+1}: {emotion_scores}")
    
    # Get recommendations
    recommendations = music_recommender.get_recommendations(
        emotion_scores=emotion_scores,
        num_recommendations=3,
        use_api=False  # Use dataset-based recommendations
    )
    
    if recommendations:
        print("🎵 Recommended tracks:")
        for j, rec in enumerate(recommendations):
            similarity = rec.get('similarity', 0)
            print(f"   {j+1}. {rec['track_name']} - {rec['artists']} (Match: {similarity:.2%})")
    else:
        print("❌ No recommendations found")


## 5. Explore Food Recommendations

In [None]:
# Explore food recommendation data
if food_recommender.food_data is not None and len(food_recommender.food_data) > 0:
    food_df = food_recommender.food_data
    print(f"Food Dataset: {len(food_df)} items")
    print(f"Columns: {list(food_df.columns)}")
    
    # Display sample data
    print("\nSample food items:")
    print(food_df.head(10))
    
    # Analyze food-emotion relationships
    emotion_columns = [col for col in food_df.columns if col in EmotionUtils.EMOTION_LABELS]
    
    if emotion_columns:
        # Top foods for each emotion
        print("\nTop 3 foods for each emotion:")
        for emotion in emotion_columns:
            top_foods = food_df.nlargest(3, emotion)[['food_item', emotion]]
            print(f"\n{emotion.capitalize()}:")
            for _, row in top_foods.iterrows():
                print(f"  - {row['food_item']} (score: {row[emotion]:.2f})")
else:
    print("No food data available")


In [None]:
# Visualize food-emotion relationships
if 'food_df' in locals() and len(food_df) > 0:
    emotion_columns = [col for col in food_df.columns if col in EmotionUtils.EMOTION_LABELS]
    
    if emotion_columns and len(emotion_columns) > 1:
        # Food-emotion heatmap
        plt.figure(figsize=(12, 10))
        food_emotion_matrix = food_df.set_index('food_item')[emotion_columns]
        sns.heatmap(food_emotion_matrix.T, 
                   cmap='YlOrRd', 
                   cbar_kws={'label': 'Emotion Match Score'},
                   xticklabels=True, yticklabels=True)
        plt.title('Food-Emotion Relationship Heatmap')
        plt.xlabel('Food Items')
        plt.ylabel('Emotions')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()
        
        # Average emotion scores for all foods
        plt.figure(figsize=(10, 6))
        avg_scores = food_emotion_matrix.mean()
        colors = [EmotionUtils.EMOTION_COLORS.get(emotion, '#888888') for emotion in avg_scores.index]
        
        bars = plt.bar(avg_scores.index, avg_scores.values, color=colors)
        plt.title('Average Food-Emotion Association Scores')
        plt.xlabel('Emotions')
        plt.ylabel('Average Score')
        plt.xticks(rotation=45)
        
        # Add value labels on bars
        for bar, value in zip(bars, avg_scores.values):
            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                    f'{value:.2f}', ha='center', va='bottom')
        
        plt.tight_layout()
        plt.show()
else:
    print("No food emotion data available for visualization")


## 6. Test Food Recommendations

In [None]:
# Test food recommendations for different emotions
print("Testing Food Recommendations:")
print("=" * 40)

for i, emotion_scores in enumerate(test_emotions):
    print(f"\nTest Case {i+1}: {emotion_scores}")
    
    # Get food recommendations
    food_recommendations = food_recommender.get_food_recommendations(
        emotion_scores=emotion_scores,
        num_recommendations=3
    )
    
    if food_recommendations:
        print("🍽️ Recommended foods:")
        for j, rec in enumerate(food_recommendations):
            score = rec.get('score', 0)
            print(f"   {j+1}. {rec['food_item']} (Score: {score:.2%})")
            print(f"      {rec['description']}")
            if rec['benefits']:
                print(f"      Benefits: {', '.join(rec['benefits'][:2])}")
    else:
        print("❌ No food recommendations found")
    
    # Get meal suggestions
    meal_suggestions = food_recommender.get_meal_suggestions(emotion_scores)
    if meal_suggestions:
        dominant_emotion = max(emotion_scores.items(), key=lambda x: x[1])[0]
        print(f"\n🍴 Meal suggestions for {dominant_emotion} mood:")
        for meal_type, suggestions in meal_suggestions.items():
            print(f"   {meal_type.capitalize()}: {', '.join(suggestions[:2])}")


## 7. Recommendation System Performance Analysis

In [None]:
# Analyze recommendation system performance
print("Recommendation System Analysis:")
print("=" * 50)

# Test recommendation consistency
consistency_test = {'happy': 0.8, 'neutral': 0.2}

print(f"\nConsistency Test with emotion: {consistency_test}")
print("Running recommendations multiple times...")

# Run recommendations multiple times to check consistency
music_results = []
food_results = []

for run in range(3):
    music_recs = music_recommender.get_recommendations(
        emotion_scores=consistency_test, num_recommendations=5
    )
    food_recs = food_recommender.get_food_recommendations(
        emotion_scores=consistency_test, num_recommendations=5
    )
    
    music_results.append([rec['track_name'] for rec in music_recs])
    food_results.append([rec['food_item'] for rec in food_recs])

print("\n🎵 Music Recommendations (3 runs):")
for i, result in enumerate(music_results):
    print(f"   Run {i+1}: {result[:3]}")

print("\n🍽️ Food Recommendations (3 runs):")
for i, result in enumerate(food_results):
    print(f"   Run {i+1}: {result[:3]}")

# Calculate similarity between runs (simple overlap measure)
if len(music_results) >= 2:
    music_overlap = len(set(music_results[0]) & set(music_results[1])) / len(set(music_results[0]))
    print(f"\n📊 Music recommendation consistency: {music_overlap:.2%}")


## 8. Interactive Recommendation Testing

In [None]:
# Interactive testing function
def test_custom_emotion_scenario(emotion_scores_dict):
    """Test recommendations for custom emotion scenario"""
    print(f"\nTesting Custom Scenario: {emotion_scores_dict}")
    print("=" * 60)
    
    # Normalize emotion scores
    total_score = sum(emotion_scores_dict.values())
    if total_score > 0:
        normalized_scores = {k: v/total_score for k, v in emotion_scores_dict.items()}
    else:
        normalized_scores = emotion_scores_dict
    
    # Get recommendations
    music_recs = music_recommender.get_recommendations(normalized_scores, num_recommendations=5)
    food_recs = food_recommender.get_food_recommendations(normalized_scores, num_recommendations=5)
    
    # Display results
    print(f"🎵 Music Recommendations:")
    for i, rec in enumerate(music_recs):
        similarity = rec.get('similarity', 0)
        print(f"   {i+1}. {rec['track_name']} by {rec['artists']} (Match: {similarity:.2%})")
        if 'audio_features' in rec:
            features = rec['audio_features']
            print(f"      Features: Valence={features.get('valence', 0):.2f}, Energy={features.get('energy', 0):.2f}")
    
    print(f"\n🍽️ Food Recommendations:")
    for i, rec in enumerate(food_recs):
        score = rec.get('score', 0)
        print(f"   {i+1}. {rec['food_item']} (Score: {score:.2%})")
        print(f"      {rec['description']}")
        if rec['benefits']:
            print(f"      Benefits: {', '.join(rec['benefits'][:3])}")
        print()

# Test various scenarios
test_scenarios = [
    {'happy': 0.6, 'surprise': 0.4},  # Mixed positive emotions
    {'sad': 0.7, 'fear': 0.3},         # Mixed negative emotions
    {'angry': 0.5, 'disgust': 0.3, 'sad': 0.2},  # Complex negative mix
    {'happy': 0.4, 'neutral': 0.6},   # Mild happiness
    {'surprise': 1.0}                  # Pure surprise
]

for scenario in test_scenarios:
    test_custom_emotion_scenario(scenario)


## 9. Recommendation Quality Metrics

In [None]:
# Calculate recommendation quality metrics
print("Recommendation Quality Assessment:")
print("=" * 50)

# Diversity metric - how diverse are the recommendations?
def calculate_music_diversity(recommendations):
    """Calculate diversity of music recommendations based on audio features"""
    if not recommendations or len(recommendations) < 2:
        return 0
    
    features_list = []
    for rec in recommendations:
        if 'audio_features' in rec:
            features = rec['audio_features']
            feature_vector = [features.get(f, 0) for f in ['valence', 'energy', 'danceability']]
            features_list.append(feature_vector)
    
    if len(features_list) < 2:
        return 0
    
    # Calculate pairwise distances
    total_distance = 0
    count = 0
    
    for i in range(len(features_list)):
        for j in range(i+1, len(features_list)):
            distance = np.linalg.norm(np.array(features_list[i]) - np.array(features_list[j]))
            total_distance += distance
            count += 1
    
    return total_distance / count if count > 0 else 0

def calculate_food_diversity(recommendations):
    """Calculate diversity of food recommendations"""
    if not recommendations or len(recommendations) < 2:
        return 0
    
    # Simple diversity based on food categories (you could enhance this)
    food_types = set()
    for rec in recommendations:
        food_item = rec['food_item'].lower()
        # Simple categorization
        if any(word in food_item for word in ['chocolate', 'ice cream', 'cookie']):
            food_types.add('sweet')
        elif any(word in food_item for word in ['salad', 'vegetable', 'fruit']):
            food_types.add('healthy')
        elif any(word in food_item for word in ['coffee', 'tea']):
            food_types.add('beverage')
        else:
            food_types.add('other')
    
    return len(food_types) / len(recommendations)

# Test diversity for different emotions
diversity_results = []

for emotion in ['happy', 'sad', 'angry', 'neutral']:
    emotion_dict = {emotion: 1.0}
    
    music_recs = music_recommender.get_recommendations(emotion_dict, num_recommendations=5)
    food_recs = food_recommender.get_food_recommendations(emotion_dict, num_recommendations=5)
    
    music_diversity = calculate_music_diversity(music_recs)
    food_diversity = calculate_food_diversity(food_recs)
    
    diversity_results.append({
        'emotion': emotion,
        'music_diversity': music_diversity,
        'food_diversity': food_diversity,
        'music_count': len(music_recs),
        'food_count': len(food_recs)
    })

# Display diversity results
diversity_df = pd.DataFrame(diversity_results)
print("Recommendation Diversity by Emotion:")
print(diversity_df.round(3))

# Visualize diversity
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Music diversity
axes[0].bar(diversity_df['emotion'], diversity_df['music_diversity'])
axes[0].set_title('Music Recommendation Diversity')
axes[0].set_ylabel('Diversity Score')
axes[0].set_xlabel('Emotion')

# Food diversity
axes[1].bar(diversity_df['emotion'], diversity_df['food_diversity'])
axes[1].set_title('Food Recommendation Diversity')
axes[1].set_ylabel('Diversity Score')
axes[1].set_xlabel('Emotion')

plt.tight_layout()
plt.show()

# Overall system statistics
print(f"\n📊 Overall System Statistics:")
print(f"   Music Database Size: {len(music_recommender.spotify_data) if music_recommender.spotify_data is not None else 0} tracks")
print(f"   Food Database Size: {len(food_recommender.food_data) if food_recommender.food_data is not None else 0} items")
print(f"   Average Music Diversity: {diversity_df['music_diversity'].mean():.3f}")
print(f"   Average Food Diversity: {diversity_df['food_diversity'].mean():.3f}")
print(f"   Spotify API Available: {'Yes' if music_recommender.spotify_client else 'No'}")


## 10. Save Recommendation Data and Results

In [None]:
# Save final recommendations and datasets
output_dir = Path('recommendation_outputs')
output_dir.mkdir(exist_ok=True)

# Save Spotify dataset
if music_recommender.spotify_data is not None:
    spotify_file = output_dir / 'spotify_dataset.csv'
    music_recommender.spotify_data.to_csv(spotify_file, index=False)
    print(f"Spotify dataset saved to {spotify_file}")

# Save food dataset
if food_recommender.food_data is not None:
    food_file = output_dir / 'food_dataset.csv'
    food_recommender.food_data.to_csv(food_file, index=False)
    print(f"Food dataset saved to {food_file}")

# Save diversity metrics
diversity_file = output_dir / 'diversity_metrics.csv'
diversity_df.to_csv(diversity_file, index=False)
print(f"Diversity metrics saved to {diversity_file}")
