In [1]:
# Import library untuk manipulasi data
import pandas as pd
import numpy as np

# Import library untuk preprocessing
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Import library untuk menghitung cosine similarity
from sklearn.metrics.pairwise import cosine_similarity

# Import library untuk visualisasi (opsional)
import matplotlib.pyplot as plt
import seaborn as sns

# Setting untuk display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

## 1. Import Library yang Diperlukan

# Sistem Rekomendasi Lagu Berbasis Mood dan Energi

Notebook ini mengimplementasikan sistem rekomendasi lagu menggunakan metode **Content-Based Filtering** dengan algoritma **Cosine Similarity**.

## Tujuan:
- Merekomendasikan lagu berdasarkan input **Mood** dan **Level Energi** pengguna
- Menggunakan 5 fitur: Genre, Mood, Energy, Danceability, dan Tempo
- Menghitung kemiripan antar lagu menggunakan Cosine Similarity

## 2. Load Dataset

In [2]:
# Load dataset dari file CSV
df = pd.read_csv('music_sentiment_dataset.csv')

print(f"Dataset berhasil dimuat!")
print(f"Total baris: {len(df)}")
print(f"Total kolom: {len(df.columns)}")
print(f"\nPreview 5 baris pertama:")
df.head()

Dataset berhasil dimuat!
Total baris: 1000
Total kolom: 11

Preview 5 baris pertama:


Unnamed: 0,User_ID,User_Text,Sentiment_Label,Recommended_Song_ID,Song_Name,Artist,Genre,Tempo (BPM),Mood,Energy,Danceability
0,U1,Way ball purpose public experience recently re...,Sad,S1,Someone Like You,Adele,Pop,67,Melancholic,Low,Low
1,U2,Save officer two myself a.,Happy,S2,Happy,Pharrell Williams,Pop,160,Joyful,High,High
2,U3,Decade ahead everyone environment themselves a...,Relaxed,S3,Clair de Lune,Debussy,Classical,60,Soothing,Low,Low
3,U4,Best change letter citizen try ask quality pro...,Happy,S4,Happy,Pharrell Williams,Pop,160,Joyful,High,High
4,U5,Worker player chance kind actually.,Happy,S5,Happy,Pharrell Williams,Pop,160,Joyful,High,High


## 3. Explorasi Data Awal

In [3]:
# Informasi struktur dataset
print("Informasi Dataset:")
print(df.info())
print("\n" + "="*50 + "\n")

# Statistik deskriptif
print("Statistik Deskriptif:")
print(df.describe())
print("\n" + "="*50 + "\n")

# Cek missing values
print("Missing Values:")
print(df.isnull().sum())
print("\n" + "="*50 + "\n")

# Cek jumlah lagu unik
unique_songs = df.drop_duplicates(subset=['Song_Name', 'Artist'])
print(f"Total lagu unik: {len(unique_songs)}")
print(f"Total entri dalam dataset: {len(df)}")

Informasi Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   User_ID              1000 non-null   object
 1   User_Text            1000 non-null   object
 2   Sentiment_Label      1000 non-null   object
 3   Recommended_Song_ID  1000 non-null   object
 4   Song_Name            1000 non-null   object
 5   Artist               1000 non-null   object
 6   Genre                1000 non-null   object
 7   Tempo (BPM)          1000 non-null   int64 
 8   Mood                 1000 non-null   object
 9   Energy               1000 non-null   object
 10  Danceability         1000 non-null   object
dtypes: int64(1), object(10)
memory usage: 86.1+ KB
None


Statistik Deskriptif:
       Tempo (BPM)
count   1000.00000
mean      97.76000
std       36.74701
min       50.00000
25%       67.00000
50%      109.00000
75%      130.00000
max  

In [4]:
# Distribusi Sentiment Label
print("Distribusi Sentiment Label:")
print(df['Sentiment_Label'].value_counts())
print("\n" + "="*50 + "\n")

# Distribusi Energy Level
print("Distribusi Energy Level:")
print(df['Energy'].value_counts())
print("\n" + "="*50 + "\n")

# Distribusi Genre
print("Distribusi Genre:")
print(df['Genre'].value_counts())
print("\n" + "="*50 + "\n")

# Analisis lagu unik per mood
print("Analisis Lagu Unik per Sentiment:")
for sentiment in df['Sentiment_Label'].unique():
    sentiment_songs = unique_songs[unique_songs['Sentiment_Label'] == sentiment]
    print(f"  {sentiment}: {len(sentiment_songs)} lagu unik")
    print(f"    Energy distribution: {sentiment_songs['Energy'].value_counts().to_dict()}")

Distribusi Sentiment Label:
Sentiment_Label
Motivated    262
Happy        256
Sad          243
Relaxed      239
Name: count, dtype: int64


Distribusi Energy Level:
Energy
High    518
Low     482
Name: count, dtype: int64


Distribusi Genre:
Genre
Pop          263
Rock         253
Classical    134
Hip-Hop      132
Funk         113
Ambient      105
Name: count, dtype: int64


Analisis Lagu Unik per Sentiment:
  Sad: 2 lagu unik
    Energy distribution: {'Low': 2}
  Happy: 2 lagu unik
    Energy distribution: {'High': 2}
  Relaxed: 2 lagu unik
    Energy distribution: {'Low': 2}
  Motivated: 2 lagu unik
    Energy distribution: {'High': 2}


## 4. Preprocessing Data

In [5]:
# Buat copy dataframe untuk preprocessing
songs_df = df.copy()

# 1. Encoding fitur kategorikal menggunakan LabelEncoder
print("Melakukan encoding fitur kategorikal...")

# Inisialisasi LabelEncoder untuk setiap fitur
le_genre = LabelEncoder()
le_mood = LabelEncoder()
le_energy = LabelEncoder()
le_dance = LabelEncoder()

# Encoding setiap fitur
songs_df['Genre_Encoded'] = le_genre.fit_transform(songs_df['Genre'])
songs_df['Mood_Encoded'] = le_mood.fit_transform(songs_df['Mood'])
songs_df['Energy_Encoded'] = le_energy.fit_transform(songs_df['Energy'])
songs_df['Danceability_Encoded'] = le_dance.fit_transform(songs_df['Danceability'])

print("Encoding selesai!")
print("\nMapping Encoding:")
print(f"  Genre: {dict(zip(le_genre.classes_, le_genre.transform(le_genre.classes_)))}")
print(f"  Mood: {dict(zip(le_mood.classes_, le_mood.transform(le_mood.classes_)))}")
print(f"  Energy: {dict(zip(le_energy.classes_, le_energy.transform(le_energy.classes_)))}")
print(f"  Danceability: {dict(zip(le_dance.classes_, le_dance.transform(le_dance.classes_)))}")

Melakukan encoding fitur kategorikal...
Encoding selesai!

Mapping Encoding:
  Genre: {'Ambient': 0, 'Classical': 1, 'Funk': 2, 'Hip-Hop': 3, 'Pop': 4, 'Rock': 5}
  Mood: {'Calm': 0, 'Emotional': 1, 'Energetic': 2, 'Joyful': 3, 'Melancholic': 4, 'Powerful': 5, 'Soothing': 6}
  Energy: {'High': 0, 'Low': 1}
  Danceability: {'High': 0, 'Low': 1, 'Medium': 2}


In [6]:
# 2. Normalisasi fitur numerik (Tempo) menggunakan StandardScaler
print("\nMelakukan normalisasi fitur Tempo...")

scaler = StandardScaler()
songs_df['Tempo_Normalized'] = scaler.fit_transform(songs_df[['Tempo (BPM)']])

print("Normalisasi selesai!")
print(f"\nStatistik Tempo sebelum normalisasi:")
print(songs_df['Tempo (BPM)'].describe())
print(f"\nStatistik Tempo setelah normalisasi:")
print(songs_df['Tempo_Normalized'].describe())


Melakukan normalisasi fitur Tempo...
Normalisasi selesai!

Statistik Tempo sebelum normalisasi:
count    1000.00000
mean       97.76000
std        36.74701
min        50.00000
25%        67.00000
50%       109.00000
75%       130.00000
max       160.00000
Name: Tempo (BPM), dtype: float64

Statistik Tempo setelah normalisasi:
count    1.000000e+03
mean    -1.225686e-16
std      1.000500e+00
min     -1.300348e+00
25%     -8.374938e-01
50%      3.060283e-01
75%      8.777893e-01
max      1.694591e+00
Name: Tempo_Normalized, dtype: float64


In [7]:
# Preview hasil preprocessing
print("\nPreview data setelah preprocessing:")
display_cols = ['Song_Name', 'Artist', 'Genre_Encoded', 'Mood_Encoded', 
                'Energy_Encoded', 'Danceability_Encoded', 'Tempo_Normalized']
songs_df[display_cols].head(10)


Preview data setelah preprocessing:


Unnamed: 0,Song_Name,Artist,Genre_Encoded,Mood_Encoded,Energy_Encoded,Danceability_Encoded,Tempo_Normalized
0,Someone Like You,Adele,4,4,1,1,-0.837494
1,Happy,Pharrell Williams,4,3,0,0,1.694591
2,Clair de Lune,Debussy,1,6,1,1,-1.028081
3,Happy,Pharrell Williams,4,3,0,0,1.694591
4,Happy,Pharrell Williams,4,3,0,0,1.694591
5,Eye of the Tiger,Survivor,5,2,0,2,0.306028
6,Someone Like You,Adele,4,4,1,1,-0.837494
7,Someone Like You,Adele,4,4,1,1,-0.837494
8,Eye of the Tiger,Survivor,5,2,0,2,0.306028
9,Fix You,Coldplay,5,1,1,2,-0.61968


## 5. Membuat Feature Matrix dan Menghitung Cosine Similarity

In [8]:
# Membuat feature matrix dengan 5 fitur
print("Membuat feature matrix...")

# Kolom fitur yang akan digunakan untuk menghitung similarity
feature_columns = ['Genre_Encoded', 'Mood_Encoded', 'Energy_Encoded', 
                   'Danceability_Encoded', 'Tempo_Normalized']

# Ekstrak nilai dari kolom fitur menjadi array NumPy
feature_matrix = songs_df[feature_columns].values

print(f"Feature matrix berhasil dibuat!")
print(f"Dimensi feature matrix: {feature_matrix.shape}")
print(f"   - Jumlah lagu: {feature_matrix.shape[0]}")
print(f"   - Jumlah fitur: {feature_matrix.shape[1]}")
print(f"\nContoh 5 baris pertama feature matrix:")
print(feature_matrix[:5])

Membuat feature matrix...
Feature matrix berhasil dibuat!
Dimensi feature matrix: (1000, 5)
   - Jumlah lagu: 1000
   - Jumlah fitur: 5

Contoh 5 baris pertama feature matrix:
[[ 4.          4.          1.          1.         -0.83749376]
 [ 4.          3.          0.          0.          1.69459075]
 [ 1.          6.          1.          1.         -1.02808077]
 [ 4.          3.          0.          0.          1.69459075]
 [ 4.          3.          0.          0.          1.69459075]]


In [9]:
# Menghitung Cosine Similarity Matrix
print("\nMenghitung Cosine Similarity Matrix...")
print("   Formula: cosine_similarity(A,B) = (A · B) / (||A|| × ||B||)")

# Hitung similarity matrix (ukuran: jumlah_lagu × jumlah_lagu)
similarity_matrix = cosine_similarity(feature_matrix)

print(f"\nCosine Similarity Matrix berhasil dihitung!")
print(f"Dimensi similarity matrix: {similarity_matrix.shape}")
print(f"   - Setiap lagu dibandingkan dengan {similarity_matrix.shape[1]} lagu")
print(f"\nStatistik nilai similarity:")
print(f"   - Min: {similarity_matrix.min():.4f}")
print(f"   - Max: {similarity_matrix.max():.4f}")
print(f"   - Mean: {similarity_matrix.mean():.4f}")
print(f"   - Median: {np.median(similarity_matrix):.4f}")


Menghitung Cosine Similarity Matrix...
   Formula: cosine_similarity(A,B) = (A · B) / (||A|| × ||B||)

Cosine Similarity Matrix berhasil dihitung!
Dimensi similarity matrix: (1000, 1000)
   - Setiap lagu dibandingkan dengan 1000 lagu

Statistik nilai similarity:
   - Min: -0.2173
   - Max: 1.0000
   - Mean: 0.6854
   - Median: 0.8339


## 6. Implementasi Fungsi Rekomendasi

In [10]:
def get_recommendations(user_mood, user_energy, top_n=10):
    """
    Fungsi untuk mendapatkan rekomendasi lagu berdasarkan mood dan energi pengguna
    
    Parameter:
        user_mood (str): Mood pengguna (Happy, Sad, Relaxed, Motivated)
        user_energy (str): Level energi (High, Medium, Low)
        top_n (int): Jumlah rekomendasi yang diinginkan (default: 10)
    
    Return:
        list: List berisi dictionary informasi lagu yang direkomendasikan
    """
    
    # LANGKAH 1: Filter berdasarkan Sentiment_Label
    mood_songs = songs_df[songs_df['Sentiment_Label'].str.lower() == user_mood.lower()].copy()
    
    # Jika tidak ada hasil, coba mapping ke Mood spesifik
    if mood_songs.empty:
        sentiment_to_mood = {
            'happy': ['joyful', 'energetic'],
            'sad': ['melancholic', 'emotional'],
            'relaxed': ['soothing', 'calm'],
            'motivated': ['energetic', 'powerful']
        }
        user_mood_lower = user_mood.lower()
        target_moods = sentiment_to_mood.get(user_mood_lower, [user_mood_lower])
        mood_songs = songs_df[songs_df['Mood'].str.lower().isin(target_moods)].copy()
    
    if mood_songs.empty:
        return []
    
    # LANGKAH 2: Filter berdasarkan Energy
    if user_energy:
        energy_filtered = mood_songs[mood_songs['Energy'].str.lower() == user_energy.lower()]
        if not energy_filtered.empty:
            mood_songs = energy_filtered
    
    if mood_songs.empty:
        return []
    
    # LANGKAH 3: Hapus duplikasi berdasarkan Song_Name + Artist
    unique_songs = mood_songs.drop_duplicates(subset=['Song_Name', 'Artist'], keep='first')
    
    # LANGKAH 4: Dapatkan index lagu
    song_indices = unique_songs.index.tolist()
    
    # LANGKAH 5: Shuffle untuk variasi
    np.random.shuffle(song_indices)
    
    # LANGKAH 6: Ranking berdasarkan similarity
    if len(song_indices) > top_n:
        similarity_scores = []
        
        for idx in song_indices:
            # Hitung rata-rata similarity dengan lagu lain
            scores = similarity_matrix[idx]
            avg_score = np.mean(scores)
            
            # Tambahkan random factor untuk variasi (0.85 - 1.15)
            random_factor = np.random.uniform(0.85, 1.15)
            similarity_scores.append((idx, avg_score * random_factor))
        
        # Urutkan berdasarkan score tertinggi
        similarity_scores.sort(key=lambda x: x[1], reverse=True)
        top_indices = [i[0] for i in similarity_scores[:top_n]]
    else:
        top_indices = song_indices[:top_n]
    
    # LANGKAH 7: Buat list rekomendasi
    recommendations = []
    seen_songs = set()
    
    for idx in top_indices:
        song = songs_df.iloc[idx]
        song_key = f"{song['Song_Name']}_{song['Artist']}"
        
        if song_key not in seen_songs:
            seen_songs.add(song_key)
            recommendations.append({
                'Song_Name': song['Song_Name'],
                'Artist': song['Artist'],
                'Genre': song['Genre'],
                'Mood': song['Mood'],
                'Energy': song['Energy'],
                'Tempo': song['Tempo (BPM)'],
                'Danceability': song['Danceability']
            })
    
    return recommendations

print("Fungsi rekomendasi berhasil dibuat!")

Fungsi rekomendasi berhasil dibuat!


## 7. Testing Sistem Rekomendasi

Sekarang kita akan menguji sistem rekomendasi dengan berbagai kombinasi mood dan energi.

In [11]:
# Test 1: Sad + Low Energy
print("="*70)
print("TEST 1: Mood = Sad, Energy = Low")
print("="*70)

recommendations = get_recommendations(user_mood='Sad', user_energy='Low', top_n=10)

if recommendations:
    print(f"\nDitemukan {len(recommendations)} rekomendasi lagu:\n")
    for i, song in enumerate(recommendations, 1):
        print(f"{i}. {song['Song_Name']} - {song['Artist']}")
        print(f"   Genre: {song['Genre']} | Mood: {song['Mood']} | Energy: {song['Energy']}")
        print(f"   Tempo: {song['Tempo']} BPM | Danceability: {song['Danceability']}\n")
else:
    print("Tidak ada rekomendasi yang ditemukan.")

TEST 1: Mood = Sad, Energy = Low

Ditemukan 2 rekomendasi lagu:

1. Someone Like You - Adele
   Genre: Pop | Mood: Melancholic | Energy: Low
   Tempo: 67 BPM | Danceability: Low

2. Fix You - Coldplay
   Genre: Rock | Mood: Emotional | Energy: Low
   Tempo: 75 BPM | Danceability: Medium



In [12]:
# Test 2: Happy + High Energy
print("="*70)
print("TEST 2: Mood = Happy, Energy = High")
print("="*70)

recommendations = get_recommendations(user_mood='Happy', user_energy='High', top_n=10)

if recommendations:
    print(f"\nDitemukan {len(recommendations)} rekomendasi lagu:\n")
    for i, song in enumerate(recommendations, 1):
        print(f"{i}. {song['Song_Name']} - {song['Artist']}")
        print(f"   Genre: {song['Genre']} | Mood: {song['Mood']} | Energy: {song['Energy']}")
        print(f"   Tempo: {song['Tempo']} BPM | Danceability: {song['Danceability']}\n")
else:
    print("Tidak ada rekomendasi yang ditemukan.")

TEST 2: Mood = Happy, Energy = High

Ditemukan 2 rekomendasi lagu:

1. Uptown Funk - Bruno Mars
   Genre: Funk | Mood: Energetic | Energy: High
   Tempo: 115 BPM | Danceability: High

2. Happy - Pharrell Williams
   Genre: Pop | Mood: Joyful | Energy: High
   Tempo: 160 BPM | Danceability: High



In [13]:
# Test 3: Relaxed + Low Energy
print("="*70)
print("TEST 3: Mood = Relaxed, Energy = Low")
print("="*70)

recommendations = get_recommendations(user_mood='Relaxed', user_energy='Low', top_n=10)

if recommendations:
    print(f"\nDitemukan {len(recommendations)} rekomendasi lagu:\n")
    for i, song in enumerate(recommendations, 1):
        print(f"{i}. {song['Song_Name']} - {song['Artist']}")
        print(f"   Genre: {song['Genre']} | Mood: {song['Mood']} | Energy: {song['Energy']}")
        print(f"   Tempo: {song['Tempo']} BPM | Danceability: {song['Danceability']}\n")
else:
    print("Tidak ada rekomendasi yang ditemukan.")

TEST 3: Mood = Relaxed, Energy = Low

Ditemukan 2 rekomendasi lagu:

1. Weightless - Marconi Union
   Genre: Ambient | Mood: Calm | Energy: Low
   Tempo: 50 BPM | Danceability: Low

2. Clair de Lune - Debussy
   Genre: Classical | Mood: Soothing | Energy: Low
   Tempo: 60 BPM | Danceability: Low



In [14]:
# Test 4: Motivated + High Energy
print("="*70)
print("TEST 4: Mood = Motivated, Energy = High")
print("="*70)

recommendations = get_recommendations(user_mood='Motivated', user_energy='High', top_n=10)

if recommendations:
    print(f"\nDitemukan {len(recommendations)} rekomendasi lagu:\n")
    for i, song in enumerate(recommendations, 1):
        print(f"{i}. {song['Song_Name']} - {song['Artist']}")
        print(f"   Genre: {song['Genre']} | Mood: {song['Mood']} | Energy: {song['Energy']}")
        print(f"   Tempo: {song['Tempo']} BPM | Danceability: {song['Danceability']}\n")
else:
    print("Tidak ada rekomendasi yang ditemukan.")

TEST 4: Mood = Motivated, Energy = High

Ditemukan 2 rekomendasi lagu:

1. Eye of the Tiger - Survivor
   Genre: Rock | Mood: Energetic | Energy: High
   Tempo: 109 BPM | Danceability: Medium

2. Stronger - Kanye West
   Genre: Hip-Hop | Mood: Powerful | Energy: High
   Tempo: 130 BPM | Danceability: High



In [15]:
# Test 5: Sad + High Energy (untuk melihat perilaku fallback)
print("="*70)
print("TEST 5: Mood = Sad, Energy = High")
print("="*70)
print("Catatan: Kombinasi ini mungkin tidak tersedia di dataset.")
print("   Sistem akan menggunakan fallback ke lagu Sad dengan energi apapun.\n")

recommendations = get_recommendations(user_mood='Sad', user_energy='High', top_n=10)

if recommendations:
    print(f"\nDitemukan {len(recommendations)} rekomendasi lagu:\n")
    for i, song in enumerate(recommendations, 1):
        print(f"{i}. {song['Song_Name']} - {song['Artist']}")
        print(f"   Genre: {song['Genre']} | Mood: {song['Mood']} | Energy: {song['Energy']}")
        print(f"   Tempo: {song['Tempo']} BPM | Danceability: {song['Danceability']}\n")
else:
    print("Tidak ada rekomendasi yang ditemukan.")

TEST 5: Mood = Sad, Energy = High
Catatan: Kombinasi ini mungkin tidak tersedia di dataset.
   Sistem akan menggunakan fallback ke lagu Sad dengan energi apapun.


Ditemukan 2 rekomendasi lagu:

1. Fix You - Coldplay
   Genre: Rock | Mood: Emotional | Energy: Low
   Tempo: 75 BPM | Danceability: Medium

2. Someone Like You - Adele
   Genre: Pop | Mood: Melancholic | Energy: Low
   Tempo: 67 BPM | Danceability: Low



## 8. Analisis Hasil

Mari kita analisis karakteristik sistem rekomendasi yang telah dibuat.

In [16]:
# Fungsi helper untuk menganalisis rekomendasi
def analyze_recommendations():
    """
    Fungsi untuk menganalisis ketersediaan kombinasi mood-energi dalam dataset
    """
    print("ANALISIS KETERSEDIAAN KOMBINASI MOOD-ENERGI\n")
    print("="*70)
    
    moods = ['Happy', 'Sad', 'Relaxed', 'Motivated']
    energies = ['High', 'Medium', 'Low']
    
    results = []
    
    for mood in moods:
        for energy in energies:
            recs = get_recommendations(mood, energy, top_n=10)
            count = len(recs)
            results.append({
                'Mood': mood,
                'Energy': energy,
                'Jumlah Rekomendasi': count,
                'Status': 'Tersedia' if count > 0 else 'Tidak Ada'
            })
    
    # Buat DataFrame untuk visualisasi
    df_results = pd.DataFrame(results)
    
    print("\nTabel Ketersediaan Kombinasi:\n")
    print(df_results.to_string(index=False))
    
    # Statistik
    print("\n" + "="*70)
    print("\nSTATISTIK:")
    total_combinations = len(results)
    available = len([r for r in results if r['Jumlah Rekomendasi'] > 0])
    print(f"   Total kombinasi: {total_combinations}")
    print(f"   Kombinasi tersedia: {available}")
    print(f"   Kombinasi tidak tersedia: {total_combinations - available}")
    print(f"   Persentase ketersediaan: {(available/total_combinations)*100:.1f}%")
    
    return df_results

# Jalankan analisis
df_analysis = analyze_recommendations()

ANALISIS KETERSEDIAAN KOMBINASI MOOD-ENERGI


Tabel Ketersediaan Kombinasi:

     Mood Energy  Jumlah Rekomendasi   Status
    Happy   High                   2 Tersedia
    Happy Medium                   2 Tersedia
    Happy    Low                   2 Tersedia
      Sad   High                   2 Tersedia
      Sad Medium                   2 Tersedia
      Sad    Low                   2 Tersedia
  Relaxed   High                   2 Tersedia
  Relaxed Medium                   2 Tersedia
  Relaxed    Low                   2 Tersedia
Motivated   High                   2 Tersedia
Motivated Medium                   2 Tersedia
Motivated    Low                   2 Tersedia


STATISTIK:
   Total kombinasi: 12
   Kombinasi tersedia: 12
   Kombinasi tidak tersedia: 0
   Persentase ketersediaan: 100.0%


## 9. Evaluasi Sistem Rekomendasi

Bagian ini mengevaluasi performa sistem rekomendasi menggunakan berbagai metrik.

### 9.1 Coverage (Cakupan)

Coverage mengukur seberapa banyak item dalam katalog yang bisa direkomendasikan oleh sistem.

In [17]:
def evaluate_coverage():
    """
    Menghitung coverage: persentase item unik yang bisa direkomendasikan
    """
    print("="*70)
    print("EVALUASI COVERAGE")
    print("="*70)
    
    # Total lagu unik dalam dataset
    total_unique_songs = songs_df.drop_duplicates(subset=['Song_Name', 'Artist']).shape[0]
    
    # Set untuk menyimpan lagu yang pernah direkomendasikan
    recommended_songs = set()
    
    # Test semua kombinasi mood-energi
    moods = ['Happy', 'Sad', 'Relaxed', 'Motivated']
    energies = ['High', 'Medium', 'Low']
    
    for mood in moods:
        for energy in energies:
            recs = get_recommendations(mood, energy, top_n=10)
            for rec in recs:
                song_key = f"{rec['Song_Name']}_{rec['Artist']}"
                recommended_songs.add(song_key)
    
    # Hitung coverage
    coverage = (len(recommended_songs) / total_unique_songs) * 100
    
    print(f"\nTotal lagu unik dalam dataset: {total_unique_songs}")
    print(f"Total lagu yang bisa direkomendasikan: {len(recommended_songs)}")
    print(f"Coverage: {coverage:.2f}%")
    
    # Detail lagu yang bisa direkomendasikan
    print(f"\nLagu yang dapat direkomendasikan:")
    for i, song in enumerate(sorted(recommended_songs), 1):
        song_name, artist = song.split('_')
        print(f"  {i}. {song_name} - {artist}")
    
    return coverage, len(recommended_songs), total_unique_songs

# Jalankan evaluasi coverage
coverage_score, rec_count, total_count = evaluate_coverage()

EVALUASI COVERAGE

Total lagu unik dalam dataset: 8
Total lagu yang bisa direkomendasikan: 8
Coverage: 100.00%

Lagu yang dapat direkomendasikan:
  1. Clair de Lune - Debussy
  2. Eye of the Tiger - Survivor
  3. Fix You - Coldplay
  4. Happy - Pharrell Williams
  5. Someone Like You - Adele
  6. Stronger - Kanye West
  7. Uptown Funk - Bruno Mars
  8. Weightless - Marconi Union


### 9.2 Diversity (Keberagaman)

Diversity mengukur seberapa beragam rekomendasi yang diberikan (variasi genre, mood, tempo, dll).

In [18]:
def evaluate_diversity(user_mood, user_energy):
    """
    Menghitung diversity rekomendasi untuk satu query
    Diversity diukur dari variasi genre, mood, energy, dan danceability
    """
    recommendations = get_recommendations(user_mood, user_energy, top_n=10)
    
    if not recommendations:
        return None
    
    # Ekstrak atribut
    genres = [rec['Genre'] for rec in recommendations]
    moods = [rec['Mood'] for rec in recommendations]
    energies = [rec['Energy'] for rec in recommendations]
    danceabilities = [rec['Danceability'] for rec in recommendations]
    tempos = [rec['Tempo'] for rec in recommendations]
    
    # Hitung unique values (diversity)
    genre_diversity = len(set(genres)) / len(genres) if genres else 0
    mood_diversity = len(set(moods)) / len(moods) if moods else 0
    energy_diversity = len(set(energies)) / len(energies) if energies else 0
    dance_diversity = len(set(danceabilities)) / len(danceabilities) if danceabilities else 0
    
    # Hitung standard deviation tempo (variasi tempo)
    tempo_std = np.std(tempos) if len(tempos) > 1 else 0
    
    # Rata-rata diversity
    avg_diversity = (genre_diversity + mood_diversity + energy_diversity + dance_diversity) / 4
    
    return {
        'genre_diversity': genre_diversity,
        'mood_diversity': mood_diversity,
        'energy_diversity': energy_diversity,
        'dance_diversity': dance_diversity,
        'tempo_std': tempo_std,
        'avg_diversity': avg_diversity,
        'unique_genres': set(genres),
        'unique_moods': set(moods)
    }

# Evaluasi diversity untuk beberapa kombinasi
print("="*70)
print("EVALUASI DIVERSITY")
print("="*70)

test_cases = [
    ('Sad', 'Low'),
    ('Happy', 'High'),
    ('Relaxed', 'Low'),
    ('Motivated', 'High')
]

diversity_results = []

for mood, energy in test_cases:
    print(f"\n{'='*70}")
    print(f"Query: Mood={mood}, Energy={energy}")
    print(f"{'='*70}")
    
    div_result = evaluate_diversity(mood, energy)
    
    if div_result:
        print(f"\nMetrik Diversity:")
        print(f"  Genre Diversity: {div_result['genre_diversity']:.2%}")
        print(f"  Mood Diversity: {div_result['mood_diversity']:.2%}")
        print(f"  Energy Diversity: {div_result['energy_diversity']:.2%}")
        print(f"  Danceability Diversity: {div_result['dance_diversity']:.2%}")
        print(f"  Tempo Std Dev: {div_result['tempo_std']:.2f} BPM")
        print(f"  AVERAGE DIVERSITY: {div_result['avg_diversity']:.2%}")
        
        print(f"\nVariasi dalam rekomendasi:")
        print(f"  Genre unik: {', '.join(div_result['unique_genres'])}")
        print(f"  Mood unik: {', '.join(div_result['unique_moods'])}")
        
        diversity_results.append({
            'Mood': mood,
            'Energy': energy,
            'Avg_Diversity': div_result['avg_diversity']
        })
    else:
        print("Tidak ada rekomendasi ditemukan.")

# Summary diversity
if diversity_results:
    avg_diversity_all = np.mean([r['Avg_Diversity'] for r in diversity_results])
    print(f"\n{'='*70}")
    print(f"RATA-RATA DIVERSITY KESELURUHAN: {avg_diversity_all:.2%}")
    print(f"{'='*70}")

EVALUASI DIVERSITY

Query: Mood=Sad, Energy=Low

Metrik Diversity:
  Genre Diversity: 100.00%
  Mood Diversity: 100.00%
  Energy Diversity: 50.00%
  Danceability Diversity: 100.00%
  Tempo Std Dev: 4.00 BPM
  AVERAGE DIVERSITY: 87.50%

Variasi dalam rekomendasi:
  Genre unik: Pop, Rock
  Mood unik: Melancholic, Emotional

Query: Mood=Happy, Energy=High

Metrik Diversity:
  Genre Diversity: 100.00%
  Mood Diversity: 100.00%
  Energy Diversity: 50.00%
  Danceability Diversity: 50.00%
  Tempo Std Dev: 22.50 BPM
  AVERAGE DIVERSITY: 75.00%

Variasi dalam rekomendasi:
  Genre unik: Pop, Funk
  Mood unik: Joyful, Energetic

Query: Mood=Relaxed, Energy=Low

Metrik Diversity:
  Genre Diversity: 100.00%
  Mood Diversity: 100.00%
  Energy Diversity: 50.00%
  Danceability Diversity: 50.00%
  Tempo Std Dev: 5.00 BPM
  AVERAGE DIVERSITY: 75.00%

Variasi dalam rekomendasi:
  Genre unik: Ambient, Classical
  Mood unik: Soothing, Calm

Query: Mood=Motivated, Energy=High

Metrik Diversity:
  Genre Dive

### 9.3 Intra-List Similarity

Mengukur kemiripan antar item dalam satu list rekomendasi. Nilai rendah menunjukkan rekomendasi yang beragam.

In [19]:
def evaluate_intra_list_similarity(user_mood, user_energy):
    """
    Menghitung rata-rata similarity antar item dalam satu list rekomendasi
    Nilai tinggi = item sangat mirip (kurang diverse)
    Nilai rendah = item berbeda-beda (lebih diverse)
    """
    recommendations = get_recommendations(user_mood, user_energy, top_n=10)
    
    if len(recommendations) < 2:
        return None
    
    # Dapatkan index lagu yang direkomendasikan
    rec_indices = []
    for rec in recommendations:
        # Cari index lagu di dataframe
        mask = (songs_df['Song_Name'] == rec['Song_Name']) & (songs_df['Artist'] == rec['Artist'])
        idx = songs_df[mask].index[0]
        rec_indices.append(idx)
    
    # Hitung similarity antar lagu dalam list
    similarities = []
    for i in range(len(rec_indices)):
        for j in range(i+1, len(rec_indices)):
            sim = similarity_matrix[rec_indices[i]][rec_indices[j]]
            similarities.append(sim)
    
    avg_similarity = np.mean(similarities) if similarities else 0
    
    return {
        'avg_intra_similarity': avg_similarity,
        'min_similarity': np.min(similarities) if similarities else 0,
        'max_similarity': np.max(similarities) if similarities else 0,
        'total_pairs': len(similarities)
    }

# Evaluasi intra-list similarity
print("="*70)
print("EVALUASI INTRA-LIST SIMILARITY")
print("="*70)
print("Mengukur kemiripan antar item dalam satu list rekomendasi")
print("Nilai rendah = rekomendasi beragam (baik)")
print("Nilai tinggi = rekomendasi terlalu mirip (kurang baik)")

intra_sim_results = []

for mood, energy in test_cases:
    print(f"\n{'='*70}")
    print(f"Query: Mood={mood}, Energy={energy}")
    print(f"{'='*70}")
    
    ils_result = evaluate_intra_list_similarity(mood, energy)
    
    if ils_result:
        print(f"\nIntra-List Similarity:")
        print(f"  Rata-rata similarity: {ils_result['avg_intra_similarity']:.4f}")
        print(f"  Min similarity: {ils_result['min_similarity']:.4f}")
        print(f"  Max similarity: {ils_result['max_similarity']:.4f}")
        print(f"  Total pasangan: {ils_result['total_pairs']}")
        
        # Interpretasi
        if ils_result['avg_intra_similarity'] > 0.9:
            print(f"  Interpretasi: Sangat mirip (diversity rendah)")
        elif ils_result['avg_intra_similarity'] > 0.7:
            print(f"  Interpretasi: Cukup mirip (diversity sedang)")
        else:
            print(f"  Interpretasi: Beragam (diversity tinggi)")
        
        intra_sim_results.append({
            'Mood': mood,
            'Energy': energy,
            'Avg_Similarity': ils_result['avg_intra_similarity']
        })
    else:
        print("Tidak dapat menghitung (rekomendasi < 2 item).")

# Summary
if intra_sim_results:
    avg_ils = np.mean([r['Avg_Similarity'] for r in intra_sim_results])
    print(f"\n{'='*70}")
    print(f"RATA-RATA INTRA-LIST SIMILARITY: {avg_ils:.4f}")
    print(f"{'='*70}")

EVALUASI INTRA-LIST SIMILARITY
Mengukur kemiripan antar item dalam satu list rekomendasi
Nilai rendah = rekomendasi beragam (baik)
Nilai tinggi = rekomendasi terlalu mirip (kurang baik)

Query: Mood=Sad, Energy=Low

Intra-List Similarity:
  Rata-rata similarity: 0.8339
  Min similarity: 0.8339
  Max similarity: 0.8339
  Total pasangan: 1
  Interpretasi: Cukup mirip (diversity sedang)

Query: Mood=Happy, Energy=High

Intra-List Similarity:
  Rata-rata similarity: 0.9775
  Min similarity: 0.9775
  Max similarity: 0.9775
  Total pasangan: 1
  Interpretasi: Sangat mirip (diversity rendah)

Query: Mood=Relaxed, Energy=Low

Intra-List Similarity:
  Rata-rata similarity: 0.2744
  Min similarity: 0.2744
  Max similarity: 0.2744
  Total pasangan: 1
  Interpretasi: Beragam (diversity tinggi)

Query: Mood=Motivated, Energy=High

Intra-List Similarity:
  Rata-rata similarity: 0.7449
  Min similarity: 0.7449
  Max similarity: 0.7449
  Total pasangan: 1
  Interpretasi: Cukup mirip (diversity sedang)

### 9.4 Personalization (Personalisasi)

Mengukur seberapa berbeda rekomendasi untuk query yang berbeda. Sistem yang baik memberikan hasil berbeda untuk preferensi berbeda.

In [20]:
def evaluate_personalization():
    """
    Menghitung seberapa berbeda rekomendasi untuk query yang berbeda
    Nilai tinggi = sistem memberikan rekomendasi yang dipersonalisasi (baik)
    Nilai rendah = sistem memberikan rekomendasi yang sama untuk semua (kurang baik)
    """
    print("="*70)
    print("EVALUASI PERSONALIZATION")
    print("="*70)
    
    # Test berbagai kombinasi
    queries = [
        ('Sad', 'Low'),
        ('Happy', 'High'),
        ('Relaxed', 'Low'),
        ('Motivated', 'High')
    ]
    
    # Dapatkan rekomendasi untuk setiap query
    all_recommendations = {}
    for mood, energy in queries:
        recs = get_recommendations(mood, energy, top_n=10)
        song_set = set([f"{r['Song_Name']}_{r['Artist']}" for r in recs])
        all_recommendations[f"{mood}_{energy}"] = song_set
    
    # Hitung overlap antar query
    query_pairs = []
    overlaps = []
    
    query_keys = list(all_recommendations.keys())
    for i in range(len(query_keys)):
        for j in range(i+1, len(query_keys)):
            key1, key2 = query_keys[i], query_keys[j]
            set1, set2 = all_recommendations[key1], all_recommendations[key2]
            
            # Hitung Jaccard similarity (overlap)
            if len(set1) == 0 or len(set2) == 0:
                overlap = 0
            else:
                intersection = len(set1.intersection(set2))
                union = len(set1.union(set2))
                overlap = intersection / union if union > 0 else 0
            
            query_pairs.append((key1, key2))
            overlaps.append(overlap)
            
            print(f"\n{key1.replace('_', ' ')} vs {key2.replace('_', ' ')}:")
            print(f"  Overlap (Jaccard): {overlap:.2%}")
            print(f"  Lagu sama: {len(set1.intersection(set2))}")
            print(f"  Lagu berbeda: {len(set1.symmetric_difference(set2))}")
    
    # Hitung personalization score (1 - rata-rata overlap)
    avg_overlap = np.mean(overlaps)
    personalization_score = 1 - avg_overlap
    
    print(f"\n{'='*70}")
    print(f"HASIL PERSONALIZATION:")
    print(f"  Rata-rata overlap: {avg_overlap:.2%}")
    print(f"  Personalization Score: {personalization_score:.2%}")
    print(f"{'='*70}")
    
    # Interpretasi
    print(f"\nInterpretasi:")
    if personalization_score > 0.7:
        print(f"  SANGAT BAIK - Sistem memberikan rekomendasi yang berbeda untuk setiap user")
    elif personalization_score > 0.4:
        print(f"  BAIK - Sistem cukup mempersonalisasi rekomendasi")
    else:
        print(f"  KURANG - Sistem memberikan rekomendasi yang terlalu mirip")
    
    return personalization_score, avg_overlap

# Jalankan evaluasi personalization
pers_score, avg_overlap = evaluate_personalization()

EVALUASI PERSONALIZATION

Sad Low vs Happy High:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu berbeda: 4

Sad Low vs Relaxed Low:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu berbeda: 4

Sad Low vs Motivated High:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu berbeda: 4

Happy High vs Relaxed Low:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu berbeda: 4

Happy High vs Motivated High:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu berbeda: 4

Relaxed Low vs Motivated High:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu berbeda: 4

HASIL PERSONALIZATION:
  Rata-rata overlap: 0.00%
  Personalization Score: 100.00%

Interpretasi:
  SANGAT BAIK - Sistem memberikan rekomendasi yang berbeda untuk setiap user

EVALUASI PERSONALIZATION

Sad Low vs Happy High:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu berbeda: 4

Sad Low vs Relaxed Low:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu berbeda: 4

Sad Low vs Motivated High:
  Overlap (Jaccard): 0.00%
  Lagu sama: 0
  Lagu

### 9.5 Summary Evaluasi

Ringkasan dari semua metrik evaluasi yang telah dihitung.

In [21]:
print("="*70)
print("SUMMARY EVALUASI SISTEM REKOMENDASI")
print("="*70)

# Buat tabel summary
summary_data = {
    'Metrik': [
        'Coverage',
        'Average Diversity',
        'Intra-List Similarity',
        'Personalization Score'
    ],
    'Nilai': [
        f"{coverage_score:.2f}%",
        f"{avg_diversity_all:.2%}" if diversity_results else "N/A",
        f"{avg_ils:.4f}" if intra_sim_results else "N/A",
        f"{pers_score:.2%}"
    ],
    'Interpretasi': [
        f"{rec_count}/{total_count} lagu dapat direkomendasikan",
        "Rekomendasi cukup beragam" if diversity_results and avg_diversity_all > 0.3 else "Diversity rendah",
        "Similarity tinggi (kurang diverse)" if intra_sim_results and avg_ils > 0.9 else "Cukup diverse",
        "Personalisasi baik" if pers_score > 0.4 else "Personalisasi kurang"
    ]
}

df_summary = pd.DataFrame(summary_data)
print("\n")
print(df_summary.to_string(index=False))

print("\n" + "="*70)
print("KESIMPULAN EVALUASI:")
print("="*70)

print(f"""
1. COVERAGE ({coverage_score:.2f}%):
   - Sistem dapat merekomendasikan {rec_count} dari {total_count} lagu unik
   - {'Baik - Semua lagu dapat direkomendasikan' if coverage_score == 100 else 'Perlu penambahan data untuk coverage lebih baik'}

2. DIVERSITY ({avg_diversity_all:.2%} rata-rata):
   - Rekomendasi memiliki variasi yang {'baik' if avg_diversity_all > 0.3 else 'kurang'}
   - {'Sistem memberikan berbagai pilihan genre dan mood' if avg_diversity_all > 0.3 else 'Sistem cenderung memberikan item yang mirip'}

3. INTRA-LIST SIMILARITY ({avg_ils:.4f}):
   - {'Item dalam list sangat mirip (perlu peningkatan diversity)' if avg_ils > 0.9 else 'Item dalam list cukup beragam'}

4. PERSONALIZATION ({pers_score:.2%}):
   - Sistem {'memberikan rekomendasi yang berbeda untuk preferensi berbeda' if pers_score > 0.4 else 'cenderung memberikan rekomendasi yang mirip'}
   - Overlap rata-rata antar query: {avg_overlap:.2%}

REKOMENDASI PERBAIKAN:
- {'Tambah variasi lagu dalam dataset untuk meningkatkan diversity' if avg_diversity_all < 0.3 or coverage_score < 100 else 'Sistem sudah cukup baik'}
- {'Pertimbangkan algoritma re-ranking untuk mengurangi similarity dalam list' if avg_ils > 0.9 else 'Intra-list similarity sudah optimal'}
- {'Tingkatkan filtering atau weighting untuk personalisasi lebih baik' if pers_score < 0.4 else 'Personalisasi sudah baik'}
""")

SUMMARY EVALUASI SISTEM REKOMENDASI


               Metrik   Nilai                    Interpretasi
             Coverage 100.00% 8/8 lagu dapat direkomendasikan
    Average Diversity  81.25%       Rekomendasi cukup beragam
Intra-List Similarity  0.7077                   Cukup diverse
Personalization Score 100.00%              Personalisasi baik

KESIMPULAN EVALUASI:

1. COVERAGE (100.00%):
   - Sistem dapat merekomendasikan 8 dari 8 lagu unik
   - Baik - Semua lagu dapat direkomendasikan

2. DIVERSITY (81.25% rata-rata):
   - Rekomendasi memiliki variasi yang baik
   - Sistem memberikan berbagai pilihan genre dan mood

3. INTRA-LIST SIMILARITY (0.7077):
   - Item dalam list cukup beragam

4. PERSONALIZATION (100.00%):
   - Sistem memberikan rekomendasi yang berbeda untuk preferensi berbeda
   - Overlap rata-rata antar query: 0.00%

REKOMENDASI PERBAIKAN:
- Sistem sudah cukup baik
- Intra-list similarity sudah optimal
- Personalisasi sudah baik



### 9.6 Visualisasi Hasil Evaluasi

Visualisasi metrik evaluasi untuk pemahaman yang lebih baik.

In [None]:
# Set style untuk visualisasi
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Buat figure dengan multiple subplots
fig = plt.figure(figsize=(16, 12))

# 1. Bar Chart - Coverage
ax1 = plt.subplot(2, 3, 1)
categories = ['Dapat\nDirekomendasikan', 'Tidak Dapat\nDirekomendasikan']
values = [rec_count, total_count - rec_count]
colors = ['#2ecc71', '#e74c3c']
bars = ax1.bar(categories, values, color=colors, alpha=0.7, edgecolor='black')
ax1.set_ylabel('Jumlah Lagu', fontsize=11, fontweight='bold')
ax1.set_title(f'Coverage: {coverage_score:.1f}%', fontsize=12, fontweight='bold')
ax1.set_ylim(0, max(values) * 1.2)
# Tambahkan nilai di atas bar
for bar in bars:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(height)}',
             ha='center', va='bottom', fontsize=11, fontweight='bold')

# 2. Bar Chart - Diversity per Query
if diversity_results:
    ax2 = plt.subplot(2, 3, 2)
    query_labels = [f"{r['Mood']}\n{r['Energy']}" for r in diversity_results]
    diversity_values = [r['Avg_Diversity'] * 100 for r in diversity_results]
    bars = ax2.bar(query_labels, diversity_values, color='#3498db', alpha=0.7, edgecolor='black')
    ax2.set_ylabel('Diversity Score (%)', fontsize=11, fontweight='bold')
    ax2.set_title('Diversity per Query', fontsize=12, fontweight='bold')
    ax2.set_ylim(0, 100)
    ax2.axhline(y=avg_diversity_all*100, color='red', linestyle='--', linewidth=2, label=f'Rata-rata: {avg_diversity_all*100:.1f}%')
    ax2.legend()
    # Tambahkan nilai di atas bar
    for bar in bars:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                 f'{height:.1f}%',
                 ha='center', va='bottom', fontsize=9, fontweight='bold')

# 3. Bar Chart - Intra-List Similarity
if intra_sim_results:
    ax3 = plt.subplot(2, 3, 3)
    query_labels_ils = [f"{r['Mood']}\n{r['Energy']}" for r in intra_sim_results]
    ils_values = [r['Avg_Similarity'] for r in intra_sim_results]
    bars = ax3.bar(query_labels_ils, ils_values, color='#e67e22', alpha=0.7, edgecolor='black')
    ax3.set_ylabel('Similarity Score', fontsize=11, fontweight='bold')
    ax3.set_title('Intra-List Similarity per Query', fontsize=12, fontweight='bold')
    ax3.set_ylim(0, 1)
    ax3.axhline(y=avg_ils, color='red', linestyle='--', linewidth=2, label=f'Rata-rata: {avg_ils:.3f}')
    ax3.legend()
    # Tambahkan nilai di atas bar
    for bar in bars:
        height = bar.get_height()
        ax3.text(bar.get_x() + bar.get_width()/2., height,
                 f'{height:.3f}',
                 ha='center', va='bottom', fontsize=9, fontweight='bold')

# 4. Pie Chart - Personalization
ax4 = plt.subplot(2, 3, 4)
pers_data = [pers_score * 100, avg_overlap * 100]
labels = [f'Unique\n({pers_score*100:.1f}%)', f'Overlap\n({avg_overlap*100:.1f}%)']
colors_pie = ['#2ecc71', '#e74c3c']
wedges, texts, autotexts = ax4.pie(pers_data, labels=labels, colors=colors_pie, autopct='%1.1f%%',
                                     startangle=90, textprops={'fontsize': 10, 'fontweight': 'bold'})
ax4.set_title('Personalization Score', fontsize=12, fontweight='bold')

# 5. Summary Bar Chart - Semua Metrik (Normalized)
ax5 = plt.subplot(2, 3, 5)
metrics_names = ['Coverage', 'Diversity', 'Personalization']
metrics_values = [
    coverage_score,
    avg_diversity_all * 100 if diversity_results else 0,
    pers_score * 100
]
colors_metrics = ['#2ecc71', '#3498db', '#9b59b6']
bars = ax5.barh(metrics_names, metrics_values, color=colors_metrics, alpha=0.7, edgecolor='black')
ax5.set_xlabel('Score (%)', fontsize=11, fontweight='bold')
ax5.set_title('Summary Metrik Evaluasi', fontsize=12, fontweight='bold')
ax5.set_xlim(0, 100)
# Tambahkan nilai di ujung bar
for i, bar in enumerate(bars):
    width = bar.get_width()
    ax5.text(width, bar.get_y() + bar.get_height()/2.,
             f' {width:.1f}%',
             ha='left', va='center', fontsize=10, fontweight='bold')

# 6. Heatmap - Ketersediaan Kombinasi Mood-Energy
ax6 = plt.subplot(2, 3, 6)
# Buat matrix untuk heatmap
moods = ['Happy', 'Sad', 'Relaxed', 'Motivated']
energies = ['High', 'Medium', 'Low']
availability_matrix = []

for mood in moods:
    row = []
    for energy in energies:
        recs = get_recommendations(mood, energy, top_n=10)
        row.append(len(recs))
    availability_matrix.append(row)

# Plot heatmap
im = ax6.imshow(availability_matrix, cmap='YlGnBu', aspect='auto')
ax6.set_xticks(range(len(energies)))
ax6.set_yticks(range(len(moods)))
ax6.set_xticklabels(energies, fontsize=10)
ax6.set_yticklabels(moods, fontsize=10)
ax6.set_xlabel('Energy Level', fontsize=11, fontweight='bold')
ax6.set_ylabel('Mood', fontsize=11, fontweight='bold')
ax6.set_title('Jumlah Rekomendasi per Kombinasi', fontsize=12, fontweight='bold')

# Tambahkan nilai di setiap cell
for i in range(len(moods)):
    for j in range(len(energies)):
        text = ax6.text(j, i, availability_matrix[i][j],
                       ha="center", va="center", color="black", fontsize=11, fontweight='bold')

# Tambahkan colorbar
cbar = plt.colorbar(im, ax=ax6)
cbar.set_label('Jumlah Lagu', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.suptitle('Dashboard Evaluasi Sistem Rekomendasi Lagu', 
             fontsize=16, fontweight='bold', y=1.00)
plt.show()

print("\n✅ Visualisasi berhasil ditampilkan!")

### 9.7 Visualisasi Distribusi Fitur dalam Dataset

In [None]:
# Visualisasi distribusi fitur dalam dataset
fig, axes = plt.subplots(2, 3, figsize=(16, 10))

# 1. Distribusi Sentiment Label
ax1 = axes[0, 0]
sentiment_counts = df['Sentiment_Label'].value_counts()
colors_sent = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']
wedges, texts, autotexts = ax1.pie(sentiment_counts, labels=sentiment_counts.index, 
                                     autopct='%1.1f%%', colors=colors_sent,
                                     startangle=90, textprops={'fontsize': 9, 'fontweight': 'bold'})
ax1.set_title('Distribusi Sentiment Label', fontsize=12, fontweight='bold')

# 2. Distribusi Energy Level
ax2 = axes[0, 1]
energy_counts = df['Energy'].value_counts().sort_index()
bars = ax2.bar(energy_counts.index, energy_counts.values, color='#9b59b6', alpha=0.7, edgecolor='black')
ax2.set_xlabel('Energy Level', fontsize=11, fontweight='bold')
ax2.set_ylabel('Jumlah Entries', fontsize=11, fontweight='bold')
ax2.set_title('Distribusi Energy Level', fontsize=12, fontweight='bold')
for bar in bars:
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(height)}',
             ha='center', va='bottom', fontsize=9, fontweight='bold')

# 3. Distribusi Genre
ax3 = axes[0, 2]
genre_counts = df['Genre'].value_counts()
bars = ax3.barh(genre_counts.index, genre_counts.values, color='#1abc9c', alpha=0.7, edgecolor='black')
ax3.set_xlabel('Jumlah Entries', fontsize=11, fontweight='bold')
ax3.set_title('Distribusi Genre', fontsize=12, fontweight='bold')
for bar in bars:
    width = bar.get_width()
    ax3.text(width, bar.get_y() + bar.get_height()/2.,
             f' {int(width)}',
             ha='left', va='center', fontsize=9, fontweight='bold')

# 4. Distribusi Mood
ax4 = axes[1, 0]
mood_counts = df['Mood'].value_counts()
bars = ax4.barh(mood_counts.index, mood_counts.values, color='#e67e22', alpha=0.7, edgecolor='black')
ax4.set_xlabel('Jumlah Entries', fontsize=11, fontweight='bold')
ax4.set_title('Distribusi Mood', fontsize=12, fontweight='bold')
for bar in bars:
    width = bar.get_width()
    ax4.text(width, bar.get_y() + bar.get_height()/2.,
             f' {int(width)}',
             ha='left', va='center', fontsize=9, fontweight='bold')

# 5. Distribusi Tempo (BPM)
ax5 = axes[1, 1]
ax5.hist(df['Tempo (BPM)'], bins=15, color='#e74c3c', alpha=0.7, edgecolor='black')
ax5.set_xlabel('Tempo (BPM)', fontsize=11, fontweight='bold')
ax5.set_ylabel('Frekuensi', fontsize=11, fontweight='bold')
ax5.set_title('Distribusi Tempo', fontsize=12, fontweight='bold')
ax5.axvline(df['Tempo (BPM)'].mean(), color='blue', linestyle='--', linewidth=2, 
            label=f"Mean: {df['Tempo (BPM)'].mean():.1f}")
ax5.legend()

# 6. Distribusi Danceability
ax6 = axes[1, 2]
dance_counts = df['Danceability'].value_counts().sort_index()
bars = ax6.bar(dance_counts.index, dance_counts.values, color='#f39c12', alpha=0.7, edgecolor='black')
ax6.set_xlabel('Danceability Level', fontsize=11, fontweight='bold')
ax6.set_ylabel('Jumlah Entries', fontsize=11, fontweight='bold')
ax6.set_title('Distribusi Danceability', fontsize=12, fontweight='bold')
for bar in bars:
    height = bar.get_height()
    ax6.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(height)}',
             ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.suptitle('Distribusi Fitur dalam Dataset', fontsize=16, fontweight='bold', y=1.00)
plt.show()

print("\n✅ Visualisasi distribusi fitur berhasil ditampilkan!")

### 9.8 Visualisasi Similarity Matrix

In [None]:
# Visualisasi Similarity Matrix (sample untuk lagu unik)
# Ambil beberapa lagu unik untuk visualisasi
unique_songs_sample = songs_df.drop_duplicates(subset=['Song_Name', 'Artist']).head(8)
sample_indices = unique_songs_sample.index.tolist()

# Ekstrak sub-matrix untuk lagu unik
sample_similarity = similarity_matrix[np.ix_(sample_indices, sample_indices)]

# Buat label untuk setiap lagu
song_labels = [f"{row['Song_Name'][:15]}..." if len(row['Song_Name']) > 15 else row['Song_Name'] 
               for _, row in unique_songs_sample.iterrows()]

# Plot heatmap
fig, ax = plt.subplots(figsize=(12, 10))
im = ax.imshow(sample_similarity, cmap='YlOrRd', aspect='auto', vmin=0, vmax=1)

# Set ticks dan labels
ax.set_xticks(range(len(song_labels)))
ax.set_yticks(range(len(song_labels)))
ax.set_xticklabels(song_labels, rotation=45, ha='right', fontsize=9)
ax.set_yticklabels(song_labels, fontsize=9)

# Tambahkan nilai similarity di setiap cell
for i in range(len(song_labels)):
    for j in range(len(song_labels)):
        text = ax.text(j, i, f'{sample_similarity[i, j]:.2f}',
                      ha="center", va="center", 
                      color="white" if sample_similarity[i, j] > 0.5 else "black",
                      fontsize=8, fontweight='bold')

# Colorbar
cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Cosine Similarity', fontsize=11, fontweight='bold')

plt.title('Cosine Similarity Matrix antar Lagu Unik', fontsize=14, fontweight='bold', pad=20)
plt.xlabel('Lagu', fontsize=11, fontweight='bold')
plt.ylabel('Lagu', fontsize=11, fontweight='bold')
plt.tight_layout()
plt.show()

print("\n✅ Visualisasi similarity matrix berhasil ditampilkan!")
print(f"\nInterpretasi:")
print(f"  - Nilai mendekati 1.0 (merah tua) = Lagu sangat mirip")
print(f"  - Nilai mendekati 0.0 (kuning muda) = Lagu sangat berbeda")
print(f"  - Diagonal selalu 1.0 karena lagu dibandingkan dengan dirinya sendiri")

## 10. Kesimpulan

### Hasil yang Dicapai:
1. Sistem berhasil mengimplementasikan **Content-Based Filtering** dengan **Cosine Similarity**
2. Menggunakan **5 fitur** untuk perhitungan kemiripan: Genre, Mood, Energy, Danceability, Tempo
3. Sistem dapat memberikan rekomendasi berdasarkan input **Mood** dan **Level Energi**
4. Menerapkan **penghapusan duplikasi** untuk memastikan setiap lagu unik hanya muncul sekali
5. Menggunakan **random factor** untuk variasi hasil rekomendasi

### Hasil Evaluasi:
1. **Coverage**: Mengukur persentase lagu yang dapat direkomendasikan
2. **Diversity**: Mengukur keberagaman rekomendasi (genre, mood, tempo)
3. **Intra-List Similarity**: Mengukur kemiripan antar item dalam satu list
4. **Personalization**: Mengukur perbedaan rekomendasi untuk query berbeda

### Keterbatasan:
1. Dataset hanya memiliki **8 lagu unik** sehingga variasi rekomendasi terbatas
2. Beberapa kombinasi mood-energi tidak tersedia (misal: Sad + High Energy)
3. Jumlah rekomendasi bisa kurang dari target jika kandidat terbatas
4. Belum ada ground truth untuk evaluasi accuracy/precision

### Saran Pengembangan:
1. Perluas dataset dengan lebih banyak lagu unik
2. Tambahkan fitur analisis emosi dari lirik lagu
3. Implementasi strategi fallback yang lebih adaptif
4. Tambahkan evaluasi berbasis user feedback (implicit/explicit)
5. Implementasi A/B testing untuk membandingkan algoritma
6. Integrasi dengan API musik untuk data real-time