In [78]:
!pip install annoy




In [16]:
import pandas as pd
import numpy as np
from annoy import AnnoyIndex


In [17]:
# 載入CSV
df = pd.read_csv("music_dataset.csv")

# 音樂特徵
feature_cols = [
    'acousticness', 'danceability', 'duration_ms', 'energy',
    'instrumentalness', 'liveness', 'loudness',
    'speechiness', 'tempo', 'valence'
]

# 特徵數值化
df[feature_cols] = df[feature_cols].fillna(0).astype(float)

print("資料筆數：", len(df))


資料筆數： 40560


In [18]:
f = len(feature_cols)
annoy_index = AnnoyIndex(f, 'euclidean')  #歐氏距離

print("建立 Annoy Index...")
for i in range(len(df)):
    vector = df.loc[i, feature_cols].values
    annoy_index.add_item(i, vector)

annoy_index.build(10)  
print("Annoy Index 建立完成")


建立 Annoy Index...
Annoy Index 建立完成


In [82]:
import pandas as pd

# 讀取音樂資料集 CSV 檔
df = pd.read_csv("music_dataset.csv")

# 避免空值
df['track_name'] = df['track_name'].fillna('').astype(str)
df['artist_name'] = df['artist_name'].fillna('').astype(str)
df['music_genre'] = df['music_genre'].fillna('Unknown')
df['popularity'] = pd.to_numeric(df['popularity'], errors='coerce').fillna(0)


In [86]:
# 推薦函式：用 Annoy + genre 篩選 + popularity 加權
def recommend_similar_tracks_by_genre(track_name, top_n=5, genre_filter=True, weight_popularity=0.3):
    track_name = track_name.lower()
    match = df[df['track_name'].str.lower() == track_name]

    if match.empty:
        print(f"❌ Can't find the song named \"{track_name}\". Please try another one.\n")
        return False

    idx = match.index[0]
    target_vector = df.loc[idx, feature_cols].values
    target_genre = df.loc[idx, 'music_genre']
    target_popularity = df.loc[idx, 'popularity']

    # 找出最近鄰（包含自己）
    candidate_indices, distances = annoy_index.get_nns_by_vector(target_vector, 100, include_distances=True)

    candidates = pd.DataFrame({
        'index': candidate_indices,
        'distance': distances
    })

    # 加入欄位資訊
    candidates['track_name'] = candidates['index'].apply(lambda i: df.loc[i, 'track_name'])
    candidates['artist_name'] = candidates['index'].apply(lambda i: df.loc[i, 'artist_name'])
    candidates['music_genre'] = candidates['index'].apply(lambda i: df.loc[i, 'music_genre'])
    candidates['popularity'] = candidates['index'].apply(lambda i: df.loc[i, 'popularity'])

    # 排除自己
    candidates = candidates[candidates['index'] != idx]

    # 篩選相同 genre
    if genre_filter:
        candidates = candidates[candidates['music_genre'] == target_genre]

    # 加入加權分數（distance 越小越相似）
    max_pop = df['popularity'].max()
    candidates['popularity_norm'] = candidates['popularity'] / max_pop
    candidates['score'] = (1 - weight_popularity) * (1 - candidates['distance']) + weight_popularity * candidates['popularity_norm']

    candidates = candidates.sort_values(by='score', ascending=False).head(top_n)

    print(f"\n🎵 Songs recommended based on \"{df.loc[idx, 'track_name']}\" (Genre: {target_genre}):")
    for i, row in candidates.iterrows():
        print(f"  🎧 {row['track_name']} — {row['artist_name']} | Popularity: {row['popularity']} | Score: {row['score']:.4f}")
    print()
    return True

# 使用者互動輸入歌曲名稱
print("🔍 Enter a song name to get recommendations.")
print("👉 Press Enter without typing anything to quit.\n")

while True:
    user_input = input("Enter the name of a song you like: ").strip()
    if user_input == "":
        print("👋 Bye! Enjoy your music!")
        break
    recommend_similar_tracks_by_genre(user_input, top_n=5)


🔍 Enter a song name to get recommendations.
👉 Press Enter without typing anything to quit.



Enter the name of a song you like:  Clash



🎵 Songs recommended based on "Clash" (Genre: Electronic):
  🎧 BZZRK - Kotori Remix — SVDDEN DEATH | Popularity: 28.0 | Score: -2.5914
  🎧 Murda — Snavs | Popularity: 54.0 | Score: -4.7440
  🎧 Calling You Home (feat. Runn) - Crystal Skies Remix — Seven Lions | Popularity: 32.0 | Score: -4.9853
  🎧 Never Ever - Edit — Röyksopp | Popularity: 48.0 | Score: -23.5034
  🎧 Cyclone — Dub Pistols | Popularity: 36.0 | Score: -33.5506



Enter the name of a song you like:  


👋 Bye! Enjoy your music!
