In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt

# =======================
# 1. Load dataset
# =======================
file_path = "spotify_songs.csv"
df = pd.read_csv(file_path)

# Select features for similarity
feature_cols = [
    'danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
    'instrumentalness', 'liveness', 'valence', 'tempo'
]

features = df[feature_cols]

# =======================
# 2. Normalize features
# =======================
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)


# =======================
# 3. Combined visualization (Radar + Bar)
# =======================

#在CBF1里面，美方这边因为output不需要


# =======================
# 4. Recommendation function (with scores)
# =======================
def recommend_songs(song_name, n=10, plot=True):
    if song_name not in df['track_name'].values:
        return f"Song '{song_name}' not found in dataset."

    # 找到目标歌索引
    idx = df[df['track_name'] == song_name].index[0]
    target_artist = df.loc[idx, 'track_artist']

    # 特征向量
    song_vec = features_scaled[idx].reshape(1, -1)

    # 相似度
    sim_scores = cosine_similarity(song_vec, features_scaled)[0]
    similar_idx = np.argsort(sim_scores)[::-1]

    # 排除完全相同的歌（同名 + 同艺人）
    similar_idx = [
        i for i in similar_idx
        if not (df.loc[i, 'track_name'] == song_name and df.loc[i, 'track_artist'] == target_artist)
    ]

    # 推荐表，加上相似度分数
    recommendations = df[['track_name', 'track_artist', 'track_album_name', 'track_album_release_date']].iloc[similar_idx].copy()
    # recommendations["similarity_score"] = sim_scores[similar_idx]

    # 去重（避免 remix、live）
    recommendations = recommendations.drop_duplicates(subset=['track_name', 'track_artist'])
    top_recommendations = recommendations.head(n)

    return top_recommendations




# =======================
# 5. Example usage
# =======================
print(recommend_songs("Señorita", n=10, plot=True))


                                        track_name       track_artist  \
1828   Wildside - From "Adventures in Babysitting"  Sabrina Carpenter   
30394                                Call Me Maybe   Carly Rae Jepsen   
14616                                     Sideshow      Evan Petruzzi   
1815                                        Domino           Jessie J   
24518                             U Know What's Up       Donell Jones   
12759                               What Can I Say         Boz Scaggs   
30080                                Blurred Lines       Robin Thicke   
4574                                      Runaways          Sam Feldt   
3318                                      Tus Ojos           Belanova   
12820                               CITY OF ANGELS           24kGoldn   

                                        track_album_name  \
1828   Your Favorite Songs from 100 Disney Channel Or...   
30394                                               Kiss   
14616           

: 