In [17]:
# Spotify Recommendation Model Training (Jupyter Notebook Format)

# 1. Imports and Data Loading
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity

In [18]:
df = pd.read_csv("top_10000_1950-now.csv")

In [19]:
print(df.head())

                              Track URI                           Track Name  \
0  spotify:track:0vNPJrUrBnMFdCs8b2MTNG                                Fader   
1  spotify:track:0NpvdCO506uO58D4AbKzki                               Sherry   
2  spotify:track:1MtUq6Wp1eQ8PC6BbPCj8P  I Took A Pill In Ibiza - Seeb Remix   
3  spotify:track:59lq75uFIqzUZcgZ4CbqFG                   Let Go for Tonight   
4  spotify:track:7KdcZQ3GJeGdserhK61kfv          The Way I Want To Touch You   

                                       Artist URI(s)  \
0              spotify:artist:4W48hZAnAHVOC2c8WH8pcq   
1              spotify:artist:6mcrZQmgzFGRWf7C0SObou   
2  spotify:artist:2KsP6tYLJlTBvSUxnwlVWa, spotify...   
3              spotify:artist:7qRll6DYV06u2VuRPAVqug   
4              spotify:artist:7BEfMxbaqx6dOpbtlEqScm   

                     Artist Name(s)                             Album URI  \
0                   The Temper Trap  spotify:album:0V59MMtgoruvEqMv18KAOH   
1  Frankie Valli & The Four 

In [21]:
# 2. Feature Selection and Cleaning
audio_features = [
    "Danceability", "Energy", "Loudness", "Speechiness", "Acousticness",
    "Instrumentalness", "Liveness", "Valence", "Tempo"
]
metadata_columns = ["Track Name", "Artist Name(s)", "Album Image URL", "Track URI"]

# Drop rows with missing values
df_clean = df.dropna(subset=audio_features + metadata_columns).reset_index(drop=True)

In [22]:
# 3. Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_clean[audio_features])

In [23]:
# 4. KMeans Clustering
kmeans = KMeans(n_clusters=10, random_state=42, n_init=10)
df_clean["Cluster"] = kmeans.fit_predict(X_scaled)

In [24]:
# 5. Modified Recommendation Function (based on track name)
def get_recommendations_by_name(track_name, top_n=5):
    matches = df_clean[df_clean["Track Name"].str.lower() == track_name.lower()]
    
    if matches.empty:
        return f"Track '{track_name}' not found in dataset."
    
    # Pick the first matching song (you can adjust this logic)
    song_row = matches.iloc[0]
    song_index = song_row.name  # Get the index of the song in df_clean
    
    target_cluster = song_row["Cluster"]
    cluster_songs = df_clean[df_clean["Cluster"] == target_cluster]
    
    # Scale features for cluster
    features_cluster = scaler.transform(cluster_songs[audio_features])
    target_vector = scaler.transform([song_row[audio_features]])  # Rescale just this row
    
    # Cosine similarity
    similarities = cosine_similarity(target_vector, features_cluster).flatten()
    similar_indices = similarities.argsort()[::-1][1:top_n+1]  # Skip the input song

    # Return recommendations
    return cluster_songs.iloc[similar_indices][metadata_columns]

In [25]:
# 6. Example Usage (by track name)
input_track_name = "Blinding Lights"  # Replace with a known song in your dataset
input_song = df_clean[df_clean["Track Name"].str.lower() == input_track_name.lower()].iloc[0][["Track Name", "Artist Name(s)"]]
recommendations = get_recommendations_by_name(input_track_name)

input_song, recommendations



(Track Name        Blinding Lights
 Artist Name(s)         The Weeknd
 Name: 4870, dtype: object,
                    Track Name           Artist Name(s)  \
 73                 Vegas Girl            Conor Maynard   
 1696                Come Home            Daniel Powter   
 5094          Blinding Lights               The Weeknd   
 3404  Don't Want to Leave You       Scouting For Girls   
 5963          Let Me Love You  DJ Snake, Justin Bieber   
 
                                         Album Image URL  \
 73    https://i.scdn.co/image/ab67616d0000b2731dbee6...   
 1696  https://i.scdn.co/image/ab67616d0000b27303fbeb...   
 5094  https://i.scdn.co/image/ab67616d0000b273c464fa...   
 3404  https://i.scdn.co/image/ab67616d0000b2730ebb38...   
 5963  https://i.scdn.co/image/ab67616d0000b2735045de...   
 
                                  Track URI  
 73    spotify:track:7K5u9x1qd2WyEkTN7ntm1A  
 1696  spotify:track:2xaSVQ7d7xvRkHqXIFyKRK  
 5094  spotify:track:0sf12qNH5qcw8qpgymFOqD  
