In [3]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv('spotify_songs.csv')
# Explore the data
print(df.head())
print(df.info())
print(df.isnull().sum())


                 track_id                                         track_name  \
0  6f807x0ima9a1j3VPbc7VN  I Don't Care (with Justin Bieber) - Loud Luxur...   
1  0r7CVbZTWZgbTCYdfa2P31                    Memories - Dillon Francis Remix   
2  1z1Hg7Vb0AhHDiEmnDE79l                    All the Time - Don Diablo Remix   
3  75FpbthrwQmzHlBJLuGdC7                  Call You Mine - Keanu Silva Remix   
4  1e8PAfcKUYoKkxPhrHqw4x            Someone You Loved - Future Humans Remix   

       track_artist  track_popularity          track_album_id  \
0        Ed Sheeran                66  2oCs0DGTsRO98Gh5ZSl2Cx   
1          Maroon 5                67  63rPSO264uRjW1X5E6cWv6   
2      Zara Larsson                70  1HoSmj2eLcsrR0vE9gThr4   
3  The Chainsmokers                60  1nqYsOef1yKKuGOVchbsk6   
4     Lewis Capaldi                69  7m7vv9wlQ4i0LFuJiE2zsQ   

                                    track_album_name track_album_release_date  \
0  I Don't Care (with Justin Bieber) [Loud Luxu

In [2]:
from sklearn.preprocessing import MinMaxScaler

# Drop unnecessary columns
df = df.drop(columns=['track_album_id', 'playlist_id'])

# Fill missing values
df.fillna(0, inplace=True)

# Normalize numerical features
scaler = MinMaxScaler()
numerical_cols = ['danceability', 'energy', 'loudness', 'tempo', 'duration_ms']
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])


In [8]:
from sklearn.neighbors import NearestNeighbors
# Define features
features = df[['danceability', 'energy', 'valence', 'tempo']]

# Fit Nearest Neighbors model
knn = NearestNeighbors(n_neighbors=6, metric='cosine', algorithm='auto')


knn.fit(features)

# Function to recommend songs
def recommend_songs_ann(track_name, n=5):
    # Find the index of the track
    idx = df[df['track_name'] == track_name].index[0]
    
    # Find nearest neighbors
    distances, indices = knn.kneighbors([features.iloc[idx]])
    
    # Get the top 'n' songs (excluding the input track itself)
    top_songs = [df.iloc[i]['track_name'] for i in indices[0][1:n+1]]
    return top_songs

# Test the optimized recommendation function
print(recommend_songs_ann('Memories - Dillon Francis Remix'))


['Memories - Dillon Francis Remix', 'Hola Señorita - Robin Schulz Remix', 'Hola Señorita - Robin Schulz Remix', 'Hola Señorita - Robin Schulz Remix', 'Tear Me Down (feat. Philip Rustad)']


