# **Image Preprocessing and KNN model Training**

In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
import joblib

# Load the dataset
df = pd.read_csv('/content/spotify.csv')

# Preprocess the data
# Drop unnecessary columns and handle missing values
df = df.dropna()

# Select relevant numerical features for the model
numerical_features = [
    'danceability', 'energy', 'loudness', 'mode', 'speechiness',
    'acousticness', 'instrumentalness', 'liveness', 'valence',
    'tempo', 'duration_ms'
]

# Encode categorical features
categorical_features = ['track_artist', 'playlist_genre']
df_encoded = pd.get_dummies(df, columns=categorical_features)

# Define feature importance weights for numerical features
feature_importance_weights = {
    'danceability': 1.2,
    'energy': 1.0,
    'loudness': 0.8,
    'mode': 1.0,
    'speechiness': 0.9,
    'acousticness': 0.7,
    'instrumentalness': 0.6,
    'liveness': 0.5,
    'valence': 1.1,
    'tempo': 0.9,
    'duration_ms': 0.8
}

# Define a uniform weight for all categorical features
categorical_weight = 2.0

# Sample 70% of the data for training
df_sampled = df_encoded.sample(frac=0.7, random_state=42)

# Standardize the numerical features
scaler = StandardScaler()
df_sampled[numerical_features] = scaler.fit_transform(df_sampled[numerical_features])

# Apply weights to numerical features
for feature, weight in feature_importance_weights.items():
    if feature in df_sampled.columns:
        df_sampled[feature] *= weight

# Apply uniform weight to all one-hot encoded categorical features
for feature in df_sampled.columns:
    if feature.startswith(tuple(categorical_features)):
        df_sampled[feature] *= categorical_weight

# Extract feature columns for training
feature_columns = numerical_features + list(df_encoded.columns[df_encoded.columns.str.startswith(tuple(categorical_features))])
X = df_sampled[feature_columns]

# Train the KNN model on the sampled data
knn = NearestNeighbors(n_neighbors=10, algorithm='auto')
knn.fit(X)

# Save the model, scaler, and columns
joblib.dump(knn, 'knn_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(feature_columns, 'columns.pkl')

['columns.pkl']

In [4]:
import pandas as pd
import joblib
from IPython.display import display, clear_output
import ipywidgets as widgets
import os

# Load the model, scaler, and columns
knn = joblib.load('knn_model.pkl')
scaler = joblib.load('scaler.pkl')
feature_columns = joblib.load('columns.pkl')

# Load the original dataset
df = pd.read_csv('/content/spotify.csv')

# Extract unique artists, genres, and songs
unique_artists = df['track_artist'].unique()
unique_genres = df['playlist_genre'].unique()
unique_songs = df['track_name'].unique()

# Function to create a user profile
def create_user_profile(preferences):
    user_profile = pd.DataFrame(preferences, index=[0])
    user_profile_encoded = pd.get_dummies(user_profile)
    user_profile_encoded = user_profile_encoded.reindex(columns=feature_columns, fill_value=0)
    return user_profile_encoded

# Function to get recommendations
def get_recommendations(preferences):
    user_profile = create_user_profile(preferences)

    # Standardize only the numerical features in the user profile
    numerical_features = [
        'danceability', 'energy', 'loudness', 'mode', 'speechiness',
        'acousticness', 'instrumentalness', 'liveness', 'valence',
        'tempo', 'duration_ms'
    ]
    user_profile[numerical_features] = scaler.transform(user_profile[numerical_features])

    # Get song recommendations
    distances, indices = knn.kneighbors(user_profile, n_neighbors=10)
    recommendations = df.iloc[indices[0]]
    return recommendations[['track_name', 'track_artist']]

# Function to save recommendations for custom inputs
def save_recommendations_custom(preferences, recommendations):
    preferences_list = [preferences]
    preferences_df = pd.DataFrame({'input_features': preferences_list})
    combined_df = pd.concat([preferences_df, recommendations], axis=1)
    if not os.path.isfile('recommendations.csv'):
        combined_df.to_csv('recommendations.csv', index=False)
    else:
        combined_df.to_csv('recommendations.csv', mode='a', header=False, index=False)

# Function to get recommendations based on a song name
def get_recommendations_from_song(song_name):
    song = df[df['track_name'] == song_name].iloc[0]
    song_features = song[[
        'danceability', 'energy', 'loudness', 'mode', 'speechiness',
        'acousticness', 'instrumentalness', 'liveness', 'valence',
        'tempo', 'duration_ms'
    ]].to_dict()
    song_features['track_artist'] = song['track_artist']
    song_features['playlist_genre'] = song['playlist_genre']
    return get_recommendations(song_features)

# Widgets for user inputs
artist = widgets.Dropdown(options=unique_artists, description='Artist:')
genre = widgets.Dropdown(options=unique_genres, description='Genre:')
danceability = widgets.FloatSlider(description='Danceability:', min=0.0, max=0.99, step=0.01, value=0.5)
energy = widgets.FloatSlider(description='Energy:', min=0.1, max=1.0, step=0.01, value=0.5)
loudness = widgets.FloatSlider(description='Loudness:', min=-46.0, max=1.2, step=0.1, value=-5.0)
mode = widgets.IntSlider(description='Mode:', min=0, max=1, step=1, value=1)
speechiness = widgets.FloatSlider(description='Speechiness:', min=0.0, max=0.91, step=0.01, value=0.1)
acousticness = widgets.FloatSlider(description='Acousticness:', min=0.0, max=0.99, step=0.01, value=0.1)
instrumentalness = widgets.FloatSlider(description='Instrumentalness:', min=0.0, max=0.99, step=0.01, value=0.0)
liveness = widgets.FloatSlider(description='Liveness:', min=0.0, max=0.99, step=0.01, value=0.1)
valence = widgets.FloatSlider(description='Valence:', min=0.0, max=0.99, step=0.01, value=0.5)
tempo = widgets.IntSlider(description='Tempo:', min=0, max=239, step=1, value=120)
duration_ms = widgets.IntSlider(description='Duration (ms):', min=40000, max=500000, step=1000, value=200000)
song_name_input = widgets.Dropdown(options=unique_songs, description='Song Name:')

# Buttons to get recommendations
button = widgets.Button(description="Get Recommendations by Preferences")
button.style.font_weight = 'bold'
button.layout = widgets.Layout(width='300px', height='50px')

# Display input widgets
print("Customise your parameters and get the song recommendations tailored to your choice:")
display(artist, genre, danceability, energy, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, duration_ms)
display(button)

# Function to handle button click for preferences
def on_button_clicked(b):
    clear_output(wait=True)  # Clear previous output
    # Re-display widgets and buttons
    print("Customise your parameters and get the song recommendations tailored to your choice:")
    display(artist, genre, danceability, energy, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, duration_ms)
    display(button)

    preferences = {
        'danceability': danceability.value,
        'energy': energy.value,
        'loudness': loudness.value,
        'mode': mode.value,
        'speechiness': speechiness.value,
        'acousticness': acousticness.value,
        'instrumentalness': instrumentalness.value,
        'liveness': liveness.value,
        'valence': valence.value,
        'tempo': tempo.value,
        'duration_ms': duration_ms.value,
        'track_artist': artist.value,
        'playlist_genre': genre.value
    }
    recommendations = get_recommendations(preferences)
    display(recommendations)
    save_recommendations_custom(preferences, recommendations)

# Link the button click events to the functions
button.on_click(on_button_clicked)


Customise your parameters and get the song recommendations tailored to your choice:


Dropdown(description='Artist:', options=('Ed Sheeran', 'Maroon 5', 'Zara Larsson', 'The Chainsmokers', 'Lewis …

Dropdown(description='Genre:', options=('pop', 'rap', 'rock', 'latin', 'r&b', 'edm'), value='pop')

FloatSlider(value=0.33, description='Danceability:', max=0.99, step=0.01)

FloatSlider(value=0.74, description='Energy:', max=1.0, min=0.1, step=0.01)

FloatSlider(value=-5.0, description='Loudness:', max=1.2, min=-46.0)

IntSlider(value=1, description='Mode:', max=1)

FloatSlider(value=0.1, description='Speechiness:', max=0.91, step=0.01)

FloatSlider(value=0.1, description='Acousticness:', max=0.99, step=0.01)

FloatSlider(value=0.0, description='Instrumentalness:', max=0.99, step=0.01)

FloatSlider(value=0.1, description='Liveness:', max=0.99, step=0.01)

FloatSlider(value=0.5, description='Valence:', max=0.99, step=0.01)

IntSlider(value=120, description='Tempo:', max=239)

IntSlider(value=200000, description='Duration (ms):', max=500000, min=40000, step=1000)

Button(description='Get Recommendations by Preferences', layout=Layout(height='50px', width='300px'), style=Bu…

Unnamed: 0,track_name,track_artist
16844,Mine By Tomorrow,Hartman
20167,King Of Drums - Original Mix,Genairo Nvilla
22637,Yo soy el punto cubano,David Blanco
18922,Siente El Boom,"Tito ""El Bambino"""
17300,This Girl (Kungs Vs. Cookin' On 3 Burners) - K...,Kungs
21569,Before You Go,Lewis Capaldi
13616,The Killing Moon,Echo & the Bunnymen
21143,$. A. N. T. E. R. Í. A.,Doble Porcion
19622,Energía,Alexis y Fido
16513,Like Water,Paisley Pink
