In [1]:
#Load and Clean the Dataset
import pandas as pd
import numpy as np

# Load the dataset from the specified path
file_path = r"C:\Users\Jonathan Gonzalez\Final Project\P4-main\P4-main\resources\spotify_songs.csv"
songs_df = pd.read_csv(file_path)

# Remove duplicates based on track_id
songs_df = songs_df.drop_duplicates(subset='track_id')

# Keep track_name and track_artist for final recommendation
songs_df_pp = songs_df[['track_id', 'track_name', 'track_artist', 'track_album_release_date', 'danceability', 
                        'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 
                        'instrumentalness', 'liveness', 'track_popularity']]

# Converting and extracting date information
songs_df_pp['track_album_release_date'] = pd.to_datetime(songs_df_pp['track_album_release_date'], format='mixed')
songs_df_pp['release_month'] = songs_df_pp['track_album_release_date'].dt.month

# Encoding categorical variables using one-hot encoding
songs_df_pp_encoded = pd.get_dummies(songs_df_pp)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  songs_df_pp['track_album_release_date'] = pd.to_datetime(songs_df_pp['track_album_release_date'], format='mixed')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  songs_df_pp['release_month'] = songs_df_pp['track_album_release_date'].dt.month


In [2]:
#Preprocess the Data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Define the features and target based on the available columns
features = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness']
target = 'track_popularity'  # Assuming 'track_popularity' is the target variable indicating your preference

# Create feature matrix X and target vector y
X = songs_df_pp_encoded[features]
y = songs_df_pp_encoded[target]

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [3]:
# Build and Train the Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Define the neural network model
model = Sequential([
    Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')  # Linear activation for regression
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])

# Train the model
history = model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), epochs=50, batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m709/709[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 1176.0173 - mae: 28.1979 - val_loss: 556.3094 - val_mae: 19.6553
Epoch 2/50
[1m709/709[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 541.8455 - mae: 19.4759 - val_loss: 548.4000 - val_mae: 19.5014
Epoch 3/50
[1m709/709[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 540.3450 - mae: 19.4184 - val_loss: 544.6928 - val_mae: 19.4364
Epoch 4/50
[1m709/709[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 533.2655 - mae: 19.3370 - val_loss: 542.0204 - val_mae: 19.4737
Epoch 5/50
[1m709/709[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 532.8700 - mae: 19.2981 - val_loss: 543.2704 - val_mae: 19.4517
Epoch 6/50
[1m709/709[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 537.7310 - mae: 19.3908 - val_loss: 540.1265 - val_mae: 19.4657
Epoch 7/50
[1m709/709[0m [32m━━━━━━━━━━━━━━━━━━━

In [4]:
#Define Functions for User Input and Recommendations
# Function to get user preferences on a scale of "low", "medium", and "high"
def get_user_preferences():
    scale_mapping = {"low": 0.3, "medium": 0.6, "high": 0.9}
    
    preferences = {}
    preferences['danceability'] = scale_mapping[input("Enter your preference for Danceability (low, medium, high): ").lower()]
    preferences['energy'] = scale_mapping[input("Enter your preference for Energy (low, medium, high): ").lower()]
    preferences['key'] = scale_mapping[input("Enter your preference for Key (low, medium, high): ").lower()]
    preferences['loudness'] = scale_mapping[input("Enter your preference for Loudness (low, medium, high): ").lower()]
    preferences['mode'] = scale_mapping[input("Enter your preference for Mode (low, medium, high): ").lower()]
    preferences['speechiness'] = scale_mapping[input("Enter your preference for Speechiness (low, medium, high): ").lower()]
    preferences['acousticness'] = scale_mapping[input("Enter your preference for Acousticness (low, medium, high): ").lower()]
    preferences['instrumentalness'] = scale_mapping[input("Enter your preference for Instrumentalness (low, medium, high): ").lower()]
    preferences['liveness'] = scale_mapping[input("Enter your preference for Liveness (low, medium, high): ").lower()]
    return preferences

def recommend_songs(model, scaler, preferences, original_df, features):
    # Convert preferences to DataFrame
    user_df = pd.DataFrame([preferences])
    
    # Scale the user preferences
    user_scaled = scaler.transform(user_df)
    
    # Predict the popularity for the user's preferences
    predicted_popularity = model.predict(user_scaled)
    
    # Add the predicted popularity to the user_df
    user_df['predicted_popularity'] = predicted_popularity
    
    # Combine with the original dataset to find similar songs
    similar_songs = original_df.copy()
    for feature in preferences.keys():
        similar_songs[feature] = similar_songs[feature] - preferences[feature]
    similar_songs['distance'] = np.sqrt(np.sum(np.square(similar_songs[features]), axis=1))
    
    # Drop duplicates based on track_name and track_artist
    similar_songs = similar_songs.drop_duplicates(subset=['track_name', 'track_artist'])
    
    # Get the top 10 similar songs
    top_10_songs = similar_songs.sort_values(by='distance').head(10)
    return top_10_songs[['track_name', 'track_artist', 'distance']]


In [6]:
#Testing and Getting Recommendations
# Get user preferences
user_preferences = get_user_preferences()

# Recommend songs based on user preferences
top_10_songs = recommend_songs(model, scaler, user_preferences, songs_df_pp, features)

# Print the recommended songs
print("Top 10 Recommended Songs Based on Your Preferences:")
print(top_10_songs)

Enter your preference for Danceability (low, medium, high):  low
Enter your preference for Energy (low, medium, high):  high
Enter your preference for Key (low, medium, high):  medium
Enter your preference for Loudness (low, medium, high):  low
Enter your preference for Mode (low, medium, high):  medium
Enter your preference for Speechiness (low, medium, high):  high
Enter your preference for Acousticness (low, medium, high):  high
Enter your preference for Instrumentalness (low, medium, high):  low
Enter your preference for Liveness (low, medium, high):  medium


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
Top 10 Recommended Songs Based on Your Preferences:
                               track_name   track_artist  distance
10438                               Crema           Owin  1.529791
24889                This Is Now We Do It  Montel Jordan  1.583977
11031                    Makina de Armado           Duki  1.626554
27535            Sound Of The Underground      Zeds Dead  1.626708
27716      Escape From Love - Curbi Remix     Eva Simons  1.661602
32333  Walk The Line - Laurent Wolf Remix   Laurent Wolf  1.760806
28031                WTF!? - Original Mix         Zomboy  1.921622
27481                             Latency  Martin Garrix  1.933935
31850                      Enjoy the Ride       Krewella  1.953980
30339                     Vidrado Em Você       Dj Guuga  1.991521
