In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('/Users/zac/Codes/Music_Project/GIT_HUB/Musis_Recommendation_Engine/exploration/Data_collection/final_training.csv')

# Ensure there are no missing values
data.dropna(inplace=True)

# Preprocess the features (scaling numeric features)
scaler = StandardScaler()
numeric_features = ['popularity', 'danceability', 'energy', 'key', 'loudness', 
                    'mode', 'speechiness', 'acousticness', 'instrumentalness', 
                    'liveness', 'valence', 'tempo', 'duration_ms']

data[numeric_features] = scaler.fit_transform(data[numeric_features])

# Filter data based on year and language if necessary
def filter_data(df, year, language):
    # return df[(df['year'] == year) & (df['language'] == language)]
    return df

# Encode the mood as a numerical value for similarity calculation
data['mood'] = data['mood'].astype('category').cat.codes


data.drop(columns="Unnamed: 0",axis=1, inplace=True)

In [10]:
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Define the feature set for KNN
features = numeric_features + ['mood']

# Train KNN model
knn = NearestNeighbors(n_neighbors=10, algorithm='auto')
knn.fit(data[features])


In [21]:
def recommend_songs(mood, year, language, num_recommendations=5):
    # Filter the dataset based on year and language
    filtered_data = filter_data(data, year, language)
    
    if filtered_data.empty:
        return "No songs found for the given year and language."

    # # Convert mood to the same encoding as the data
    # mood_code = pd.Series(mood).astype('category').cat.codes.values[0]
    # print(mood_code)
    
    # Create a sample input with the specified mood
    sample_input = np.zeros((1, len(features)))
    sample_input[0, -1] = mood  # Set mood
    sample_input[0, :-1] = np.mean(filtered_data[numeric_features], axis=0)  # Set average values for other features
    
    # Find nearest neighbors
    distances, indices = knn.kneighbors(sample_input, n_neighbors=num_recommendations)
    
    recommendations = filtered_data.iloc[indices[0]]
    return recommendations[['track_name', 'artist_name']]

# Example usage
print(recommend_songs('1', 2020, 'English', num_recommendations=5))


# ANGRY          0
# HAPPY          1
# SAD            2
# SCARED         3


           track_name    artist_name
280          Blackout   Madison Beer
40           O Sathii     Atif aslam
31         Tu Chahiye     Atif Aslam
21   Dil Diyan Gallan     Atif Aslam
264          Medicine  Lewis Capaldi




In [15]:
# Load the dataset
test = pd.read_csv('/Users/zac/Codes/Music_Project/GIT_HUB/Musis_Recommendation_Engine/exploration/Data_collection/final_training.csv')

# Ensure there are no missing values
test.dropna(inplace=True)

# Encode the mood as a numerical value for similarity calculation
test['mood_code'] = test['mood'].astype('category').cat.codes

# Display unique moods and their corresponding codes
mood_encoding = test[['mood', 'mood_code']].drop_duplicates().sort_values(by='mood_code')
print(mood_encoding)

       mood  mood_code
420   ANGRY          0
0     HAPPY          1
78      SAD          2
293  SCARED          3
