In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('/Users/zac/Codes/Music_Project/GIT_HUB/Musis_Recommendation_Engine/data/testing/testing.csv')

# Ensure there are no missing values
data.dropna(inplace=True)

# Preprocess the features (scaling numeric features)
scaler = StandardScaler()
numeric_features = ['popularity', 'danceability', 'energy', 'key', 'loudness', 
                    'mode', 'speechiness', 'acousticness', 'instrumentalness', 
                    'liveness', 'valence', 'tempo', 'duration_ms']

data[numeric_features] = scaler.fit_transform(data[numeric_features])

# Filter data based on year and language if necessary
def filter_data(df, year, language):
    # return df[(df['year'] == year) & (df['language'] == language)]
    return df

# Encode the mood as a numerical value for similarity calculation
data['mood'] = data['mood'].astype('category').cat.codes


In [7]:
data

Unnamed: 0,artist_name,track_name,track_id,popularity,year,genre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,mood
0,BLACKPINK,Hope Not,3eZD5DZGibwxMAOaCMBg3k,1.268712,2019,k-pop,-0.842386,-1.587457,-0.564197,0.000810,-0.981307,-0.817291,0.867202,-0.113891,-0.724856,-1.688190,-0.585567,-0.625986,4,3
1,BTS,Dream Glow (BTS World Original Soundtrack) - P...,4c1WgUnHXq2LEncGHKgj1I,1.566902,2019,acoustic,0.699226,0.179937,-1.692591,0.573532,-0.981307,-0.469397,-0.488345,-0.113891,-0.616855,-0.006045,0.628198,-0.765965,4,2
2,EVERGLOW,Adios,0sq2QUCf3ykmfYxjCDWcir,1.865091,2019,rock,-0.179235,-0.009625,1.128394,-0.067759,-0.981307,-0.316039,-0.866748,1.274462,0.470707,-0.665892,0.103904,-0.699777,4,3
3,Hwa Sa,TWIT,6qyz1KOlGJsKYJ4ZsRmRSD,1.566902,2019,rock,0.216934,0.586940,-0.282099,0.805384,-0.981307,1.619392,-0.388736,-0.113841,0.606652,0.193768,0.404165,-0.635548,4,1
4,TOMORROW X TOGETHER,Our Summer,22PEycypPsZpzlMxTgukiQ,0.672333,2019,rock,0.733676,-0.762301,0.282099,-0.261626,1.019049,-0.747712,-0.858394,-0.113891,-0.133494,-1.279271,-0.665793,0.128378,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,BIBI,Step?,3fU8FWNIgHEOepLTidZWyq,-1.713184,2019,dubstep,-0.084499,-1.063371,-1.128394,-1.387339,1.019049,-0.460877,1.296346,-0.105358,-0.718059,-1.534845,1.317707,-0.601024,4,3
102,Stray Kids,You Can STAY,4YQel4FiTWYHUn6TXMCuPa,-1.116804,2019,dubstep,0.656164,0.274719,0.564197,-1.007497,1.019049,-0.436737,-0.808825,-0.113891,-0.541330,-0.865705,-1.160792,0.056430,4,3
103,Yoon Mirae,Flower,2m5Ex34NOwIWOwHthEqLco,0.672333,2019,dubstep,-2.866291,-1.297537,-1.410493,-1.060280,1.019049,-0.698013,2.087705,-0.113833,-0.571540,-1.595254,0.530867,1.768504,4,3
104,ATEEZ,Light,2KnNV0Ahhb5qkK83LKymZE,-1.713184,2019,dubstep,0.311670,0.046128,-1.128394,-0.067266,-0.981307,0.033276,0.182145,-0.113891,-0.627429,0.862908,-1.806988,0.463160,4,2


In [8]:
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Define the feature set for KNN
features = numeric_features + ['mood']

# Train KNN model
knn = NearestNeighbors(n_neighbors=10, algorithm='auto')
knn.fit(data[features])


In [10]:
def recommend_songs(mood, year, language, num_recommendations=5):
    # Filter the dataset based on year and language
    filtered_data = filter_data(data, year, language)
    
    if filtered_data.empty:
        return "No songs found for the given year and language."

    # Convert mood to the same encoding as the data
    mood_code = pd.Series(mood).astype('category').cat.codes.values[0]
    
    # Create a sample input with the specified mood
    sample_input = np.zeros((1, len(features)))
    sample_input[0, -1] = mood_code  # Set mood
    sample_input[0, :-1] = np.mean(filtered_data[numeric_features], axis=0)  # Set average values for other features
    
    # Find nearest neighbors
    distances, indices = knn.kneighbors(sample_input, n_neighbors=num_recommendations)
    
    recommendations = filtered_data.iloc[indices[0]]
    return recommendations[['track_name', 'artist_name']]

# Example usage
print(recommend_songs('happy', 2020, 'English', num_recommendations=5))


                             track_name          artist_name
46  Highway to Heaven - English Version              NCT 127
47                              Deja Vu         Dreamcatcher
70                        %%(Eung Eung)                Apink
4                            Our Summer  TOMORROW X TOGETHER
89                           Zimzalabim           Red Velvet


