In [1]:
import os
import datetime

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from tqdm import tqdm

from scipy.optimize import curve_fit
from yellowbrick.target import FeatureCorrelation
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans 
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score,mean_absolute_error

from sklearn.model_selection import train_test_split

In [2]:
# Load dataset
df = pd.read_csv('data/data.csv')

# Remove brackets from artists
df["artists"]=df["artists"].str.replace("[", "")
df["artists"]=df["artists"].str.replace("]", "")
df["artists"]=df["artists"].str.replace("'", "")

def normalize_column(col):
    """
    col - column in dataframe that needs to be normalized
    """
    max_d = df[col].max()
    min_d = df[col].min()
    df[col] = (df[col] - min_d)/(max_d - min_d)
    
# Normalize num columns min value = 0 and max value = 1
num_types = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
num = df.select_dtypes(include=num_types)
        
for col in num.columns:
    normalize_column(col)
    
# Kmeans clustering
km = KMeans(n_clusters=25)
pred = km.fit_predict(num)
df['pred'] = pred
normalize_column('pred')

# Song recommender
class Song_Recommender():
    """
    Neighbourhood Based Collborative filterng recommendation System using similarity Metrics
    Manhattan Distance calculated for songs & recommend Songs that are similar based given song
    """
    def __init__(self, data):
        self.data_ = data
    
    # Returns recommendations; choose num of songs to recommended
    def get_recommendations(self, song_name, n_top):
        distances = []
        
        # Choosing song_name & drop it from data
        song = self.data_[(self.data_.name.str.lower() == song_name.lower())].head(1).values[0]
        rem_data = self.data_[self.data_.name.str.lower() != song_name.lower()]
        for r_song in tqdm(rem_data.values):
            dist = 0
            for col in np.arange(len(rem_data.columns)):
                
                # Indeces of non-num columns(id,Release date,name,artists)
                if not col in [3,8,14,16]:
                    
                    # Calculate manhattan distance for each num feature
                    dist = dist + np.absolute(float(song[col]) - float(r_song[col]))
            distances.append(dist)
        rem_data['distance'] = distances
        
        # Sorting data ascending by 'distance' feature
        rem_data = rem_data.sort_values('distance')
        columns = ['artists', 'name']
        return rem_data[columns][:n_top]


# Instantiate recommender class
recommender = Song_Recommender(df)

# 5 recommendations 'Hotline Bling' song by Drake Genre= Pop/R&B
recommender.get_recommendations('Hotline Bling', 5)

100%|██████████| 170652/170652 [00:05<00:00, 29292.83it/s]


Unnamed: 0,artists,name
57153,Ariana Grande,bad idea
18125,P!nk,Try
124176,Charli XCX,Boys
108850,BTS,Stay Gold
16407,"Usher, Lil Jon, Ludacris",Yeah! (feat. Lil Jon & Ludacris)


In [3]:
# 5 recommendations 'Earned It' song by The Weeknd Genre= Chamber Pop/R&B
recommender.get_recommendations("Earned It", 5)

100%|██████████| 170652/170652 [00:06<00:00, 28070.49it/s]


Unnamed: 0,artists,name
140410,"Saba, theMIND",BUSY / SIRENS
91905,tobi lou,Just Keep Goin'
37872,Dbangz,Thick Niggas and Anime Tiddies
124194,J. Cole,High For Hours
56847,Takeoff,Casper


In [4]:
# 5 recommendations 'Black Beatles' song by Rae Sremmurd Genre= Hip Hop/Trap
recommender.get_recommendations("Black Beatles", 5)

100%|██████████| 170652/170652 [00:05<00:00, 29185.49it/s]


Unnamed: 0,artists,name
57005,Ariana Grande,in my head
18878,Kevin Gates,Really Really
57263,"iann dior, Lil Baby",Prospect (ft. Lil Baby)
75076,"iann dior, Lil Baby",Prospect (ft. Lil Baby)
37793,"Mustard, Quavo, YG",Want Her
