## Music Recommendation System (Machine Learning)

This project is aimed upon building a music recommendation system that gives the user recommendations on music based on his music taste by analysing his previously heard music and playlist. This project is done in two ways, using 'User - to - User Recommendation' and 'Item - to - Item Recommendation'. Birch, MiniBatchKMeans and KMeans algorithms are being used along with 'Surprise' module to compute the similarity between recommendations and user's already existing playlist for evaluation

### Obtaining Data

In [1]:
import pandas as pd
import numpy as np

In [2]:
final = pd.read_csv('datasets/final/final.csv')
metadata = pd.read_csv('datasets/final/metadata.csv')

### Model Selection - K Means Algorithm

In [3]:
from sklearn.cluster import KMeans
from sklearn.utils import shuffle

In [6]:
final.columns

Index(['Unnamed: 0', 'track_id', 'acousticness', 'danceability', 'energy',
       'instrumentalness', 'liveness', 'speechiness', 'tempo', 'valence',
       ...
       'Holiday', 'Salsa', 'NuJazz', 'HipHop Beats', 'Modern Jazz', 'Turkish',
       'Tango', 'Fado', 'Christmas', 'Instrumental'],
      dtype='object', length=930)

In [6]:
final.head(5).T

Unnamed: 0,0,1,2,3,4
Unnamed: 0,0.000000,1.000000,2.000000,3.000000,4.000000
track_id,2.000000,3.000000,5.000000,10.000000,134.000000
acousticness,0.416675,0.374408,0.043567,0.951670,0.452217
danceability,0.675894,0.528643,0.745566,0.658179,0.513238
energy,0.634476,0.817461,0.701470,0.924525,0.560410
...,...,...,...,...,...
Turkish,0.000000,0.000000,0.000000,0.000000,0.000000
Tango,0.000000,0.000000,0.000000,0.000000,0.000000
Fado,0.000000,0.000000,0.000000,0.000000,0.000000
Christmas,0.000000,0.000000,0.000000,0.000000,0.000000


In [8]:
final = shuffle(final)

In [9]:
X = final.loc[[i for i in range(0, 6000)]]
Y = final.loc[[i for i in range(6000, final.shape[0])]]

In [10]:
X = shuffle(X)
Y = shuffle(Y)

In [14]:
metadata

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,AWOL - A Way Of Life,AWOL,HipHop,Food
3,AWOL - A Way Of Life,AWOL,HipHop,Electric Ave
5,AWOL - A Way Of Life,AWOL,HipHop,This World
10,Constant Hitmaker,Kurt Vile,Pop,Freeway
134,AWOL - A Way Of Life,AWOL,HipHop,Street Music
...,...,...,...,...
124857,Do You Know The Word,BASIC,AvantGarde|International|,I'm Wearing My Dancing Pants
124862,Do You Know The Word,BASIC,AvantGarde|International|,Me Funky You Jane
124863,Do You Know The Word,BASIC,AvantGarde|International|,Ain't Mad at Ya
124864,Do You Know The Word,BASIC,AvantGarde|International|,This Is Not the End


In [13]:
metadata = metadata.set_index('track_id')

In [16]:
# X.drop(['label'], axis= 1, inplace= True)

In [17]:
kmeans = KMeans(n_clusters=6)

In [18]:
Y.head()

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Holiday,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental
6894,6894,29769,0.242325,0.432492,0.322405,0.829397,0.390208,0.230364,90.432,0.268099,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8070,8070,34708,0.966225,0.46172,0.531951,0.002665,0.113224,0.587401,89.151,0.57916,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12725,12725,112560,0.98897,0.461875,0.041411,0.901645,0.107383,0.056882,53.264,0.189274,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10935,10935,51765,0.078156,0.252227,0.808472,0.822721,0.336813,0.063776,168.559,0.85663,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10962,10962,52229,0.001668,0.201674,0.570412,0.915632,0.105019,0.039046,82.939,0.182293,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
def fit(df, algo, flag=0):
    if flag:
        algo.fit(df)
    else:
         algo.partial_fit(df)          
    df['label'] = algo.labels_
    return (df, algo)

In [20]:
def predict(t, Y):
    y_pred = t[1].predict(Y)
    mode = pd.Series(y_pred).mode()
    return t[0][t[0]['label'] == mode.loc[0]]

In [21]:
def recommend(recommendations, meta, Y):
    dat = []
    for i in Y['track_id']:
        dat.append(i)
    genre_mode = meta.loc[dat]['genre'].mode()
    artist_mode = meta.loc[dat]['artist_name'].mode()
    return meta[meta['genre'] == genre_mode.iloc[0]], meta[meta['artist_name'] == artist_mode.iloc[0]], meta.loc[recommendations['track_id']]

In [22]:
t = fit(X, kmeans, 1)

In [23]:
recommendations = predict(t, Y)

In [24]:
output = recommend(recommendations, metadata, Y)

In [25]:
genre_recommend, artist_name_recommend, mixed_recommend = output[0], output[1], output[2]

In [26]:
genre_recommend.shape

(3892, 4)

In [27]:
artist_name_recommend.shape

(52, 4)

In [28]:
mixed_recommend.shape

(1150, 4)

In [29]:
# Genre wise recommendations
genre_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
153,Arc and Sender,Arc and Sender,Rock,Hundred-Year Flood
154,Arc and Sender,Arc and Sender,Rock,Squares And Circles
155,unreleased demo,Arc and Sender,Rock,Maps of the Stars Homes
169,Boss of Goth,Argumentix,Rock,Boss of Goth
170,Nightmarcher,Argumentix,Rock,Industry Standard Massacre


In [30]:
# Artist wise recommendations
artist_name_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34660,Zehu,51%,AvantGarde|International|Blues|Jazz|,Hadri Ha'Kat
34661,Zehu,51%,AvantGarde|International|Blues|Jazz|,Blender Tzivoni
34662,Zehu,51%,AvantGarde|International|Blues|Jazz|,Naniah
34663,Zehu,51%,AvantGarde|International|Blues|Jazz|,Yoter Miday
34664,Zehu,51%,AvantGarde|International|Blues|Jazz|,"Yamim, Lielot"


In [31]:
# Mixed Recommendations
mixed_recommend.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
19433,Atlas Sound Live at ATP-NY 2009 on WFMU,Atlas Sound,AvantGarde|International|,Don Fogelberg / Dave Matthews rap
14342,netBloc Vol. 22: Life on Ceres,Kiddo,Electronic,For The First Time
23278,The Yaouhl! Mashups,Gillicuddy,AvantGarde|International|,"I know, I know"
13217,netBloc Vol. 09: Lo-Fi Adventures on Planet Rh...,The Unfinished Sympathy,Rock,Teenage Kicks
17021,Split with Jake Vida,Tusk Lord,AvantGarde|International|,Tidal Bore


In [32]:
recommendations.head()

Unnamed: 0.1,Unnamed: 0,track_id,acousticness,danceability,energy,instrumentalness,liveness,speechiness,tempo,valence,...,Salsa,NuJazz,HipHop Beats,Modern Jazz,Turkish,Tango,Fado,Christmas,Instrumental,label
4844,4844,19433,0.796041,0.503507,0.031072,0.0,0.124561,0.902207,58.426,0.218378,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3589,3589,14342,0.212583,0.710613,0.397395,0.941276,0.155592,0.034283,106.008,0.823302,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
5752,5752,23278,0.990841,0.463714,0.329223,0.946983,0.196328,0.037285,62.536,0.151162,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
3329,3329,13217,0.091283,0.616178,0.695149,0.132674,0.089085,0.034032,135.922,0.561843,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4321,4321,17021,0.688388,0.19631,0.465892,0.941985,0.108535,0.139271,155.059,0.030413,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2


In [26]:
artist_name_recommend['artist_name'].value_counts()

51%    52
Name: artist_name, dtype: int64

In [27]:
genre_recommend['genre'].value_counts()

Rock    3892
Name: genre, dtype: int64

In [28]:
genre_recommend['artist_name'].value_counts()

Glove Compartment               65
Blah Blah Blah                  62
Mors Ontologica                 50
Les Baudouins Morts             38
Kraus                           35
                                ..
Alone in 1982                    1
Ostrich Tuning                   1
The Dalai Lama Rama Fa Fa Fa     1
The Rusty Bells                  1
Lost Boy                         1
Name: artist_name, Length: 725, dtype: int64

#### Testing

In [29]:
testing = Y.iloc[6:12]['track_id']

In [30]:
testing

7589     32889
6816     29020
7214     31383
9187     41079
7638     33006
10386    48203
Name: track_id, dtype: int64

In [31]:
ids = testing.loc[testing.index]

In [32]:
songs = metadata.loc[testing.loc[list(testing.index)]]

In [33]:
songs

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
32889,Malachi,Hargreaves / Noyes / Duplant,Jazz,se lever avant le jour
29020,Italian Dub Community Showcase Vol. 2,Italian Dub Community,AvantGarde|International|,"Fetta Ketta - ""White Lion Dub"""
31383,Stuck In My Soul,Friends or Whatever,AvantGarde|International|,Walk Through Walls (Studio Haen Remix)
41079,netlabelism.com - Compilation 10/10,Izmar,Electronic,Unity
33006,Astor Bell Anniversary 1,Socket Science,Electronic,Sidewinding (Giuseppe Remix)
48203,Full Blush EP,Full Blush,Electronic,Swuggers


In [33]:
re = predict(t, Y.iloc[6:12])

In [34]:
output = recommend(re, metadata, Y.iloc[6:12])

In [35]:
ge_re, ge_ar, ge_mix = output[0], output[1], output[2]

In [36]:
ge_re.head()

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
384,Summer Set,Blanketship,Electronic,Baja Jones
386,Summer Set,Blanketship,Electronic,Clapartroach
387,Summer Set,Blanketship,Electronic,I wish I wish
396,On the Back of a Dying Beast: Volume 1,Borful Tang,Electronic,Juggernaut Soliloquy
397,On the Back of a Dying Beast: Volume 1,Borful Tang,Electronic,The Tides Of Land


In [37]:
ge_ar.head(10)

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
37899,Slam Funk,Broke For Free,AvantGarde|International|Blues|,Our Ego [Feat. Different Visitor]
37900,Slam Funk,Broke For Free,AvantGarde|International|Blues|,Simple Hop
37901,Slam Funk,Broke For Free,AvantGarde|International|Blues|,Covered In Oil
37902,Slam Funk,Broke For Free,AvantGarde|International|Blues|,Drop of Water In the Ocean
37903,Slam Funk,Broke For Free,AvantGarde|International|Blues|,Living In Reverse
37904,Slam Funk,Broke For Free,AvantGarde|International|Blues|,At The Count
37905,Slam Funk,Broke For Free,AvantGarde|International|Blues|,High School Snaps
37906,Slam Funk,Broke For Free,AvantGarde|International|Blues|,Hella
37907,Slam Funk,Broke For Free,AvantGarde|International|Blues|,Caught In The Beat
37908,Slam Funk,Broke For Free,AvantGarde|International|Blues|,The Great


In [38]:
ge_mix.head(10)

Unnamed: 0_level_0,album_title,artist_name,genre,track_title
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
19433,Atlas Sound Live at ATP-NY 2009 on WFMU,Atlas Sound,AvantGarde|International|,Don Fogelberg / Dave Matthews rap
14342,netBloc Vol. 22: Life on Ceres,Kiddo,Electronic,For The First Time
23278,The Yaouhl! Mashups,Gillicuddy,AvantGarde|International|,"I know, I know"
13217,netBloc Vol. 09: Lo-Fi Adventures on Planet Rh...,The Unfinished Sympathy,Rock,Teenage Kicks
17021,Split with Jake Vida,Tusk Lord,AvantGarde|International|,Tidal Bore
9172,Live at WFMU/Aquarius SXSW show 3/20/2009,Major Stars,Rock,(title unknown)
23303,Accident Consultancy Live / Undead,THF Drenching,AvantGarde|International|,Fuck Keith M*ssi*h (Live)
14344,netBloc Vol. 22: Life on Ceres,Ambienteer,Electronic,Ecclesia
15863,Come Fly With Me,The Kid Daytona,HipHop,Contact! feat. Kardinal Offishall {prod. 6th S...
15639,Mydrone,Agents del Futuro,Electronic,Not Fucking Printz


In [39]:
ge_re.shape

(2170, 4)

In [40]:
ge_ar.shape

(23, 4)

In [41]:
ge_mix.shape

(1150, 4)

### Model Selection - MiniBatchKMeans

In [None]:
from sklearn.cluster import MiniBatchKMeans

In [None]:
mini = MiniBatchKMeans(n_clusters = 6)

In [None]:
X.drop('label', axis=1, inplace=True)

In [None]:
# Let's divide the intital dataset into pieces to demonstrate online learning
part_1, part_2, part_3 = X.iloc[0: 2000], X.iloc[2000:4000], X.iloc[4000:6000]

In [None]:
for i in [part_1, part_2, part_3]:
    t = fit(i, mini)
    mini = t[1]
    i = t[0]

In [None]:
X = pd.concat([part_1, part_2, part_3])

In [None]:
X.columns

In [None]:
X.head(3)

In [None]:
X['label'].value_counts()

In [None]:
recommendations = predict((X, mini), Y)

In [None]:
output = recommend(recommendations, metadata, Y)

In [None]:
genre_recommend_mini, artist_name_recommend_mini, mixed_mini = output[0], output[1], output[2]

In [None]:
genre_recommend_mini.shape

In [None]:
artist_name_recommend_mini.shape

In [None]:
# Genre wise recommendations
genre_recommend_mini.head()

In [None]:
# Artist wise recommendations
artist_name_recommend_mini.head()

In [None]:
# Mixed Recommendations
mixed_mini.head()

### Model Selection - Birch

In [None]:
from sklearn.cluster import Birch

In [None]:
birch = Birch(n_clusters = 6)

In [None]:
X.drop('label', axis=1, inplace=True)

In [None]:
# Let's divide the intital dataset into pieces to demonstrate online learning
part_1, part_2, part_3 = X.iloc[0: 2000], X.iloc[2000:4000], X.iloc[4000:6000]

In [None]:
for i in [part_1, part_2, part_3]:
    t = fit(i, birch)
    mini = t[1]
    i = t[0]

In [None]:
X = pd.concat([part_1, part_2, part_3])

In [None]:
X.columns

In [None]:
X.head(3)

In [None]:
X['label'].value_counts()

In [None]:
recommendations = predict((X, birch), Y)

In [None]:
output = recommend(recommendations, metadata, Y)

In [None]:
genre_recommend_birch, artist_name_recommend_birch, mixed_birch = output[0], output[1], output[2]

In [None]:
genre_recommend_birch.shape

In [None]:
artist_name_recommend_birch.shape

In [None]:
# Genre wise recommendations
genre_recommend_birch.head()

In [None]:
# Artist wise recommendations
artist_name_recommend_birch.head()

In [None]:
# Mixed Recommendations
mixed_birch.head()