## Importing libraries

In [None]:
import pandas as pd, warnings
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import sigmoid_kernel

warnings.filterwarnings("ignore")

##Read data from github

In [None]:
full_data = pd.read_csv("https://github.com/Umadevi65B/Spotify_data/blob/main/data_spotofy.zip?raw=true",  compression='zip')

In [None]:
full_data.shape

(170653, 19)

The data consists of 170k rows but we will use only 25000 records due to the limitation of computation power.

In [None]:
data = full_data[:25000]

In [None]:
# displaying top 5 rows
data.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
0,0.0594,1921,0.982,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",0.279,831667,0.211,0,4BJqT0PrAfrxzMOxytFOIz,0.878,10,0.665,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,1921,0.0366,80.954
1,0.963,1921,0.732,['Dennis Day'],0.819,180533,0.341,0,7xPhfUan2yNtyFG0cUWkt8,0.0,7,0.16,-12.441,1,Clancy Lowered the Boom,5,1921,0.415,60.936
2,0.0394,1921,0.961,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,0.328,500062,0.166,0,1o6I8BglA6ylDMrIELygv1,0.913,3,0.101,-14.85,1,Gati Bali,5,1921,0.0339,110.339
3,0.165,1921,0.967,['Frank Parker'],0.275,210000,0.309,0,3ftBPsC5vPBKxYSee08FDH,2.8e-05,5,0.381,-9.316,1,Danny Boy,3,1921,0.0354,100.109
4,0.253,1921,0.957,['Phil Regan'],0.418,166693,0.193,0,4d6HGyGT8e121BsdKmw9v6,2e-06,3,0.229,-10.096,1,When Irish Eyes Are Smiling,2,1921,0.038,101.665


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25000 entries, 0 to 24999
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   valence           25000 non-null  float64
 1   year              25000 non-null  int64  
 2   acousticness      25000 non-null  float64
 3   artists           25000 non-null  object 
 4   danceability      25000 non-null  float64
 5   duration_ms       25000 non-null  int64  
 6   energy            25000 non-null  float64
 7   explicit          25000 non-null  int64  
 8   id                25000 non-null  object 
 9   instrumentalness  25000 non-null  float64
 10  key               25000 non-null  int64  
 11  liveness          25000 non-null  float64
 12  loudness          25000 non-null  float64
 13  mode              25000 non-null  int64  
 14  name              25000 non-null  object 
 15  popularity        25000 non-null  int64  
 16  release_date      25000 non-null  object

In [None]:
# creating list of features that we require genarating recommendations 
feature_cols=['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'valence']

# Feature Scaling using MinMaxScaler
scaler = MinMaxScaler()
normalized_df =scaler.fit_transform(data[feature_cols])

print(normalized_df[:2])

[[0.98594378 0.28238866 0.23632721 0.211      0.878      0.90909091
  0.667001   0.65547488 1.         0.03784902 0.36508359 0.05993946]
 [0.73493976 0.82894737 0.04995788 0.341      0.         0.63636364
  0.16048144 0.78121817 1.         0.42916236 0.27480709 0.97174571]]


In [None]:
# Create a pandas series with song titles as index and indices as series values 
indices = pd.Series(data.index, index=data['name']).drop_duplicates()
indices

name
Piano Concerto No. 3 in D Minor, Op. 30: III. Finale. Alla breve                       0
Clancy Lowered the Boom                                                                1
Gati Bali                                                                              2
Danny Boy                                                                              3
When Irish Eyes Are Smiling                                                            4
                                                                                   ...  
Till We Meet Again (with Paul Weston & His Orchestra & The Norman Luboff Choir)    24995
After the Ball                                                                     24996
Piano Sonata No. 3 in F-Sharp Minor, Op. 23: IV. Presto con fuoco - Meno mosso     24997
Where Will I Shelter My Sheep                                                      24998
Cherokee                                                                           24999
Length: 25000, d

In [None]:
# Create cosine similarity matrix for the normalized data
cosine = cosine_similarity(normalized_df)
print(cosine)

[[1.         0.73031113 0.91637774 ... 0.94830561 0.84669081 0.83813791]
 [0.73031113 1.         0.71936626 ... 0.73862435 0.93880693 0.89919355]
 [0.91637774 0.71936626 1.         ... 0.98433058 0.77485027 0.89537703]
 ...
 [0.94830561 0.73862435 0.98433058 ... 1.         0.80455433 0.87612856]
 [0.84669081 0.93880693 0.77485027 ... 0.80455433 1.         0.90093072]
 [0.83813791 0.89919355 0.89537703 ... 0.87612856 0.90093072 1.        ]]


In [None]:
# converting 'cosine' array to dataframe to view the results
cosine_df = pd.DataFrame(cosine)
cosine_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24990,24991,24992,24993,24994,24995,24996,24997,24998,24999
0,1.0,0.730311,0.916378,0.871308,0.827951,0.793473,0.74517,0.775508,0.677141,0.821941,...,0.794325,0.790011,0.944817,0.892781,0.834385,0.783731,0.880825,0.948306,0.846691,0.838138
1,0.730311,1.0,0.719366,0.849495,0.875961,0.877961,0.880305,0.606551,0.822811,0.967884,...,0.916412,0.831003,0.724486,0.879994,0.930755,0.819929,0.83073,0.738624,0.938807,0.899194
2,0.916378,0.719366,1.0,0.862304,0.87179,0.87191,0.841197,0.815969,0.669618,0.769704,...,0.769144,0.869868,0.97865,0.902752,0.814457,0.872898,0.815107,0.984331,0.77485,0.895377
3,0.871308,0.849495,0.862304,1.0,0.98539,0.920903,0.934916,0.745293,0.744742,0.917614,...,0.911343,0.949488,0.850456,0.954966,0.937946,0.962997,0.95308,0.866973,0.925611,0.922612
4,0.827951,0.875961,0.87179,0.98539,1.0,0.950625,0.976921,0.752763,0.767231,0.921389,...,0.900876,0.981668,0.847586,0.950777,0.949595,0.98734,0.927572,0.859208,0.917526,0.940562
5,0.793473,0.877961,0.87191,0.920903,0.950625,1.0,0.93689,0.702532,0.691616,0.88436,...,0.882185,0.937952,0.81897,0.924112,0.906776,0.925541,0.854425,0.84058,0.897048,0.949699
6,0.74517,0.880305,0.841197,0.934916,0.976921,0.93689,1.0,0.731416,0.768839,0.891527,...,0.843494,0.96123,0.803536,0.90383,0.905719,0.978404,0.836763,0.816241,0.860342,0.928366
7,0.775508,0.606551,0.815969,0.745293,0.752763,0.702532,0.731416,1.0,0.832409,0.701445,...,0.620398,0.743441,0.77405,0.788071,0.70521,0.72428,0.708185,0.786757,0.646091,0.766658
8,0.677141,0.822811,0.669618,0.744742,0.767231,0.691616,0.768839,0.832409,1.0,0.839282,...,0.790677,0.731955,0.673135,0.810245,0.830864,0.710797,0.753557,0.686179,0.788627,0.799326
9,0.821941,0.967884,0.769704,0.917614,0.921389,0.88436,0.891527,0.701445,0.839282,1.0,...,0.929534,0.885044,0.779103,0.932371,0.969693,0.863151,0.922728,0.786123,0.964031,0.917707


In [None]:
def generate_recommendation(song_title, model_type = cosine ):
    """
    Function for song recommendations 
    Inputs: song title and type of similarity model
    Output: Pandas series of recommended songs
    """
    # Getting index of the song
    index=indices[song_title]

    # Get list of songs corresponding to the input song
    score=list(enumerate(model_type[indices[song_title]]))

    # Sort the songs in descending order of similarity
    similarity_score = sorted(score,key = lambda x:x[1],reverse = True)

    # Select the top-10 similar songs
    similarity_score = similarity_score[1:11]
    top_songs_index = [i[0] for i in similarity_score]

    # Return recommended songs
    top_songs=data['name'].iloc[top_songs_index]
    return top_songs

In [None]:
# Pass the song as an input to the generate_recommendation function
print("Recommended Songs:")
print(generate_recommendation('Gati Bali',cosine).values)

Recommended Songs:
['Gati Main-main' 'Gati Mrak Ati'
 'Estampes, L. 100: III. Jardins sous la pluie - Net et vif (Remastered)'
 'Das Christelflein, Op. 20: Overture - Excerpts'
 'Étude in E Major, Op. 10, No. 3 "Tristesse"'
 'Iberia Book II: I. Rondeña'
 'Symphony No.6 in B Minor Op. 74: II. Allegro con grazia'
 'By Love Possessed (1961) Full Circle'
 'Nocturne No. 6 in D-Flat Major, Op. 63'
 'Piano Quintet in E-Flat Major, Op.44: I. Allegro brillante']


In [None]:
# Create sigmoid kernel matrix based on given matrix
sig_kernel = sigmoid_kernel(normalized_df)

print("Recommended Songs:")
print(generate_recommendation('Gati Bali',sig_kernel).values)

Recommended Songs:
["It's Time To Jump And Shout"
 'Para mi Gaucha - Instrumental (Remasterizado)' "Let's Dance"
 "Let's Dance" 'Thahr Zara O Jane Wale - Instrumental'
 'Yo Tuve un Amorcito - Remasterizado' "Let's Dance"
 'Melodía de Amor - Remasterizado' "Roll 'Em"
 'Na Jane Yeh Chanda - Instrumental']


In [None]:
sigmoid_df = pd.DataFrame(sig_kernel)
sigmoid_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24990,24991,24992,24993,24994,24995,24996,24997,24998,24999
0,0.884672,0.857699,0.868065,0.859669,0.853821,0.84816,0.846691,0.836012,0.832234,0.868936,...,0.864337,0.848951,0.868411,0.872237,0.863551,0.844763,0.869923,0.871358,0.863836,0.863424
1,0.857699,0.88153,0.848461,0.856376,0.856634,0.854101,0.857393,0.820897,0.84347,0.880965,...,0.87439,0.851247,0.847124,0.869573,0.870805,0.846603,0.863805,0.850793,0.870611,0.867531
2,0.868065,0.848461,0.866107,0.850637,0.849479,0.846931,0.847337,0.832269,0.825148,0.855212,...,0.853419,0.847856,0.862356,0.864156,0.853274,0.844481,0.855237,0.86537,0.848911,0.859886
3,0.859669,0.856376,0.850637,0.857319,0.854206,0.846775,0.850772,0.823843,0.827919,0.864269,...,0.861891,0.850016,0.847806,0.864101,0.859553,0.847319,0.862782,0.851489,0.857718,0.857841
4,0.853821,0.856634,0.849479,0.854206,0.853291,0.847089,0.851974,0.822914,0.828064,0.862465,...,0.858921,0.850451,0.845688,0.861642,0.858424,0.847162,0.858612,0.848911,0.855035,0.857244
5,0.84816,0.854101,0.846931,0.846775,0.847089,0.848074,0.846441,0.817479,0.82043,0.856563,...,0.854625,0.844733,0.84093,0.856708,0.852308,0.840345,0.849905,0.844862,0.850741,0.855245
6,0.846691,0.857393,0.847337,0.850772,0.851974,0.846441,0.85405,0.821663,0.828481,0.860326,...,0.854393,0.849325,0.84243,0.858173,0.855274,0.846891,0.851392,0.845704,0.850735,0.856655
7,0.836012,0.820897,0.832269,0.823843,0.822914,0.817479,0.821663,0.827729,0.821734,0.830389,...,0.822205,0.821152,0.82765,0.834877,0.82595,0.817165,0.827647,0.830487,0.82065,0.830382
8,0.832234,0.84347,0.825148,0.827919,0.828064,0.82043,0.828481,0.821734,0.8366,0.846596,...,0.840977,0.82427,0.823998,0.841378,0.840344,0.819941,0.835764,0.826871,0.836298,0.837457
9,0.868936,0.880965,0.855212,0.864269,0.862465,0.856563,0.860326,0.830389,0.846596,0.886004,...,0.877745,0.857793,0.854172,0.876463,0.876326,0.852074,0.874357,0.857266,0.874924,0.871276
