# Recommender 1

In [2]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
df = pd.read_csv('./Data/1970-2020_tracks_cleaned')
df.head()

Unnamed: 0,track_name,artist_name,year,key,tempo,mode,time_signature,duration_ms,popularity,loudness,acousticness,danceability,energy,instrumentalness,liveness,speechiness,valence,uri
0,Cassidy,Bob Weir,1970,6.0,149.346,1.0,4.0,221107.0,31,-7.784,0.621,0.569,0.679,0.000125,0.135,0.0297,0.54,spotify:track:1wrFUny4CRCDV5wTMv7RYK
1,Stories??,The Chakachas,1970,11.0,94.208,0.0,4.0,167234.0,35,-17.29,0.671,0.757,0.45,8.5e-05,0.0802,0.416,0.962,spotify:track:0dXapcDIbXkH91ObjpXQqh
2,Polk Salad Annie,Tony Joe White,1970,9.0,132.035,1.0,4.0,223533.0,38,-11.565,0.303,0.783,0.543,0.0159,0.0892,0.0412,0.833,spotify:track:12wB4syL3O9w54bpe1tmZX
3,It Never Rains in Southern California,Albert Hammond,1970,9.0,116.903,1.0,4.0,229000.0,31,-6.134,0.0563,0.584,0.785,0.0,0.0853,0.0267,0.937,spotify:track:1K24PVzIzAKBzRhmVKRleL
4,Gimme Some More,The J.B.'s,1970,2.0,97.76,1.0,4.0,187560.0,38,-9.968,0.203,0.822,0.426,0.0,0.0715,0.0766,0.82,spotify:track:1NXyoATCYylbjSDFe8FHbJ


**Making all my song names and artist names lower case**  
This will making searching specific songs easier later since everything can be inputed in lowercase.

In [4]:
df['track_name'] = df['track_name']+', '+df['artist_name']
df['track_name'] = df['track_name'].str.lower()

**Setting Index of dataframe as the tracks themselves**  
We will want to compare each song with every other song in later steps.

In [5]:
df.set_index('track_name', inplace=True)

**Due to Computer memory limitations, I will only use songs between the years 2000 to 2020**  
This brings our data set to about 40,000 songs.

In [6]:
df = df[df['year'] >= 2000]

In [7]:
df.shape

(40948, 17)

In [8]:
df = df[~df.index.duplicated(keep='last')]

In [9]:
df.shape

(38775, 17)

**Standard Scaling since many of our variables are on quite different scales**

In [10]:
numerical_df = df.drop(columns=['artist_name', 'uri'])

ss = StandardScaler()
numerical_df_scaled = ss.fit_transform(numerical_df)

In [11]:
non_svd = pd.DataFrame(numerical_df_scaled, columns = numerical_df.columns, index = numerical_df.index)
non_svd.shape

(38775, 15)

### Cosine Similary

For our product-based recommender (using features we know about our "product", the songs in this case), we will use cosine similarity for our recommender model.

- Citing [Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity#:~:text=):

    Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them. The cosine of 0° is 1, and it is less than 1 for any angle in the interval (0, π] radians. It is thus a judgment of orientation and not magnitude: two vectors with the same orientation have a cosine similarity of 1, two vectors oriented at 90° relative to each other have a similarity of 0, and two vectors diametrically opposed have a similarity of -1, independent of their magnitude.
    
- In our case, each track is its own vector of unique features. We will get a big matrix comparing each song to every other song in our dataset.

In [12]:
cs1 = cosine_similarity(non_svd, non_svd)

In [13]:
recommender1 = pd.DataFrame(cs1, columns=non_svd.index, index=non_svd.index)

In [14]:
recommender1.head()

track_name,"bitch please ii, eminem","papa was a rollin' stone - single version, the temptations","shape of my heart, backstreet boys","dig, mudvayne","hold on loosely, 38 special","she loves you - mono / remastered, the beatles","if i ever feel better, phoenix","didn't cha know, erykah badu","take a look around, limp bizkit","mis tres viejas, los tucanes de tijuana",...,"piano concerto no.3 in c minor, op.37: 2. largo - live, ludwig van beethoven","sextet in e-flat major, op. 81b: ii. adagio, ludwig van beethoven","violin sonata no. 2 in a major, op. 12, no. 2: 2. andante più tosto allegretto, ludwig van beethoven","string quartet no.14 in c sharp minor, op.131: 1. adagio ma non troppo e molto espressivo, ludwig van beethoven","piano sonata no.17 in d minor, op.31 no.2 -""tempest"": 3. allegretto, ludwig van beethoven","piano sonata no. 23 in f minor, op. 57 ""appassionata"": 2. andante con moto, ludwig van beethoven","piano concerto no.5 in e flat major op.73 -""emperor"": 3. rondo (allegro), ludwig van beethoven","bagatelle in a minor, woo 59 -""für elise"", ludwig van beethoven","6 bagatelles, op. 126: 1. andante con moto, ludwig van beethoven","symphony no.2 in d, op.36: 2. larghetto, ludwig van beethoven"
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"bitch please ii, eminem",1.0,0.391338,0.498434,0.066567,0.459532,0.341913,0.604419,0.501313,0.246337,0.604176,...,-0.550682,-0.652182,-0.684979,-0.687223,-0.640968,-0.48109,-0.41918,-0.66248,-0.538907,-0.5341
"papa was a rollin' stone - single version, the temptations",0.391338,1.0,0.335735,0.147327,0.4742,0.228197,0.818433,0.671276,0.323903,0.533293,...,-0.030278,-0.125796,0.023493,-0.016709,-0.077645,-0.04907,-0.024566,-0.006965,-0.053085,-0.015204
"shape of my heart, backstreet boys",0.498434,0.335735,1.0,0.489441,0.704472,0.549591,0.405666,0.150466,0.81411,0.62799,...,-0.599737,-0.515017,-0.613685,-0.661423,-0.576426,-0.605888,-0.537301,-0.634903,-0.518892,-0.51868
"dig, mudvayne",0.066567,0.147327,0.489441,1.0,0.325227,0.366108,0.377042,0.195958,0.464464,0.20007,...,-0.545933,-0.508592,-0.402764,-0.417617,-0.415094,-0.632859,-0.606415,-0.403343,-0.514044,-0.537816
"hold on loosely, 38 special",0.459532,0.4742,0.704472,0.325227,1.0,0.45756,0.56361,0.202407,0.742752,0.619089,...,-0.366043,-0.414705,-0.453897,-0.426642,-0.45417,-0.457939,-0.361778,-0.444952,-0.453769,-0.350089


### Recommender

Function to search a song to get 10 recommendations.

In [15]:
def recommend1(search):

    for track in df.loc[df.index.str.contains(search)].sort_values(by ='popularity', ascending = False).index:
        print(track)
        print('')
        print('10 closest songs')
        print('')
        print(recommender1[track].sort_values(ascending = False)[1:11])
        print('')
        print('*******************************************************************************************')
        print('')

In [16]:
recommend1('ocean eyes, billie eilish')

ocean eyes, billie eilish

10 closest songs

track_name
solo, frank ocean                               0.930773
don't forget about me, cloves                   0.927655
technicolour beat, oh wonder                    0.920424
angels, khalid                                  0.915919
ocean eyes - astronomyy remix, billie eilish    0.914408
teenage mind, tate mcrae                        0.904694
remember me, jeremih                            0.902335
vertigo, khalid                                 0.900547
conversations in the dark, john legend          0.900239
disfruto, carla morrison                        0.900048
Name: ocean eyes, billie eilish, dtype: float64

*******************************************************************************************



Function to get more details of specific songs you are interested in

In [17]:
def get_features(song):
    return df.loc[song]

In [19]:
get_features('ocean eyes, billie eilish')

artist_name                                Billie Eilish
year                                                2016
key                                                    4
tempo                                            144.828
mode                                                   0
time_signature                                         4
duration_ms                                       200560
popularity                                            69
loudness                                          -7.725
acousticness                                       0.819
danceability                                       0.475
energy                                              0.37
instrumentalness                                  0.0577
liveness                                          0.0841
speechiness                                       0.0361
valence                                            0.157
uri                 spotify:track:2uIX8YMNjGMD7441kqyyNU
Name: ocean eyes, billie eilish

____