##### Hidden Functions: 

In [61]:
import pandas as pd
import spotipy
import sys
from spotipy.oauth2 import SpotifyClientCredentials
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [62]:
from dotenv import load_dotenv
import os

# Used to access my .env variables so I dont leak them
load_dotenv()

# Create a spotify client:
client_id = os.environ.get("SPOTIFY_CLIENT_ID")
client_secret = os.environ.get("SPOTIFY_CLIENT_SECRET")

spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                                client_secret=client_secret))

In [63]:
def get_playlist_track_ids(spotify, playlist_id, offset=0):
    df = pd.DataFrame(spotify.playlist_tracks(playlist_id, offset=offset))
    tracks = df['items']

    track_ids = []
    for row in tracks:
        track_ids.append(row['track']['id'])
    return track_ids

In [64]:

def get_audio_features(spotify, track_ids):
    track_audio_features = spotify.audio_features(track_ids)
    return (pd.DataFrame(track_audio_features))

In [65]:
whale_track_ids = whale_track_ids = get_playlist_track_ids(spotify, "5v0XNgUfSqaDSZ6SkJkN7E")
whale_audio_features = get_audio_features(spotify, whale_track_ids)

In [66]:
test_playlist_audio_features_list = []

#Because the spotify limit is set to 100 results, we loop through 4 times to get 400 results
for offset in [0,100,200,300]:
     test_tracks_ids = get_playlist_track_ids(spotify, "2Vjg4BQi0ILYyEN5lr9Lma",offset=offset)
     test_audio_features = get_audio_features(spotify, test_tracks_ids)
     test_playlist_audio_features_list.append(test_audio_features)

#Merge all the dataframes into one big one :)
test_playlist_audio_features = pd.concat(test_playlist_audio_features_list, ignore_index=True)



<img src="https://media.giphy.com/media/sVnKj2wDhUTsFKFWhx/giphy.gif" alt ="Office Gif" style="height:250px"/>

<br>
<br>

#  🎉🎉🎉 Creating a spotify recommendation system 🎉🎉🎉









<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>


<br>
<br>
<br>
<br>
<br>
<br>
<br>

###  Step 1) Gathering a dataset 

- Needs have enough values to break the cold start up problem.
- Needs to include song meta data so we can use content based filtering.
- Needs to be small enough so the model does not take over 1 minute to run.
- Needs to include enough variation to prove that the recommendation system is actually working!

![Forgaing](https://media.giphy.com/media/3oriO5oRIGNYdFFuXm/giphy.gif)











 

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>


##### [Freddie's Playlist](https://open.spotify.com/playlist/5v0XNgUfSqaDSZ6SkJkN7E?si=a3cc7f0fe7bc424b) 

<iframe style="border-radius:12px" src="https://open.spotify.com/embed/playlist/5v0XNgUfSqaDSZ6SkJkN7E?utm_source=generator&theme=0" width="100%" height="352" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>

Features: 
- 73 songs
- Freddie's depression playlist!

<hr>

<br>
<br>
<br>
<br>
<br>

##### [Test Playlist](https://open.spotify.com/playlist/2Vjg4BQi0ILYyEN5lr9Lma?si=eaf02cfa5b364f24)

**400 songs (mixture of random and non-random)**

- This generated through adding 100 spotify recommended songs.
- Then adding random songs to introduce variablility from: Theodo Playlist, A Super John Mcginn playlist, drum and base playlist, Classical Bangers, Eminem albums.

<img src="./images/random-playlist.png" alt="random-songs" style="margin:30px; height:200px; width:500px;"/>



<iframe style="border-radius:12px" src="https://open.spotify.com/embed/playlist/2Vjg4BQi0ILYyEN5lr9Lma?utm_source=generator&theme=0" width="100%" height="352" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>

<hr>

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br> 
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br> 
<br> 
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br> 
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br> 
<br> 

<hr>

###  Step 2) Extract Track Meta Data 
- We need to be able to use the meta data in code.


<div style="width:480px"><iframe allow="fullscreen" frameBorder="0" height="270" src="https://giphy.com/embed/lu01tsQqf1mJuHrFVq/video" width="480"></iframe></div>
<br>
<br>
<br>
<br>

**Step 2.1) Connect to the [Spotify Api](https://developer.spotify.com/documentation/web-api)**

In [None]:
spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                                client_secret=client_secret))

<br>
<br>
<br>
<br>
<br>


##### **Step 2.2) Get playlist tracks**

In [None]:
playlist_id = "5v0XNgUfSqaDSZ6SkJkN7E"

# Searches for my playlist, and stores in it a data frame (A nice table)
df = pd.DataFrame(spotify.playlist_tracks(playlist_id)["items"])

df.head(5)

Unnamed: 0,added_at,added_by,is_local,primary_color,track,video_thumbnail
0,2022-01-02T19:41:08Z,{'external_urls': {'spotify': 'https://open.sp...,False,,"{'album': {'album_type': 'album', 'artists': [...",{'url': None}
1,2022-12-09T11:20:50Z,{'external_urls': {'spotify': 'https://open.sp...,False,,"{'album': {'album_type': 'single', 'artists': ...",{'url': None}
2,2022-12-09T11:25:33Z,{'external_urls': {'spotify': 'https://open.sp...,False,,"{'album': {'album_type': 'album', 'artists': [...",{'url': None}
3,2022-12-09T11:25:32Z,{'external_urls': {'spotify': 'https://open.sp...,False,,"{'album': {'album_type': 'album', 'artists': [...",{'url': None}
4,2022-12-09T11:28:27Z,{'external_urls': {'spotify': 'https://open.sp...,False,,"{'album': {'album_type': 'album', 'artists': [...",{'url': None}


<br>
<br>
<br>
<br>


#### **Step 2.3) Get track audio features**

In [None]:
track_id = "3CAX47TnPqTujLIQTw8nwI"

# Searches for my playlist, and stores in it a data frame (A nice table)
df = pd.DataFrame(spotify.audio_features(track_id))
df.head(5)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.401,0.364,5,-10.836,1,0.033,0.45,0.0503,0.162,0.224,129.57,audio_features,3CAX47TnPqTujLIQTw8nwI,spotify:track:3CAX47TnPqTujLIQTw8nwI,https://api.spotify.com/v1/tracks/3CAX47TnPqTu...,https://api.spotify.com/v1/audio-analysis/3CAX...,328507,4


<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/3CAX47TnPqTujLIQTw8nwI?utm_source=generator" width="100%" height="152" frameBorder="0" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>

<br> 
<br>
<br>
<br>


##### Once you combine your functions (See hidden code folder for how I do that) you get two dataframes:


##### **Whale:**

In [67]:
whale_audio_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.401,0.3640,5,-10.836,1,0.0330,0.4500,0.050300,0.1620,0.2240,129.570,audio_features,3CAX47TnPqTujLIQTw8nwI,spotify:track:3CAX47TnPqTujLIQTw8nwI,https://api.spotify.com/v1/tracks/3CAX47TnPqTu...,https://api.spotify.com/v1/audio-analysis/3CAX...,328507,4
1,0.493,0.3720,7,-11.889,1,0.0331,0.9640,0.030800,0.1170,0.2500,116.860,audio_features,15WbjBy1bzH1UsUldBwwlW,spotify:track:15WbjBy1bzH1UsUldBwwlW,https://api.spotify.com/v1/tracks/15WbjBy1bzH1...,https://api.spotify.com/v1/audio-analysis/15Wb...,285160,4
2,0.386,0.2050,4,-16.699,1,0.0366,0.6530,0.044800,0.1060,0.6230,172.373,audio_features,6UIxGIqWlO5wsddY44AV1R,spotify:track:6UIxGIqWlO5wsddY44AV1R,https://api.spotify.com/v1/tracks/6UIxGIqWlO5w...,https://api.spotify.com/v1/audio-analysis/6UIx...,152613,4
3,0.556,0.2690,5,-12.234,1,0.0502,0.9460,0.004920,0.0834,0.1730,99.902,audio_features,2D369Iv0P8FplOVp5ZMel7,spotify:track:2D369Iv0P8FplOVp5ZMel7,https://api.spotify.com/v1/tracks/2D369Iv0P8Fp...,https://api.spotify.com/v1/audio-analysis/2D36...,273693,4
4,0.489,0.2500,9,-12.766,1,0.0307,0.9210,0.736000,0.1100,0.0562,128.286,audio_features,5so4VqG1hJDiqyRqavbH6o,spotify:track:5so4VqG1hJDiqyRqavbH6o,https://api.spotify.com/v1/tracks/5so4VqG1hJDi...,https://api.spotify.com/v1/audio-analysis/5so4...,316800,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,0.577,0.4340,7,-8.437,1,0.0303,0.8870,0.215000,0.0822,0.3210,79.497,audio_features,6eusWfAal7mOOLr0XFLkiY,spotify:track:6eusWfAal7mOOLr0XFLkiY,https://api.spotify.com/v1/tracks/6eusWfAal7mO...,https://api.spotify.com/v1/audio-analysis/6eus...,178627,4
69,0.509,0.3100,9,-10.623,1,0.0313,0.9590,0.229000,0.0963,0.1510,137.474,audio_features,7CoMBpPTwQi2wPT0U0Nr9b,spotify:track:7CoMBpPTwQi2wPT0U0Nr9b,https://api.spotify.com/v1/tracks/7CoMBpPTwQi2...,https://api.spotify.com/v1/audio-analysis/7CoM...,208653,3
70,0.412,0.2270,0,-12.696,1,0.0359,0.7870,0.000005,0.2820,0.2260,80.111,audio_features,35r28RDot7nPE7y9K9H7l0,spotify:track:35r28RDot7nPE7y9K9H7l0,https://api.spotify.com/v1/tracks/35r28RDot7nP...,https://api.spotify.com/v1/audio-analysis/35r2...,257267,4
71,0.370,0.4670,5,-9.013,1,0.0273,0.0205,0.572000,0.1090,0.1820,94.485,audio_features,0yc6Gst2xkRu0eMLeRMGCX,spotify:track:0yc6Gst2xkRu0eMLeRMGCX,https://api.spotify.com/v1/tracks/0yc6Gst2xkRu...,https://api.spotify.com/v1/audio-analysis/0yc6...,290617,4


<br>
<br>
<br>
<br>

##### **Test Playlist:**

In [68]:
test_playlist_audio_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.401,0.364,5,-10.836,1,0.0330,0.450,0.050300,0.1620,0.2240,129.570,audio_features,3CAX47TnPqTujLIQTw8nwI,spotify:track:3CAX47TnPqTujLIQTw8nwI,https://api.spotify.com/v1/tracks/3CAX47TnPqTu...,https://api.spotify.com/v1/audio-analysis/3CAX...,328507,4
1,0.493,0.372,7,-11.889,1,0.0331,0.964,0.030800,0.1170,0.2500,116.860,audio_features,15WbjBy1bzH1UsUldBwwlW,spotify:track:15WbjBy1bzH1UsUldBwwlW,https://api.spotify.com/v1/tracks/15WbjBy1bzH1...,https://api.spotify.com/v1/audio-analysis/15Wb...,285160,4
2,0.386,0.205,4,-16.699,1,0.0366,0.653,0.044800,0.1060,0.6230,172.373,audio_features,6UIxGIqWlO5wsddY44AV1R,spotify:track:6UIxGIqWlO5wsddY44AV1R,https://api.spotify.com/v1/tracks/6UIxGIqWlO5w...,https://api.spotify.com/v1/audio-analysis/6UIx...,152613,4
3,0.556,0.269,5,-12.234,1,0.0502,0.946,0.004920,0.0834,0.1730,99.902,audio_features,2D369Iv0P8FplOVp5ZMel7,spotify:track:2D369Iv0P8FplOVp5ZMel7,https://api.spotify.com/v1/tracks/2D369Iv0P8Fp...,https://api.spotify.com/v1/audio-analysis/2D36...,273693,4
4,0.489,0.250,9,-12.766,1,0.0307,0.921,0.736000,0.1100,0.0562,128.286,audio_features,5so4VqG1hJDiqyRqavbH6o,spotify:track:5so4VqG1hJDiqyRqavbH6o,https://api.spotify.com/v1/tracks/5so4VqG1hJDi...,https://api.spotify.com/v1/audio-analysis/5so4...,316800,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.475,0.178,7,-9.976,1,0.0347,0.745,0.000189,0.0899,0.2300,136.687,audio_features,1ptwL6lZNLc654XAaHqcHb,spotify:track:1ptwL6lZNLc654XAaHqcHb,https://api.spotify.com/v1/tracks/1ptwL6lZNLc6...,https://api.spotify.com/v1/audio-analysis/1ptw...,280107,4
396,0.588,0.412,2,-10.669,1,0.0613,0.152,0.000000,0.1340,0.3020,133.815,audio_features,2A1FGotEDhN5UjBiLsUwnk,spotify:track:2A1FGotEDhN5UjBiLsUwnk,https://api.spotify.com/v1/tracks/2A1FGotEDhN5...,https://api.spotify.com/v1/audio-analysis/2A1F...,155660,4
397,0.637,0.651,2,-7.089,1,0.0521,0.320,0.000000,0.0938,0.4850,115.926,audio_features,7ByxizhA4GgEf7Sxomxhze,spotify:track:7ByxizhA4GgEf7Sxomxhze,https://api.spotify.com/v1/tracks/7ByxizhA4GgE...,https://api.spotify.com/v1/audio-analysis/7Byx...,251511,4
398,0.446,0.247,11,-13.036,0,0.0287,0.905,0.233000,0.1110,0.2450,138.328,audio_features,4x1dWc1GgAfC04GcTlllax,spotify:track:4x1dWc1GgAfC04GcTlllax,https://api.spotify.com/v1/tracks/4x1dWc1GgAfC...,https://api.spotify.com/v1/audio-analysis/4x1d...,106250,1



<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br> 
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br> 
<br> 
<hr>

### Step 3) Start creating our Recommendation System
- Once we have got all of our data we are able to start creating our recommendation system.

<br>
<br>
<br>


<iframe src="https://giphy.com/embed/SjR2HvaFokmJ1a30wR" width="480" height="270" frameBorder="0" class="giphy-embed" allowFullScreen></iframe>


<br>
<br>
<br>

<br>
<br>
<br>

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

**How can we decide a metric?**
- We want a metric that is going make songs that belong in my playlist stand out in comparison to the test dataset
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>


In [69]:
whale_audio_features.describe()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
count,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0
mean,0.502274,0.398518,5.027397,-10.311479,0.753425,0.045911,0.61989,0.079008,0.13764,0.271527,120.643082,242604.808219,3.90411
std,0.131086,0.188886,3.883629,3.5891,0.434,0.035534,0.30438,0.164879,0.082138,0.135744,31.683731,49974.674325,0.531361
min,0.236,0.0565,0.0,-22.895,0.0,0.0273,0.00239,0.0,0.0559,0.0384,70.06,147952.0,1.0
25%,0.395,0.256,1.0,-12.111,1.0,0.0307,0.392,6e-06,0.0992,0.178,94.023,208653.0,4.0
50%,0.508,0.38,5.0,-9.883,1.0,0.0355,0.707,0.000307,0.114,0.237,124.026,242014.0,4.0
75%,0.592,0.528,9.0,-7.376,1.0,0.0429,0.885,0.0651,0.127,0.339,143.979,271387.0,4.0
max,0.771,0.835,11.0,-5.303,1.0,0.252,0.983,0.739,0.531,0.646,180.917,401426.0,5.0


In [None]:
test_playlist_audio_features.describe()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
count,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0
mean,0.538515,0.49889,5.2175,-9.950297,0.6425,0.088902,0.476746,0.155587,0.170617,0.346262,117.794573,232668.3375,3.855
std,0.177702,0.269094,3.528643,5.891676,0.479864,0.112306,0.367008,0.293239,0.13796,0.234744,32.612584,70513.380462,0.514428
min,0.0625,0.00618,0.0,-31.855,0.0,0.0253,0.000122,0.0,0.0328,0.0299,44.109,22747.0,1.0
25%,0.41675,0.27275,2.0,-12.46375,0.0,0.033975,0.103,3e-06,0.0955,0.15975,89.9725,192378.5,4.0
50%,0.5525,0.4805,5.0,-8.8565,1.0,0.04235,0.4695,0.001385,0.1145,0.289,115.348,231094.5,4.0
75%,0.67025,0.71475,8.0,-5.92375,1.0,0.0802,0.854,0.129,0.175,0.502,139.85975,269986.5,4.0
max,0.908,0.995,11.0,0.496,1.0,0.947,0.996,0.967,0.828,0.963,214.527,676545.0,5.0


<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

#### Using Speechiness

We want to recommend songs that have the closest Speechiness to 0.045911 (the mean of whale dataset)

In [70]:
mean_speechiness = 0.045911

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
We can then loop through our test dataset and find the 5 closest values to the mean_speechiness

In [76]:
# Prevent it recommending songs I have listened to (this is personal preference!)
test_playlist = test_playlist_audio_features[73:]



recommended_songs = test_playlist.iloc[(
    test_playlist['speechiness'] - mean_speechiness)
    .abs()
    .argsort()
    [:5]]


<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>


<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/0T7aTl1t15HKHfwep4nANV?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/69Fm9O838y94hhkxWsJkoY?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/354Cv7huFJnlEdVAHvhH3Q?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/76BfKpZqnuNCZtbN8N3s4C?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/1ZauqNrrbC4pYMwMtA2S27?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<br>




<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br> 
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br> 
<br> 
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>


<hr>

### Step 4) Using All Metrics


We have shown that speechiness is not enough to define our music taste, so we need to create a bit more of an advanced recommendation system.

<iframe src="https://giphy.com/embed/lrc1TZHRYxj7lGM3Vg" width="480" height="270" frameBorder="0" class="giphy-embed" allowFullScreen></iframe>

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
We saw that we have lots of meta data on songs so lets use all of that!

However now we need to calculate distance over lots of dimensions, this will require a bit more of an advanced equations

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

# In comes Greek Hero Euclid
<br>

<img src="./images/euclid.png"
     alt="Euclid"
     style="height:500px;"/>


<br>
<br>
<br>
<br>
<br>
<br>

### Euclidean Distance

<img src="./images/euclidean.png"
     alt="Euclidean Distance"
     style="height:150px;"/>


This looks a lot more complicated than it is. (Its just pythag repeated across multiple dimensions)


<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

In [78]:

import math

# List out the features we care about
columns_to_check = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "valence", "tempo"]


means = whale_audio_features[columns_to_check].mean()

test_playlist['distance_from_mean'] = test_playlist[columns_to_check].apply(lambda row: math.sqrt(sum((row - means)**2)),axis=1)


<br>
<br>
<br>
<br>
<br>

<br>
<br>
<br>

<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/0V57cVCVlaU9vXIUBlQdg2?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>

<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/3xSN9JH0u8QCa9AquF5XvV?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/52pGXXhSsBPx3jOSxlU35N?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>

<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/1KQsrNv2vEW7AXKTrFz0Ah?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/0gEcmyKlIUoi3sHTFVO1bE?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>




<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

<hr>

### Step 5) Normalising Each Metric 

Some of you might have noticed the problem.



<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

In [None]:
whale_audio_features.describe()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
count,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0,73.0
mean,0.502274,0.398518,5.027397,-10.311479,0.753425,0.045911,0.61989,0.079008,0.13764,0.271527,120.643082,242604.808219,3.90411
std,0.131086,0.188886,3.883629,3.5891,0.434,0.035534,0.30438,0.164879,0.082138,0.135744,31.683731,49974.674325,0.531361
min,0.236,0.0565,0.0,-22.895,0.0,0.0273,0.00239,0.0,0.0559,0.0384,70.06,147952.0,1.0
25%,0.395,0.256,1.0,-12.111,1.0,0.0307,0.392,6e-06,0.0992,0.178,94.023,208653.0,4.0
50%,0.508,0.38,5.0,-9.883,1.0,0.0355,0.707,0.000307,0.114,0.237,124.026,242014.0,4.0
75%,0.592,0.528,9.0,-7.376,1.0,0.0429,0.885,0.0651,0.127,0.339,143.979,271387.0,4.0
max,0.771,0.835,11.0,-5.303,1.0,0.252,0.983,0.739,0.531,0.646,180.917,401426.0,5.0



<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

### Updated code

In [80]:
means = whale_audio_features[columns_to_check].mean()
stds = test_playlist[columns_to_check].std()

test_playlist['distance_from_mean'] = test_playlist[columns_to_check].apply(lambda row: math.sqrt(sum(((row - means)/stds)**2)),axis=1)

In [81]:
test_playlist.sort_values("distance_from_mean").head(5)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,distance_from_mean
166,0.459,0.472,8,-11.412,0,0.0479,0.583,0.000115,0.111,0.246,139.847,audio_features,4XT9wAQ1malGGztBCmVX3m,spotify:track:4XT9wAQ1malGGztBCmVX3m,https://api.spotify.com/v1/tracks/4XT9wAQ1malG...,https://api.spotify.com/v1/audio-analysis/4XT9...,204000,4,0.761995
157,0.607,0.405,9,-8.607,1,0.0265,0.539,7e-06,0.231,0.233,129.543,audio_features,3ou9rSNUQnE7XYmJkUUIOc,spotify:track:3ou9rSNUQnE7XYmJkUUIOc,https://api.spotify.com/v1/tracks/3ou9rSNUQnE7...,https://api.spotify.com/v1/audio-analysis/3ou9...,223040,4,0.791283
81,0.596,0.544,0,-11.759,1,0.0362,0.604,0.017,0.262,0.262,115.022,audio_features,7x46YkKAwp3yZpaa72n5Mu,spotify:track:7x46YkKAwp3yZpaa72n5Mu,https://api.spotify.com/v1/tracks/7x46YkKAwp3y...,https://api.spotify.com/v1/audio-analysis/7x46...,279000,4,0.810795
139,0.528,0.416,9,-8.523,1,0.0305,0.691,0.000276,0.0953,0.184,99.868,audio_features,6hH2y9wgEjgsSeJuv7kfaU,spotify:track:6hH2y9wgEjgsSeJuv7kfaU,https://api.spotify.com/v1/tracks/6hH2y9wgEjgs...,https://api.spotify.com/v1/audio-analysis/6hH2...,152280,5,0.862744
144,0.556,0.261,4,-11.668,1,0.0289,0.797,0.016,0.0893,0.219,110.841,audio_features,0bndF6tTweNXPjwpPL7Slt,spotify:track:0bndF6tTweNXPjwpPL7Slt,https://api.spotify.com/v1/tracks/0bndF6tTweNX...,https://api.spotify.com/v1/audio-analysis/0bnd...,172500,4,0.89041


<br>
<br>
<br>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/4XT9wAQ1malGGztBCmVX3m?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/3ou9rSNUQnE7XYmJkUUIOc?utm_source=generator" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/7x46YkKAwp3yZpaa72n5Mu?utm_source=generator&theme=0" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/6hH2y9wgEjgsSeJuv7kfaU?utm_source=generator&theme=0" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<iframe style="border-radius:12px" src="https://open.spotify.com/embed/track/0bndF6tTweNXPjwpPL7Slt?utm_source=generator&theme=0" width="100%" height="152" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>



### Voila you have a recommendation system as good as Spotify


<iframe src="https://giphy.com/embed/1BFEEIo4h1BuTH8eqP" width="480" height="403" frameBorder="0" class="giphy-embed" allowFullScreen></iframe>