# Dependencies

In [11]:
# ! pip install python-dotenv
# ! pip install spotipy --upgrade
# ! touch .gitignore

import pandas as pd
from sklearn import preprocessing
from sklearn.metrics.pairwise import cosine_similarity

import spotipy
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials

from dotenv import load_dotenv
import os

### Spotify Setup

In [12]:
# load environment variables 
load_dotenv()

# get Spotify API login credentials 
cid = os.getenv("cid")
secret = os.getenv("secret")

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= cid,
                                                           client_secret= secret))

# Inputs

In [24]:
# take user input from Spotify 
input_url = input('Input Spotify song URL: ')
# find audio features of user's track
pp_input_df = sp.audio_features(input_url)
p_input_df = pd.DataFrame.from_dict(pp_input_df)
input_df = p_input_df[['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'valence', 'tempo', 'time_signature']]

# TEST CODE 
# input_df

Input Spotify song URL: https://open.spotify.com/track/7wFrXvmVFnYCUJY5OtOJVG?si=abf7b8f9f7754419


# Data Frame Preparation   

##### Spotify song features dataset link: 
https://www.kaggle.com/datasets/rodolfofigueroa/spotify-12m-songs

In [25]:
# read in Spotify data set and return cleaned DataFrame 
df2 = pd.read_csv('tracks_features.csv')
df3 = df2[['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'valence', 'tempo', 'time_signature']]
df4 = pd.concat([input_df, df3]).reset_index(drop=True)


# TEST CODE 
# df2 = df1.truncate(after = 200)
# df4.head()

## Feature Scaling
### MinMaxScaler scales audio features to the range of 0 and 1. 
#### - Scaling all audio features to the range of 0 and 1 ensure that each metric is weighted on the same scale when finding recommendations. 

In [26]:
# select columns to apply feature scaling to 
columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'valence', 'tempo', 'time_signature']

# scale selected columns
scaler = preprocessing.MinMaxScaler()
df4[columns] = scaler.fit_transform(df4[columns])


# TEST CODE 
# df4.head()

## Calculating Cosine Similarity
### Cosine_similarity finds how similar the user input song is to each song in the DataFrame
#### - Cosine similarity calculates how similar the input track is to each track in the DataFrame by finding the difference between the input audio features vector and each DataFrame audio features vector using the formula: (A.B) / (||A||.||B||).

In [27]:
# create and populate column in DataFrame to hold cosine similarity calculations
df4['distances'] = cosine_similarity(df4.iloc[0:,1:11], df4.iloc[0:1,1:11])

# select number of songs to display in output 
n = 10
n_plus1 = n + 1
n_largest = df4.nlargest((n_plus1), 'distances')


# TEST CODE
# n_largest

# Outputs 

In [30]:
# create DataFrame to hold top n track audio features
final_df = []

for i in n_largest['id']:
    final_df.append(
    
        sp.audio_features(n_largest['id'])
    )


# create DataFrame to hold top n track IDs
final_df1 = []
    
for i in range(n_plus1):
    final_df1.append(
        final_df[0][i]['id']
    )


# TEST CODE
# final_df

In [31]:
# create empty DataFrame to hold output  
result = pd.DataFrame(columns = ['ID', 'Title', 'Artist', 'Year', 'URI'])

# populate output DataFrame
for i in range(n_plus1):
    r_id = final_df1[i] 
    track_detail = sp.track(r_id)
    
    r_name = track_detail.get('name')
    r_artist = track_detail.get('artists')[0]['name']
    r_uri = track_detail.get('uri')
    r_year = track_detail.get('album')['release_date']
    
    r_list = [r_id, r_name, r_artist, r_year, r_uri]
    result.loc[len(result)] = r_list

# display output DataFrame
display(result)

Unnamed: 0,ID,Title,Artist,Year,URI
0,7wFrXvmVFnYCUJY5OtOJVG,Апрель,Тося Чайкина,2019-04-16,spotify:track:7wFrXvmVFnYCUJY5OtOJVG
1,7wFrXvmVFnYCUJY5OtOJVG,Апрель,Тося Чайкина,2019-04-16,spotify:track:7wFrXvmVFnYCUJY5OtOJVG
2,3R3BAoW0LS7Z63MRIKA07k,曾經愛過,Jeff Chang,2007-12-10,spotify:track:3R3BAoW0LS7Z63MRIKA07k
3,4V06GkEniMzkakGgLi66CJ,As It Was Written,Dylan James,2016-10-14,spotify:track:4V06GkEniMzkakGgLi66CJ
4,2JmFRXaJrkAUD2cs6U20KG,Jet Fuel,Mac Miller,2018-08-03,spotify:track:2JmFRXaJrkAUD2cs6U20KG
5,2Y67TJjDzjiYDU40lv2OFU,Dukh Par Har,Gurunam Singh,2008-01-01,spotify:track:2Y67TJjDzjiYDU40lv2OFU
6,4CG6vvFt5txQ2e79duyGR9,Last Breath,Mike Murk,2020-09-30,spotify:track:4CG6vvFt5txQ2e79duyGR9
7,0zsUMpq1NWhGMjPU9cMDML,Принимай меня,Olga Buzova,2018-10-05,spotify:track:0zsUMpq1NWhGMjPU9cMDML
8,7EqWNeER1OBHnTMb2O7TB5,Solace,Repose,2010-03-20,spotify:track:7EqWNeER1OBHnTMb2O7TB5
9,4AB78gWOSj8zkNmgAsopJX,A Dance of Light,Terra Lumina,2012-12-18,spotify:track:4AB78gWOSj8zkNmgAsopJX
