# Importing Libraries

In [1]:
import pandas as pd
pd.set_option('max_rows',100)
import time
import os
import warnings
warnings.filterwarnings('ignore')
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import pickle
from tqdm import tqdm 

# Importing Main Spotify Dataset

In [2]:
df = pd.read_csv('Spotify/data.csv')

In [3]:
# Set index as track_ids
df.set_index('id',inplace = True)

In [4]:
# Subset the columns based on our trained model

df = df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms']]

# Scaling

In [5]:
# Scale the data before applying the model

scaler = StandardScaler()

df.iloc[:,:] = scaler.fit_transform(df.iloc[:,:])

In [6]:
df.head()

Unnamed: 0_level_0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
6KbQ3uYMLKb5jDxLF7wYDD,0.968662,-1.097999,1.365333,-0.186652,0.641344,-0.28984,1.332319,1.296562,-0.314998,0.940924,0.0495,-0.599713
6KuQTIu1KoTTkLXKrwlLPV,-0.907636,-1.776785,0.796383,-3.014729,0.641344,-0.319186,1.329664,2.389253,-0.737519,-1.735454,-1.073199,0.418119
6L63VW0PibdM1HDSBoqnoM,1.202486,-1.004503,-0.057043,-1.509457,-1.559227,5.568626,0.294154,-0.523513,-0.495997,1.325822,-0.317996,-1.04768
6M94FkXd15sOAOQYRnWPN8,1.384983,-1.341091,-1.194943,-0.593587,-1.559227,-0.009722,1.332319,2.343994,-0.541247,0.716082,-0.291114,-0.417454
6N6tiFZ9vLTSOIxkj8qKrd,-1.871449,-1.064341,1.649808,-0.963288,0.641344,-0.34453,1.319044,2.411883,-0.614778,-1.763655,-1.783425,3.7613


# Applying the Model

In [7]:
# Import the Model

import pickle

loaded_model = pickle.load(open('final_model', 'rb'))

In [8]:
# Predict on the new dataset
preds = loaded_model.predict_proba(df)

In [9]:
# Store track_ids in a list
# Subsetting index of songs with probability > 0.8

list_of_songs = list(df.index[preds[:,1] > 0.8])

# API building to get track info

In [10]:
import requests
import json
from requests.auth import HTTPBasicAuth

In [11]:
client_id = 'b107854432764d6fb8104f4b7a494d74'
client_secret = '9ef8de4d0e324edfb26676339b80be32'

In [12]:
auth_url = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(auth_url, {
    'grant_type': 'client_credentials',
    'client_id': client_id,
    'client_secret': client_secret,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [13]:
headers = {
    'Authorization': 'Bearer {token}'.format(token=access_token)
}

In [14]:
# base URL of all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

In [15]:
data = pd.DataFrame(index = list_of_songs, columns = ['Song Name', 'Artist Name'])

In [17]:
i = 0
for item in tqdm(list_of_songs):
    song_info = requests.get(BASE_URL + 'tracks/' + item, headers=headers)
    song_info = song_info.json()
    
    data.iloc[i, 1] = song_info['artists'][0]['name']
    data.iloc[i, 0] = song_info['name']
    
    i = i + 1

100%|██████████| 63/63 [00:24<00:00,  2.53it/s]


In [18]:
data

Unnamed: 0,Song Name,Artist Name
1TKwqSq2YTqJ4Aso17FfYR,What 'cha Say,The Meters
099Ov2UBeZcckb2Py8o7qa,Pain,Betty Wright
0DQlv289AsGiUJJZL4mviO,Little Melonae,Miles Davis
3LPEU6PFvVal1Iya8I5eU5,Creola,Jimmy Buffett
59FdiKJe5LXnDhKKd6vrI5,I'll Remember April,Clifford Brown
78rosLrFWoORay8qHATSVk,I'll Remember April,Kenny Dorham
1MmxrJ35NzNxHjPdyKDhut,Kashmir - 1990 Remaster,Led Zeppelin
61g5yH0ig62XhtQ161kEED,In My Time of Dying - Initial / Rough Mix,Led Zeppelin
0slsrhWlw0WrrRe1KCVboI,And the Healing Has Begun,Van Morrison
66YI1uZX5bqDb760dF5XYh,Dark Eyes,Dizzy Gillespie
