# Imports and dataframe initialization

In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
import pickle
import requests

In [3]:
df = pd.read_csv('tracks_features.csv')

# Data exploration and cleaning function

## Exploration

Check which columns need to be dropped, if any columns need to be cleaned up, and check for null values

In [23]:
df.head()

Unnamed: 0,name,album,album_id,artists,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year
0,Testify,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],False,0.47,0.978,7,-5.399,1,0.0727,0.0261,1.1e-05,0.356,0.503,117.906,210133,4.0,1999
1,Guerrilla Radio,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],True,0.599,0.957,11,-5.764,1,0.188,0.0129,7.1e-05,0.155,0.489,103.68,206200,4.0,1999
2,Calm Like a Bomb,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],False,0.315,0.97,7,-5.424,1,0.483,0.0234,2e-06,0.122,0.37,149.749,298893,4.0,1999
3,Mic Check,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],True,0.44,0.967,11,-5.83,0,0.237,0.163,4e-06,0.121,0.574,96.752,213640,4.0,1999
4,Sleep Now In the Fire,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],False,0.426,0.929,2,-6.729,1,0.0701,0.00162,0.105,0.0789,0.539,127.059,205600,4.0,1999


In [10]:
df.nunique()

id                  1204025
name                 850944
album                106162
album_id             118382
artists              165365
artist_ids           166423
track_number             50
disc_number              13
explicit                  2
danceability           1362
energy                 3441
key                      12
loudness              39805
mode                      2
speechiness            1653
acousticness           5398
instrumentalness       5402
liveness               1799
valence                1884
tempo                140472
duration_ms          210013
time_signature            5
year                    101
release_date          10566
dtype: int64

In [19]:
df.time_signature.value_counts()

4.0    988647
3.0    162645
5.0     30493
1.0     19431
0.0      2809
Name: time_signature, dtype: int64

In [3]:
df.artists[1]

"['Rage Against The Machine']"

In [13]:
df.isnull().sum()

name                0
album               0
artists             0
explicit            0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
duration_ms         0
time_signature      0
year                0
dtype: int64

## Feature engineering and cleaning to determine function below (READ WARNING)

This is legacy code that was used to construct the cleaning function.  DO NOT RUN, RUN CLEANING FUNCTION INSTEAD.

In [33]:
columns_to_drop = ['artist_ids', 'track_number', 'disc_number', 'album_id', 'id', 'release_date']
df.drop(columns_to_drop, axis=1, inplace=True)

# df.replace('(^\s+|\s+$)', '', regex=True, inplace=True)
df.head()

Unnamed: 0,name,album,artists,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year
0,Testify,The Battle Of Los Angeles,['Rage Against The Machine'],False,0.47,0.978,7,-5.399,1,0.0727,0.0261,1.1e-05,0.356,0.503,117.906,210133,4.0,1999
1,Guerrilla Radio,The Battle Of Los Angeles,['Rage Against The Machine'],True,0.599,0.957,11,-5.764,1,0.188,0.0129,7.1e-05,0.155,0.489,103.68,206200,4.0,1999
2,Calm Like a Bomb,The Battle Of Los Angeles,['Rage Against The Machine'],False,0.315,0.97,7,-5.424,1,0.483,0.0234,2e-06,0.122,0.37,149.749,298893,4.0,1999
3,Mic Check,The Battle Of Los Angeles,['Rage Against The Machine'],True,0.44,0.967,11,-5.83,0,0.237,0.163,4e-06,0.121,0.574,96.752,213640,4.0,1999
4,Sleep Now In the Fire,The Battle Of Los Angeles,['Rage Against The Machine'],False,0.426,0.929,2,-6.729,1,0.0701,0.00162,0.105,0.0789,0.539,127.059,205600,4.0,1999


In [36]:
# df['artists'].apply(lambda x: x.replace('[','').replace(']','').replace("'","")) 
# df.replace('(^\s+|\s+$)', '', regex=True, inplace=True)
df['artists'] = df['artists'].str.replace(r'[][]+', '', regex=True) 
df['artists'] = df['artists'].str.replace(r"[\"\',]", '')
df.head()

  df['artists'] = df['artists'].str.replace(r"[\"\',]", '')


Unnamed: 0,name,album,artists,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year
0,Testify,The Battle Of Los Angeles,Rage Against The Machine,False,0.47,0.978,7,-5.399,1,0.0727,0.0261,1.1e-05,0.356,0.503,117.906,210133,4.0,1999
1,Guerrilla Radio,The Battle Of Los Angeles,Rage Against The Machine,True,0.599,0.957,11,-5.764,1,0.188,0.0129,7.1e-05,0.155,0.489,103.68,206200,4.0,1999
2,Calm Like a Bomb,The Battle Of Los Angeles,Rage Against The Machine,False,0.315,0.97,7,-5.424,1,0.483,0.0234,2e-06,0.122,0.37,149.749,298893,4.0,1999
3,Mic Check,The Battle Of Los Angeles,Rage Against The Machine,True,0.44,0.967,11,-5.83,0,0.237,0.163,4e-06,0.121,0.574,96.752,213640,4.0,1999
4,Sleep Now In the Fire,The Battle Of Los Angeles,Rage Against The Machine,False,0.426,0.929,2,-6.729,1,0.0701,0.00162,0.105,0.0789,0.539,127.059,205600,4.0,1999


In [37]:
df.to_csv('cleaned_data')

## Cleaning Function

This is where we convert the data cleaning to a function, then apply the function

In [4]:
def clean_spotify_data(spotify_dataframe):
    '''Input the spotify dataframe and it will return a version compatible with our model.
    Removes unused features and cleans up the artist names by removing the extranious
    '[]' '''
    
    df = spotify_dataframe
    columns_to_drop = ['artist_ids', 'track_number', 'disc_number', 'album_id', 'id', 'release_date']
    
    df.drop(columns_to_drop, axis=1, inplace=True)
    df['artists'] = df['artists'].str.replace(r'[][]+', '', regex=True)
    df['artists'] = df['artists'].str.replace(r"[\"\',]", '')
    
    return df

In [79]:
df = clean_spotify_data(df)

In [5]:
df.head()

Unnamed: 0,name,album,artists,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year
0,Testify,The Battle Of Los Angeles,Rage Against The Machine,False,0.47,0.978,7,-5.399,1,0.0727,0.0261,1.1e-05,0.356,0.503,117.906,210133,4.0,1999
1,Guerrilla Radio,The Battle Of Los Angeles,Rage Against The Machine,True,0.599,0.957,11,-5.764,1,0.188,0.0129,7.1e-05,0.155,0.489,103.68,206200,4.0,1999
2,Calm Like a Bomb,The Battle Of Los Angeles,Rage Against The Machine,False,0.315,0.97,7,-5.424,1,0.483,0.0234,2e-06,0.122,0.37,149.749,298893,4.0,1999
3,Mic Check,The Battle Of Los Angeles,Rage Against The Machine,True,0.44,0.967,11,-5.83,0,0.237,0.163,4e-06,0.121,0.574,96.752,213640,4.0,1999
4,Sleep Now In the Fire,The Battle Of Los Angeles,Rage Against The Machine,False,0.426,0.929,2,-6.729,1,0.0701,0.00162,0.105,0.0789,0.539,127.059,205600,4.0,1999


# Work on KNN Model

This is where the KNN model is created, trained, tested, and finally saved

## Creating the model
This was extremely messy due to troubleshooting.  Model was later built in "build smaller model"

In [51]:
knn = KNeighborsClassifier(n_jobs=-1)
nn = NearestNeighbors(n_jobs=-1)
vect = TfidfVectorizer(stop_words="english",lowercase=False)
scl = StandardScaler()
pipe = Pipeline([('vect', vect),
                 ('clf', knn)])
pipe2 = Pipeline([('vect', vect),
                  ('scl', scl),
                  ('clf', nn)])
parameters = {
 #   'vect__min_df': ([0, .2]),
    'clf__n_neighbors': ([6])
}
target = 'name'
y = df[target]
X = df.drop(columns=target)
gs = GridSearchCV(pipe, parameters, cv=2, n_jobs=-1, verbose=1)
X_train, X_test, y_train, y_test = train_test_split(df, df['name'], test_size=.2)

## experiment 1
using this to collapes old data to try expermiment 2

In [9]:

pipe2.fit(df)

Pipeline(steps=[('vect',
                 TfidfVectorizer(lowercase=False, stop_words='english')),
                ('clf', NearestNeighbors())])

In [None]:
def recommend(songdata):
    '''Gets 5 song recommendations from the NN model
    Use: input data from the API into a dataframe, then
    put that dataframe into the function.'''
    
    s = songdata
    clean_spotify_data(s)
    

In [14]:
tst = df.iloc[1]

In [72]:
# pipe2.kneighbors(tst, 5, return_distance=False)

In [73]:

# vect.transform(tst['name'])


In [74]:
# nn.kneighbors(tst, 5, return_distance=False)

In [None]:
X.album.apply(str)
X.artists.apply(str)
X.explicit.apply(int)

0          0
1          1
2          0
3          1
4          0
          ..
1204020    0
1204021    0
1204022    0
1204023    0
1204024    0
Name: explicit, Length: 1204025, dtype: int64

In [None]:
X.head()

In [75]:
# knn.fit(X,y)

In [None]:
X = X.T
X.shape

(17, 1204025)

In [None]:
y.values.reshape((1204025,1))
y.shape

(1204025,)

In [76]:
# pipe.fit(X,y)

In [77]:
# gs.fit(X, y)
# print(gs.best_score_)
# print(gs.best_params_)

## experiment 2

In [101]:
pipe3 = Pipeline([ ('scl', scl),
                  ('clf', knn)])
strcols = ['album','artists']
X2 = X.drop(columns=strcols)

In [263]:
Xtst = X2.iloc[77].to_numpy()
Xtst = Xtst.reshape(1,-1)
Xtst.shape

(1, 15)

In [96]:

X2 = X2.to_numpy().reshape(1,-1)
scl.fit_transform(X2)

array([[0., 0., 0., ..., 0., 0., 0.]])

In [103]:
knn.fit(X2,y)

KNeighborsClassifier(n_jobs=-1)

In [264]:
recnums = knn.kneighbors(Xtst, 5, return_distance=False)



In [106]:
allstrcols = ['name','album','artists']
rectable = df[allstrcols]
rectable.head()

Unnamed: 0,name,album,artists
0,Testify,The Battle Of Los Angeles,Rage Against The Machine
1,Guerrilla Radio,The Battle Of Los Angeles,Rage Against The Machine
2,Calm Like a Bomb,The Battle Of Los Angeles,Rage Against The Machine
3,Mic Check,The Battle Of Los Angeles,Rage Against The Machine
4,Sleep Now In the Fire,The Battle Of Los Angeles,Rage Against The Machine


In [113]:
# for x in recnums:
#     print(rectable.iloc[x])

                                                      name  \
77                                             Rosaryville   
597350   Shiftwork (with George Strait) - Duet With Geo...   
34748                                 Diamond On Your Hand   
1165689                    Even Better Than the Real Thing   
1030881                                       Italian Rain   

                                     album                      artists  
77                             Rosaryville                Kate Campbell  
597350      Just Who I Am: Poets & Pirates  Kenny Chesney George Strait  
34748                  Big Dream Boulevard                Antje Duvekot  
1165689  You Spin Me Round (Like A Record)                Dead Or Alive  
1030881                       Blue Guitars               Stephen Bishop  


In [265]:
recnumlist=recnums.tolist()[0]


In [193]:
# recnumlist

[77, 597350, 34748, 1165689, 1030881]

In [186]:
# recnumlist = [77, 597350, 34748, 1165689, 1030881]

In [266]:
rectable.iloc[recnumlist]

Unnamed: 0,name,album,artists
77,Rosaryville,Rosaryville,Kate Campbell
597350,Shiftwork (with George Strait) - Duet With Geo...,Just Who I Am: Poets & Pirates,Kenny Chesney George Strait
34748,Diamond On Your Hand,Big Dream Boulevard,Antje Duvekot
1165689,Even Better Than the Real Thing,You Spin Me Round (Like A Record),Dead Or Alive
1030881,Italian Rain,Blue Guitars,Stephen Bishop


In [115]:
# knn.predict(Xtst)



array(['Diamond On Your Hand'], dtype=object)

In [124]:
pickle.dump(knn, open('knn.pkl', 'wb'))

## build smaller model for tiny github data limit
It's truly very tiny.

In [6]:
df2 = df.sample(frac=.1,random_state=45)

In [119]:
#testing to see how model size is affected
x2t = X2.drop(columns=['key','explicit','duration_ms'])

In [12]:
knn2 = KNeighborsClassifier(n_jobs=-1)

In [121]:
knn2.fit(x2t,y)

KNeighborsClassifier(n_jobs=-1)

In [123]:
pickle.dump(knn2, open('knn2.pkl', 'wb'))

In [7]:
df2 = df2.drop(columns=['key','explicit','duration_ms'])

In [203]:
df2['time_signature'].value_counts(normalize=True)

4.0    0.820764
3.0    0.135412
5.0    0.025253
1.0    0.016329
0.0    0.002242
Name: time_signature, dtype: float64

In [8]:
target = 'name'
ys = df2[target]
Xs = df2.drop(columns=target)
Xs = Xs.drop(columns=['album','artists', 'year', 'time_signature'])

In [9]:
allstrcols = ['name','album','artists','year']
rectable2 = df2[allstrcols]
rectable2.head()

Unnamed: 0,name,album,artists,year
828113,Christmas Lights,Christmas Lights,Blush,2012
1045913,Chronosaurus,SKZ2020,Stray Kids,2020
367206,Squid Ink Part 2,A Real Diamond in the Rough,Buckethead,2009
21199,States Away,Lemonade,Breaking Laces,2005
780599,Keep a Light,The Troubadour,Drew De Four,2009


In [10]:
Xs.head()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
828113,0.516,0.778,-5.998,1,0.037,0.709,0.0,0.175,0.506,95.94
1045913,0.657,0.879,-2.733,1,0.0513,0.284,0.0,0.28,0.472,99.973
367206,0.25,0.124,-18.66,0,0.0526,0.812,0.959,0.105,0.0342,123.479
21199,0.805,0.436,-10.191,1,0.03,0.0418,1e-05,0.0742,0.703,113.025
780599,0.303,0.468,-9.727,1,0.0329,0.848,0.0344,0.171,0.193,118.059


In [13]:
knn2.fit(Xs,ys)

KNeighborsClassifier(n_jobs=-1)

In [298]:
Xs.shape

(120402, 10)

In [14]:
Xs.head()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
828113,0.516,0.778,-5.998,1,0.037,0.709,0.0,0.175,0.506,95.94
1045913,0.657,0.879,-2.733,1,0.0513,0.284,0.0,0.28,0.472,99.973
367206,0.25,0.124,-18.66,0,0.0526,0.812,0.959,0.105,0.0342,123.479
21199,0.805,0.436,-10.191,1,0.03,0.0418,1e-05,0.0742,0.703,113.025
780599,0.303,0.468,-9.727,1,0.0329,0.848,0.0344,0.171,0.193,118.059


In [17]:
# Xtst = X2.iloc[77].to_numpy()
# Xtst = Xtst.reshape(1,-1)
# Xtst.shape

Xtst2 = Xs.iloc[95].to_numpy().reshape(1,-1)
# Xtst = Xtst.reshape(1,-1)
Xtst2.shape

(1, 10)

In [338]:
Xtst2

array([[ 3.88000e-01,  8.79000e-01, -6.47300e+00,  0.00000e+00,
         3.72000e-02,  2.07000e-03,  9.28000e-05,  2.95000e-01,
         5.80000e-01,  1.56434e+02]])

In [18]:
recnums2 = knn2.kneighbors(Xtst2, 5, return_distance=False)
recnumlist2=recnums2.tolist()[0]
rectable2.iloc[recnumlist2]



Unnamed: 0,name,album,artists,year
600794,Sommar Adjö,Jan Johansson in Hamburg with Georg Riedel,Jan Johansson Georg Riedel,2011
121690,Winter,Winter,Glow,2008
484347,One Big Boss,Suitcase 4: Captain Kangaroo Won the War,Guided By Voices,2016
1001176,Palms,The Lights Behind Us,Sonaura,2005
284482,Here and Now,Jean-François Groulx,Jean-François Groulx,2009


In [218]:
recnumlist

[77, 597350, 34748, 1165689, 1030881]

In [19]:
pickle.dump(knn2, open('model.pkl', 'wb'))

In [20]:
Song_data = rectable2
Song_data.to_csv('Song_data.csv')

In [22]:
Song_data = pd.read_csv('Song_data.csv', index_col=0)
Song_data.head(20)

Unnamed: 0,name,album,artists,year
828113,Christmas Lights,Christmas Lights,Blush,2012
1045913,Chronosaurus,SKZ2020,Stray Kids,2020
367206,Squid Ink Part 2,A Real Diamond in the Rough,Buckethead,2009
21199,States Away,Lemonade,Breaking Laces,2005
780599,Keep a Light,The Troubadour,Drew De Four,2009
125978,Gracias a la Vida,Sola,Amanda Martinez,2006
34585,Glass Houses,Our Last Escape,Aaron Booth,2004
131795,Thinking About Bix,Thinking About Bix,Dick Hyman,2012
922723,Daydream,"Deep Elm Sampler No. 12 ""Sometimes I See You i...",Carly Comando,2014
549485,"Piano Concerto No. 27 in B-Flat Major, Op. 17,...",Mozart: Piano Concertos Nos. 9 and 27,Wolfgang Amadeus Mozart Jenő Jandó Concentus H...,1990


In [310]:
Song_data.iloc[recnumlist2]

Unnamed: 0,name,album,artists,year
720812,The Ledge,All for Nothing / Nothing for All,The Replacements,1997
186986,Writers Block,Windmills and Wishes,Ange Hardy,2010
95956,My Enemy,Behind Every Door,Neverset,2006
504036,Rebels,Blurred,Genesis Elijah,2020
793851,Quicksand Under Carpet - UK Single Version,Winter's Kill,New Radiant Storm King,2014


In [327]:
Xs.head()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
828113,0.516,0.778,-5.998,1,0.037,0.709,0.0,0.175,0.506,95.94
1045913,0.657,0.879,-2.733,1,0.0513,0.284,0.0,0.28,0.472,99.973
367206,0.25,0.124,-18.66,0,0.0526,0.812,0.959,0.105,0.0342,123.479
21199,0.805,0.436,-10.191,1,0.03,0.0418,1e-05,0.0742,0.703,113.025
780599,0.303,0.468,-9.727,1,0.0329,0.848,0.0344,0.171,0.193,118.059


## Full Model and functions (Depriciated)
former use: Used to have a clean result rather than spending time cleaning experiments

# Testing API
testing the api before using it in a function.

In [311]:
CLIENT_ID = '00b86059cf3041e59392d702ed0348c7' 
CLIENT_SECRET = 'dd97033d320f4d2c90b878d2fc52a0ab'

In [332]:
df2.rename(mapper={'name':'song_name'},axis='columns', inplace=True)
df2.head()

Unnamed: 0,song_name,album,artists,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,year
828113,Christmas Lights,Christmas Lights,Blush,0.516,0.778,-5.998,1,0.037,0.709,0.0,0.175,0.506,95.94,4.0,2012
1045913,Chronosaurus,SKZ2020,Stray Kids,0.657,0.879,-2.733,1,0.0513,0.284,0.0,0.28,0.472,99.973,4.0,2020
367206,Squid Ink Part 2,A Real Diamond in the Rough,Buckethead,0.25,0.124,-18.66,0,0.0526,0.812,0.959,0.105,0.0342,123.479,4.0,2009
21199,States Away,Lemonade,Breaking Laces,0.805,0.436,-10.191,1,0.03,0.0418,1e-05,0.0742,0.703,113.025,4.0,2005
780599,Keep a Light,The Troubadour,Drew De Four,0.303,0.468,-9.727,1,0.0329,0.848,0.0344,0.171,0.193,118.059,4.0,2009


# Py functions
This area will be used to create and test the functions that will make up the py files that the app will use.  It will have reimports for easy transfer to the .py file that will be built.

In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import pickle
import requests

In [339]:
dummy_data = np.array([[ 3.88000e-01,  8.79000e-01, -6.47300e+00,  0.00000e+00,
         3.72000e-02,  2.07000e-03,  9.28000e-05,  2.95000e-01,
         5.80000e-01,  1.56434e+02]])

In [342]:
model = pickle.load(open('model.pkl', 'rb'))
Song_data = pd.read_csv('Song_data.csv', index_col=0)

def get_recommendation(user_song_data):

    rec_raw = model.kneighbors(user_song_data, 5, return_distance=False)
    rec_list=rec_raw.tolist()[0]
    return Song_data.iloc[rec_list]


In [343]:
get_recommendation(dummy_data)



Unnamed: 0,name,album,artists,year
720812,The Ledge,All for Nothing / Nothing for All,The Replacements,1997
186986,Writers Block,Windmills and Wishes,Ange Hardy,2010
95956,My Enemy,Behind Every Door,Neverset,2006
504036,Rebels,Blurred,Genesis Elijah,2020
793851,Quicksand Under Carpet - UK Single Version,Winter's Kill,New Radiant Storm King,2014


In [None]:
'''Query Spotify API based on Artist name and Track, convert using 'audio analysis' to match comparasion dataset'''

from os import getenv
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np

# Initalize spotipy
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

def convert(artist, track):
    track_search= spotify.search(q='artist:' + artist + ' track:' + track, type='track', limit=1, market='US')
    track_id = track_search['tracks']['items'][0]['id']
    user_audio = spotify.audio_features(track_id)
    df = pd.DataFrame.from_records(user_audio)
    df.drop(columns = ['key', 'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms', 'time_signature'], inplace=True)
    df.to_numpy().reshape(1,-1)
    return (df)

In [23]:
Xs.head()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
828113,0.516,0.778,-5.998,1,0.037,0.709,0.0,0.175,0.506,95.94
1045913,0.657,0.879,-2.733,1,0.0513,0.284,0.0,0.28,0.472,99.973
367206,0.25,0.124,-18.66,0,0.0526,0.812,0.959,0.105,0.0342,123.479
21199,0.805,0.436,-10.191,1,0.03,0.0418,1e-05,0.0742,0.703,113.025
780599,0.303,0.468,-9.727,1,0.0329,0.848,0.0344,0.171,0.193,118.059


In [63]:
tfr = pd.DataFrame(data={'danceability':[3]})

In [37]:
tfr.iloc[0]

danceability    3
Name: 0, dtype: int64

In [64]:
Xtst = pd.DataFrame(data={'danceability':[.07], 'valence':[.5]})

In [49]:
Xtst.head()

Unnamed: 0,danceability,valence
0,0.07,0.5


In [42]:
tfr

Unnamed: 0,danceability
0,3
1,3


In [65]:
pd.set_option('display.width', 5000)
if tfr.iloc[0][0]>1:
    dft = pd.DataFrame(data={'Error finding song':['Please use a different song']}) 
    print(dft.head())
else:
    print('Second pass')

            Error finding song
0  Please use a different song


In [56]:
dft.head()

Unnamed: 0,Error
0,The api could not find your song. Please use ...


In [66]:
if Xtst.iloc[0][0]>1:
    print('Success') 
else:
    print('Full completion')

Full completion


In [68]:
df2.to_csv('Full_data.csv')

In [70]:
df2.head()

Unnamed: 0,name,album,artists,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,year
828113,Christmas Lights,Christmas Lights,Blush,0.516,0.778,-5.998,1,0.037,0.709,0.0,0.175,0.506,95.94,4.0,2012
1045913,Chronosaurus,SKZ2020,Stray Kids,0.657,0.879,-2.733,1,0.0513,0.284,0.0,0.28,0.472,99.973,4.0,2020
367206,Squid Ink Part 2,A Real Diamond in the Rough,Buckethead,0.25,0.124,-18.66,0,0.0526,0.812,0.959,0.105,0.0342,123.479,4.0,2009
21199,States Away,Lemonade,Breaking Laces,0.805,0.436,-10.191,1,0.03,0.0418,1e-05,0.0742,0.703,113.025,4.0,2005
780599,Keep a Light,The Troubadour,Drew De Four,0.303,0.468,-9.727,1,0.0329,0.848,0.0344,0.171,0.193,118.059,4.0,2009
