**This notebook explores the song and artist portion of the original Peloton dataset and tests pulling information on the tracks via the Spotify API. This function is used to create the dataset utilized for modeling.**

In [1]:
import pandas as pd
import numpy as np

# Source: https://betterprogramming.pub/how-to-extract-any-artists-data-using-spotify-s-api-python-and-spotipy-4c079401bc37
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time 

import pickle

In [117]:
# import peloton data
# Thank you to okaykristinakay on Reddit for the data!
# Source: https://www.reddit.com/r/pelotoncycle/comments/m18xnr/peloton_class_list_march_update/
df = pd.read_excel('../../../data/original_datasets/AGF_Peloton Classes March.xlsx')
df.head()

Unnamed: 0,classId,className,classDescription,classDifficulty,classDuration,classType,classLength,classLocation,classOriginalAirdate,classRating,classRatingCount,instructorName,instructorBio,classEquipment,classSongs,classArtists,classUrl
0,7f66378211c9476b9b5619bf989f91d0,20 min Peace Meditation,A guided meditation that focuses on cultivatin...,4.3333,20,Meditation,23,psny-studio-2,2021-09-03 13:25:00,0.9847,131,Aditi Shah,"To Aditi, yoga goes beyond movement and can br...",Yoga Block,Meditation 22,RIOPY,https://members.onepeloton.com/classes/bootcam...
1,54ac61803b364b2fa8378acd9f593cdb,15 min Bodyweight Strength,"No equipment, no problem. Join us for a high-e...",5.7755,15,Strength,19,psny-studio-3,2021-09-03 13:19:00,0.9933,297,Olivia Amato,"Born and raised in New York, Olivia grew up pl...",Workout Mat,"California Gurls,Let's Get Loud,Let It Rock (f...","Katy Perry,Snoop Dogg,Jennifer Lopez,Kevin Rud...",https://members.onepeloton.com/classes/bootcam...
2,c75fd4831573483c9d45739aae11d083,20 min Focus Flow: Lower Body,This yoga flow class focuses on poses that eng...,4.3664,20,Yoga,23,psny-studio-2,2021-09-03 12:51:00,1.0,159,Aditi Shah,"To Aditi, yoga goes beyond movement and can br...","Yoga Blanket,Yoga Block,Yoga Mat","Interlude No 1,Oceansize,She Just Likes To Fig...","James Vincent McMorrow,Oh Wonder,Four Tet,Grim...",https://members.onepeloton.com/classes/bootcam...
3,470086936f7a4723ab5a53cb80b571ff,45 min Pop Bootcamp,Split your workout 50/50 between cardio on the...,7.8312,45,Tread Bootcamp,50,psny-studio-4,2021-09-03 11:56:00,0.9737,152,Olivia Amato,"Born and raised in New York, Olivia grew up pl...","Workout Mat,Medium Weights",34+35 (Remix) (feat. Doja Cat & Megan Thee Sta...,"Ariana Grande,Doja Cat,Megan Thee Stallion,Jus...",https://members.onepeloton.com/classes/bootcam...
4,9680a817bf2149d2b91990c87166a400,20 min Pop Ride,We dare you not to dance as you ride to all th...,7.4,20,Cycling,24,uk,2021-09-03 07:52:00,1.0,82,Sam Yo,Sam is a pro at many things but shines when it...,,"Señorita,Marry You,Irreplaceable,What Do You M...","Justin Timberlake,Bruno Mars,Beyoncé,Justin Bi...",https://members.onepeloton.com/classes/bootcam...


In [118]:
# overview
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16892 entries, 0 to 16891
Data columns (total 17 columns):
classId                 16892 non-null object
className               16892 non-null object
classDescription        16892 non-null object
classDifficulty         16892 non-null float64
classDuration           16892 non-null int64
classType               16892 non-null object
classLength             16892 non-null int64
classLocation           16892 non-null object
classOriginalAirdate    16892 non-null object
classRating             16892 non-null float64
classRatingCount        16892 non-null int64
instructorName          16892 non-null object
instructorBio           16883 non-null object
classEquipment          16892 non-null object
classSongs              16892 non-null object
classArtists            16892 non-null object
classUrl                16892 non-null object
dtypes: float64(2), int64(3), object(12)
memory usage: 2.2+ MB


In [119]:
# Pull song list from a class
songtest = df['classSongs'][1].split(",")
songtest

['California Gurls',
 "Let's Get Loud",
 'Let It Rock (feat. Lil Wayne)',
 'Every Chance We Get We Run (feat. Tegan & Sara)',
 '34+35 (Remix) (feat. Doja Cat & Megan Thee Stallion)',
 'Leave The World Behind']

In [120]:
# Pull artist list from a class
artisttest = df['classArtists'][1].split(",")
artisttest

['Katy Perry',
 'Snoop Dogg',
 'Jennifer Lopez',
 'Kevin Rudolf',
 'Lil Wayne',
 'David Guetta - Alesso',
 'Tegan Rain Quin',
 'Ariana Grande',
 'Doja Cat',
 'Megan Thee Stallion',
 'SNBRN',
 'Kaleena Zanders']

In [121]:
#Spotify API set up
# Source: https://betterprogramming.pub/how-to-extract-any-artists-data-using-spotify-s-api-python-and-spotipy-4c079401bc37
client_id = '4aac5b215c8a4fc591b506b71af7ebf5'
client_secret = '31d999b609424f569382b047ba828b00'

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [122]:
# Set up search term 
searchtest = songtest[0] + " " + artisttest[0]
searchtest

'California Gurls Katy Perry'

In [123]:
# Pull out trackid
result = sp.search(searchtest)
trackidtest = result['tracks']['items'][0]['id']

In [124]:
trackidtest

'6tS3XVuOyu10897O3ae7bi'

In [125]:
track_df = pd.DataFrame(columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 
                        'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 
                        'speechiness', 'tempo', 'time_signature'])

In [126]:
track_df

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature


In [127]:
# Create function to pull out track features
# Source: https://betterprogramming.pub/how-to-extract-any-artists-data-using-spotify-s-api-python-and-spotipy-4c079401bc37
# Edited to put track information as additional row in track_df
def getTrackFeatures(id):
    meta = sp.track(id)
    features = sp.audio_features(id)
    
    # meta
    id_searched = id
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']
    
    # features
    acousticness = features[0]['acousticness']
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    loudness = features[0]['loudness']
    speechiness = features[0]['speechiness']
    tempo = features[0]['tempo']
    time_signature = features[0]['time_signature']
    
    track = [id_searched, name, album, artist, release_date, length, popularity, danceability, acousticness, 
             danceability, energy, instrumentalness, liveness, loudness, speechiness, tempo, time_signature]
    
    track_df.loc[(len(track_df) +1)] = track
    return track_df

In [128]:
trackresultstest = getTrackFeatures(trackidtest)
trackresultstest

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
1,California Gurls,Katy Perry - Teenage Dream: The Complete Confe...,Katy Perry,2012-03-12,234653,74,0.791,0.00446,0.791,0.754,0,0.163,-3.729,0.0569,125.014,4


In [104]:
# Use function to test track results function
trackresultstest = getTrackFeatures(trackidtest)
trackresultstest

['California Gurls',
 'Katy Perry - Teenage Dream: The Complete Confection',
 'Katy Perry',
 '2012-03-12',
 234653,
 74,
 0.791,
 0.00446,
 0.791,
 0.754,
 0,
 0.163,
 -3.729,
 0.0569,
 125.014,
 4]

In [110]:
# Test appending row to track_df 
track_df.loc[0] = trackresultstest

In [111]:
# Check results
track_df

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,California Gurls,Katy Perry - Teenage Dream: The Complete Confe...,Katy Perry,2012-03-12,234653,74,0.791,0.00446,0.791,0.754,0,0.163,-3.729,0.0569,125.014,4
