## 1. Import dependencies and define functions

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time 
from config import client_id, client_secret
import re

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
def getURI(uri_list):
#     p = re.compile(r'(?:spotify:playlist:)(\w{22})')
    p = re.compile(r'(?:spotify:(?:album|playlist|artist|track):)(\w{22})')
    URIs = [p.match(item).group(1) for item in uri_list]
    return URIs

In [28]:
def getPlaylistTracks(playlist_URI_list):
    # Tracks variable will hold the JSON dictionaries,
    # one dictionary for every playlist 
    
    clean_URIs = getURI(playlist_URI_list)
    json_dicts = [sp.playlist_items(uri) for uri in clean_URIs]
    tracks = []
    playlist_count = 0
    song_count = 0
    for i in range(len(json_dicts)):
        json = json_dicts[i]['items']
        playlist_count += 1
        for j in range(len(json)):
            try:
                track = json[j]['track']['id']
                tracks.append(track)
                song_count +=1
            except (TypeError, ValueError):
                pass
    print('Number of playlists scanned: ',playlist_count, 'Total songs: ', song_count)
    return tracks

In [8]:
def getTrackFeatures(id):
  meta = sp.track(id)
  features = sp.audio_features(id)

  # meta
  name = meta['name']
  album = meta['album']['name']
  artist = meta['album']['artists'][0]['name']
  release_date = meta['album']['release_date']
  length = meta['duration_ms']
  popularity = meta['popularity']

  # features
  acousticness = features[0]['acousticness']
  danceability = features[0]['danceability']
  energy = features[0]['energy']
  instrumentalness = features[0]['instrumentalness']
  liveness = features[0]['liveness']
  loudness = features[0]['loudness']
  speechiness = features[0]['speechiness']
  tempo = features[0]['tempo']
  time_signature = features[0]['time_signature']

  track = [name, album, artist, release_date, length, popularity, danceability, acousticness, danceability, energy, instrumentalness, liveness, loudness, speechiness, tempo, time_signature]
  return track

## 2. Import our playlist mood data and grab track info for each mood

In [11]:
mood_playlists = pd.read_csv('Mood_Playlists.csv')
mood_playlists.head()

Unnamed: 0,song_URI,num_tracks,type,mood,playlist_name,user_id,counts
0,spotify:playlist:37i9dQZF1DX76Wlfdnj7AP,200,Spotify,Workout,Beast Mode,Spotify,872.0
1,spotify:playlist:37i9dQZF1DX70RN3TfWWJh,100,Spotify,Workout,Workout,Spotify,
2,spotify:playlist:37i9dQZF1DX35oM5SPECmN,76,Spotify,Workout,Run Wild,Spotify,
3,spotify:playlist:37i9dQZF1DX9BXb6GsGCLl,80,Spotify,Workout,Powerwalk!,Spotify,
4,spotify:playlist:37i9dQZF1DWXx3Txis2L4x,40,Spotify,Workout,Rock 'n' Run 150-180 BPM,Spotify,


In [13]:
# Filter the mood_playlists df for only the sad songs
sad_playlist = mood_playlists[mood_playlists.mood =='sad']

# Get the song_URI column
sad_URIs = sad_playlist['song_URI']

# Convert the series to a list 
sad_URIs = list(sad_URIs)

['spotify:playlist:45nUFHsTdA01QF8jsWt0IQ',
 'spotify:playlist:37i9dQZF1DWSqBruwoIXkA',
 'spotify:playlist:37i9dQZF1DWW2hj3ZtMbuO',
 'spotify:playlist:37i9dQZF1DX7qK8ma5wgG1',
 'spotify:playlist:37i9dQZF1DX8Vz2ROLXhTT']

In [30]:
sad_tracks = getPlaylistTracks(sad_URIs)

Number of playlists scanned:  42 Total songs:  3495


In [24]:
# Filter the mood_playlists df for only the sad songs
happy_playlist = mood_playlists[mood_playlists.mood =='Happy']

# Get the song_URI column
happy_URIs = happy_playlist['song_URI']

# Convert the series to a list 
happy_URIs = list(happy_URIs

13

In [29]:
happy_tracks = getPlaylistTracks(happy_URIs)

Number of playlists scanned:  13 Total songs:  1165


## 3. Create the dataframes and export

In [31]:
def createDataFrame(track_ids,export_name):
    tracks = []

    for i in range(len(track_ids)):
      time.sleep(.5)
      track = getTrackFeatures(track_ids[i])
      tracks.append(track)

    # create dataset
    df = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])
    df.to_csv(export_name, sep = ',')
    return df

In [32]:
# Please note this will take more than 15 min-30 min if list > 1,000 songs
happy = createDataFrame(happy_tracks,'happy.csv')

In [33]:
# Please note this will take more than 15 min if list > 1,000 songs
sad = createDataFrame(sad_tracks,'sad.csv')

In [52]:
# Mood 1 = 'sad'
sad['mood'] = 1
sad.to_csv('sad.csv')
sad.head(5)

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mood
0,everything i wanted,everything i wanted,Billie Eilish,2019-11-13,245425,82,0.704,0.902,0.704,0.225,0.657,0.106,-14.454,0.0994,120.006,4,1
1,ghostin,"thank u, next",Ariana Grande,2019-02-08,271466,69,0.287,0.418,0.287,0.364,1.8e-05,0.185,-8.295,0.0306,103.777,4,1
2,Too Good At Goodbyes,The Thrill Of It All (Special Edition),Sam Smith,2017-11-03,201000,74,0.681,0.64,0.681,0.372,0.0,0.169,-8.237,0.0432,91.873,4,1
3,i love you,"WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?",Billie Eilish,2019-03-29,291796,80,0.421,0.952,0.421,0.131,0.00453,0.109,-18.435,0.0382,137.446,4,1
4,I Fall Apart,Stoney (Deluxe),Post Malone,2016-12-09,223346,80,0.556,0.0689,0.556,0.538,0.0,0.196,-5.408,0.0382,143.95,4,1


In [53]:
# Mood 2 = 'Happy'
happy['mood'] = 2
happy.to_csv('happy.csv')
happy.head()

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mood
0,What You Know,Tourist History,Two Door Cinema Club,2010-02-17,191706,70,0.561,0.000715,0.561,0.741,1.4e-05,0.0822,-4.239,0.0419,139.001,4,2
1,Pumped Up Kicks,Torches,Foster The People,2011-05-23,239600,84,0.733,0.145,0.733,0.71,0.115,0.0956,-5.849,0.0292,127.975,4,2
2,Tongue Tied,Never Trust a Happy Song,Grouplove,2011-09-02,218013,80,0.56,0.00847,0.56,0.936,0.0,0.161,-5.835,0.0439,112.96,4,2
3,1901,Wolfgang Amadeus Phoenix,Phoenix,2009-05-25,193106,67,0.591,0.0605,0.591,0.831,4.7e-05,0.19,-5.647,0.0415,144.084,4,2
4,Hard Times,After Laughter,Paramore,2017-05-12,182693,72,0.695,0.00647,0.695,0.818,5e-06,0.0219,-5.379,0.0334,119.965,4,2


In [54]:
# Mood 0 = 'Workout'
workout = pd.read_csv('workout.csv', index_col = 0)
workout['mood'] = 0
workout.to_csv('workout.csv')
workout.head(5)

Unnamed: 0,name,album,artist,release_date,length,popularity,danceability,acousticness,danceability.1,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mood
0,Goosebumps - Remix,Goosebumps (Remix),Travis Scott,2021-01-15,162802,85,0.841,0.418,0.841,0.593,0.0,0.124,-7.846,0.0379,124.917,4,0
1,Paradise (feat. Dermot Kennedy),Paradise,MEDUZA,2020-10-30,167903,83,0.632,0.0689,0.632,0.595,0.0,0.209,-7.644,0.0401,124.114,4,0
2,INDUSTRY BABY (feat. Jack Harlow),INDUSTRY BABY (feat. Jack Harlow),Lil Nas X,2021-07-23,212000,97,0.736,0.0203,0.736,0.704,0.0,0.0501,-7.409,0.0615,149.995,4,0
3,Motley Crew,Motley Crew,Post Malone,2021-07-09,184213,84,0.797,0.0904,0.797,0.631,4e-06,0.0998,-3.818,0.0786,129.915,4,0
4,Levitating (feat. DaBaby),Levitating (feat. DaBaby),Dua Lipa,2020-10-01,203064,86,0.702,0.00883,0.702,0.825,0.0,0.0674,-3.787,0.0601,102.977,4,0


In [55]:
# Miscellaneous code

In [3]:
# def getTrackIDs(user, playlist_id):
#     ids = []
#     playlist = sp.user_playlist(user, playlist_id)
#     for item in playlist['tracks']['items']:
#         track = item['track']
#         ids.append(track['id'])
#     return ids