## 1. Import dependencies and define functions

In [17]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time 
from config import client_id, client_secret
import re

In [18]:
client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
def getURI(uri_list):
#     p = re.compile(r'(?:spotify:playlist:)(\w{22})')
    p = re.compile(r'(?:spotify:(?:album|playlist|artist|track):)(\w{22})')
    URIs = [p.match(item).group(1) for item in uri_list]
    return URIs

def getPlaylistTracks(playlist_URI_list):
    # Tracks variable will hold the JSON dictionaries,
    # one dictionary for every playlist 
    
    clean_URIs = getURI(playlist_URI_list)
    json_dicts = [sp.playlist_items(uri) for uri in clean_URIs]
    tracks = []
    playlist_count = 0
    song_count = 0
    for i in range(len(json_dicts)):
        json = json_dicts[i]['items']
        playlist_count += 1
        for j in range(len(json)):
            try:
                track = json[j]['track']['id']
                tracks.append(track)
                song_count +=1
            except (TypeError, ValueError):
                pass
    print('Number of playlists scanned: ',playlist_count, 'Total songs: ', song_count)
    return tracks

In [4]:
def getTrackFeatures(id):
    meta = sp.track(id)
    features = sp.audio_features(id)

    # meta
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']

    # features
    acousticness = features[0]['acousticness']
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    loudness = features[0]['loudness']
    key = features[0]['key']
    mode = features[0]['mode']
    valence = features[0]['valence']
    speechiness = features[0]['speechiness']
    tempo = features[0]['tempo']
    time_signature = features[0]['time_signature']

    track = [name, album, artist, release_date, length, popularity, key, mode, valence, danceability, acousticness, energy, instrumentalness, liveness, loudness, speechiness, tempo, time_signature]
    return track

## 2. Import our playlist mood data and grab track info for each mood

In [5]:
mood_playlists = pd.read_csv('../analysis_data/Mood_Playlists.csv')
mood_playlists.head()

Unnamed: 0,song_URI,num_tracks,type,mood,playlist_name,user_id,counts
0,spotify:playlist:37i9dQZF1DX76Wlfdnj7AP,200,Spotify,Workout,Beast Mode,Spotify,872.0
1,spotify:playlist:37i9dQZF1DX70RN3TfWWJh,100,Spotify,Workout,Workout,Spotify,
2,spotify:playlist:37i9dQZF1DX35oM5SPECmN,76,Spotify,Workout,Run Wild,Spotify,
3,spotify:playlist:37i9dQZF1DX9BXb6GsGCLl,80,Spotify,Workout,Powerwalk!,Spotify,
4,spotify:playlist:37i9dQZF1DWXx3Txis2L4x,40,Spotify,Workout,Rock 'n' Run 150-180 BPM,Spotify,


In [6]:
def get_data(mood, mood_playlists):
    mood_playlist = mood_playlists[mood_playlists.mood == mood]
    
    # Get the song_URI column
    mood_URIs = mood_playlist['song_URI']

    # Convert the series to a list 
    mood_URIs = list(mood_URIs)

    mood_tracks = getPlaylistTracks(mood_URIs)
    return mood_tracks

In [7]:
sad_tracks = get_data('sad',mood_playlists)
happy_tracks = get_data('Happy',mood_playlists)
workout_tracks = get_data('Workout',mood_playlists)

Number of playlists scanned:  42 Total songs:  3496
Number of playlists scanned:  21 Total songs:  1805
Number of playlists scanned:  9 Total songs:  670


In [None]:
sad_tracks[:5]

## 3. Create the dataframes and export

In [10]:
def createDataFrame(track_ids,export_name):
    tracks = []

    for i in range(len(track_ids)):
        time.sleep(.5)
        track = getTrackFeatures(track_ids[i])
        tracks.append(track)

    # create dataset
    df = pd.DataFrame(tracks, columns = ["name", "album", "artist", "release_date", "length", "popularity", "key", "mode", "valence", "danceability", "acousticness", "energy", "instrumentalness", "liveness", "loudness", "speechiness", "tempo", "time_signature"])
    df.to_csv(export_name, sep = ',')
    return df

In [11]:
# Please note this will take more than 15 min-30 min if list > 1,000 songs
happy = createDataFrame(happy_tracks,'happy.csv')

In [16]:
# Mood 2 = 'Happy'
happy['mood'] = 2
happy.to_csv('happy.csv')
happy

Unnamed: 0,name,album,artist,release_date,length,popularity,key,mode,valence,danceability,acousticness,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mood
0,What You Know,Tourist History,Two Door Cinema Club,2010-02-17,191706,71,6,0,0.776,0.561,0.000715,0.741,0.000014,0.0822,-4.239,0.0419,139.001,4,2
1,Pumped Up Kicks,Torches,Foster The People,2011-05-23,239600,84,5,0,0.965,0.733,0.145000,0.710,0.115000,0.0956,-5.849,0.0292,127.975,4,2
2,Tongue Tied,Never Trust a Happy Song,Grouplove,2011-09-02,218013,80,3,1,0.371,0.560,0.008470,0.936,0.000000,0.1610,-5.835,0.0439,112.960,4,2
3,1901,Wolfgang Amadeus Phoenix,Phoenix,2009-05-25,193106,67,0,1,0.705,0.591,0.060500,0.831,0.000047,0.1900,-5.647,0.0415,144.084,4,2
4,Hard Times,After Laughter,Paramore,2017-05-12,182693,72,5,0,0.916,0.695,0.006470,0.818,0.000005,0.0219,-5.379,0.0334,119.965,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1800,Does Your Mother Know,Voulez-Vous,ABBA,1979,195666,0,7,1,0.973,0.722,0.153000,0.859,0.000044,0.0586,-5.706,0.0360,135.628,4,2
1801,"Sugar, Sugar","The Very Best Of ""The Archies""",The Archies,2007-11-07,167186,66,2,1,0.967,0.736,0.466000,0.871,0.000000,0.1080,-2.786,0.0278,122.395,4,2
1802,Respect,I Never Loved a Man the Way I Love You,Aretha Franklin,1967-03-10,147600,73,0,1,0.965,0.805,0.164000,0.558,0.000022,0.0546,-5.226,0.0410,114.950,4,2
1803,Wouldn't It Be Nice,Greatest Hits,The Beach Boys,2012-01-01,153866,58,5,1,0.672,0.385,0.724000,0.657,0.000000,0.1050,-6.975,0.0342,124.895,4,2


In [19]:
# Please note this will take more than 15 min if list > 1,000 songs
sad = createDataFrame(sad_tracks,'sad.csv')

In [20]:
# Mood 1 = 'sad'
sad['mood'] = 1
sad.to_csv('sad.csv')
sad.head(5)

Unnamed: 0,name,album,artist,release_date,length,popularity,key,mode,valence,danceability,acousticness,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mood
0,everything i wanted,everything i wanted,Billie Eilish,2019-11-13,245425,82,6,0,0.243,0.704,0.902,0.225,0.657,0.106,-14.454,0.0994,120.006,4,1
1,ghostin,"thank u, next",Ariana Grande,2019-02-08,271466,69,9,1,0.11,0.287,0.418,0.364,1.8e-05,0.185,-8.295,0.0306,103.777,4,1
2,Too Good At Goodbyes,The Thrill Of It All (Special Edition),Sam Smith,2017-11-03,201000,74,5,1,0.476,0.681,0.64,0.372,0.0,0.169,-8.237,0.0432,91.873,4,1
3,i love you,"WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?",Billie Eilish,2019-03-29,291796,80,0,1,0.12,0.421,0.952,0.131,0.00453,0.109,-18.435,0.0382,137.446,4,1
4,I Fall Apart,Stoney (Deluxe),Post Malone,2016-12-09,223346,80,8,0,0.291,0.556,0.0689,0.538,0.0,0.196,-5.408,0.0382,143.95,4,1


In [None]:
# Mood 0 = 'Workout'
workout = pd.read_csv('workout.csv', index_col = 0)
workout['mood'] = 0
workout.to_csv('workout.csv')
workout.head(5)

In [None]:
# Import dependencies

from sqlalchemy import create_engine
import psycopg2
from config import db_password

In [None]:
# Connect sad playlst to SQL

db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/spotify_data"
engine = create_engine(db_string)
    
sad.to_sql(name='sad_playlist', con=engine, if_exists='replace')

In [None]:
# Connect happy/workout playlist to SQL
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/spotify_data"
engine = create_engine(db_string)
    
happy.to_sql(name='happy_playlist', con=engine, if_exists='replace')
workout.to_sql(name='workout_playlist', con=engine, if_exists='replace')