In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time 
from config import client_id, client_secret
import re

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
def getURI(uri_list):
#     p = re.compile(r'(?:spotify:playlist:)(\w{22})')
    p = re.compile(r'(?:spotify:(?:album|playlist|artist|track):)(\w{22})')
    URIs = [p.match(item).group(1) for item in uri_list]
    return URIs

In [4]:
def getPlaylistTracks(playlist_URI_list):
    # Tracks variable will hold the JSON dictionaries,
    # one dictionary for every playlist 
    
    clean_URIs = getURI(playlist_URI_list)
    json_dicts = [sp.playlist_items(uri) for uri in clean_URIs]
    tracks = []
    playlist_count = 0
    song_count = 0
    for i in range(len(json_dicts)):
        json = json_dicts[i]['items']
        playlist_count += 1
        for j in range(len(json)):
            try:
                track = json[j]['track']['id']
                tracks.append(track)
                song_count +=1
            except (TypeError, ValueError):
                pass
    print('Number of playlists scanned: ',playlist_count, 'Total songs: ', song_count)
    return tracks

In [5]:
def getTrackFeatures(id):
  meta = sp.track(id)
  features = sp.audio_features(id)

  # meta
  name = meta['name']
  album = meta['album']['name']
  artist = meta['album']['artists'][0]['name']
  release_date = meta['album']['release_date']
  length = meta['duration_ms']
  popularity = meta['popularity']

  # features
  acousticness = features[0]['acousticness']
  danceability = features[0]['danceability']
  energy = features[0]['energy']
  instrumentalness = features[0]['instrumentalness']
  liveness = features[0]['liveness']
  loudness = features[0]['loudness']
  speechiness = features[0]['speechiness']
  tempo = features[0]['tempo']
  time_signature = features[0]['time_signature']

  track = [name, album, artist, release_date, length, popularity, danceability, acousticness, danceability, energy, instrumentalness, liveness, loudness, speechiness, tempo, time_signature]
  return track

### Import our playlist mood data and grab track info for each mood

In [6]:
mood_playlists = pd.read_csv('Mood_Playlists.csv')
mood_playlists.head()

Unnamed: 0,song_URI,num_tracks,type,mood,playlist_name,user_id,counts
0,spotify:playlist:37i9dQZF1DX76Wlfdnj7AP,200,Spotify,Workout,Beast Mode,Spotify,872.0
1,spotify:playlist:37i9dQZF1DX70RN3TfWWJh,100,Spotify,Workout,Workout,Spotify,
2,spotify:playlist:37i9dQZF1DX35oM5SPECmN,76,Spotify,Workout,Run Wild,Spotify,
3,spotify:playlist:37i9dQZF1DX9BXb6GsGCLl,80,Spotify,Workout,Powerwalk!,Spotify,
4,spotify:playlist:37i9dQZF1DWXx3Txis2L4x,40,Spotify,Workout,Rock 'n' Run 150-180 BPM,Spotify,


In [9]:
# Filter the mood_playlists df for only the sad songs
sad_playlist = mood_playlists[mood_playlists.mood =='sad']

# Get the song_URI column
sad_URIs = sad_playlist['song_URI']

# Convert the series to a list 
sad_URIs = list(sad_URIs)

In [10]:
sad_tracks = getPlaylistTracks(sad_URIs)

Number of playlists scanned:  42 Total songs:  3500


In [11]:
# Filter the mood_playlists df for only the sad songs
happy_playlist = mood_playlists[mood_playlists.mood =='Happy']

# Get the song_URI column
happy_URIs = happy_playlist['song_URI']

# Convert the series to a list 
happy_URIs = list(happy_URIs)

In [12]:
happy_tracks = getPlaylistTracks(happy_URIs)

Number of playlists scanned:  13 Total songs:  1165


In [10]:
# Filter the mood_playlists df for only the sad songs
workout_playlist = mood_playlists[mood_playlists.mood =='Workout']

# Get the song_URI column
workout_URIs = workout_playlist['song_URI']

# Convert the series to a list 
workout_URIs = list(workout_URIs)

In [11]:
workout_tracks = getPlaylistTracks(workout_URIs)

Number of playlists scanned:  9 Total songs:  671


In [8]:
def createDataFrame(track_ids,export_name):
    tracks = []

    for i in range(len(track_ids)):
      time.sleep(.5)
      track = getTrackFeatures(track_ids[i])
      tracks.append(track)

    # create dataset
    df = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature'])
    df.to_csv(export_name, sep = ',')
    return df

In [14]:
# Please note this will take more than 15 min-30 min if list > 1,000 songs
happy = createDataFrame(happy_tracks,'happy.csv')

In [20]:
# Please note this will take more than 15 min if list > 1,000 songs
sad = createDataFrame(sad_tracks,'sad.csv')

ReadTimeout: HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=5)

In [12]:
# Please note this will take more than 15 min if list > 1,000 songs
workout = createDataFrame(workout_tracks,'workout.csv')

### Loading Playlists into DB

NOTE: Our sad playlist was timing out whenever I tried to run it on my computer (I think it took too much processing, so I could only load our happy + workout playlist)

In [13]:
from sqlalchemy import create_engine
import psycopg2

from db_config import db_password

In [14]:
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/spotify_data"
engine = create_engine(db_string)
    
happy.to_sql(name='happy_playlist', con=engine, if_exists='replace')
workout.to_sql(name='workout_playlist', con=engine, if_exists='replace')