In [None]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import cred
import time
import random
import pandas as pd
#import numpy as np

scope = "playlist-read-private, playlist-modify-public, user-read-private, user-top-read, user-library-read"
auth_manager = SpotifyOAuth(client_id=cred.client_id, client_secret=cred.client_secret, redirect_uri='http://127.0.0.1:8080', scope=scope)
sp = spotipy.Spotify(auth_manager=auth_manager)

user_id = sp.current_user()['id']
user_country = sp.current_user()['country']


In [None]:
def get_user_playlist_ids():
  '''
  Collects a list of user playlist dictionaries and the Spotify ID for each of them.
  '''
  playlists_lst =[]
  ids = []
  offset = 0
  while True:
      playlists = sp.current_user_playlists(offset=offset)
      if len(playlists['items']) == 0:
          break
      for playlist in playlists['items']:
          playlists_lst.append(playlist)
      offset = offset + len(playlists['items'])
      time.sleep(0.01) 
  
  for playlist in playlists_lst:
      ids.append(playlist['id'])
  return ids, playlists_lst

def get_saved_tracks():
  '''
  Gets a user's "liked" tracks
  '''
  ids = []
  print('I\'m starting to look at the user\'s saved tracks!!')
  offset = 0
  t1 = time.time()
  while True:
      track_ids = sp.current_user_saved_tracks(offset=offset)
      if len(track_ids['items']) == 0:
          break
      for track in track_ids['items']:
          if track['track'] == None:
              continue
          else:
              ids.append(track['track']['id'])
      offset = offset + len(track_ids['items'])
      time.sleep(0.01)
  t2 = time.time()
  print(f'Hmmm... getting the liked tracks took {t2-t1} seconds!\n')
  return list(set(ids))

def get_playlist_names(playlists):
  '''
  Returns a list of a user's playlist titles when given a list of playlist ids
  '''
  names = []
  for playlist in playlists:
      name = playlist['name']
      names.append(name)
  return names

def get_song_ids_from_playlists(user, playlist_urls):
  '''
  Gets song ids from each of the songs in given playlist ids
  '''
  ids = []
  t1 = time.time()
  for i in range(len(playlist_urls)):
      offset = 0
      print(f'I\'m starting playlist number {i+1} out of {len(playlist_urls)}')
      while True:
          track_ids = sp.user_playlist_tracks(user=user, playlist_id=playlist_urls[i], offset=offset, fields ='items.track.id')
          #print(track_ids)
          #print(len(track_ids['items']))
          if len(track_ids['items']) == 0:
              break
          for track in track_ids['items']:
              if track['track'] == None:
                  continue
              else:
                  ids.append(track['track']['id'])
          offset = offset + len(track_ids['items'])
          time.sleep(0.01)
  t2 = time.time()
  print(f'Getting song ids from all those playlists took {round(t2-t1, 2)} seconds!\n')
  return list(set(ids))

def get_recc_ids(list_seed_tracks, country):
  '''
  Gets ids for # recommended songs for each song in the seed tracks list
  '''
  print('Starting to collect recommendation ids.')
  if len(list_seed_tracks) > 150:
      print(f'Wow! I have {len(list_seed_tracks)*30} to make. This may take a while.\n')
  recc_ids = []
  #raw_recs = []
  t1 = time.time()
  for seed in list_seed_tracks:
      seed_to_use = []
      seed_to_use.append(seed)
      recs = sp.recommendations(seed_tracks=seed_to_use, limit = 30, country=country)
      #raw_recs.append(recs)
      #print(recs)
      for i in range(len(recs['tracks'])):
        track_id = recs['tracks'][i]['id']
        recc_ids.append(track_id)
      #print(len(recc_ids))
  set_ids = set(recc_ids) 
  t2 = time.time()
  print(f'Making and saving all of those recommendations took {round(t2-t1, 2)} seconds.\n')
  return list(set_ids)

def create_playlist(tracks):
  sp.user_playlist_create(user_id, 'your recommended songs', description='yay new songs!')
  user_playlists, y = get_user_playlist_ids()
  sp.user_playlist_add_tracks(user_id, user_playlists[0], tracks)
  return 'Your playlist has been created!'

def create_recc_df(recc_ids):
  data = []

  for recc in recc_ids:
    # Get raw data for track
    track = sp.track(recc)
    features = sp.audio_features(recc)
    analysis = sp.audio_analysis(recc)
  
    # Extract relevant data
    observation = [
      track['album']['uri'], 
      track['album']['release_date'][0:4], 
      len(track['artists']),
      round(track['duration_ms']/60000, 4),
      track['popularity'],
      features[0]['danceability'],
      features[0]['energy'],
      features[0]['key'],
      analysis['track']['key_confidence'],
      features[0]['loudness'],
      features[0]['mode'],
      analysis['track']['mode_confidence'],
      features[0]['speechiness'],
      features[0]['acousticness'],
      features[0]['acousticness'],
      features[0]['liveness'],
      features[0]['valence'],
      features[0]['tempo'],
      analysis['track']['tempo_confidence'],
      features[0]['time_signature'],
      analysis['track']['time_signature_confidence'],
      analysis['track']['num_samples'],
      len(analysis['bars']),
      len(analysis['beats']),
      len(analysis['sections']),
      len(analysis['segments']), # for each segment, there is a list of pitches and timbre!
      len(analysis['tatums'])
    ]

    # Add observation to total dataset
    data.append(observation)
    
  # Create final data frame with proper column names
  df = pd.DataFrame(data, columns=[
    'uri', 'release_date', 'nartists', 'duration_m', 'track_popularity', 'danceability', 'energy',
    'key', 'key_conf', 'loudness', 'mode', 'mode_conf', 'speechiness', 'acousticness', 'instrumentalness',
    'liveness', 'valence', 'tempo', 'tempo_conf', 'time_sig', 'time_sig_conf', 'nsamples', 'nbars',
    'nbeats', 'nsections', 'nsegments', 'ntatums'
  ])

  return df

In [None]:
# Get all playlist ids and all complete playlist information
playlist_ids, raw_playlists = get_user_playlist_ids()

playlist_names = get_playlist_names(playlist_ids)

# Extract playlist 1 for testing
testing_playlist = [playlist_ids[0]]

# Get the song ids for all tracks in the test playlist
song_ids = get_song_ids_from_playlists(user_id, testing_playlist)

recc_ids = get_recc_ids(song_ids, user_country)

In [None]:
random.shuffle(recc_ids)

if len(recc_ids) <= 30:
  group1 = recc_ids
else:
  group1 = []
  for i in range(len(recc_ids)):
    if recc_ids[i] not in song_ids:
      group1.append(recc_ids[i])
    if len(group1) == 30:
      break

create_playlist(group1)

In [18]:
# Pandas dataframe
# Each record is a song
# Each song has 50* attributes (timbres, year)
# Record categorical and binary data
# Normalize data (8.1.1)
# artist_followers = track['artists'] how to access more info like follower count and artist 
#   popularity and genres

# key: discrete
# time_sig: discrete
# year: discrete
# nartists: discrete
# *genre: categorical
# explicit: binary (True/False)
# mode: binary (0/1)
# uri: string

yeet = ['https://open.spotify.com/track/0v1XpBHnsbkCn7iJ9Ucr1l?si=88b01857391c474c', 'https://open.spotify.com/track/2WfaOiMkCvy7F5fcp2zZ8L?si=e7e62d2459b046c4']
x = create_recc_df(yeet)
print(x)

                                    uri release_date  nartists  duration_m  \
0  spotify:album:0Q9SljCrM0CL0bR23MuP69         2000         1      3.7416   
1  spotify:album:1ER3B6zev5JEAaqhnyyfbf         1985         1      3.7547   

   track_popularity  danceability  energy  key  key_conf  loudness  ...  \
0                81         0.551   0.913    0     0.693    -4.063  ...   
1                86         0.573   0.902    6     0.443    -7.638  ...   

     tempo  tempo_conf  time_sig  time_sig_conf  nsamples  nbars  nbeats  \
0  119.992       0.775         4          0.696   4950078    111     445   
1   84.412       0.019         4          1.000   4967424     78     314   

   nsections  nsegments  ntatums  
0          8        658      890  
1          9        922      628  

[2 rows x 27 columns]
