[Spotify API Search](https://developer.spotify.com/documentation/web-api/reference/search/search/)

In [13]:
from __future__ import print_function    # (at top of module)
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import time
import sys
import pandas as pd
import numpy as np
import re

In [14]:
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [15]:
import os
cid = os.environ.get('SPOTIPY_CLIENT_ID')
secret = os.environ.get('SPOTIPY_CLIENT_SECRET')
username = os.environ.get('SPOTIFY_USER_ID')
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)     
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [16]:
def users_to_playlists(users):
    playlists = []
    for user in users:
        try:
            for item in sp.user_playlists(user)['items']:
                try:
                    playlists.append({'user_id':user, 'playlist_id':item['id'], 'added_by_id':item['owner']['id']})
                except:
                    print('except playlist')
        except:
            print('except user')
        print('user_complete')
    return playlists


In [17]:
def parse_playlist_data(userID, playlistID, limit = 100, fields = 'items,total,next'):
    '''
    Used to get information about the tracks in spotify playlist
    
    Arguments:
    userID is a spotify user ID
    playlistID is a spotify playlist ID
    limit is the number of tracks to retrieve in each call. "Next" is used to get all songs. 100 is the max to
        retrieve at once
    fields is the fields to retrieve for each playlist call
    
    Returns:
    A 2D list where each row is a dictionary corresponding to a song
    the keys of this dictionary are: user_id, playlist_id, added_by_id, album_name,
                album_id, artist_names, artist_ids, track_name, track_id, date_added
    '''
    playlist = sp.user_playlist_tracks(userID,
                            playlist_id=playlistID,
                            fields = fields,
                            limit = limit)
    test_data_list = []
    for i in range(len(playlist['items'])):
        artist_name_list = []
        artist_id_list = []
        added_by_id = playlist['items'][i]['added_by']['id']
        added_at = playlist['items'][i]['added_at']
        curr = playlist['items'][i]['track']
        for j in range(len(curr['artists'])):
            artist_name_list.append(curr['artists'][j]['name'])
            artist_id_list.append(curr['artists'][j]['id'])
        track_name = curr['name']
        track_id = curr['id']
        album_name = curr['album']['name']
        album_id = curr['album']['id']
        test_data_list.append({'user_id': userID,
                               'playlist_id': playlistID,
                               'added_by_id': added_by_id,
                               'album_name': album_name,
                               'album_id': album_id,
                               'artist_names':artist_name_list,
                               'artist_ids':artist_id_list,
                               'track_name': track_name,
                               'track_id': track_id,
                               'date_added': added_at
                              })
    while playlist['next']:
        playlist = sp.next(playlist)
        for i in range(len(playlist['items'])):
            artist_name_list = []
            artist_id_list = []
            added_by_id = playlist['items'][i]['added_by']['id']
            added_at = playlist['items'][i]['added_at']
            curr = playlist['items'][i]['track']
            for j in range(len(curr['artists'])):
                artist_name_list.append(curr['artists'][j]['name'])
                artist_id_list.append(curr['artists'][j]['id'])
            track_name = curr['name']
            track_id = curr['id']
            album_name = curr['album']['name']
            album_id = curr['album']['id']
            test_data_list.append({'user_id': userID,
                                   'playlist_id': playlistID,
                                   'added_by_id': added_by_id,
                                   'album_name': album_name,
                                   'album_id': album_id,
                                   'artist_names':artist_name_list,
                                   'artist_ids':artist_id_list,
                                   'track_name': track_name,
                                   'track_id': track_id,
                                   'date_added': added_at
                                  })
    return test_data_list

In [18]:
df = pd.read_json('user_ids_fri.json')

In [20]:
users = list(df[0])

In [48]:
playlists = []
for user in users:
    for item in sp.user_playlists(user)['items']:
        playlists.append([user, item['id'], item['owner']['id']])

4s55ys9lIBIEQpogCcnjlT irvine_rain
3qiWr94o0EGEKDPZafGoDI 6ogp04wbngt0us3t5wj6xv7nq
4NPp2655MkbpKoRZyBJ8yk irvine_rain
2l7B5kDK1873tgSbDMWWTT irvine_rain
3a7ZrMx8IWQVRUdB0qfGPK irvine_rain
1t9wrOztYtbX9aYNNDiHr2 irvine_rain
5O0Emhrv0xwIZt0K8YUhCd irvine_rain
1289l390Sfs8Gb97fDEded irvine_rain
1pKzRkVe5uTGdrBVTA4zVR haschurig
3bgNrPZ14fsOzI3DOW0ZQl irvine_rain
0NVGoAmCDFhUc6oxXNpiML irvine_rain
6Dt6lkKyyB8DJ9RWAm8O8g irvine_rain
1ifSEW1pqzXXZaTIfM9MHM irvine_rain


In [53]:
playlists = users_to_playlists(users)

user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_complete
user_c

In [56]:
df_playlists = pd.DataFrame(playlists)

In [61]:
df_playlists.to_json('playlists_fri.json')

In [67]:
df_playlists.head()

Unnamed: 0,added_by_id,playlist_id,user_id
0,brendan.ta,3pTPXB3vT93AOTSVozP54o,brendan.ta
1,brendan.ta,095Q1U6Hyk9J8zgcBLXZSV,brendan.ta
2,brendan.ta,0W42jQAELrPbNTF2u2vqgv,brendan.ta
3,brendan.ta,09IWLqL8IAXijc2Oa4oXM6,brendan.ta
4,brendan.ta,1BZXb715L2PTSNxjT1Ed4W,brendan.ta


In [68]:
playlist_curr = list(df_playlists.playlist_id)
user_curr = list(df_playlists.user_id)
feed = list(zip(user_curr, playlist_curr))

In [79]:
len(fri_track_data)

6294

In [87]:
all_data_test.shape
new_list = sum(all_data_test,[])

In [89]:
df_all_test = pd.DataFrame(new_list)

In [90]:
df_all_test.head()

Unnamed: 0,added_by_id,album_id,album_name,artist_ids,artist_names,date_added,playlist_id,track_id,track_name,user_id
0,brendan.ta,4uIDigk79DeZEYV6Z5Yf4s,What Went Down,[6FQqZYVfTNQ1pCqfkwVFEa],[Foals],2016-03-26T00:48:16Z,3pTPXB3vT93AOTSVozP54o,53L6A3I9vf7rgEZnMzx54E,Mountain At My Gates,brendan.ta
1,brendan.ta,4sFhah3DYcJlYeT47q3rhM,In The Silence (Deluxe Version),[7xUZ4069zcyBM4Bn10NQ1c],[Ásgeir],2016-03-26T00:49:53Z,3pTPXB3vT93AOTSVozP54o,6VNo09sojPBi5mdckQkLbX,King and Cross,brendan.ta
2,brendan.ta,7jfkffCqKYEJmIgGOIJ9me,ZABA,[4yvcSjfu4PC0CYQyLy4wSq],[Glass Animals],2016-03-26T00:49:59Z,3pTPXB3vT93AOTSVozP54o,3djK2yDqJArNAzHyhzSf8G,Gooey,brendan.ta
3,brendan.ta,1VwxJUHMuoppsVjf3VWcZb,Swim,[4aEnNH9PuU1HF3TsZTru54],[Caribou],2016-03-26T00:52:14Z,3pTPXB3vT93AOTSVozP54o,5uBOzETXaVhfUvo0a0X1TZ,Odessa,brendan.ta
4,brendan.ta,1vJRG3hs7MpA7wVwrXoNWJ,Our Love,[4aEnNH9PuU1HF3TsZTru54],[Caribou],2016-03-26T00:52:22Z,3pTPXB3vT93AOTSVozP54o,23FA91vzMAs3k0gMV3ZGaV,Can't Do Without You,brendan.ta


In [91]:
# df_all_test.to_json('fri_working_data.json')

In [3]:
df_all_test = pd.read_json('fri_working_data.json')

In [7]:
df_all_test.shape

(751157, 10)

In [93]:
df_all_test.track_id.nunique()

395441

In [5]:
track_counts = df_all_test.track_id.value_counts()

In [104]:
track_counts

0bYg9bo50gSsH3LtXe2SQn    140
2xLMifQCjDGFmkHkpNLD9h    116
2JvzF1RMd7lE3KmFlsyZD8     97
2374M0fQpWi3dLnB54qaLX     95
40riOy7x9W7GXjyGp4pjAv     94
7sO5G9EABYOXQKNPNiE9NR     89
7w87IxuO7BDcJ3YUqCyMTT     87
2dpaYNEQHiRxtZbfNsse99     87
2IRZnDFmlqMuOrYOLnZZyc     86
7ycWLEP1GsNjVvcjawXz3z     86
2Fxmhks0bxGSBdJ92vM42m     85
7m9OqQk4RVRkw9JJdeAw96     85
7dt6x5M1jzdTEt8oCbisTK     85
4qKcDkK6siZ7Jp1Jb4m0aL     83
6gBFPUFcJLzWGx4lenP6h2     78
08mG3Y1vljYA6bvDt4Wqkj     74
1LzNfuep1bnAUR9skqdHCK     74
0I3q5fE6wg7LIfHGngUTnV     71
1jJci4qxiYcOHhQR247rEU     71
2aoo2jlRnM3A0NyLQqMN2f     71
0rTV5WefWd1J3OwIheTzxM     71
6MWtB6iiXyIwun0YzU6DFP     71
5p7ujcrUXASCNwRaWNHR1C     70
6L89mwZXSOwYl76YXfX13s     69
7COXchtUOMd6uIT6HvmRaI     68
6u7jPi22kF8CTQ3rb9DHE7     68
0d28khcov6AiegSCpG5TuT     68
0b9oOr2ZgvyQu88wzixux9     67
3FtYbEfBqAlGO46NUDQSAt     66
6Qs4SXO9dwPj5GKvVOv8Ki     66
                         ... 
2oCIHudP9wgI3OshvOtNfG      1
75K1Vpg7BhP3xC256aqtKv      1
53aUYPTwJe

In [19]:
frequent_tracks.index

Index(['0bYg9bo50gSsH3LtXe2SQn', '2xLMifQCjDGFmkHkpNLD9h',
       '2JvzF1RMd7lE3KmFlsyZD8', '2374M0fQpWi3dLnB54qaLX',
       '40riOy7x9W7GXjyGp4pjAv', '7sO5G9EABYOXQKNPNiE9NR',
       '7w87IxuO7BDcJ3YUqCyMTT', '2dpaYNEQHiRxtZbfNsse99',
       '7ycWLEP1GsNjVvcjawXz3z', '2IRZnDFmlqMuOrYOLnZZyc',
       '7m9OqQk4RVRkw9JJdeAw96', '2Fxmhks0bxGSBdJ92vM42m',
       '7dt6x5M1jzdTEt8oCbisTK', '4qKcDkK6siZ7Jp1Jb4m0aL',
       '6gBFPUFcJLzWGx4lenP6h2', '1LzNfuep1bnAUR9skqdHCK',
       '08mG3Y1vljYA6bvDt4Wqkj', '1jJci4qxiYcOHhQR247rEU',
       '0rTV5WefWd1J3OwIheTzxM', '2aoo2jlRnM3A0NyLQqMN2f',
       '6MWtB6iiXyIwun0YzU6DFP', '0I3q5fE6wg7LIfHGngUTnV'],
      dtype='object')

In [10]:
frequent_tracks = track_counts[track_counts > 70]

In [25]:
df_frequent = pd.DataFrame(frequent_tracks.index)
df_frequent.columns = ['track_id']

In [34]:
df_frequent = df_frequent.set_index(df_frequent.track_id)

In [38]:
df_frequent.rename(columns={'track_id': 'test_track_id'}, inplace=True)

In [73]:
all_tops = df_frequent.join(df_all_test[['track_name', 'track_id', 'artist_names']].set_index('track_id'), on = 'track_id')

In [84]:
df_all_working = pd.DataFrame(all_tops.apply(lambda x: pd.Series(x['artist_names']),axis=1).stack().reset_index(level=1, drop=True)).join(df_all_test[['track_name', 'track_id']].set_index('track_id'), on = 'track_id')


In [85]:
df_all_working.drop_duplicates()

Unnamed: 0_level_0,0,track_name
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0bYg9bo50gSsH3LtXe2SQn,Mariah Carey,All I Want for Christmas Is You
2xLMifQCjDGFmkHkpNLD9h,Travis Scott,SICKO MODE
2JvzF1RMd7lE3KmFlsyZD8,J. Cole,MIDDLE CHILD
2374M0fQpWi3dLnB54qaLX,Toto,Africa
40riOy7x9W7GXjyGp4pjAv,Eagles,Hotel California - 2013 Remaster
7sO5G9EABYOXQKNPNiE9NR,Offset,Ric Flair Drip (& Metro Boomin)
7sO5G9EABYOXQKNPNiE9NR,Metro Boomin,Ric Flair Drip (& Metro Boomin)
7w87IxuO7BDcJ3YUqCyMTT,Foster The People,Pumped Up Kicks
2dpaYNEQHiRxtZbfNsse99,Marshmello,Happier
2dpaYNEQHiRxtZbfNsse99,Bastille,Happier
