In [1]:
# ********** Import Statements ************************************************************ #
import sys
import spotipy
import spotipy.util as util

import pandas as pd
import json
import math

import matplotlib.pyplot as plt
import seaborn as sns


from sklearn import datasets, linear_model

pd.set_option('display.max_rows', 500)

In [2]:
# ********** Define Intro Values ************************************************************ #
# Loading in the client id and secret.
#client_info = json.load(open('/Users/cassielebauer/Documents/Projects/secret credentials/sameify_creds.json'))

# Spotify user IDs for testing: these are public information.
user_ids = {'Cassie':'1242091675',
            'Caity':'12166088827',
            'Ben':'yaojxcyh5tczzlln0ygm9sfla',
            'Alex':'12130992491',
            'Jason':'jason.albert'}

In [27]:
class Sameify:
    # Scopes for initialization; these are for the Spotify API
    redirect_uri = 'http://localhost:1410/'
    client_info = json.load(open('/Users/cassielebauer/Documents/Projects/secret credentials/sameify_creds.json'))
    scopes = ['user-read-recently-played',
              'user-top-read',
              'playlist-read-collaborative',
              'user-library-read',
              'playlist-read-private',
              'user-read-private',
              'playlist-modify-public']

    def __init__(self, user_id, read_only=True):
        # Declare valuesl
        self.id = user_id
        self.client_id = self.client_info['client_id']
        self.client_secret = self.client_info['client_secret']
        
        # Connect to Spotify
        self.sp = self.spotify_connect()
        
        # Pull relevant main values
        self.name = self.sp.user(self.id)['display_name']
        self.playlists = self.get_all_playlists()
        self.personal_playlists = self.get_personal_playlists(self.playlists)
        #self.personal_tracks = self.get_all_playlist_tracks(self.personal_playlists[0:2])

    # Connects to Spotify
    def spotify_connect(self):
        token = util.prompt_for_user_token(self.id,
                                            self.scopes,
                                            self.client_id,
                                            self.client_secret,
                                            self.redirect_uri)

        sp = spotipy.Spotify(auth=token)
        return sp
    
    #def overstep_limit(self, item_id):

    def get_playlist_id(self, playlist_name):
        return self.playlists.loc[self.playlists['playlist_name'] == playlist_name,'playlist_id'].values[0]
    
    def get_playlist_name(self, playlist_id):
        return self.playlists.loc[self.playlists['playlist_id'] == playlist_id,'playlist_name'].values[0]
    
    def get_all_playlist_tracks(self, list_of_playlists):
        tracks_df = pd.DataFrame()

        for ix in range(len(list_of_playlists)):
            plist_tracks = self.get_playlist_tracks(list_of_playlists.loc[ix,'playlist_id'])
            tracks_df = tracks_df.append(plist_tracks)
        return tracks_df
            
    def get_playlist_tracks(self, playlist_id):
        ret_df = pd.DataFrame()
        
        playlist = self.sp.user_playlist_tracks(self.id, playlist_id)
        playlist_name = self.get_playlist_name(playlist_id)
        tracks = playlist['items']
        
        while playlist['next']:
            playlist = self.sp.next(playlist)
            tracks.extend(playlist['items'])
            
        for track in tracks:
            x = track['track']
            data = {'playlist_id'  : playlist_id,
                    'playlist_name': playlist_name,
                    'track_name'   : x['name'],
                    'track_id'     : x['id'], 
                    'track_artist' : x['artists'][0]['name'], #[artist['name'] for artist in x['artists']]
                    'added_by'     : track['added_by']['id'],
                    'added_at'     : track['added_at']
                   }
            data_df = pd.DataFrame([data])
            song_deets = pd.DataFrame(self.sp.audio_features(data['track_id']))
            
            full_data = pd.concat([data_df, song_deets],axis=1)
            
            ret_df = ret_df.append(full_data)
        ret_df.reset_index(drop=True, inplace=True)
        return ret_df
            
        
    def get_all_playlists(self):
        playlists = pd.DataFrame()
        
        total = self.sp.user_playlists(self.id)['total'] #number of playlists
        limit = 50 # pull at a time
        
        for x in range(0, total, limit): # for each set of playlists
            new_playlist = self.sp.user_playlists(self.id, limit=limit, offset=x)
            for x in new_playlist['items']:
                items = {'playlist_name': x['name'],
                         'playlist_id'  : x['id'],
                         'owner_name'   : x['owner']['display_name'],
                         'owner_id'     : x['owner']['id'],
                         'num_tracks'   : x['tracks']['total'],
                         'collaborative': x['collaborative'],
                         }
                playlists = playlists.append(pd.DataFrame([items]))
        
        playlists.reset_index(drop=True, inplace=True)
        return playlists
     
    def remove_long_playlists(self, playlists):        
        req_text = '''The following playlists have over 100 tracks. Do you want to exclude any of these from the program? 
If so, please list the index numbers in a comma separated list. (e.g. "1,2,5,10").
For all playlists, please type "all".'''
        print(playlists[playlists['num_tracks']>=100][['playlist_name','num_tracks']])
        
        indices = input(req_text)
        if indices == 'all':
            print(playlists[['playlist_name','num_tracks']])
            req_text_2 = 'Please list the index numbers of the playlists to remove in a comma separated list. (e.g. "1,2,5,10")'
            indices = input(req_text_2)
        
        indices_to_exclude = [int(x) for x in indices.split(',')] if indices != '' else []

        indices_to_include = [x for x in playlists.index if x not in indices_to_exclude]
        playlists_to_use = playlists.iloc[indices_to_include,:].reset_index(drop=True)
        return playlists_to_use
    
    def get_personal_playlists(self, playlists):
        collab_filter = (playlists['collaborative'] == True) & (playlists['owner_name'] != self.name)
        personal_filter = playlists['owner_name'] == self.name
        personal_playlists = playlists[collab_filter | personal_filter].reset_index(drop=True)
        playlists_to_use = self.remove_long_playlists(personal_playlists)
        return playlists_to_use
 

In [4]:
# 1, 2, 3, 203, 204, 205, 206

In [None]:
cas

In [7]:
cassie_acct = Sameify(user_ids['Cassie'],read_only=False)

                  playlist_name  num_tracks
1       Discover Weekly Archive         120
2         Release Radar Archive         118
3    All Songs (as of 04/05/21)        1994
10           The Nechromanticon         123
40               Teenage Cassie         222
129          Discovered & Loved         149
130              Songs Saved v1         113
205    Release Radar Archive v1        4929
206  Discover Weekly Archive v2        4322


KeyboardInterrupt: Interrupted by user

In [28]:
ben_acct = Sameify(user_ids['Ben'],read_only=True)

    playlist_name  num_tracks
2        Workout!         222
8   Golden Oldies         385
11        Writing         113
15        Running         613
The following playlists have over 100 tracks. Do you want to exclude any of these from the program? 
If so, please list the index numbers in a comma separated list. (e.g. "1,2,5,10").
For all playlists, please type "all".2,8,11,15


In [29]:
ben_acct.get_all_playlist_tracks(ben_acct.personal_playlists[0:2])

AttributeError: 'int' object has no attribute 'loc'

In [21]:
test_df = ben_acct.personal_playlists[0:2]

In [12]:
ben_acct.personal_playlists[0:2]

Unnamed: 0,playlist_name,playlist_id,owner_name,owner_id,num_tracks,collaborative
0,Ben's Music Charcuterie,4AgXIKASWMr5rsjloJNPrP,BDKnight,yaojxcyh5tczzlln0ygm9sfla,87,False
1,Marcy's Music Recs,5fwPrcSx7AeHaddJgU4Um6,BDKnight,yaojxcyh5tczzlln0ygm9sfla,30,False


In [None]:
ben_acct.playlists

In [None]:
cassie_acct.personal_playlists['num_tracks'].sum()

In [None]:
ben_acct.personal_playlists['num_tracks'].sum()

In [None]:
collab_filter = (ben_acct.playlists['collaborative'] == True) & (ben_acct.playlists['owner_name'] != self.name)
personal_filter = ben_acct.playlists['owner_name'] == self.name

In [None]:
ben_acct.name

In [None]:
import timeit

In [None]:
start = timeit.default_timer()
ben_list = ben_acct.playlist_tracks('4AgXIKASWMr5rsjloJNPrP')
end = timeit.default_timer()
diff = end - start
print(f'{diff} sec for {len(ben_list)} entries; ~{diff//len(ben_list)} per 1')

In [None]:
ben_acct.playlists

In [None]:
ben_acct.id, ben_acct.name