In [1]:
# this version reads all the posts without filtering by upvotes
# asks openai to grab the score from the post and put it in the csv
# attempts to dedupe with pandas_dedupe
# ranks by summing scores

import os
import glob
import pickle
from datetime import datetime
import time
import dotenv
import pandas as pd
import re

import pandas_dedupe

import requests
import requests.auth

import praw

import openai

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# load secrets from .env into environment variables
dotenv.load_dotenv()

praw.__version__

'7.7.0'

See README.md
 - objective is to use OpenAI for named entity extraction to extract all the songs form [this reddit thread](https://www.reddit.com/r/AskReddit/comments/12viv4v/what_is_the_prettiest_song_you_ever_heard_in_your/) and make Spotify playlist
 - use Reddit PRAW API to download all the comments (get [Reddit API key](https://www.reddit.com/prefs/apps))
 - use OpenAI API with a prompt like, extract all the songs from this text to CSV get ([OpenAI API key](https://platform.openai.com/account/api-keys))
 - use Spotify API to make a playlist (get [Spotify API key](https://developer.spotify.com/documentation/web-api/tutorials/getting-started))
 - works, needed a lot of scrubbing, but about 1 day of work, wouldn't have been possible to do a 700-song playlist manually without a team of Mechanical Turks or something
 - If I wanted to go nuts, would process comments individually, save a file for each comment's extracted songs, would make it easier to track down what OpenAI gets wrong, have a resumable, retryable, repeatable process and 
 - Spotify playist is [here](https://open.spotify.com/playlist/08YFkbtTV6GBfNtjJ4PHDu?si=f4761d983ac84091) 
 
 needs a .env file per dot-env-template
 

In [2]:
# a thread 
submission = "12viv4v"

# minimum karma to process a reply 
minkarma = 5

# a prompt to apply to replies on the thread
# prefix = """Define an example CSV file output as follows: 
# "artist","song_title"
# "The Beatles","Yesterday"
# "Eagles","Hotel California"

# Extract all song titles and artists from the following input, and return a CSV file output of the artists and song titles you extract from the input. If there were no songs extracted from the input, return "no songs found". the input is:
# """


# an output file to accumulate all the responses
savefile = 'bronze.txt'

system_prompt="You will act as a research assistant finding all the artists and track titles in a series of messages."
assistant_prompt="""Define a post as follows:
post_id: 843
post_score: 6996
I love Yesterday by the Beatles. Also Hotel California from The Eagles.
post_end: 843

Define a comma-separated values output as follows:
"post_id","post_score","artist","track"
843,6996,"The Beatles","Yesterday"
843,6996,"Eagles","Hotel California"
"""
user_prefix="""You will extract all artists and tracks from the following input, and return a list of records containing the post_id, post_score, artist and track extracted from the input in a comma-separated values format. The header row should contain `"post_id","post_score","artist","track"`. The input is:"""
# an output file to accumulate all the responses
savefile = 'bronze.txt'


In [3]:
csv_validate_re = re.compile(r'''
    \s*                # Any whitespace.
    (                  # Start capturing here.
      [^,"']+?         # Either a series of non-comma non-quote characters.
      |                # OR
      "(?:             # A double-quote followed by a string of characters...
          [^"\\]|\\.   # That are either non-quotes or escaped...
       )*              # ...repeated any number of times.
      "                # Followed by a closing double-quote.
      |                # OR
      '(?:[^'\\]|\\.)*'# Same as above, for single quotes.
    )                  # Done capturing.
    \s*                # Allow arbitrary space before the comma.
    (?:,|$)            # Followed by a comma or the end of a string.
    ''', re.VERBOSE)


## Get all comments from a reddit posting

In [4]:
def getPraw():
    return praw.Reddit(user_agent="prettiest_song/0.001", 
                       client_id=os.getenv('CLIENT_ID'), 
                       client_secret=os.getenv('CLIENT_SECRET'))


def getAll(r, submissionId, verbose=True):
    submission = r.submission(submissionId)
    submission.comments.replace_more(limit=None)
    commentsList=submission.comments.list()
    return commentsList


In [None]:
# print(datetime.now())
# r = getPraw()
# res = getAll(r, submission)
# print(datetime.now())

# print("retrieved ", len(res), 'comments')


In [5]:
!ls -lt reddit_full.pkl

-rw-r--r--  1 drucev  staff  26049163 May 11 13:13 reddit_full.pkl


In [None]:
# # we have a list of comment objects
# # filter comments with at least some karma
res3 = [r for r in res if r.score >= 0]
print('filtered to ', len(res3), 'comments')
res3[0].body, res3[0].score

In [6]:
# save so we can reload it later without downloading

# with open('reddit_full.pkl', 'wb') as f:
#     pickle.dump(res3, f)
    
with open('reddit_full.pkl', 'rb') as f:
    res3 = pickle.load(f)


NameError: name 'res' is not defined

## Extract artists and song titles using OpenAI

In [None]:
c = 0
for i in range(len(res3)):
    c+=1
    print ("post_id: %d" % c)
    print ("post_score: %d" % res3[i].score)
    print (res3[i].body)
    print ("post_end: %d\n" % c)
    break

In [None]:
# check lengths of posts
shorties = []
big_ones = []
for i in range(len(res3)):
    if len(res3[i].body) <3:
        print (i, res3[i].body)
        shorties.append(i)
    if len(res3[i].body) > 4096:
        print(i, len(res3[i].body))
        big_ones.append(i)
        

In [None]:
# avg length
sum([len(r.body) for r in res3]) / len(res3)

In [None]:
print (res3[big_ones[0]].body[:500])

In [7]:
# for each comment object we will extract the body 
# then submit as part of a prompt to chatgpt
print(datetime.now())

openai.api_key = os.getenv('OPENAI_API_KEY')

slist = res3.copy()
total_posts = len(slist)
print("processing %d posts" % total_posts)

# to speed things we'll cumulate posts til we get to 100 posts or 5000 chars, whichever comes first
max_post_size=300  # redditor needs to put any songs in 1st couple hundred chars, c'mon
maxchars = 5000  # max tokens is 4096 but we'll limit each prompt to 5000 chars
nposts = 100 # max posts to combine into a chunk
RETRIES = 3

# make sure no single post > maxchars + prefix which breaks the logic below
for i in range(len(slist)):
    if len(slist[i].body) > maxchars + len(user_prefix):
        print ("truncated ", i)
        slist[i].body = slist[i].body[:maxchars + len(user_prefix)]
        
outdir = 'out'
logdir = 'logs'
# make sure out and logs are empty
for f in glob.glob('%s/*' % outdir):
    os.remove(f)
for f in glob.glob('%s/*' % logdir):
    os.remove(f)
count = 0
c = 0

prompt_template = """
post_id: %d
post_score: %d
%s
post_end: %d

"""

while(slist):  # still comments to process
    prompt = ""
    reply_ids = []

    for _ in range(nposts):  # add up to 100 posts to the prompt
        if slist:
            # make sure no single post > max_post_size, truncate as nec 
            slist[0].body = slist[0].body[:max_post_size]
            if len(prompt) + len(slist[0].body) < maxchars:
                c += 1            
                reply = slist.pop(0)
                reply_ids.append(reply.id)
                body = reply.body
                prompt += prompt_template % (c, reply.score, body, c)
                        
    # retry loop, have received untrapped 502 error
    response=''
    success = False    
    for i in range(RETRIES):
        try:
            messages=[{"role":"system", "content": system_prompt},
                      {"role":"assistant", "content": assistant_prompt},
                      {"role":"user", "content": user_prefix + prompt}
                     ]
#             print(messages)
            response = openai.ChatCompletion.create(
                model='gpt-3.5-turbo-0301',
                messages=messages,
                temperature=0,
            )
            success=True
#             print(response['choices'][0]['message']['content'])
        except Exception as error:
            print("An exception occurred:", error)
            print("Retrying chunk...")
            time.sleep(5)
            continue  # try again
        # SUCCESS - exception not triggered
        break   
    if not success:   # FAIL - retries exhausted
        print('Bailing to next chunk')
        continue
        
    # do basic validation and cleanup
    csv_output = response['choices'][0]['message']['content']
    csv_valid, csv_err = [], []
    for line in csv_output.split("\n"):
        try:
            csv_values = csv_validate_re.findall(line)
            if len(csv_values) == 4:
                csv_valid.append(line)
            else:
                csv_err.append(line)
        except:
            csv_err.append(line)
            
    if csv_valid:
        csv_output = "\n".join(csv_valid)
        
        with open("%s/%04d.csv" % (outdir, count), 'w') as outfile:
            outfile.write(csv_output)
    
    if csv_err:
        with open("%s/%04d.err" % (outdir, count), 'w') as outfile:
            outfile.write("\n".join(csv_err))
        
    with open("%s/%04d.log" % (logdir, count), 'w') as logfile:
        logfile.write(str(reply_ids))
        logfile.write('\n\n===== raw prompt =====\n\n')        
        logfile.write(prompt)
        logfile.write('\n\n===== raw response =====\n\n')
        logfile.write(response['choices'][0]['message']['content'])
        if csv_err:
            logfile.write('\n\n===== failed validation =====\n\n')
            logfile.write("\n".join(csv_err))
 
    count += 1
#     print(c)
    print(total_posts-len(slist), end=' ')
    
print()
print(datetime.now())



2023-05-12 08:23:20.638981
processing 24681 posts
53 99 147 194 An exception occurred: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retrying chunk...
239 292 337 379 420 465 508 550 595 636 685 725 765 806 856 900 947 990 1040 1087 1127 1166 1215 1252 1301 1342 1390 1431 1473 1511 1549 1595 1643 1683 1721 1768 1817 1869 1909 1951 1994 2039 2084 2125 2166 2205 2242 2280 2325 2374 2420 2468 2514 2566 2615 2668 2714 2758 2797 2838 An exception occurred: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 8cee0b6fd7ace98d6f21f54631f70a94 in your message.)
Retrying chunk...
2885 2940 2985 3032 3088 3145 3191 3239 3279 3320 3367 3412 3464 3513 3553 3602 3652 3692 3735 3786 3836 3889 3932 An exception occurred: That model is currently overloaded with other requests. You can retry your reque

In [None]:
# load full csv
# delete na
# dedupe / groupby sum
# mark the ones that already match ones in list
# mark the ones that don't match the lookup track
# go through manually

In [None]:
print(response['choices'][0]['message']['content'])

In [None]:
os.environ['OPENAI_API_KEY']="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"

In [209]:
# may still have to tweak the files to get them to load
# should inspect .err files and clean up if possible

filelist = glob.glob('%s/*.csv' % outdir)

output_df = None

for f in sorted(filelist):
    print(f)
    try:
        tempdf = pd.read_csv("%s" % (f), header=None)
    except Exception as exc:
        print(str(exc))
        continue
    colcount = len(tempdf.columns)
    if len(tempdf.columns) != 4:
        print('%s has %d columns, skipped' % (f, colcount))
        continue
    tempdf.columns=["post_id","post_score","artist","track"]
        
    # ok
    # truncate header row
    if tempdf.iloc[0][0]=='artist' and tempdf.iloc[0][1]=='track':
        tempdf = tempdf[1:]
    if output_df is not None:        
        output_df = pd.concat([output_df, tempdf], axis=0)
    else:
        output_df = tempdf
        
        
        

out/0000.csv
out/0001.csv
out/0002.csv
out/0003.csv
out/0004.csv
out/0005.csv
out/0006.csv
out/0007.csv
out/0008.csv
out/0009.csv
out/0010.csv
out/0011.csv
out/0012.csv
out/0013.csv
out/0014.csv
out/0015.csv
out/0016.csv
out/0017.csv
out/0018.csv
out/0019.csv
out/0020.csv
out/0021.csv
out/0022.csv
out/0023.csv
out/0024.csv
out/0025.csv
out/0026.csv
out/0027.csv
out/0028.csv
out/0029.csv
out/0030.csv
out/0031.csv
out/0032.csv
out/0033.csv
out/0034.csv
out/0035.csv
out/0036.csv
out/0037.csv
out/0038.csv
out/0039.csv
out/0040.csv
out/0041.csv
out/0042.csv
out/0043.csv
out/0044.csv
out/0045.csv
out/0046.csv
out/0047.csv
out/0048.csv
out/0049.csv
out/0050.csv
out/0051.csv
out/0052.csv
out/0053.csv
out/0054.csv
out/0055.csv
out/0056.csv
out/0057.csv
out/0058.csv
out/0059.csv
out/0060.csv
out/0061.csv
out/0062.csv
out/0063.csv
out/0064.csv
out/0065.csv
out/0066.csv
out/0067.csv
out/0068.csv
out/0069.csv
out/0070.csv
out/0071.csv
out/0072.csv
out/0073.csv
out/0074.csv
out/0075.csv
out/0076.csv

In [210]:
output_df=output_df.drop(0)   # drop header
output_df

Unnamed: 0,post_id,post_score,artist,track
1,1,6996,Erik Satie,Gymnopédies
2,2,2824,Lord Huron,The night we met
3,3,3383,Don McLean,"Vincent (Starry, Starry Night)"
4,5,3592,Neil Young,Harvest Moon
5,6,6144,Simon & Garfunkel,Scarborough Fair
...,...,...,...,...
6,24672,2,,
7,24677,2,,
8,24680,2,,
9,24681,2,,


In [211]:
# save bronze
output_df.to_csv(savefile, index=False)
len(output_df)

26449

In [313]:
df = pd.read_csv(savefile) \
    .dropna() \
    .sort_values(["artist", "track"]) \
    .reset_index(drop=True)

df.to_csv('silver.csv', index=False)

print(len(df))
# tweak further to get to gold.csv

df


21869


Unnamed: 0,post_id,post_score,artist,track
0,21790,2,,
1,21791,2,,
2,21794,0,,
3,21795,1,,
4,21796,1,,
...,...,...,...,...
21864,14109,1,Отава Ё,Once upon a Time on a High Hill
21865,1028,2,мураками,Не спеши
21866,16549,1,увулв,Ты и твоя тень
21867,9505,1,دلکش,فردا روشن است


In [314]:
def fix_leading_trailing(s):
    """First and last should be alphanumeric"""
    # regex prob better if re.match('^\W+(.*)\W+$',playerName): 

    while len(s) and not s[0].isalnum():
        s = s[1:]
        
    while len(s) and not s[-1].isalnum():
        s = s[:-1]
    
    return s.lower()
        

In [315]:
df['artist'] = df['artist'].apply(fix_leading_trailing)

In [316]:
df.groupby('artist') \
    .count() \
    .reset_index() \
    .sort_values('track', ascending=False) \
    .head(20)



Unnamed: 0,artist,post_id,post_score,track
6216,unknown,993,993,993
5628,the beatles,304,304,304
4698,radiohead,218,218,218
1966,fleetwood mac,162,162,162
0,,159,159,159
4565,pink floyd,147,147,147
1823,enya,138,138,138
3298,led zeppelin,133,133,133
2712,jeff buckley,130,130,130
797,bon iver,121,121,121


In [317]:
df = df.drop(df.loc[df['artist'].str.strip()=='unknown'].index)
df = df.drop(df.loc[df['artist'].str.strip()=='none'].index)
df = df.drop(df.loc[df['artist'].str.strip()==''].index)


In [318]:
dedupe_df = df[['artist', 'post_score']] \
    .groupby(['artist']) \
    .count() \
    .sort_values('post_score', ascending=False) \
    .rename({'post_score': 'count'}, axis=1) \
    .reset_index() \
    .reset_index() 

dedupe_df

Unnamed: 0,index,artist,count
0,0,the beatles,304
1,1,radiohead,218
2,2,fleetwood mac,162
3,3,pink floyd,147
4,4,enya,138
...,...,...,...
6546,6546,jai wolfe,1
6547,6547,jai paul,1
6548,6548,jagjit singh,1
6549,6549,jades goudreault,1


In [319]:
dedupe_df2 = pandas_dedupe.dedupe_dataframe(dedupe_df, ['artist'])


Importing data ...
Reading from dedupe_dataframe_learned_settings
Clustering...


  dedupe_df2 = pandas_dedupe.dedupe_dataframe(dedupe_df, ['artist'])


# duplicate sets 4201


In [320]:
dedupe_df2

Unnamed: 0,index,artist,count,cluster id,confidence
0,0,the beatles,304,0,0.499640
1,1,radiohead,218,1,0.499983
2,2,fleetwood mac,162,2,0.499792
3,3,pink floyd,147,3,0.499292
4,4,enya,138,1051,1.000000
...,...,...,...,...,...
6546,6546,jai wolfe,1,697,0.500014
6547,6547,jai paul,1,4197,1.000000
6548,6548,jagjit singh,1,4198,1.000000
6549,6549,jades goudreault,1,4199,1.000000


In [321]:
# map index id to most popular version
dedupe_df['cluster id'] = dedupe_df2['cluster id']
name2i = {a: i for i, a in zip(dedupe_df['cluster id'].tolist(), dedupe_df['artist'].tolist())}
df['artist_index'] = df['artist'].apply(lambda s: name2i[s])
df

Unnamed: 0,post_id,post_score,artist,track,artist_index
144,18307,1,311,Amber,288
145,2982,1,a rocket to the moon,Gavin D,3291
146,375,"""8""",abba,"""One of us""",1121
147,375,"""8""",abba,"""The winner takes it all""",1121
148,4619,1,ac milan,Che confusione,3240
...,...,...,...,...,...
21864,14109,1,отава ё,Once upon a Time on a High Hill,2364
21865,1028,2,мураками,Не спеши,2366
21866,16549,1,увулв,Ты и твоя тень,2363
21867,9505,1,دلکش,فردا روشن است,4200


In [322]:
tempdf = dedupe_df2[['index', 'artist', 'cluster id']] \
    .groupby('cluster id') \
    .first() \
    .reset_index()

i2name = {i: a for i, a in zip(tempdf['cluster id'].tolist(), tempdf['artist'].tolist())}
df['artist'] = df.apply(lambda r: i2name[r.artist_index], axis=1)

df

Unnamed: 0,post_id,post_score,artist,track,artist_index
144,18307,1,311,Amber,288
145,2982,1,a rocket to the moon,Gavin D,3291
146,375,"""8""",abba,"""One of us""",1121
147,375,"""8""",abba,"""The winner takes it all""",1121
148,4619,1,ac milan,Che confusione,3240
...,...,...,...,...,...
21864,14109,1,otava io,Once upon a Time on a High Hill,2364
21865,1028,2,murakami,Не спеши,2366
21866,16549,1,uvulv,Ты и твоя тень,2363
21867,9505,1,dlkhsh,فردا روشن است,4200


In [323]:
mymap = {
'temptations' : 'the temptations', 
'ladysmith black mambazo and paul simon' : 'paul simon', 
'isbell' : 'jason isbell', 
'mini riperton' : 'minnie riperton', 
'"moby, sinead oconnor"' : 'moby', 
'ludivcio enaudi' : 'einaudi', 
'bob dillon' : 'bob dylan', 
'dylan' : 'bob dylan', 
'f. liszt' : 'liszt', 
'franz liszt' : 'liszt', 
'edvard greig' : 'edvard grieg', 
'apc' : 'a perfect circle', 
'tomaso albinoni' : 'albinoni', 
'allison krauss' : ' alison krauss', 
'belinda carlisle' : 'belinda carlile', 
'bernie taupin' : 'elton john', 
'camille saint-saens' : 'saint saens', 
'cass elliot' : 'mamas and the papas', 
'city colour' : 'city and colour', 
'claude debussy' : 'debussy', 
'coctaeu twins' : 'cocteau twins', 
'csn' : '"crosby, stills nash"', 
'edward elgar' : 'elgar', 
'elo' : 'electric light orchestra', 
'franz liszt' : 'liszt', 
'frederic chopin' : 'chopin', 
'garfunkel' : 'art garfunkel', 
'gustav holst' : 'holst', 
'gustav mahler' : 'mahler', 
'hanz zimmer' : 'hans zimmer', 
'iron wine' : 'iron and wine', 
'iz kamakawiwo`ole' : 'israel kamakawiwoole', 
'j.s. bach' : 'johann sebastian bach', 
'bach' : 'johann sebastian bach', 
'louie armstrong' : 'louis armstrong', 
'ludovico einaudi' : 'einaudi', 
'ludwig van beethoven' : 'beethoven', 
'maurice ravel' : 'ravel', 
'felix mendelssohn-bartholdy' : 'mendelssohn', 
'mick hucknall simply red' : 'simply red', 
'wolfgang amadeus mozart' : 'mozart', 
'wa mozart' : 'mozart', 
'luciano pavarotti' : 'pavarotti', 
'giacomo puccini' : 'puccini', 
'pyotr ilyich tchaikovsky' : 'tchaikovsky', 
'rhcp' : 'red hot chili peppers', 
'satie' : 'erik satie', 
'sergei rachmaninoff' : 'rachmaninoff', 
'tenacious d' : 'jack black', 
}

df['artist'] = df['artist'].apply(lambda s: mymap[s] if s in mymap else s)


In [346]:
df['artist'] = df['artist'].apply(lambda s: s[4:] if s[:4]=='the ' else s)

df.loc[df['artist']=='band', 'artist']='the band'

df.loc[(df['artist']=='beatles') & (df['track']=='god only knows'), 'artist'] ='beach boys'


In [329]:
len(df['artist'].unique())

4118

In [332]:
df.loc[df['artist'].str.find('carp') >=0]

Unnamed: 0,post_id,post_score,artist,track,artist_index
313,113,"""31""",carpenters,"""We've only just begun""",124
703,9809,1,karen carpenter,Something on your mind,331
704,9146,1,karen carpenter,something on your mind (remastered),331
705,15614,1,karen carpenter,The Moon Song,331
1191,8158,1,carpenters,(They Long To Be) Close To You,58
...,...,...,...,...,...
18260,6122,1,carpenters,Yesterday once more,58
18261,8501,1,carpenters,Drive,58
18262,5716,1,carpenters,Wildwood Flower,58
18263,12616,1,carpenters,Black Metallic,58


In [333]:
df['track'] = df['track'].apply(fix_leading_trailing)


In [337]:
df.groupby('track') \
    .count() \
    .reset_index() \
    .sort_values('artist', ascending=False) \
    .head(20)


Unnamed: 0,track,post_id,post_score,artist,artist_index
3551,hallelujah,142,142,142,142
8083,somewhere over the rainbow,61,61,61,61
10186,what a wonderful world,59,59,59,59
8134,songbird,59,59,59,59
7536,saturn,53,53,53,53
2800,fade into you,48,48,48,48
3383,golden hour,48,48,48,48
6497,on the nature of daylight,47,47,47,47
4903,la vie en rose,47,47,47,47
9925,vincent,46,46,46,46


In [335]:
df = df.drop(df.loc[df['track'].str.strip()=='unknown'].index)
df = df.drop(df.loc[df['track'].str.strip()=='none'].index)


In [338]:
df=df.reset_index(drop=True)
df['post_score'] = df['post_score'].apply(lambda x: ''.join(c for c in x if c.isdigit()))
df['post_score'] = df['post_score'].apply(lambda x: x[-5:])
df['post_score'] = df['post_score'].apply(lambda x: x if x else "1")
df['post_score'] = df['post_score'].apply(lambda x: "1" if x =="0" else x)
df['post_score'] = df["post_score"].str.strip().astype(int)


In [298]:
# tempdf = df[['artist', 'post_score']] \
#     .groupby('artist') \
#     .sum() \
#     .reset_index() 

# tempdf.loc[tempdf['post_score']> 2].to_csv('x.csv', index=False)

In [349]:
df = df[['artist', 'track', 'post_score']].groupby(["artist", "track"]) \
    .sum() \
    .reset_index() \
    .sort_values('post_score', ascending=False)

df.head(20)



Unnamed: 0,artist,track,post_score
3913,erik satie,gymnopédies,7008
10599,simon garfunkel,scarborough fair,6162
8348,neil young,harvest moon,5334
5377,israel kamakawiwoole,over the rainbow,5103
2671,cranberries,dreams,4404
7717,mazzy star,fade into you,3972
3699,elton john,your song,3914
1083,beatles,in my life,3789
992,beach boys,god only knows,3497
3330,don mclean,"vincent (starry, starry night",3383


In [350]:
len(df)

12561

In [354]:
df.loc[df['post_score'] >3].to_csv('silver.csv', index=False)


In [355]:
df=pd.read_csv('silver.csv')
df

Unnamed: 0,artist,track,post_score
0,erik satie,gymnopédies,7008
1,simon garfunkel,scarborough fair,6162
2,neil young,harvest moon,5334
3,israel kamakawiwoole,over the rainbow,5103
4,cranberries,dreams,4404
...,...,...,...
1635,basil poledouris,theology/civilization,4
1636,gregory alan isakov,amsterdam,4
1637,aphex twin,flim,4
1638,kodaline,all i want,4


## Load into a Spotify playlist


In [356]:
client_credentials_manager = SpotifyClientCredentials(client_id=os.getenv('SPOTIFY_CLIENT_ID'), 
                                                      client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                      )

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


In [358]:
# check artists
df = pd.read_csv("silver.csv")
df.drop_duplicates() \
    .dropna() \
    .sort_values(["artist", "track"])

dedupe = {}
fail_list = []

for index, artist, title, score in df.itertuples():
    if artist in dedupe:
        continue
    dedupe[artist]=1
    query_str = 'artist:%s' % (artist)
    artist_results = sp.search(q=query_str, type='artist', limit=3, offset=0, market='US')
    artist_names = [artist['name'] for artist in artist_results['artists']['items']]
    if artist_names:
        if artist.lower() != artist_names[0].lower():
            print(artist, artist_names)
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)

# then clean up manually as appropriate

simon garfunkel ['Simon & Garfunkel', 'Simon & Garfunkel Experience', 'Simon & Garfunkels']
israel kamakawiwoole ["Israel Kamakawiwo'ole"]
cranberries ['The Cranberries', 'Hy Bush & The Wild Cranberries', 'Karaoke - The Cranberries']
beatles ['The Beatles', 'The New Beatles', 'The Beatles Complete On Ukulele']
beach boys ['The Beach Boys', 'Kalua Beach Boys', 'Al Jardine of The Beach Boys']
edith piaf ['Édith Piaf', 'Edith Piaf Trio', 'Louiguy / Edith Piaf']
righteous brothers ['The Righteous Brothers', 'The Self-Righteous Brothers', 'The New Righteous Brothers']
sigur ros ['Sigur Rós', 'Birta Rós Sigurjónsdóttir', 'Steindór Anderson & Sigur Rós']
mamas and the papas ['The Mamas & The Papas', 'The Mamas & The Papas Experience', 'Karaoke - The Mamas & The Papas']
barber ['Sam Barber', 'Jill Barber', 'Samuel Barber']
cure ['The Cure', 'Jah Cure', 'Tinnitus Cure']
einaudi ['Ludovico Einaudi', 'Leo Einaudi', 'Gilbert Einaudi']
debussy ['Claude Debussy', 'Johann Debussy', 'Quatuor Debussy']

KeyboardInterrupt: 

In [359]:
# check tracks

df = pd.read_csv("silver.csv")

dedupe = {}
mylist = []
fail_list = []
artist_list, track_list, uri_list, album_list = [], [], [], []
orig_artist, orig_track = [], []

for index, artist, title, score in df.itertuples():
    query_str = 'artist:%s track:%s' % (artist, title)
    track_results = sp.search(q=query_str, type='track', limit=1, offset=0, market='US')
    results = track_results['tracks']['items']
    
    if results:
        r = results[0]
        # failsafe to never put same track twice
        if dedupe.get(r['id']):
            continue
        dedupe[r['id']]=True
        if title.lower() != r['name'].lower():
            print ("%s|%s : %s|%s" % (artist, title, r['artists'][0]['name'], r['name']))
        uri_list.append(r['uri'])
        artist_list.append(r['artists'][0]['name'])
        track_list.append(r['name'])
        album_list.append(r['album']['name'])
        orig_artist.append(artist)
        orig_track.append(title)
#         print('  ',
#               r['artists'][0]['name'],'|',
#               r['name'], '|',
#               r['album']['name'],'|',
#               r['album']['release_date'],'|',
#               r['popularity'])
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)
        

erik satie|gymnopédies : Erik Satie|3 Gymnopédies: No. 1 Lent et douloureux
simon garfunkel|scarborough fair : Simon & Garfunkel|Scarborough Fair / Canticle
not found: neil young - harvest moon
beatles|in my life : The Beatles|In My Life - Remastered 2009
beach boys|god only knows : The Beach Boys|God Only Knows - Mono
don mclean|vincent (starry, starry night : Don McLean|Vincent (Starry, Starry Night)
sigur ros|hoppipolla : Sigur Rós|Hoppípolla
beatles|blackbird : The Beatles|Blackbird - Remastered 2009
john denver|annie’s song : John Denver|Annie's Song
otis redding|dock of the bay : Otis Redding|(Sittin' On) the Dock of the Bay
barber|adagio for strings : Samuel Barber|Barber: Adagio for Strings
israel kamakawiwoole|somewhere over the rainbow/wonderful world : Israel Kamakawiwo'ole|Somewhere Over The Rainbow_What A Wonderful World
peter gabriel|in your eyes : Peter Gabriel|In Your Eyes - 2012 Remaster
debussy|clair de lune : Claude Debussy|Suite bergamasque, L. 75: III. Clair de lun

cat stevens|the wind : Yusuf / Cat Stevens|The Wind - Remastered 2021
cure|a night like this : The Cure|A Night like This - 2006 Remaster
not found: bee gees - inmortality
not found: aphex twin - avrl 14
babyface|every time i close my eyes : Babyface|Every Time I Close My Eyes (with Kenny G)
not found: vv - heartfull of ghosts
not found: etta james - i’d rather go blind
not found: iz - somewhere over the rainbow/wonderful world
not found: trent - a warm place
disturbed|sound of silence : Disturbed|The Sound of Silence
queen|love of my life : Queen|Love Of My Life - Remastered 2011
beethoven|moonlight sonata : Ludwig van Beethoven|Sonata No. 14 "Moonlight" in C-Sharp Minor", Op. 27 No. 2: I. Adagio sostenuto
les miserables|i dreamed a dream : Lesley Garrett|Schönberg, Claude-Michel: I Dreamed a Dream (from "Les Misérables")
jimi hendrix|guitar : Jimi Hendrix|Purple Haze (Arr. for 2 Guitars)
not found: cranberries - nan
not found: beatles - pet sounds
beatles|something : The Beatles|Some

johann sebastian bach|canon in d : Johann Sebastian Bach|Musical Offering, BWV 1079: Canon a 2 violini in unisono
johann sebastian bach|cello suite no. 1 in g major : Johann Sebastian Bach|Cello Suite No. 1 in G Major, BWV 1007: I. Prélude
van|sweet thing : Van Morrison|Sweet Thing - 1999 Remaster
rolling stones|moonlight mile : The Rolling Stones|Moonlight Mile - 2009 Mix
yes|and you and i : Yes|And You and I - 2003 Remaster
simon garfunkel|el condor pasa : Simon & Garfunkel|El Condor Pasa (If I Could)
bjork|yoga : Björkliden|Yoga Nidra
beatles|sgt. pepper : The Beatles|Sgt. Pepper's Lonely Hearts Club Band - Remastered 2009
not found: fleetwood mac - blue spotted tail
not found: ray lamontagne - til the sky turns black
dave matthews band|41 : Dave Matthews Band|#41
buddy holly|dearest : Buddy Holly|(Ummmm, Oh Yeah) Dearest
not found: verve - pale blue eyes
art garfunkel|bridge over troubled water : Art Garfunkel jr.|Geh mit mir durch den Regenbogen (Bridge Over Troubled Water)
not fo

not found: amy ray - nan
enya|song : Camille Enyal|The meditation song
beatles|a day in the life : The Beatles|A Day In The Life - Remastered 2009
liszt|liebestraum : Franz Liszt|Liebestraum No. 3 in A-Flat Major, S. 541 / 3
not found: black sabbath - the place where he inserted the blade
dvorak|song to the moon : Antonín Dvořák|Dvorák / Transc. Lenaerts: Rusalka, Op. 114, Act 1: Song to the Moon
not found: terrence jay - one blood
not found: ella fitzgerald - stars over alabama
morten lauridsen|o magnum mysterium : Morten Lauridsen|O Magnum Mysterium: O magnum mysterium
not found: riceboy sleeps - the album
evanescence|even in death : Evanescence|Even In Death - 2016 Version
not found: adeem - white trash revelry
not found: faye wong - dreams
yiruma|a river flows in you : Yiruma|River Flows in You - Arr. for Solo Harp
tchaikovsky|pas de deux : Pyotr Ilyich Tchaikovsky|Tchaikovsky: The Nutcracker, Op. 71, Act II: No. 14a, Pas de deux. Andante maestoso
tchaikovsky|waltz of the flowers :

not found: commin souls - arizona
judy garland|somewhere over the rainbow : Judy Garland|Somewhere Over the Rainbow - From "The Wizard of Oz"
queen|you take my breath away : Queen|You Take My Breath Away - Remastered 2011
chopin|ballade no. 3 : Frédéric Chopin|Ballade No. 3 in A Flat Major, Op. 47
not found: bright eyes - no lies just love
not found: james taylor - into the red
bts|the truth untold : BTS|The Truth Untold (feat. Steve Aoki)
not found: rem - find the river
tori amos|bells for her : Tori Amos|Bells for Her - 2015 Remaster
keane|russian farmers song : Keane|Russian Farmer's Song
eva cassidy|somewhere over the rainbow : Karaoke - Eva Cassidy|Karaoke - Somewhere Over The Rainbow
tash sultana|coma : Tash Sultana|Coma - MTV Unplugged, Live In Melbourne
hans zimmer|interstellar : Hans Zimmer|Day One (Interstellar Theme)
rachmaninoff|piano concerto no. 2 : Sergei Rachmaninoff|Piano Concerto No. 2 in C Minor, Op. 18: 2. Adagio sostenuto
not found: fleetwood mac - icicle tusk
not 

smashing pumpkins|disarm : The Smashing Pumpkins|Disarm - 2011 Remaster
not found: charlie puth - see you again
kanye west|cold : Kanye West|Coldest Winter
not found: brian eno - song for sienna
johann sebastian bach|air on g string : Johann Sebastian Bach|Orchestral Suite No. 3 in D Major, BWV 1068: II. Air "On a G String" (Arr. for Piano)
not found: john coltrane - 4'33
not found: dario marianelli - sunchyme
smiths|i know it's over : The Smiths|I Know It's Over - 2011 Remaster
not found: stevie wonder - life by the drop
not found: stevie wonder - landslide
not found: susanne sundfor - here with me
not found: temptations - sweet disposition
dimash|s.o.s : Dimash Qudaibergen|S.O.S d'un terrien en détresse
moody blues|tuesday afternoon : The Moody Blues|Tuesday Afternoon (Forever Afternoon)
elgar|nimrod : Edward Elgar|Variations on an Original Theme, Op. 36 "Enigma": Variation IX. Nimrod (Adagio)
david bowie|heroes : David Bowie|Heroes - 2017 Remaster
chopin|raindrop prelude : Frédéric 

In [362]:
gold_df = pd.DataFrame({'input_artist': orig_artist,
                        'artist': artist_list,
                        'input_track': orig_track,
                        'track': track_list,
                        'album': album_list,
                        'uri': uri_list})

with pd.option_context("display.max_rows", 9999):
    display(gold_df)


Unnamed: 0,input_artist,artist,input_track,track,album,uri
0,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
1,simon garfunkel,Simon & Garfunkel,scarborough fair,Scarborough Fair / Canticle,"Parsley, Sage, Rosemary And Thyme",spotify:track:3g2fYZW5v2od8KIF7VktT0
2,israel kamakawiwoole,Israel Kamakawiwo'ole,over the rainbow,Over the Rainbow,Alone In Iz World,spotify:track:3oQomOPRNQ5NVFUmLJHbAV
3,cranberries,The Cranberries,dreams,Dreams,"Everybody Else Is Doing It, So Why Can't We?",spotify:track:4JGKZS7h4Qa16gOU3oNETV
4,mazzy star,Mazzy Star,fade into you,Fade Into You,So Tonight That I Might See,spotify:track:1LzNfuep1bnAUR9skqdHCK
5,elton john,Elton John,your song,Your Song,Elton John,spotify:track:38zsOOcu31XbbYj9BIPUF1
6,beatles,The Beatles,in my life,In My Life - Remastered 2009,Rubber Soul (Remastered),spotify:track:3KfbEIOC7YIv90FIfNSZpo
7,beach boys,The Beach Boys,god only knows,God Only Knows - Mono,Pet Sounds (Original Mono & Stereo Mix),spotify:track:6iGU74CwXuT4XVepjc9Emf
8,don mclean,Don McLean,"vincent (starry, starry night","Vincent (Starry, Starry Night)",Rearview Mirror: An American Musical Journey,spotify:track:2YDyH60Vro33KkDtNZCXIk
9,edith piaf,Édith Piaf,la vie en rose,La Vie en rose,Edith Piaf - The Best Of,spotify:track:3lAun9V0YdTlCSIEXPvfsY


In [None]:
with pd.option_context("display.max_rows", 999):
    display(gold_df.loc[gold_df['input_artist'].str.lower() != gold_df['artist'].str.lower()])

In [None]:
gold_df2=gold_df.copy().reset_index(drop=True)
gold_df2['input_track']=gold_df2['input_track'].str.lower()
gold_df2['input_track']=gold_df2['input_track'].apply(lambda s: s.strip()[:10])

gold_df2['track']=gold_df2['track'].str.lower()
gold_df2['track']=gold_df2['track'].apply(lambda s: s.strip()[:10])


with pd.option_context("display.max_rows", 999):
    display(gold_df2.loc[gold_df2['input_track'] != gold_df2['track']])

In [None]:
# these are songs that look like covers or otherwise not the expected response from spotify search 
# (which is a bit wonky, doesn't like quotes and such)

bad_lookups = [
#    25,134,155,160,200,209,422,445,446,557,737,744,755,759,760,761,762,781,785,790,814,815,842
    21,51,61,63,83,145,212,317,322,439,449,575,759,784,
]

for i in bad_lookups:
    print(gold_df.iloc[i])
    
# add manually, plus 'not found'


In [None]:
gold_df = gold_df.drop(
    axis='index',
    labels=bad_lookups)

gold_df[['artist', 'track']].to_csv('gold.csv', index=False)

with pd.option_context("display.max_rows", 999):
    display(gold_df)

In [363]:
# get playlist id
# first create a playlist in UI to load songs
playlists = sp.user_playlists(os.getenv('SPOTIFY_USERNAME'))
while playlists:
    for i, playlist in enumerate(playlists['items']):
        if playlist['name'] != 'Reddit Prettiest Songs':
            continue
        print(playlist['id'])
        playlist_id = playlist['id']
        print("%4d %s %s" % (i + 1 + playlists['offset'], playlist['uri'],  playlist['name']))
    if playlists['next']:
        playlists = sp.next(playlists)
    else:
        playlists = None

08YFkbtTV6GBfNtjJ4PHDu
   1 spotify:playlist:08YFkbtTV6GBfNtjJ4PHDu Reddit Prettiest Songs


In [364]:
# must follow an oauth workflow to write a playlist in Spotify
# running this cell should request a spotify login and then redirect to an url
# paste whole url with id into form to authenticate

scope = "playlist-modify-public"

sp = spotipy.Spotify(auth_manager=spotipy.SpotifyOAuth(scope=scope,
                                                       client_id=os.getenv('SPOTIFY_CLIENT_ID'),
                                                       client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                       redirect_uri="https://druce.ai"
                                                      ))


In [None]:
# addlist = gold_df['uri'].to_list()
# print (len(addlist))

# while(addlist):
#     sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
#                                 playlist_id=playlist_id, 
#                                 tracks=addlist[-100:])
#     addlist = addlist[:-100]
#     print("added items, remaining ", len(addlist))


In [None]:
# manually add the ones that weren't found for some reason


In [365]:
# can run again and add any new tracks, either because OpenAI is a bit random, or new replies in thread
results = sp.user_playlist(os.getenv('SPOTIFY_USERNAME'), playlist_id,
                                fields='tracks,next,name')
tracks = results['tracks']

playlist_dict_by_uri = {}
playlist_dict_by_str = {}

artist_list = []
track_list = []
uri_list = []
popularity_list = []
album_list=[]

while True:
    for track_item in tracks['items']:
        track_dict = track_item['track']
        track_str = track_dict['artists'][0]['name']  + ' | ' + track_dict['name'][:15]
        uri = track_dict['uri']
        if track_str in playlist_dict_by_str:
            print(track_str)
        playlist_dict_by_str[track_str] = uri
        playlist_dict_by_uri[uri] = track_str
        
        uri_list.append(uri)
        artist_list.append(track_dict['artists'][0]['name'])
        track_list.append(track_dict['name'])
        album_list.append(track_dict['album']['name'])
        popularity_list.append(track_dict['popularity'])
        
    # check if there are more pages
    if tracks['next']:
        tracks = sp.next(tracks)
    else:
        break

print (len(list(playlist_dict_by_str.keys())))
print (len(list(playlist_dict_by_uri.keys())))


Enter the URL you were redirected to: https://druce.ai/?code=AQBNfveAJz1rIevrEoWYqNsvbYh5cG-NvoYC3SZwC2R96EyeEyQazYgwCfedEt4mftuycIBQx_UyLFs29YWqAzpWszcAP8Aqz_5D_SWdiHV-MqFws2AviV-Q2Z5IyDHyGXot7PE7S9D87O4xBht5GrGq9Wn0gesXZtMNWv6qYC1IyvLHTypLOvAYsq0
Nine Inch Nails | A Warm Place
Joni Mitchell | Both Sides Now
1025
1027


dict_keys(['spotify:track:5NK8jad728pj6YeqM5VJD4', 'spotify:track:7aQjPecQdIuNd1sz3KCDhD', 'spotify:track:50q2aUjWoTn6CJIfSPRJQA', 'spotify:track:4ny0oELEssJsXNQDxpWWLI', 'spotify:track:1Gv6f7m5ZNDcInxGA8PsLS', 'spotify:track:3wM6RTAnF7IQpMFd7b9ZcL', 'spotify:track:5hviCr3lgg6LY6noG6DPKs', 'spotify:track:0mmojIGIHZqyqYBTNIo2Tz', 'spotify:track:4JGKZS7h4Qa16gOU3oNETV', 'spotify:track:0gEyKnHvgkrkBM6fbeHdwK', 'spotify:track:5kcsc92tGAjS0uvhDY3Mok', 'spotify:track:7ABE6G4uQxbNtYgVrpBBPA', 'spotify:track:76GlO5H5RT6g7y0gev86Nk', 'spotify:track:0X5C4WjQNubRysTkHOubz3', 'spotify:track:1tuwC1Ob5vnYZhvg3zyzJg', 'spotify:track:76rfUs6iZBa3OCf8tCBn27', 'spotify:track:4m3OS54KWywYhP7WD7z1cg', 'spotify:track:1osAmhnIT0rtuqLZ1IliBQ', 'spotify:track:2DFRFqWNahKtFD112H2iEZ', 'spotify:track:3YdKJzcoMZMacISlpY4QoP', 'spotify:track:5fVZC9GiM4e8vu99W0Xf6J', 'spotify:track:0Q0IVlqMV64kNLlwjPj0Hl', 'spotify:track:5zmaypMaWb21FUGBxbw8hT', 'spotify:track:6Qyc6fS4DsZjB2mRW9DsQs', 'spotify:track:41iPmvB2ogl3dz

In [371]:
with pd.option_context("display.max_rows", 999):
    display(gold_df.loc[~gold_df['uri'].isin(playlist_dict_by_uri.keys())])

Unnamed: 0,input_artist,artist,input_track,track,album,uri
0,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
8,don mclean,Don McLean,"vincent (starry, starry night","Vincent (Starry, Starry Night)",Rearview Mirror: An American Musical Journey,spotify:track:2YDyH60Vro33KkDtNZCXIk
17,joni mitchell,Joni Mitchell,both sides now,Both Sides Now,Feathers.,spotify:track:5EsPLgSs1UQIDJG0U00RuJ
39,simon garfunkel,Simon & Garfunkel,"for emily, whenever i may find her","For Emily, Whenever I May Find Her","Parsley, Sage, Rosemary And Thyme",spotify:track:76Fcbx3T7fUgA7phUkmsn7
42,mozart,Wolfgang Amadeus Mozart,lacrimosa,Lacrimosa,Requiem - Music To Die For,spotify:track:1UvaZaHkh3D9AkmBrrnbFg
51,louis armstrong,Louis Armstrong,it's a wonderful world,It's Wonderful - Single Version,What A Wonderful World,spotify:track:7tmOKoxLQFLvQWwxzYaodT
55,henry mancini,Henry Mancini,moon river,Moon River(Vocal Audrey Hepburn),Breakfast At Tiffany's (50th Anniversary Edition),spotify:track:5iGleL7HpEThuuYQ3us2jh
86,beatles,The Beatles,golden slumbers,Golden Slumbers - Remastered 2009,Abbey Road (Remastered),spotify:track:01SfTM5nfCou5gQL70r6gs
110,tori amos,Tori Amos,little earthquakes,Little Earthquakes,Little Earthquakes,spotify:track:0z0vbrc3J6BVnKU5UisiNj
120,george harrison,George Harrison,while my guitar gently weeps,While My Guitar Gently Weeps - Live At Madison...,Let It Roll - Songs of George Harrison,spotify:track:4Egi6XuC0rbLlXfqmQeuFa


In [None]:
playlist_df = pd.DataFrame({'artist': artist_list,
                           'track': track_list,
                           'album': album_list,
                           'popularity': popularity_list,
                           })



In [None]:
with pd.option_context("display.max_rows", 9999):
    display(playlist_df.sort_values('popularity'))
    

In [None]:
gold_dict_by_uri = {}
gold_dict_by_str = {}
addlist = []
c = 0
for i, artist, track, uri in gold_df[['artist', 'track', 'uri']].itertuples():
    # print(artist, track, uri)
    track_str = artist + ' | ' + track[:15]
    if track_str not in playlist_dict_by_str:
        addlist.append([artist, track, uri])
        print(artist, track, uri)
    gold_dict_by_uri[uri]=track_str
    gold_dict_by_str['track_str']= uri
#     if track_str not in playlist_dict_by_str:
#         c += 1
#         print (c, track_str)
        
print(len(gold_dict_by_str.items()))
print(len(gold_dict_by_uri.items()))

In [None]:
addlist

In [None]:
addlist = [['ABBA', 'One Of Us', 'spotify:track:6zgtBUEkAfilJ2YEOvNexR'],
 ['Gregorio Allegri',
  'Miserere mei, Deus',
  'spotify:track:6es7DmrhnDoKj5rsFvh3XU'],
 ['Amy Winehouse',
  'Love Is A Losing Game',
  'spotify:track:3uliGwmB52ZA7brgpZMzyH'],
 ['Barbara',
  "Ma plus belle histoire d'amour",
  'spotify:track:0qBVET4VkHsQAoboWlQ2pJ'],
 ['Ludwig van Beethoven',
  'Symphony No. 5 in C Minor, Op. 67: I. Allegro con brio',
  'spotify:track:2ygeBLTP9uu3OW3VTulD8N'],
 ['Benny Goodman', 'Sing, Sing, Sing', 'spotify:track:5L8ta4ECl5zeA6bGqY7G38'],
 ['Bill Withers', 'Lean on Me', 'spotify:track:3M8FzayQWtkvOhqMn2V4T2'],
 ['Billy Joel', 'Piano Man', 'spotify:track:70C4NyhjD5OZUMzvWZ3njJ'],
 ['Bob Dylan', 'Ballad of a Thin Man', 'spotify:track:0f5N14nB8xi0p3o4BlVvbx'],
 ['Bob Dylan', "Blowin' in the Wind", 'spotify:track:18GiV1BaXzPVYpp9rmOg0E'],
 ['Bob Dylan', 'Desolation Row', 'spotify:track:4n1ZGm3TxYmoYe1YR8cMus'],
 ['Bob Dylan', 'Duquesne Whistle', 'spotify:track:5kKW4bszhKSCYVPDO0sMbX'],
 ['Bob Dylan',
  'Forever Young - Slow Version',
  'spotify:track:4yWl0tnEanf3zmZzl9kbQn'],
 ['Bob Dylan', 'Gotta Serve Somebody', 'spotify:track:760420tYNmNjFgi8bWvbop'],
 ['Bob Dylan', 'Highway 61 Revisited', 'spotify:track:6os5B6xjuke9YfBKH3tu1e'],
 ['Bob Dylan',
  'I Shall Be Released - Studio Outtake - 1971',
  'spotify:track:5vyw005QQ42hrzrLxb3xEX'],
 ['Bob Dylan', 'I Want You', 'spotify:track:7tJQ4Ekp2vN3NlI3vJJW3v'],
 ['Bob Dylan', "It Ain't Me Babe", 'spotify:track:5nbNWAfT1S6V1vqj3snHxS'],
 ['Bob Dylan', 'Jokerman', 'spotify:track:6cuHkcRUqtQhtJ4sWCkd1q'],
 ['Bob Dylan',
  "Knockin' On Heaven's Door",
  'spotify:track:6HSXNV0b4M4cLJ7ljgVVeh'],
 ['Bob Dylan', 'Lay, Lady, Lay', 'spotify:track:4uYwlMp841PLJmj1gJJwIq'],
 ['Bob Dylan', 'Like a Rolling Stone', 'spotify:track:3AhXZa8sUQht0UEdBJgpGc'],
 ['Bob Dylan', 'Love Sick', 'spotify:track:3O1hpSOaJDW4SelgUG2XT3'],
 ['Bob Dylan', "Maggie's Farm", 'spotify:track:5rGD8FFgHw74cp3RPhucyg'],
 ['Bob Dylan',
  'Make You Feel My Love',
  'spotify:track:6rfGPGghQL7SJmZPXprXIc'],
 ['Bob Dylan',
  'Mississippi - Version 2',
  'spotify:track:6JWHNd8QMxTvojYkmZtKGI'],
 ['Bob Dylan', 'Mr. Tambourine Man', 'spotify:track:3RkQ3UwOyPqpIiIvGVewuU'],
 ['Bob Dylan', 'Murder Most Foul', 'spotify:track:1LfTvT9JPYuuZanwxLtZCr'],
 ['Bob Dylan', 'Not Dark Yet', 'spotify:track:1qbn6QrHG8XfnqVFKgNzKP'],
 ['Bob Dylan',
  'Rainy Day Women #12 & 35',
  'spotify:track:7BkAlVpGwXXl3sYNn5OoJ7'],
 ['Bob Dylan',
  'Sad-Eyed Lady of the Lowlands',
  'spotify:track:4jdtLLyEL7wY0TlCdMKhxq'],
 ['Bob Dylan', 'She Belongs to Me', 'spotify:track:2itBkHBUxGl4VfDj4HNyoD'],
 ['Bob Dylan',
  'Stuck Inside of Mobile with the Memphis Blues Again',
  'spotify:track:1NYTj6JEw3IOh4ggiBh82h'],
 ['Bob Dylan',
  'Subterranean Homesick Blues',
  'spotify:track:6k9DUKMJpWvu6eFG3O64Lg'],
 ['Bob Dylan', 'Tangled up in Blue', 'spotify:track:6Vcwr9tb3ZLO63F8DL8cqu'],
 ['Bob Dylan', 'Tempest', 'spotify:track:19scNzd4ogVsHrNWsms8Rg'],
 ['Bob Dylan',
  "The Times They Are A-Changin'",
  'spotify:track:52vA3CYKZqZVdQnzRrdZt6'],
 ['Bob Dylan',
  'Things Have Changed - Single Version',
  'spotify:track:5KOi77ameCimkAdw0DMNoy'],
 ['Bob Dylan',
  'Thunder on the Mountain',
  'spotify:track:4wo2eRp6aHcAlmhmfwiTAH'],
 ['Bob Dylan', 'Visions of Johanna', 'spotify:track:2rslQV48gNv3r9pPrQFPW1'],
 ['Brian Wilson', 'God Only Knows', 'spotify:track:2SznAUigFh6rMdGpcS5d7e'],
 ['Bright Eyes',
  'First Day of My Life',
  'spotify:track:0eBryM7ePQH3Klt3jz8xZd'],
 ['Crowded House',
  'Don’t Dream It’s Over - Home Demo',
  'spotify:track:0fiSpF9mvRFQWy0ca64d1g'],
 ['Léo Delibes', 'Flower Duet', 'spotify:track:5K8jqeLAxZIqHR6e5w5so1'],
 ['Dire Straits', 'Brothers In Arms', 'spotify:track:6XYBbVpu455ZdGWZNRLGbG'],
 ['Don McLean',
  'Vincent (Starry, Starry Night)',
  'spotify:track:2YDyH60Vro33KkDtNZCXIk'],
 ['Ed Sheeran', 'Photograph', 'spotify:track:41xNsY82OWtWbIfnRMK2ky'],
 ['Elvis Presley',
  'Can’t Help Falling in Love - Acoustic Cover',
  'spotify:track:0ghQkNDYLSl4GsqfkjTjWx'],
 ['Enya', 'Amarantine', 'spotify:track:0VmzazQQ0Mo1vJldr5NxTW'],
 ['Evan Rachel Wood', 'If I Fell', 'spotify:track:0gd3hRBQAEAw096YOcUrmR'],
 ['Fleetwood Mac', 'Rhiannon', 'spotify:track:05oETzWbd4SI33qK2gbJfR'],
 ['George Harrison',
  'All Things Must Pass - 2014 Remaster',
  'spotify:track:16OwZQuzMqnwn3FZsCBZly'],
 ['George Harrison',
  'Apple Scruffs - 2014 Remaster',
  'spotify:track:2K7WhpfZX3TCCMiwebp0W7'],
 ['George Harrison',
  'Art of Dying - 2014 Remaster',
  'spotify:track:6Jod7qrtYBhU3HcUmKk4hX'],
 ['George Harrison',
  'Awaiting on You All - 2014 Remaster',
  'spotify:track:0b65WkrBrg2qOkzQeDtQ9d'],
 ['George Harrison',
  'Ballad of Sir Frankie Crisp (Let It Roll) - 2014 Remaster',
  'spotify:track:0FWeRrB8T5R6maHbWQw4Kk'],
 ['George Harrison',
  'Behind That Locked Door',
  'spotify:track:2VVbLn8nMcWJzjcL1tZsUr'],
 ['George Harrison',
  'Beware of Darkness - 2014 Remaster',
  'spotify:track:606MCyZFMBlc52Ojnn1nvU'],
 ['George Harrison',
  'Give Me Love (Give Me Peace on Earth)',
  'spotify:track:71fXxvXqo1zxWDtBmjoEVk'],
 ['George Harrison',
  'Hear Me Lord - 2014 Remaster',
  'spotify:track:3kopbNyRj10XO1actGZexP'],
 ['George Harrison',
  'I Dig Love - 2014 Remaster',
  'spotify:track:42yK1Wy62c7malKSRwy0Qk'],
 ['George Harrison',
  'I Remember Jeep - 2014 Remaster',
  'spotify:track:058AE5M3ifbCh8VWOV7903'],
 ['George Harrison',
  "It's Johnny's Birthday - 2014 Remaster",
  'spotify:track:6Cv05rcW8HWwCC6wyEp1fC'],
 ['George Harrison',
  'Let It Down - 2014 Remaster',
  'spotify:track:5FFruMKbVg8AhwHnX4xBov'],
 ['George Harrison',
  'My Sweet Lord - 2014 Remaster',
  'spotify:track:6vE90mi4yKsQGY3YD2OOv1'],
 ['George Harrison',
  'Out of the Blue - 2014 Remaster',
  'spotify:track:1KHMyFaGvwVQ7ax4yjq4BZ'],
 ['George Harrison',
  'Plug Me In - 2014 Remaster',
  'spotify:track:0tyk2xHVjBd3nk16cGktTG'],
 ['George Harrison',
  'Run of the Mill - 2014 Remaster',
  'spotify:track:4uSlUBg3NVOA77E7wwKFTO'],
 ['George Harrison',
  'Thanks for the Pepperoni - 2014 Remaster',
  'spotify:track:3smkwfPqFsTmwfnBztMXaM'],
 ['George Harrison',
  'The Inner Light (Alternative Take) - Instrumental',
  'spotify:track:7gWPnvhaBFMlQsTBWEGcSC'],
 ['George Harrison',
  'Wah-Wah - 2014 Remaster',
  'spotify:track:5j3aqkMO2fl0s5eaSuVnQ8'],
 ['George Harrison',
  'What Is Life - 2014 Remaster',
  'spotify:track:44fw7RulJyj7dGIi9qR86N'],
 ['George Harrison',
  'While My Guitar Gently Weeps - Live At Madison Square Garden; 2009 Remaster',
  'spotify:track:4Egi6XuC0rbLlXfqmQeuFa'],
 ['Glenn Miller', 'In the Mood', 'spotify:track:1xsY8IFXUrxeet1Fcmk4oC'],
 ['Hans Zimmer', 'Cornfield Chase', 'spotify:track:6pWgRkpqVfxnj3WuIcJ7WP'],
 ['Hans Zimmer',
  'Day One (Interstellar Theme)',
  'spotify:track:4WmB04GBqS4xPMYN9dHgBw'],
 ["Israel Kamakawiwo'ole",
  'Maui Medley',
  'spotify:track:6TSJ3L9pBQsYIlCD5pk7ju'],
 ['James Taylor',
  'You’ve Got a Friend',
  'spotify:track:3nK4hWsTEr7fVXziI5bTmh'],
 ['Jay Ungar', 'Ashoken Farewell', 'spotify:track:2s6pqLeVialgt5l5TTSeas'],
 ['Jeff Buckley',
  'If You Knew - Live at Sin-é, New York, NY - July/August 1993',
  'spotify:track:1nd2JEHXbUuQFDiQzCBpsv'],
 ['Jimi Hendrix', 'One Rainy Wish', 'spotify:track:5Zyv0v4rPcrXjkaeImuodv'],
 ['Jimi Hendrix',
  'Spanish Castle Magic',
  'spotify:track:2KFE98Iw0X23sf4vJYcbLH'],
 ['Jimi Hendrix',
  'Wait Until Tomorrow',
  'spotify:track:2YtVzmZzew1ILUdNueyWd7'],
 ['John Lennon',
  'Imagine - Remastered 2010',
  'spotify:track:7pKfPomDEeI4TPT6EOYjn9'],
 ['John Mayer', 'Queen of California', 'spotify:track:0CETmgFGt8Ne8vLnaLcduU'],
 ['Johnny Cash',
  'I Walk The Line - Single Version',
  'spotify:track:1TKPfF2fvn6gVLVfp3iG4j'],
 ['Joni Mitchell',
  'Mitchell: Urge for Going (Instrumental Arrangement of the B-Side Track of the Joni Mitchell Single "You Turn Me on I\'m a Radio")',
  'spotify:track:1I1u9aTdxxQ7SDLgBB3V7b'],
 ['Kanye West', 'Come to Life', 'spotify:track:5xvXeuxISyXJDRbZZf4uzd'],
 ['Leonard Cohen', 'Chelsea Hotel #2', 'spotify:track:4krhCfJg0znykZoyjeMXRe'],
 ['Leonard Cohen', 'Dear Heather', 'spotify:track:3MTKMphPprAcBFG1uIhzPZ'],
 ['Leonard Cohen',
  "Death of a Ladies' Man",
  'spotify:track:5wrylUGwZugelovhryPYg2'],
 ['Leonard Cohen', 'The Future', 'spotify:track:5l8lYrnPEM1ln3J4XaTcy5'],
 ['Leonard Cohen',
  'You Want It Darker',
  'spotify:track:5zb7npjQqoJ7Kcpq4yD9qn'],
 ['Lingers.On', 'In Lingerie', 'spotify:track:6FH3kGlJbFVJDCG9RcERf7'],
 ['Louis Armstrong',
  'La vie en rose - Single Version',
  'spotify:track:3yYfoYGVpriV4fG9L1ogsD'],
 ['The Lovecats', 'The Lovecats', 'spotify:track:7iJUiiTfnuY5cTIeEBnqHr'],
 ['Ludovico Einaudi', 'Primavera', 'spotify:track:4BMHp3DkI8VLsuB9Kr0pzu'],
 ['Mazzy Star', 'Flowers In December', 'spotify:track:0G6Ws8Gbdt0S7pZeuYmkmm'],
 ['Metallica',
  'Fade To Black (Remastered)',
  'spotify:track:0dqGfCMAGyDgpUAgLNOjWd'],
 ['Wolfgang Amadeus Mozart',
  'Requiem in D Minor, K. 626: III. Sequenz No. 6, Lacrimosa dies illa',
  'spotify:track:4bvzJZXpkI3bkjxMCWOSu1'],
 ['My Chemical Romance',
  'The Light Behind Your Eyes',
  'spotify:track:3HyDpKAuR3e4l6QB7hSB2l'],
 ['Paul McCartney',
  'Here Today - Remixed 2015',
  'spotify:track:0QtnwXDziZN1K55fXuLN6q'],
 ['Paul McCartney',
  'I’ll Follow The Sun - Live At Amoeba 2007',
  'spotify:track:3xT59EeQdq0TPGtOlXXI8t'],
 ['Puscifer', 'The Humbling River', 'spotify:track:69GE6yPZZldvqtgBHrKXxg'],
 ['Ray LaMontagne',
  'Such A Simple Thing',
  'spotify:track:4PuUa8e5s7P3Zv1IdCGIsa'],
 ['Ray Manzarek',
  'Riders on the Storm',
  'spotify:track:3FvYcTXO2QtDY7kZQHku2d'],
 ['Red Hot Chili Peppers', 'Dosed', 'spotify:track:1iFIZUVDBCCkWe705FLXto'],
 ['Sky Cries Mary',
  "Don't Forget The Sky",
  'spotify:track:4sVpjCJRClVetRrdxVBolP'],
 ['Stevie Nicks', 'Landslide', 'spotify:track:5fprEY6WEN1wvFXkgfb22C'],
 ['Stevie Wonder', 'Isn’t She Lovely', 'spotify:track:6wGlAaMfyhKdEPr2zycAnN'],
 ['Taylor Swift',
  'Fearless (Taylor’s Version)',
  'spotify:track:77sMIMlNaSURUAXq5coCxE'],
 ['Taylor Swift',
  'the lakes - bonus track',
  'spotify:track:0eFQWVz0qIxDOvhLpZ40P7'],
 ['The Band',
  'When I Paint My Masterpiece - Remastered',
  'spotify:track:76WChUuOPeIK027IeUgr0l'],
 ['The Beach Boys',
  "I Just Wasn't Made For These Times - Mono",
  'spotify:track:4CuO8TINNqM3D7aUdNQ3zG'],
 ['The Beach Boys',
  "Let's Go Away For A While - Mono",
  'spotify:track:3GsgJI1aBrvUtqX8f3MhKT'],
 ['The Beatles',
  "Don't Let Me Down - Naked Version / Remastered 2013",
  'spotify:track:5BhMoGrz5KzG2fA5uzHjZ1'],
 ['The Beatles',
  'Love Me Do - Remastered 2009',
  'spotify:track:3VbGCXWRiouAq8VyMYN2MI'],
 ['The Chemical Brothers',
  'The Boxer',
  'spotify:track:1EUeDFq2zNP784GPaRs9aH'],
 ['The Cure',
  'A Night like This - 2006 Remaster',
  'spotify:track:7cKCz7gG84i1XLvDeM3ByT'],
 ['The Cure',
  'Disintegration - 2010 Remaster',
  'spotify:track:0zY8t5dC1KQXcPUKByWMJM'],
 ['The Cure',
  'From the Edge of the Deep Green Sea',
  'spotify:track:2vwBL9RVyr0vA4Og5VH0i3'],
 ['The Cure',
  'In Between Days - 2006 Remaster',
  'spotify:track:07CyrZF9eVd02zzIse7tZA'],
 ['The Cure', 'A Letter to Elise', 'spotify:track:4DdXOLc1VMAY34ourCn1Xa'],
 ['The Cure',
  'Lullaby - 2010 Remaster',
  'spotify:track:4d4oXk7O2lEhZ83ivV93li'],
 ['The Cure', 'Underneath The Stars', 'spotify:track:0PKVjYlKw7z3IvKAoxrYTR'],
 ['The Eagles', 'The Desperadoes', 'spotify:track:10ppF835WJMYI5v65gFLZ3'],
 ['The Helio Sequence',
  'Keep Your Eyes Ahead',
  'spotify:track:3yatRBsGMJ7wMoUIgDBzzo'],
 ['The Moldy Peaches',
  'Anyone Else But You',
  'spotify:track:2pKi1lRvXNASy7ybeQIDTy'],
 ['The Strokes', 'Someday', 'spotify:track:7hm4HTk9encxT0LYC0J6oI'],
 ['Traditional',
  'Scarborough Fair (Arr. Parkin)',
  'spotify:track:4wlNPczIullwvmwb4x0ltz'],
 ['Van Morrison',
  'Madame George - 1999 Remaster',
  'spotify:track:1N4MKISvC1ddfRCRQDXDd2'],
 ['Various Artists',
  'The Girl From Ipanema',
  'spotify:track:0JgH7g0kwsIs1THEVqhlUS'],
 ['Víg Mihály',
  'Öreg - From "Werckmeister Harmóniák"',
  'spotify:track:63wMgkXQuomlkW4an4O9b4'],
 ['Willie Nelson', 'Crazy', 'spotify:track:0xqtcLB45iKNfHroi5y1em']]


In [None]:
len(addlist)

In [None]:
addlist2 = [a[2] for a in addlist]

print (len(addlist2), 'items')

while(addlist2):
    sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
                                playlist_id=playlist_id, 
                                tracks=addlist2[-100:])
    addlist2 = addlist2[:-100]
    print("added items, remaining ", len(addlist2))
