In [1]:
# this version reads a chunk of posts with the score embedded
# ranks by summing scores

import os
import glob
import pickle
from datetime import datetime
import time
import dotenv
import pandas as pd
import re
from tqdm import tqdm

import pandas_dedupe

import requests
import requests.auth

import praw

import openai

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# load secrets from .env into environment variables
dotenv.load_dotenv()

praw.__version__

'7.7.0'

See README.md
 - objective is to use OpenAI for named entity extraction to extract all the songs form [this reddit thread](https://www.reddit.com/r/AskReddit/comments/12viv4v/what_is_the_prettiest_song_you_ever_heard_in_your/) and make Spotify playlist
 - use Reddit PRAW API to download all the comments (get [Reddit API key](https://www.reddit.com/prefs/apps))
 - use OpenAI API with a prompt like, extract all the songs from this text to CSV get ([OpenAI API key](https://platform.openai.com/account/api-keys))
 - use Spotify API to make a playlist (get [Spotify API key](https://developer.spotify.com/documentation/web-api/tutorials/getting-started))
 - works, needed a lot of scrubbing, but about 1 day of work, wouldn't have been possible to do a 700-song playlist manually without a team of Mechanical Turks or something
 - If I wanted to go nuts, would process comments individually, save a file for each comment's extracted songs, would make it easier to track down what OpenAI gets wrong, have a resumable, retryable, repeatable process and 
 - Spotify playist is [here](https://open.spotify.com/playlist/08YFkbtTV6GBfNtjJ4PHDu?si=f4761d983ac84091) 
 
 needs a .env file per dot-env-template
 

In [2]:
# a thread 
submission = "12viv4v"

# minimum karma to process a reply 
minkarma = 2

# an output file to accumulate all the responses
savefile = 'bronze.txt'

prompt_prefix="""You will act as a research assistant finding all the artists and track titles mentioned in a series of messages about music, and returning them in a CSV format.
Define a post delimited below by ===
===
post_id: "abcdefg"
post_score: "6996"
I love Yesterday by the Beatles. Also Hotel California from The Eagles. And Bruce Springsteen's Born To Run!
Define a CSV format delimited below by ---
---
"post_id","post_score","artist","track"
"abcdefg","6996","The Beatles","Yesterday"
"abcdefg","6996","The Eagles","Hotel California"
"abcdefg","6996","Bruce Springsteen","Born To Run"
---

You will extract all artists and tracks from each post below delimited by ~~~ .
You will return a list of records containing the artist and track extracted from the input, and the post_id and post_score of the post the artist and track is mentioned in.
You will return the records in a CSV format.
The header row should contain `"post_id","post_score","artist","track"`. 
The input is:
"""

# an output file to accumulate all the responses
savefile = 'bronze.txt'

# to speed things we'll cumulate posts til we get to nposts posts or maxchars total chars, whichever comes first
max_post_size=300  # redditor needs to put any songs in 1st couple hundred chars
maxchars = 6000  # max tokens (words/fragments) is 4096 but we'll limit each prompt
nposts = 100 # max posts to combine into a chunk



In [3]:
csv_validate_re = re.compile(r'''
    \s*                # Any whitespace.
    (                  # Start capturing here.
      [^,"']+?         # Either a series of non-comma non-quote characters.
      |                # OR
      "(?:             # A double-quote followed by a string of characters...
          [^"\\]|\\.   # That are either non-quotes or escaped...
       )*              # ...repeated any number of times.
      "                # Followed by a closing double-quote.
      |                # OR
      '(?:[^'\\]|\\.)*'# Same as above, for single quotes.
    )                  # Done capturing.
    \s*                # Allow arbitrary space before the comma.
    (?:,|$)            # Followed by a comma or the end of a string.
    ''', re.VERBOSE)


## Get all comments from a reddit posting

In [4]:
def getPraw():
    return praw.Reddit(user_agent="prettiest_song/0.001", 
                       client_id=os.getenv('CLIENT_ID'), 
                       client_secret=os.getenv('CLIENT_SECRET'))


def getAll(r, submissionId, verbose=True):
    submission = r.submission(submissionId)
    submission.comments.replace_more(limit=None)
    commentsList=submission.comments.list()
    return commentsList


In [5]:
# print(datetime.now())
# r = getPraw()
# res = getAll(r, submission)
# print(datetime.now())

# print("retrieved ", len(res), 'comments')


In [6]:
# # we have a list of comment objects
# # filter comments with at least some karma
# res3 = [r for r in res if r.score >= minkarma]
# print('filtered to ', len(res3), 'comments')
# res3[0].body, res3[0].score


In [7]:
# save so we can reload it later without downloading

# with open('reddit_full.pkl', 'wb') as f:
#     pickle.dump(res3, f)
    
with open('reddit_full.pkl', 'rb') as f:
    res3 = pickle.load(f)


## Extract artists and song titles using OpenAI

In [8]:
# check lengths of posts
shorties = []
big_ones = []
for i in range(len(res3)):
    if len(res3[i].body) <3:
        print (i, res3[i].body)
        shorties.append(i)
    if len(res3[i].body) > 4096:
        print(i, len(res3[i].body))
        big_ones.append(i)
        

423 4162
812 26
8405 4399
11597 Up
17225 5225
21450 W
21621 -🤓
21626 W
21977 :)
22240 t
23405 <3
23481 ✨️
24440 4543
24443 Ye


In [9]:
# avg length
sum([len(r.body) for r in res3]) / len(res3)

78.88987480247964

In [10]:
print (res3[big_ones[0]].body[:500])

Saturn by Sleeping at Last:
https://www.youtube.com/watch?v=dzNvk80XY9s

The version they did with Tim Fain is even more beautiful: 
https://www.youtube.com/watch?v=0nRpeAiur9Q

I'm not good at choosing one thing from a list of favorites as the best, so I've got about 30+ answers that are really a 30+ -way tie, and the one that I would consider as "prettiest" at any given moment is heavily influenced by my current mood. So, it could be any one of these from my "Heart Wrenchingly Beautiful" playl


In [50]:
openai.api_key = os.getenv('OPENAI_API_KEY')


def get_response(messages, prompt_prefix="", verbose=False):

    prompt = prompt_prefix
    
    if type(messages) == list:
        for msg in messages:
            prompt += f"""
~~~
{msg}
~~~
"""
    else:
        prompt += messages
        
    if verbose:
        print(prompt)
        
    # retry loop, have received untrapped 502 error
    RETRIES = 3
    success = False    
    for i in range(RETRIES):
        try:
            response = openai.ChatCompletion.create(
                model='gpt-3.5-turbo-0301',
                messages=[{"role":"user", 
                           "content": prompt}],
                temperature=0,
            )
            # no exception thrown
            success=True
            break   
        except Exception as error:
            print("An exception occurred:", error)
            print("Retrying chunk...")
            time.sleep(5)
            continue  # try again
    if success:
        response_msg = response['choices'][0]['message']
        if verbose:
            print(response_msg)
        return response_msg['content']
    else:
        return None



In [12]:
# for each comment object we will extract the body 
# then submit as part of a prompt to chatgpt
print(datetime.now())

slist = res3.copy()
total_posts = len(slist)
print("processing %d posts" % total_posts)

outdir = 'out'
logdir = 'logs'
# make sure out and logs are empty
for f in glob.glob('%s/*' % outdir):
    os.remove(f)
for f in glob.glob('%s/*' % logdir):
    os.remove(f)
count = 0
c = 0

while(slist):  # still comments to process
    chars_to_date = 0
    reply_ids = []
    messages = []

    for _ in range(nposts):  # add up to 100 posts to the prompt
        if slist:
            # make sure no single post > max_post_size, truncate in place as nec 
            slist[0].body = slist[0].body[:max_post_size]
            # total post content < maxchars
            if chars_to_date + len(slist[0].body) < maxchars:
                reply = slist.pop(0)
                reply_ids.append(reply.id)
                body = reply.body
                
                messages.append(f"""
post_id: "{reply.id}"
post_score: "{reply.score}"
{body}
"""
                )
                chars_to_date += len(messages[-1])
                c += 1
            
    response = get_response(messages, prompt_prefix, verbose=False)
    if response is None:   # FAIL - retries exhausted
        print('Bailing to next chunk')
        continue

    # do basic validation and cleanup
    # should check first line is valid header and doesn't reverse columns
    csv_valid, csv_err = [], []
    for line in response.split("\n"):
        try:
            csv_values = csv_validate_re.findall(line)
            if len(csv_values) == 4:
                csv_valid.append(line)
            else:
                csv_err.append(line)
        except:
            csv_err.append(line)
    csv_output = "\n".join(csv_valid)
        
    with open("%s/%04d.csv" % (outdir, count), 'w') as outfile:
        outfile.write(csv_output)
    
    if csv_err:
        with open("%s/%04d.err" % (outdir, count), 'w') as outfile:
            outfile.write("\n".join(csv_err))
        
    with open("%s/%04d.log" % (logdir, count), 'w') as logfile:
        logfile.write(str(reply_ids))
        logfile.write('\n\n===== raw prompt =====\n\n')        
        logfile.write("\n=====\n".join(messages))
        logfile.write('\n\n===== raw response =====\n\n')
        logfile.write(response)
        logfile.write('\n\n===== failed validation =====\n\n')
        logfile.write("\n".join(csv_err))
 
    count += 1
#     print(c)
    outcount = total_posts-len(slist)
    print(outcount, end=' ')
    
    
print()
print(datetime.now())



2023-05-17 11:34:27.542439
processing 24681 posts
860 1040 1150 1880 1940 2050 2110 An exception occurred: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID fcd374277fea9a9a427fafb0403b06cb in your message.)
Retrying chunk...
2860 An exception occurred: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Wed, 17 May 2023 17:44:35 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '7c8da8a2ac60425c-EWR', 'alt

In [None]:
# may still have to tweak the files to get them to load
# should inspect .err files and clean up if possible

# filelist = glob.glob('%s/*.csv' % outdir)

# output_df = None
# count = 0
# for f in sorted(filelist):
#     print(f)
#     try:
#         tempdf = pd.read_csv("%s" % (f), header=None)
#     except Exception as exc:
#         print(str(exc))
#         continue
#     colcount = len(tempdf.columns)
#     if len(tempdf.columns) != 4:
#         print('%s has %d columns, skipped' % (f, colcount))
#         continue
        
#     # ok
#     # truncate header row if it looks like a header
#     if tempdf.iloc[0][0]=='post_id':
#         tempdf = tempdf[1:]
#     # set the header explicitly
#     tempdf.columns=["post_id","post_score","artist","track"]

#     if output_df is not None:        
#         output_df = pd.concat([output_df, tempdf], axis=0)
#     else:
#         output_df = tempdf
#     count += 1
#     if count % 10 == 0:
#         print(count, end=' ')

        
        
        

In [13]:
filelist = glob.glob('%s/*.csv' % outdir)

with open(savefile, 'w') as outfile:
    for f in tqdm(filelist, desc = 'File concat'):
        with open(f, 'r') as infile:
            data = infile.read().strip()
            outfile.write(data)
            outfile.write("\n")
    
    

File concat: 100%|██████████| 448/448 [00:00<00:00, 1426.66it/s]


In [20]:
tempdf = pd.read_csv(savefile, header=None)
tempdf.columns=['post_id','post_score','artist','track']
tempdf

Unnamed: 0,id,score,artist,track
0,post_id,post_score,artist,track
1,jhe5su5,1,,
2,jheae44,1,Watchhouse,cover
3,jheb9ji,1,Poolside,cover
4,jhednlq,1,,
...,...,...,...,...
27558,jhdw22c,1,,
27559,jhe3j2l,1,,
27560,jhefa35,1,,
27561,jhep99z,1,,


In [27]:
tempdf = tempdf.drop_duplicates() \
    .sort_values("post_score", ascending=False)
# drop header row
tempdf = tempdf.loc[~(tempdf['post_id'].str.strip()=='post_id')]
# clean up post_score to valid int
tempdf['post_score'] = tempdf['post_score'].apply(lambda s: "".join([c for c in s if c.isdigit()]))
tempdf['post_score'] = tempdf['post_score'].apply(lambda x: x[-5:])
tempdf['post_score'] = tempdf['post_score'].apply(lambda s: int(s) if s else 1)
# drop missing tracks, cleanup track
tempdf = tempdf.drop(tempdf.loc[tempdf['track'].isna()].index)
tempdf['track'] = tempdf['track'].str.strip()
tempdf = tempdf.drop(tempdf.loc[tempdf['track'].str.lower()=='unknown'].index)
tempdf = tempdf.drop(tempdf.loc[tempdf['track'].str.lower()=='track'].index)
# cleanup artist
tempdf['artist'] = tempdf['artist'].str.strip()
tempdf.loc[tempdf['artist'].isna(), 'artist'] = ''


In [28]:
# save bronze
tempdf.to_csv(savefile, index=False)
len(tempdf)

22919

In [46]:
df = pd.read_csv(savefile) \
    .sort_values(["artist", "track"]) \
    .reset_index(drop=True)

df.to_csv('silver.csv', index=False)

print(len(df))

df


22919


Unnamed: 0,post_id,post_score,artist,track
0,jhfmr74,1,$uicideboy$,My Flaws Burn Through My Skin Like Demonic Fla...
1,jhe9i7a,1,*nsync,selfish
2,jhcvus1,1,-,-
3,jhcp5xc,6,.Hack//SIGN,Key of the Twilight
4,jhcgr19,1,070 Shake,Under the Moon
...,...,...,...,...
22914,jherzqu,1,,yebbas heartbreak
22915,jher576,1,,your mother
22916,jhggg7t,1,,zelda's lullaby
22917,jhcrtes,1,,Águas de março


In [48]:
artist_df = df[['artist','track']].groupby('artist') \
    .count() \
    .sort_values('track', ascending=False) \
    .reset_index()
artist_df = artist_df.drop(artist_df.loc[artist_df['artist'].str.strip().str.lower().str.startswith('unknown')].index)
artist_df = artist_df.drop(artist_df.loc[artist_df['artist']==''].index)

artist_df.head(20)

Unnamed: 0,artist,track
2,The Beatles,360
3,Radiohead,221
4,Fleetwood Mac,148
5,Pink Floyd,145
7,Jeff Buckley,137
8,Enya,127
9,Led Zeppelin,126
10,Bon Iver,124
11,Ludovico Einaudi,86
12,The Beach Boys,84


In [54]:
# proofread / dedupe artists
print(datetime.now())

artist_map={}
nposts = 100
prompt_prefix = """I want you act as a proofreader. I will provide you a list of recording artists or composers.
I would like you to review each input artist for any spelling errors or abbreviations and provide the corrected full artist without abbreviation. 
You will provide them in CSV format, one record per line in the following order: input_artist, corrected_artist.
The input is:

"""
slist = artist_df['artist'].tolist()

while(slist):  # still artists to process

    prompt = ""
    for _ in range(nposts):  # add up to 100 posts to the prompt
        if slist:
            artist = slist.pop(0)
            prompt += f'"{artist}"\n'
        else:
            break
            
    response = get_response(prompt, prompt_prefix, verbose=False)
    if response is None:   # FAIL - retries exhausted
        print('Bailing to next chunk')
        continue
    
    for line in response.split("\n"):
        try:
            csv_values = csv_validate_re.findall(line)
            if len(csv_values) == 2:
                artist_input, artist_correct = csv_values[0], csv_values[1]
                while not artist_input[0].isalnum():
                    artist_input=artist_input[1:]
                while not artist_input[-1].isalnum():
                    artist_input=artist_input[:-1]
                while not artist_correct[0].isalnum():
                    artist_correct=artist_correct[1:]
                while not artist_correct[-1].isalnum():
                    artist_correct=artist_correct[:-1]
                if artist_input.lower() != artist_correct.lower():
                    artist_map[artist_input]=artist_correct
                    print(f'"{artist_input}", "{artist_correct}"')
            else:
                print('error', line)
        except:
            print('error', line)
        
print(datetime.now())


'Sigur Ros': 'Sigur Rós'
'Simon & Garfunkel': 'Simon and Garfunkel'
'Beethoven': 'Ludwig van Beethoven'
'Chopin': 'Frédéric Chopin'
'Debussy': 'Claude Debussy'
'Bjork': 'Björk'
'Tchaikovsky': 'Pyotr Ilyich Tchaikovsky'
'Mozart': 'Wolfgang Amadeus Mozart'
'Bach': 'Johann Sebastian Bach'
'Lana del Rey': 'Lana Del Rey'
'Iron & Wine': 'Iron and Wine'
'Pachelbel': 'Johann Pachelbel'
'Carpenters': 'The Carpenters'
'Beach Boys': 'The Beach Boys'
'Elvis': 'Elvis Presley'
'John Butler': 'John Butler Trio'
'Minnie Ripperton': 'Minnie Riperton'
'Rachmaninoff': 'Sergei Rachmaninoff'
'Cinematic Orchestra': 'The Cinematic Orchestra'
'Joni Mitchel': 'Joni Mitchell'
'Odesza': 'ODESZA'
'Alan Parsons Project': 'The Alan Parsons Project'
'Beatles': 'The Beatles'
error Crosby, Stills & Nash, Crosby, Stills, Nash & Young
'Jose Gonzalez': 'José González'
'Puccini': 'Giacomo Puccini'
'Lakme': 'Léo Delibes'
'RHCP': 'Red Hot Chili Peppers'
'Ravel': 'Maurice Ravel'
'Santo and Johnny': 'Santo & Johnny'
'Rolling 

An exception occurred: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 22a3e9e87679b86022f918592157cf1e in your message.)
Retrying chunk...
'India Arie': 'India.Arie'
'Iron and wine': 'Iron and Wine'
'Its A Beautiful Day': 'It's A Beautiful Day'
'Tears For Fears': 'Tears for Fears'
'Temptations': 'The Temptations'
'Palestrina': 'Giovanni Pierluigi da Palestrina'
'Pachabel': 'Johann Pachelbel'
'PM Dawn': 'P.M. Dawn'
'P!nk': 'Pink'
'Otis': 'Otis Redding'
'Paul Van Dyk': 'Paul van Dyk'
'Tash': 'Tash Sultana'
'Pink floyd': 'Pink Floyd'
'Pinkpantheress': 'PinkPantheress'
'Syml': 'SYML'
'System Of A Down': 'System of a Down'
'TOOL': 'Tool'
'Phillip Glass': 'Philip Glass'
'Mum': 'Múm'
'Motorhead': 'Motörhead'
'National & TS': 'The National & TS'
'Nu Deco Ensemble & Kishi Bashi': 'Nu Deco Ensemble and Kishi Bashi'
'Saint-Saëns': 'Camille Saint-Saëns

'Steins Gate': 'Steins;Gate'
'Sufjan': 'Sufjan Stevens'
'Ayub Ohara': 'Ayub Ogada'
'Stephan': 'Stephan Mathieu'
'B.o.t.a': 'BOTA'
'St. Thomas Aquinas': 'Thomas Aquinas'
'SouthTrees': 'South Trees'
'Summertime Sadness': 'Lana Del Rey'
'Auntie’s Harp': 'Auntie's Harp'
'Sujfan Stevens': 'Sufjan Stevens'
'Babe Soundtrack': 'Various Artists'
'Sparklehourse': 'Sparklehorse'
'Spirit Box': 'Spiritbox'
'BORNS': 'BØRNS'
'BOC': 'Boards of Canada'
'BLUT OWN': 'Blut Own'
'Spritbox': 'Spiritbox'
'Spyair': 'SPYAIR'
'Sujan Stevens': 'Sufjan Stevens'
'St George Choir': 'St. George Choir'
'St Saens': 'Camille Saint-Saëns'
'St. Germain': 'St Germain'
'Sufjam Stevens': 'Sufjan Stevens'
'Stephan moccio': 'Stephan Moccio'
'Stephane grappelli': 'Stéphane Grappelli'
'Avett brothers': 'The Avett Brothers'
'Stevie knicks': 'Stevie Nicks'
'Ave Maria': 'Franz Schubert'
'Ava Cassidy': 'Eva Cassidy'
'Sting/Miller': 'Sting & Andy Summers'
'Stolzel': 'Gottfried Heinrich Stölzel'
'Stone Poneys': 'The Stone Poneys'
'Av

'Phoebe Bridgers ft Noah & Abby Gundersen': 'Phoebe Bridgers featuring Noah and Abby Gundersen'
'Bonnie pink': 'Bonnie Pink'
'Philipp poisel': 'Philipp Poisel'
'Petit biscuit': 'Petit Biscuit'
'Peter, Paul, & Mary': 'Peter, Paul, and Mary'
'Penguin Café Orchestra': 'Penguin Cafe Orchestra'
'Boy On A Dolphin': 'Boy on a Dolphin'
'Boys II Men': 'Boyz II Men'
'Penny & Sparrow': 'Penny and Sparrow'
'Peter sarstedt': 'Peter Sarstedt'
'Peter Hollens and hound + the fox': 'Peter Hollens and Hound and the Fox'
'Peter Hollens and The Fox & The Hound': 'Peter Hollens and The Fox and The Hound'
'Pete yorn': 'Pete Yorn'
'Sarah Brightman & Andrea bocelli': 'Sarah Brightman and Andrea Bocelli'
'Röyksopp feat. Robyn': 'Röyksopp featuring Robyn'
'RÜFÜS DU SOl': 'RÜFÜS DU SOL'
'Russian choral piece': 'Russian Choral Piece'
'Russian choir music': 'Russian Choir Music'
'Rüfüs Du Sol': 'RÜFÜS DU SOL'
'Rüfüs du Sol': 'RÜFÜS DU SOL'
'STP': 'Stone Temple Pilots'
'STAIND': 'Staind'
'SPICE GIRLS': 'Spice Girls

'misterwives': 'MisterWives'
'minami': 'Minami'
'millet': 'Millet'
'mild orange': 'Mild Orange'
'metalium': 'Metalium'
'meshuggah': 'Meshuggah'
'meme': 'Meme'
'melody garrot': 'Melody Gardot'
'melody gardot': 'Melody Gardot'
'my brightest diamond': 'My Brightest Diamond'
'ne obliviscarus': 'Ne Obliviscaris'
'phosphorescences': 'Phosphorescent'
'newjeans': 'New Jeans'
'perfect circle': 'A Perfect Circle'
'pear jam': 'Pearl Jam'
'paranoid dj': 'Paranoid DJ'
'paper kites': 'The Paper Kites'
'paniyolo': 'Paniyolo'
'opera Lakme': 'Lakmé'
'opera': 'Opera'
'oneohtrix point never': 'Oneohtrix Point Never'
'omori': 'Omori'
'oasis': 'Oasis'
'nothing but thieves': 'Nothing But Thieves'
'noah and the whale': 'Noah and the Whale'
'ninomae ina'nis': 'Ninomae Ina'nis'
'nieve': 'Nieve'
'skeler': 'Skeler'
'sleeping at Last': 'Sleeping at Last'
'blackmill': 'Blackmill'
'young thug': 'Young Thug'
'yiruma': 'Yiruma'
'yeule': 'Yeule'
'yerin baek': 'Yerin Baek'
'yeat': 'Yeat'
'yann tiersen': 'Yann Tiersen'


'Andrew Lloyd Webber & Tm Rice': 'Andrew Lloyd Webber and Tim Rice'
'AppleSeed/YouSeeBigGirl/T:T': 'Appleseed, YouSeeBigGirl, T:T'
'Ar Rahman': 'AR Rahman'
'Arcade fire': 'Arcade Fire'
'The Beach Boys / Håkan Hellström': 'The Beach Boys and Håkan Hellström'
'The Beatles/Paul McCartney': 'The Beatles and Paul McCartney'
'The Apendix Out': 'The Appendix Out'
'Apex Twin': 'Aphex Twin'
'Antônio Carlos Jobim': 'Antonio Carlos Jobim'
'The Beegees': 'The Bee Gees'
'Temple Of The Dog': 'Temple of the Dog'
'Tegan & Sarah': 'Tegan and Sara'
'TesserAct': 'Tesseract'
'Arlo parks': 'Arlo Parks'
'Anne murray': 'Anne Murray'
'The Dave Brubeck Quartet': 'Dave Brubeck Quartet'
'The Dance': 'Dance'
'The Dubs': 'Dubs'
'The Dum Dum Girls': 'Dum Dum Girls'
'The Duprees': 'Duprees'
'The Fleetwoods': 'Fleetwoods'
'Anna Netrebko & Elīna Garanča': 'Anna Netrebko and Elina Garanca'
'The Flash': 'Flash'
'The Five Stairsteps': 'Five Stairsteps'
'The Five Blobs': 'Five Blobs'
'The Fifth Dimension': 'Fifth Dimensio

'Jack Black as Bowser': 'Jack Black'
'Delia Derbyshire (Bach': 'Delia Derbyshire'
'JUICE WRLD': 'Juice Wrld'
'Jack black': 'Jack Black'
'Delerium ft Joanna Stevens': 'Delerium ft. Joanna Stevens'
'JP saxe': 'JP Saxe'
'Ivan torrent and lara ausensi': 'Ivan Torrent and Lara Ausensi'
'It's a beautiful day': 'It's a Beautiful Day'
'Denice Williams': 'Deniece Williams'
'Isreal Kamakawiwo’ole': 'Israel Kamakawiwo'ole'
'Israel kamakawiwo’ole': 'Israel Kamakawiwo'ole'
'Israel Kamikawiwol’ole': 'Israel Kamakawiwo'ole'
'Denis kenzo': 'Denis Kenzo'
'Israel Kamakawiwo'Ole': 'Israel Kamakawiwo'ole'
'Isley Brothers': 'The Isley Brothers'
'Denez Prigant and Lisa Gerrard': 'Denez Prigent, Lisa Gerrard'
'JLo': 'Jennifer Lopez'
'Delirium ft. Sara McLaughlin': 'Delirium ft. Sarah McLachlan'
'JAKE': 'Jake'
'J.Views & Rhye': 'J.Views, Rhye'
'J.Cole': 'J. Cole'
'J. S. Bach': 'Johann Sebastian Bach'
'Iz Kamakawiwoʻole': 'Israel Kamakawiwo'ole'
'Pan’s Labyrinth': 'Pan's Labyrinth'
'D E A T H P A C T': 'DEATHP

'Fka Twigs': 'FKA Twigs'
'Fiora & Robot Koch': 'Fiora and Robot Koch'
'Florence and The Machine': 'Florence and the Machine'
'Florence + The machine': 'Florence and the Machine'
'Florence + Machine': 'Florence and the Machine'
'Florence & The Machine': 'Florence and the Machine'
'Floating points & Pharoah Sanders': 'Floating Points and Pharoah Sanders'
'Floating Points, Pharoah Sanders & The London Symphony Orchestra': 'Floating Points, Pharoah Sanders, and the London Symphony Orchestra'
'Flitz & Suppe': 'Flitz and Suppe'
'Fiona apple': 'Fiona Apple'
'Elzabeth Fraser': 'Elizabeth Fraser'
'Final Fantasy 8': 'Final Fantasy VIII'
'Final Fantasy 7': 'Final Fantasy VII'
'Eltom John': 'Elton John'
'Elton john': 'Elton John'
'Fifty fifty': 'Fifty Fifty'
'Gereth Emery': 'Gareth Emery'
'Hayd': 'Haydn'
'Harry Waters Jr., Marvin Berry, and Starlighters': 'Harry Waters Jr., Marvin Berry, and the Starlighters'
'Harry Nillsson': 'Harry Nilsson'
'Dj Okawari': 'DJ Okawari'
'Dj clock': 'DJ Clock'
'Head

'Eric Johnston': 'Eric Johnson'
'Bruce Sprinsteen': 'Bruce Springsteen'
'Oreja de Van Gohn': 'Oreja de Van Gogh'
'Orchestral Manoeuvres In the Dark': 'Orchestral Manoeuvres in the Dark'
'OutKast': 'Outkast'
'Ottis Redding': 'Otis Redding'
'Nearer, My God, To Thee': 'Nearer, My God, to Thee'
'Ne obliviscaris': 'Ne Obliviscaris'
'Native American man': 'Native American Man'
'Nathaniel Song': 'Nathaniel Rateliff & The Night Sweats'
'Neil young': 'Neil Young'
'New Christie Minstrels': 'New Christy Minstrels'
'Neverending White Lights & Dallas Green': 'Neverending White Lights & City and Colour'
'Neutral milk hotel': 'Neutral Milk Hotel'
'Neli Andreeva and the Choir of Filip Kutev Ansamble': 'Neli Andreeva and the Choir of Filip Kutev Ensemble'
'Natidredd': 'Natiruts'
'Camille Saint-Saens': 'Camille Saint-Saëns'
'Nalin and Kane': 'Nalin & Kane'
'Nahko Bear (Medicine for the People': 'Nahko and Medicine for the People'
'NWA': 'N.W.A'
'Canteloube': 'Joseph Canteloube'
'Natalia Laforcade': 'Nat

In [62]:
# check the map for reasonableness
df['artist2'] = df['artist'].apply(lambda s: artist_map[s] if s in artist_map else s)

In [64]:
df.loc[df['artist'] != df['artist2']]

Unnamed: 0,post_id,post_score,artist,track,artist2
9,jhfwqid,1,10000 Maniacs,Verdi Cries,"10,000 Maniacs"
10,jhdzwwe,1,10CC,I'm not in love,10cc
12,jhddxdh,1,1975,Oh Caroline,The 1975
13,jhejnah,1,2 Cellos,Book of Love,2Cellos
18,jhds3zv,1,2 cellos,Benedictus,2Cellos
...,...,...,...,...,...
21036,jhi0oax,1,Группа Кино,Кукушка,Kino
21037,jhec8o2,1,Кино,Спокойная ночь,Kino
21038,jhdari4,1,Наада,dviyzheniye,Naada
21039,jhenqnx,1,Отава Ё,Once upon a Time on a High Hill,Otava Yo


In [65]:
# apply the map
df['artist'] = df['artist'].apply(lambda s: artist_map[s] if s in artist_map else s)

In [66]:
# for missing artists, try to impute the artist based on the track
missing_artist_df = df.loc[df['artist']=='']
missing_artist_df


Unnamed: 0,post_id,post_score,artist,track,artist2
21044,jhe1u2x,2,,#3,
21045,jhc3j8u,21,,*you didn't have to cut me off*,
21046,jhfmea4,1,,"1, 2, Buckle my shoes.",
21047,jhcefso,18,,"10,000 Days (Wings Pt. 2)",
21048,jhcawoc,1,,23 - Jimmy Eat World,
...,...,...,...,...,...
22914,jherzqu,1,,yebbas heartbreak,
22915,jher576,1,,your mother,
22916,jhggg7t,1,,zelda's lullaby,
22917,jhcrtes,1,,Águas de março,


In [70]:
missing_track_map={}
nposts = 100
prompt_prefix = """I will provide you a list of well-known recordings.
I would like you to review each recording, and provide the name of the artist most closely associated with the recording.
You will provide them in CSV format, one record per line in the following order: recording, artist.
The input is:

"""
slist = missing_artist_df['track'] \
    .dropna() \
    .str.lower() \
    .str.strip() \
    .drop_duplicates() \
    .tolist()

slist.sort()

while(slist):  # still artists to process

    prompt = ''
    for _ in range(nposts):  # add up to 100 posts to the prompt
        if slist:
            track = slist.pop(0)
            prompt += f'"{track}"\n'
        else:
            break
    response = get_response(prompt, prompt_prefix, verbose=True)
            
    if response is None:   # FAIL - retries exhausted
        print('Bailing to next chunk')
        continue
        
    for line in response.split("\n"):
        try:
            csv_values = csv_validate_re.findall(line)
            if len(csv_values) == 2:
                track_input, artist_correct = csv_values[0], csv_values[1]
                while not track_input[0].isalnum():
                    track_input=track_input[1:]
                while not track_input[-1].isalnum():
                    track_input=track_input[:-1]
                while not artist_correct[0].isalnum():
                    artist_correct=artist_correct[1:]
                while not artist_correct[-1].isalnum():
                    artist_correct=artist_correct[:-1]
                if artist_correct.lower() != 'unknown':
                    missing_track_map[track_input]=artist_correct
                    print(f'"{track_input}", "{artist_correct}"')                    
            else:
                print('error', line)
        except:
            print('error', line)
            

I will provide you a list of well-known recordings.
I would like you to review each recording, and provide the name of the artist most closely associated with the recording.
You will provide them in CSV format, one record per line in the following order: recording, artist.
The input is:

"#3"
"*you didn't have to cut me off*"
"1, 2, buckle my shoes."
"10,000 days (wings pt. 2)"
"23 - jimmy eat world"
"26"
"2nd part of layla"
"3 little birds"
"4 gatsu"
"50 years"
"832 hz music"
"a clockwork orange"
"a day in the"
"a hermit thrush song"
"a kissed-out red floatboat"
"a long way past the past"
"a mortal heart"
"a nightingale sang in berkeley square"
"a picture no artist could paint"
"a river runs through you"
"a song for a winter’s night"
"a warm place"
"a whiter shade of pale"
"a bitter sweet genesis for him and her"
"a change is gonna come..."
"a la claire fontaine"
"a rendition of blame it on my youth on npr about 14 years ago"
"a ring ding ding ding d-ding baa aramba baa baa barooumba.

{
  "content": "\"aubrey plaza\", \"Aubrey Plaza\"\n\"auld lang syne\", \"Robert Burns\"\n\"aurora\", \"Aurora\"\n\"autumn town leaves\", \"Iron & Wine\"\n\"ava maria in latin\", \"Franz Schubert\"\n\"avatar's love\", \"Leona Lewis\"\n\"ave maria\", \"Franz Schubert\"\n\"ave mar\u00eda\", \"Johann Sebastian Bach\"\n\"avril 14\", \"Aphex Twin\"\n\"avril 14th\", \"Aphex Twin\"\n\"axis: bold as love\", \"Jimi Hendrix\"\n\"baba yetu\", \"Christopher Tin\"\n\"baby, now that i\u2019ve found you\", \"The Foundations\"\n\"bach double violin concerto in d minor\", \"Johann Sebastian Bach\"\n\"back in the 90s...\", \"Grouplove\"\n\"bad/sad day pretty songs to cry to -\", \"Various Artists\"\n\"balapan qaz\", \"Dimash Kudaibergen\"\n\"barbara allen\", \"Traditional\"\n\"barbie girl in german\", \"Aqua\"\n\"battle scars\", \"Guy Sebastian feat. Lupe Fiasco\"\n\"be here now\", \"Ray LaMontagne\"\n\"beautiful boy\", \"John Lennon\"\n\"because\", \"The Beatles\"\n\"bedouin dress\", \"Fleet Foxes\"\n\

{
  "content": "\"chariots of fire \ud83d\udd25\", Vangelis\n\"chasing cars\", Snow Patrol\n\"cherry wine\", Hozier\n\"chevaliers de sangreal\", Hans Zimmer\n\"chicken dance\", Werner Thomas\n\"choking on flowers\", Fox Academy\n\"choot volume 1\", Badshah\n\"christmas ep\", She & Him\n\"circle of steel\", Gordon Lightfoot\n\"circles\", Post Malone\n\"city of tears\", Christopher Larkin\n\"clair de lune\", Claude Debussy\n\"claire de lune\", Flight Facilities\n\"clannad\", Clannad\n\"clark gable\", The Postal Service\n\"cliffs of dover\", Eric Johnson\n\"climb every mountain\", Julie Andrews\n\"close my eyes forever\", Lita Ford and Ozzy Osbourne\n\"close to you\", The Carpenters\n\"closer\", Nine Inch Nails\n\"closing time\", Semisonic\n\"clovis, new mexico\", The American Dollar\n\"cold\", Jorge M\u00e9ndez\n\"colder weather\", Zac Brown Band\n\"colours of infinity part 6\", Carbon Based Lifeforms\n\"come away with me\", Norah Jones\n\"come back to earth\", Mac Miller\n\"come what ma

{
  "content": "\"earth song.\",\"Michael Jackson\"\n\"edelweiss\",\"Richard Rodgers\"\n\"either ludwigs ost from bloodborne or something in the way by nirvana along with nights in white satin by moody blues\",\"Various Artists\"\n\"ekki mukk\",\"Sigur R\u00f3s\"\n\"el condor pasa\",\"Simon & Garfunkel\"\n\"elderly woman behind the counter in a small town\",\"Pearl Jam\"\n\"electronic\",\"Joy Division\"\n\"empty chairs\",\"Don McLean\"\n\"end of the world\",\"R.E.M.\"\n\"ends of the earth\",\"Lord Huron\"\n\"english house\",\"Fleet Foxes\"\n\"enjoy right now, today\",\"Tyler, The Creator\"\n\"erana's peace\",\"Mark Seibert\"\n\"et si ne existe pa\",\"Stromae\"\n\"eternal love ending song (liang liang)\",\"Wei Qi Qi\"\n\"eurydice's song from hades\",\"Ana\u00efs Mitchell\"\n\"everglow\",\"Coldplay\"\n\"everlong\",\"Foo Fighters\"\n\"every time i encounter the prettiest song\",\"Goose house\"\n\"everybody wants to rule the world\",\"Tears for Fears\"\n\"everybody lies.\",\"Jason Isbell a

{
  "content": "\"golden hour\", Kacey Musgraves\n\"golden hour and notions\", Gregory Alan Isakov\n\"goodbye brother\", Ramin Djawadi\n\"goodbye\", Apparat\n\"goodnight moon\", Shivaree\n\"gortoz a ran\", Denez Prigent\n\"gortoz a ran (from \"black hawk down\")\", Lisa Gerrard & Denez Prigent\n\"grapevine fires\", Death Cab for Cutie\n\"gravity\", John Mayer\n\"great fairy fountain (scott pilgrim version or the 25th anniversary orchestral version)\", Koji Kondo\n\"great song.... could never find it what it was.\", Unknown\n\"greensleves\", Traditional\n\"grown ocean\", Fleet Foxes\n\"gumball blue\", Nujabes\n\"gusty garden galaxy\", Mahito Yokota\n\"guts theme\", Susumu Hirasawa\n\"gymnopedie no.1\", Erik Satie\n\"gymnopedies\", Erik Satie\n\"hailies song\", Eminem\n\"hakkka- hackable wo wo this time for africa during fifa word cup \u26bd\ufe0f\u26bd\ufe0f\u26bd\ufe0f\u26bd\ufe0f\ud83e\udd3e\ud83e\udd3e\", Shakira\n\"halah\", Mazzy Star\n\"halcyon + on + on\", Orbital\n\"halcyon and o

{
  "content": "\"i cum blood\", \"Cannibal Corpse\"\n\"i don't think now is the best time\", \"The Arctic Monkeys\"\n\"i dreamed a dream\", \"Anne Hathaway\"\n\"i feel like jericho song\", \"Sufjan Stevens\"\n\"i forgot the title of the song ngl\", \"Unknown\"\n\"i guess i am in love\", \"Unknown\"\n\"i have a little song i made up for my daughter\", \"Unknown\"\n\"i hear a symphony\", \"The Supremes\"\n\"i hear a symphony x pluto projector\", \"Unknown\"\n\"i hope you dance\", \"Lee Ann Womack\"\n\"i lied\", \"Nicki Minaj\"\n\"i like formidable alot.\", \"Stromae\"\n\"i may not always love yoooouuu...\", \"Elvis Presley\"\n\"i really want to stay at your house, any version is amazing\", \"Unknown\"\n\"i see the light\", \"Mandy Moore and Zachary Levi\"\n\"i still believe i hear\", \"Unknown\"\n\"i will never fall in love again untill i found her..\", \"Unknown\"\n\"i will follow you you into the dark\", \"Death Cab for Cutie\"\n\"i wish you love/ que reste-t-il de nos amours ?\", \"N

{
  "content": "\"kenya's national anthem\", \"Graham Hyslop\"\n\"khooneye ma\", \"Andy\"\n\"killing me softly\", \"Roberta Flack\"\n\"kimi no nawa - kataware doki - sweet harp\", \"RADWIMPS\"\n\"kiss to build a dream on\", \"Louis Armstrong\"\n\"koi\", \"Gen Hoshino\"\n\"l'appuntamento\", \"Ornella Vanoni\"\n\"la boheme\", \"Charles Aznavour\"\n\"la champs elysees\", \"Joe Dassin\"\n\"la mer\", \"Charles Trenet\"\n\"la seine and i\", \"Vanessa Paradis\"\n\"la ve en rose\", \"Edith Piaf\"\n\"la vie en rose\", \"Edith Piaf\"\n\"la parfum de fleurs\", \"Yiruma\"\n\"la vie en rose- how i met your mother version\", \"Cristin Milioti\"\n\"lady in red\", \"Chris de Burgh\"\n\"lake shore drive\", \"Aliotta Haynes Jeremiah\"\n\"lakeshore drive\", \"Aliotta Haynes Jeremiah\"\n\"lakme: flower duet\", \"L\u00e9o Delibes\"\n\"lakme\u2019s flower duet\", \"L\u00e9o Delibes\"\n\"landslide\", \"Fleetwood Mac\"\n\"laufey's valentine\", \"Laufey\"\n\"layla\", \"Derek and the Dominos\"\n\"le via en rose

{
  "content": "\"melancholy hill\", \"Gorillaz\"\n\"melodies of life final fantasy ix.\", \"Emiko Shiratori\"\n\"memories\", \"Maroon 5\"\n\"mer de noms\", \"A Perfect Circle\"\n\"merry christmas mr lawrence\", \"Ryuichi Sakamoto\"\n\"merry christmas mr. lawrence \u2026.rip\", \"Ryuichi Sakamoto\"\n\"merry go round of life\", \"Joe Hisaishi\"\n\"michicant\", \"Bon Iver\"\n\"mille lune mille onde\", \"Andrea Bocelli\"\n\"mine forever\", \"Lord Huron\"\n\"misirlou\", \"Dick Dale\"\n\"mockingbird\", \"Eminem\"\n\"mona lisa's and mad hatters\", \"Elton John\"\n\"mona lisas and mad hatters\", \"Elton John\"\n\"moody may\", \"Kendrick Lamar\"\n\"moon bitch get out the way\", \"Thundercat\"\n\"moon in june\", \"Soft Machine\"\n\"moon river\", \"Henry Mancini\"\n\"moonlight densetsu\", \"DALI\"\n\"moonlight serenade\", \"Glenn Miller\"\n\"moonlight sonata\", \"Ludwig van Beethoven\"\n\"moonlight sonata #6\", \"Ludwig van Beethoven\"\n\"moonlight in vermont\", \"Frank Sinatra\"\n\"moonriver\",

{
  "content": "\"oh! tengo suerte\", \"Shakira\"\n\"old friends\", \"Simon & Garfunkel\"\n\"old man\", \"Neil Young\"\n\"old september blues\", \"Dan Auerbach\"\n\"omgjyja-switch7\", \"Meme\"\n\"on melancholy hill\", \"Gorillaz\"\n\"on the nature of daylight\", \"Max Richter\"\n\"once there was the sun\", \"Thomas Bergersen\"\n\"once upon a time\", \"Lana Del Rey\"\n\"once upon a december\", \"Deana Carter\"\n\"once upon a december from anastasia\", \"Anastasia Soundtrack\"\n\"one last kiss\", \"HIM\"\n\"one summer\u2018s day\", \"Joe Hisaishi\"\n\"one from my dream that i would never be able to remember but it made me cry thats how good it was\", \"Unknown\"\n\"one heart x reidenshi snowfall\", \"Miliyah Kato\"\n\"one less pair of footsteps on your floor\", \"Jim Croce\"\n\"one of us\", \"Joan Osborne\"\n\"one summers day\", \"Spirited Away Soundtrack\"\n\"one upon a december\", \"Emile Pandolfi\"\n\"only love can break your heart\", \"Neil Young\"\n\"only you\", \"The Platters\"\n\"

{
  "content": "\"rachel sermanni sang a song\", \"Rachel Sermanni\"\n\"rachmaninoff's vocalise\", \"Sergei Rachmaninoff\"\n\"rachmaninoff\u2019s piano concerto no. 2\", \"Sergei Rachmaninoff\"\n\"rachmaninov 2nd piano concerto\", \"Sergei Rachmaninoff\"\n\"rain (deference for darkness)\", \"Boards of Canada\"\n\"rain song\", \"Led Zeppelin\"\n\"rain on tin\u2014sonic youth\", \"Sonic Youth\"\n\"rainbow connection\", \"Kermit the Frog\"\n\"rainbow road n64 version :3\", \"Nintendo\"\n\"rainy night in soho\", \"The Pogues\"\n\"ram ranch\", \"Grant MacDonald\"\n\"rayons - halfway\", \"Rayons\"\n\"real world\", \"Matchbox Twenty\"\n\"recitation of the quran\", \"Various Artists\"\n\"recorded gospel songs\", \"Various Artists\"\n\"recuerdos de la alhambra\", \"Francisco T\u00e1rrega\"\n\"redemption song\", \"Bob Marley\"\n\"reinbert de leeuw's renditions\", \"Reinbert de Leeuw\"\n\"remember the mountain bed\", \"Woody Guthrie\"\n\"reminds me of growing up. i was in elementary school, bush 

{
  "content": "\"slut me out\", \"marina\"\n\"smile\", \"lily allen\"\n\"snake eater\", \"cynthia harrell\"\n\"snow on the beach\", \"the midnight\"\n\"snowfield\", \"radwimps\"\n\"so cold in ireland\", \"celine dion\"\n\"sofia\", \"clairo\"\n\"solitude\", \"billie holiday\"\n\"some obscure boss battle music from an obscure game.\", \"unknown\"\n\"someone make a spotify playlist of these songs in order of popularity in this thread. so many good ones and so many i havent heard. \\n\\nwhat a joy these are. making my day going through this. currently crying to unchained melody .\", \"unknown\"\n\"someone to watch over me\", \"ella fitzgerald\"\n\"something\", \"the beatles\"\n\"sometimes you can\u2019t make it on your own\", \"u2\"\n\"somewhere over the rainbow\", \"judy garland\"\n\"somewhere over the rainbow - all versions\", \"israel kamakawiwo'ole\"\n\"somewhere only we know\", \"keane\"\n\"sonder\", \"sonder\"\n\"song for the asking\", \"simon & garfunkel\"\n\"song for charley\", \"

{
  "content": "\"that last song rachel mcadams and will ferrel sing at the end of eurovision.\", \"Various Artists\"\n\"that one you sang in the shower the other day\u2026\", \"N/A\"\n\"that piano some from the amile soundtrack\", \"Yann Tiersen\"\n\"that quiet melody from the bacardi commercial that takes place on a peaceful island. if anyone knows the name of that song please let me know\", \"N/A\"\n\"that song in howls moving castle\", \"Joe Hisaishi\"\n\"that's on me\", \"Megan Thee Stallion\"\n\"that\u2019s amore\", \"Dean Martin\"\n\"that\u2019s rough, buddy.\", \"N/A\"\n\"the 59th street bridge song (feeling groovy)\", \"Simon & Garfunkel\"\n\"the ballad of lilith\", \"Amanda Palmer\"\n\"the best is yet to come.\", \"Frank Sinatra\"\n\"the blue danube waltz\", \"Johann Strauss II\"\n\"the bonnie banks o' loch lomond\", \"Traditional Scottish Folk Song\"\n\"the boxer\", \"Simon & Garfunkel\"\n\"the chain\", \"Fleetwood Mac\"\n\"the disappearance of haruhi suzumiya\", \"Various A

{
  "content": "\"this land is your land\",\"Woody Guthrie\"\n\"this must be the place\",\"Talking Heads\"\n\"this is not the greatest song in the world, no, this is just a tribute\",\"Tenacious D\"\n\"this is the end, beautiful friend\",\"The Doors\"\n\"this is the way\",\"E-Type\"\n\"those eyes, that mouth\",\"Cocteau Twins\"\n\"those eyes\",\"Thrice\"\n\"those guys on paris ...\",\"The Flaming Lips\"\n\"three little birds\",\"Bob Marley\"\n\"tiger mountain peasant song\",\"Fleet Foxes\"\n\"time\",\"Pink Floyd\"\n\"time after time\",\"Cyndi Lauper\"\n\"time in a bottle\",\"Jim Croce\"\n\"time waits for no one\",\"The Rolling Stones\"\n\"timing is the most important part of comedy. touch\u00e9\",\"Dan Harmon\"\n\"tin man\",\"America\"\n\"tiny vessels\",\"Death Cab for Cutie\"\n\"tip toe by the window\",\"Tiny Tim\"\n\"titanic song\",\"Celine Dion\"\n\"to be over\",\"Yes\"\n\"to build a home\",\"The Cinematic Orchestra\"\n\"to live is to die\",\"Metallica\"\n\"to zanarkand\",\"Nobuo Ue

{
  "content": "\"westworld version\",Ramin Djawadi\n\"we\u2019re walking in the air\",Peter Auty\n\"what sarah said\",Death Cab for Cutie\n\"what wonderous love is this\",Traditional\n\"what a beautiful name\",Hillsong Worship\n\"what a friend we have in jesus\",Traditional\n\"what a wonderful world\",Louis Armstrong\n\"what a beautiful love story!\",The Jive Aces\n\"what you won\u2019t do for love\",Bobby Caldwell\n\"when i am laid in earth (dido\u2019s lament)\",Henry Purcell\n\"when the moment comes\",Mia Vaile\n\"when you\u2019re gone\",Avril Lavigne\n\"when she loved me\",Sarah McLachlan\n\"when the river meets the sea\",John Denver\n\"when the night is over\",Lord Huron\n\"when your kids/grandkids sing happy birthday to you\",Traditional\n\"where is my mind\",Pixies\n\"while my guitar gently weeps\",The Beatles\n\"whiskey lullaby\",Brad Paisley and Alison Krauss\n\"white ferrari\",Frank Ocean\n\"white sandy beach\",Israel Kamakawiwo'ole\n\"white winter hymnal\",Fleet Foxes\n\"wh

{
  "content": "\"him\", \"Lily Allen\"\n\"how to disappear completely\", \"Radiohead\"\n\"https://m.youtube.com/watch?v=ztaervxi7bw\", \"The Beatles\"\n\"https://open.spotify.com/track/33smtg9zbfmswdyp9hpomn?si=4kpug-zat1g0ufsaasibow\", \"Billie Eilish\"\n\"https://www.youtube.com/watch?v=lz6d60ysb-y\", \"The Rolling Stones\"\n\"https://youtu.be/fshqvq0hauc\", \"The Strokes\"\n\"https://youtu.be/siuf37ewalu\", \"The White Stripes\"\n\"https://youtu.be/iklkhewidws\", \"The Black Keys\"\n\"https://youtu.be/ivykyc8j29m\", \"Arctic Monkeys\"\n\"https://youtube.com/shorts/ceaowoiolgy?feature=share\", \"Unknown\"\n\"hum to deaf son\", \"Unknown\"\n\"in my solitude\", \"Billie Holiday\"\n\"inevitable conflict\", \"Unknown\"\n\"joy of remembrance from celeste\", \"Lena Raine\"\n\"knife dance\", \"Unknown\"\n\"lemon tree\", \"Fool's Garden\"\n\"light shower\", \"Unknown\"\n\"loneliest girl\", \"Carole King\"\n\"long after you're gone\", \"Chris Jones\"\n\"love in outer space\", \"Sun Ra\"\n\"m

In [80]:
# check for reasonableness and apply
df['track']=df['track'].astype(str)
df['artist2'] = df.apply(lambda row: missing_track_map[row.track.lower().strip()] if row.artist=="" and row.track.lower().strip() in missing_track_map else row.artist, axis=1)
df.loc[df['artist'] != df['artist2']]



Unnamed: 0,post_id,post_score,artist,track,artist2
21048,jhcawoc,1,,23 - Jimmy Eat World,Jimmy Eat World
21049,jhc0ab7,2,,26,Paramore
21050,jhgr6v3,1,,2nd part of Layla,Derek and the Dominos
21051,jhinrlz,1,,3 little birds,Bob Marley
21052,jhdmguf,1,,4 gatsu,Joe Hisaishi
...,...,...,...,...,...
22911,jhe4qna,1,,wet ass pussy,Cardi B
22912,jhcr4s6,1,,when you say nothing at all,Alison Krauss
22914,jherzqu,1,,yebbas heartbreak,Yebba
22916,jhggg7t,1,,zelda's lullaby,Koji Kondo


In [82]:
df['artist'] = df.apply(lambda row: missing_track_map[row.track.lower().strip()] if row.artist=="" and row.track.lower().strip() in missing_track_map else row.artist, axis=1)


In [83]:
def fix_leading_trailing(s):
    """First and last should be alphanumeric"""
    # regex prob better if re.match('^\W+(.*)\W+$',playerName): 

    while len(s) and not s[0].isalnum():
        s = s[1:]
        
    while len(s) and not s[-1].isalnum():
        s = s[:-1]
    
    return s.lower().strip()
        

In [84]:
df['artist'] = df['artist'].apply(fix_leading_trailing)

In [89]:
df.groupby('artist') \
    .count() \
    .reset_index() \
    .sort_values('track', ascending=False) \
    .head(20)



Unnamed: 0,artist,post_id,post_score,track,artist2
5144,the beatles,426,426,426,426
4316,radiohead,248,248,248,248
1804,fleetwood mac,175,175,175,175
4208,pink floyd,164,164,164,164
2482,jeff buckley,158,158,158,158
3036,led zeppelin,155,155,155,155
714,bon iver,135,135,135,135
1663,enya,134,134,134,134
1090,claude debussy,131,131,131,131
4754,sigur rós,127,127,127,127


In [88]:
df['artist'] = df['artist'].apply(fix_leading_trailing)
df = df.drop(df.loc[df['artist'].str.startswith('unknown')].index)
df = df.drop(df.loc[df['artist'].str.startswith('various artists')].index)
df = df.drop(df.loc[df['artist']=='none'].index)
df = df.drop(df.loc[df['artist']==''].index)
df = df.drop(df.loc[df['artist']=='post_score'].index)


In [90]:
dedupe_df = df[['artist', 'post_score']] \
    .groupby(['artist']) \
    .count() \
    .sort_values('post_score', ascending=False) \
    .rename({'post_score': 'count'}, axis=1) \
    .reset_index() \
    .reset_index() 

dedupe_df


Unnamed: 0,index,artist,count
0,0,the beatles,426
1,1,radiohead,248
2,2,fleetwood mac,175
3,3,pink floyd,164
4,4,jeff buckley,158
...,...,...,...
5944,5944,james arthur,1
5945,5945,james agee,1
5946,5946,james,1
5947,5947,jal the band,1


In [91]:
# reset dedupe learned settings
# !rm dedupe_dataframe_learned_settings 
# !rm dedupe_dataframe_training.json   
dedupe_df2 = pandas_dedupe.dedupe_dataframe(dedupe_df, ['artist'])


Importing data ...


  dedupe_df2 = pandas_dedupe.dedupe_dataframe(dedupe_df, ['artist'])
artist : the amazing devil

artist : the amazing devils

0/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished


Starting active labeling...
y


artist : luther vandross

artist : luther vandros

1/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : kingdom hearts

artist : kingdom hearts 2

2/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : the greatest showman cast

artist : the greatest showman

3/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : final fantasy

artist : final fantasy xiv

4/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : vince guaraldi

artist : vince guaraldi trio

5/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : thomas newman

artist : thomas new

6/10 positive, 0/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : priscilla ahn

artist : priscilla

6/10 positive, 1/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : destiny 2

artist : destiny

6/10 positive, 2/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : fleetwood mac

artist : fleetwood

7/10 positive, 2/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


u


artist : stratovarius

artist : stratova

7/10 positive, 2/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : bruce springsteen

artist : bruce springsteen/melissa etheridge

7/10 positive, 3/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : traditional

artist : traditional folk song

7/10 positive, 4/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : midnight youth

artist : midnight

8/10 positive, 4/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : desiree

artist : desire

8/10 positive, 5/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


u


artist : johnny ace

artist : johnny

8/10 positive, 5/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : jimin

artist : jimi

8/10 positive, 6/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : cults

artist : cult

8/10 positive, 7/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : bibio

artist : bibi

8/10 positive, 8/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : the las

artist : the last revel

9/10 positive, 8/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : johnny rebel

artist : johnny

9/10 positive, 9/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : prince

artist : prince ralis

9/10 positive, 10/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : justin scott

artist : justin

9/10 positive, 11/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : justin bieber

artist : justin

9/10 positive, 12/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : sara bareilles

artist : sara bareilles and ingrid michaelson

9/10 positive, 13/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : porter robinson

artist : porter robinson, madeon

9/10 positive, 14/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : rusted root

artist : rusted roots

9/10 positive, 15/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : michael buble

artist : michael buble

10/10 positive, 15/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : final fantasy

artist : final fantasy iv

11/10 positive, 15/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : neverending white lights

artist : neverending white lights city and colour

12/10 positive, 15/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : sarah brightman

artist : sarah brightman and andrea bocelli

13/10 positive, 15/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : black

artist : black flag

13/10 positive, 16/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : martin garrix

artist : martin garrix shaun farrugia

13/10 positive, 17/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : the national

artist : the national parks

13/10 positive, 18/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : a perfect circle

artist : a perfect circle/tool

13/10 positive, 19/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : fleetwoods

artist : fleetwood

13/10 positive, 20/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : francisco tarrega

artist : francisco tarrega

13/10 positive, 21/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : rosalia

artist : rosalia

14/10 positive, 21/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : glen hansard and marketa irglova

artist : glen hansard and marketa irglova

15/10 positive, 21/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : passenger

artist : passengers

16/10 positive, 21/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : the national

artist : the national ts

16/10 positive, 22/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


n


artist : tobias jesso

artist : tobias jesso jr

16/10 positive, 23/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : edith piaf

artist : edith piaf

17/10 positive, 23/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : final fantasy vii

artist : final fantasy

18/10 positive, 23/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : les miserables

artist : les miserables cast

19/10 positive, 23/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : bill evans

artist : bill evans trio

20/10 positive, 23/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


y


artist : taylor swift

artist : taylor swift bon iver

21/10 positive, 23/10 negative
Do these records refer to the same thing?
(y)es / (n)o / (u)nsure / (f)inished / (p)revious


f


Finished labeling


Clustering...
# duplicate sets 5818


In [94]:
dedupe_df2

Unnamed: 0,index,artist,count,cluster id,confidence
0,0,the beatles,426,123,1.0
1,1,radiohead,248,124,1.0
2,2,fleetwood mac,175,125,1.0
3,3,pink floyd,164,126,1.0
4,4,jeff buckley,158,127,1.0
...,...,...,...,...,...
5944,5944,james arthur,1,5813,1.0
5945,5945,james agee,1,5814,1.0
5946,5946,james,1,5815,1.0
5947,5947,jal the band,1,5816,1.0


In [95]:
# map index id to most popular version
dedupe_df['cluster id'] = dedupe_df2['cluster id']
name2i = {a: i for i, a in zip(dedupe_df['cluster id'].tolist(), dedupe_df['artist'].tolist())}
df['artist_index'] = df['artist'].apply(lambda s: name2i[s])
df

Unnamed: 0,post_id,post_score,artist,track,artist2,artist_index
0,jhfmr74,1,uicideboy,My Flaws Burn Through My Skin Like Demonic Fla...,$uicideboy$,2381
1,jhe9i7a,1,nsync,selfish,*nsync,1441
3,jhcp5xc,6,hack//sign,Key of the Twilight,.Hack//SIGN,5304
4,jhcgr19,1,070 shake,Under the Moon,070 Shake,3534
5,jhdynba,1,"10,000 maniacs",Verdi Cries,"10,000 Maniacs",819
...,...,...,...,...,...,...
22911,jhe4qna,1,cardi b,wet ass pussy,Cardi B,608
22912,jhcr4s6,1,alison krauss,when you say nothing at all,Alison Krauss,21
22914,jherzqu,1,yebba,yebbas heartbreak,Yebba,636
22916,jhggg7t,1,koji kondo,zelda's lullaby,Koji Kondo,373


In [96]:
tempdf = dedupe_df2[['index', 'artist', 'cluster id']] \
    .groupby('cluster id') \
    .first() \
    .reset_index()

i2name = {i: a for i, a in zip(tempdf['cluster id'].tolist(), tempdf['artist'].tolist())}
df['artist'] = df.apply(lambda r: i2name[r.artist_index], axis=1)
df['artist'] = df['artist'].str.strip()
df

Unnamed: 0,post_id,post_score,artist,track,artist2,artist_index
0,jhfmr74,1,uicideboy,My Flaws Burn Through My Skin Like Demonic Fla...,$uicideboy$,2381
1,jhe9i7a,1,nsync,selfish,*nsync,1441
3,jhcp5xc,6,hack//sign,Key of the Twilight,.Hack//SIGN,5304
4,jhcgr19,1,070 shake,Under the Moon,070 Shake,3534
5,jhdynba,1,"10,000 maniacs",Verdi Cries,"10,000 Maniacs",819
...,...,...,...,...,...,...
22911,jhe4qna,1,cardi b,wet ass pussy,Cardi B,608
22912,jhcr4s6,1,alison krauss,when you say nothing at all,Alison Krauss,21
22914,jherzqu,1,yebba,yebbas heartbreak,Yebba,636
22916,jhggg7t,1,koji kondo,zelda's lullaby,Koji Kondo,373


In [97]:
mymap = {
'temptations' : 'the temptations', 
'ladysmith black mambazo and paul simon' : 'paul simon', 
'isbell' : 'jason isbell', 
'mini riperton' : 'minnie riperton', 
'"moby, sinead oconnor"' : 'moby', 
'ludivcio enaudi' : 'einaudi', 
'bob dillon' : 'bob dylan', 
'dylan' : 'bob dylan', 
'f. liszt' : 'liszt', 
'franz liszt' : 'liszt', 
'edvard greig' : 'edvard grieg', 
'apc' : 'a perfect circle', 
'tomaso albinoni' : 'albinoni', 
'allison krauss' : ' alison krauss', 
'belinda carlisle' : 'belinda carlile', 
'bernie taupin' : 'elton john', 
'camille saint-saens' : 'saint saens', 
'cass elliot' : 'mamas and the papas', 
'city colour' : 'city and colour', 
'claude debussy' : 'debussy', 
'coctaeu twins' : 'cocteau twins', 
'csn' : '"crosby, stills nash"', 
'edward elgar' : 'elgar', 
'elo' : 'electric light orchestra', 
'franz liszt' : 'liszt', 
'frederic chopin' : 'chopin', 
'garfunkel' : 'art garfunkel', 
'gustav holst' : 'holst', 
'gustav mahler' : 'mahler', 
'hanz zimmer' : 'hans zimmer', 
'iron wine' : 'iron and wine', 
'iz kamakawiwo`ole' : 'israel kamakawiwoole',
'johann sebastien bach' : 'johann sebastian bach', 
'j.s. bach' : 'johann sebastian bach', 
'bach' : 'johann sebastian bach', 
'louie armstrong' : 'louis armstrong', 
'ludovico einaudi' : 'einaudi', 
'ludwig van beethoven' : 'beethoven', 
'maurice ravel' : 'ravel', 
'felix mendelssohn-bartholdy' : 'mendelssohn', 
'mick hucknall simply red' : 'simply red', 
'wolfgang amadeus mozart' : 'mozart', 
'wa mozart' : 'mozart', 
'luciano pavarotti' : 'pavarotti', 
'giacomo puccini' : 'puccini', 
'pyotr ilyich tchaikovsky' : 'tchaikovsky', 
'rhcp' : 'red hot chili peppers', 
'satie' : 'erik satie', 
'sergei rachmaninoff' : 'rachmaninoff', 
'tenacious d' : 'jack black', 
'cindy lauper' : 'cyndi lauper', 
'lightfoot' : 'gordon lightfoot', 
'otis' : 'otis redding', 
'henry mancini' : 'mancini', 
'claire torrey' : 'pink floyd', 

}

df['artist'] = df['artist'].apply(lambda s: mymap[s] if s in mymap else s)


In [98]:
df['artist'] = df['artist'].apply(lambda s: s[4:] if s[:4]=='the ' else s)

df.loc[df['artist']=='band', 'artist']='the band'

df.loc[(df['artist']=='beatles') & (df['track']=='god only knows'), 'artist'] ='beach boys'


In [99]:
len(df['artist'].unique())

5781

In [100]:
df.loc[df['artist'].str.find('carp') >=0]

Unnamed: 0,post_id,post_score,artist,track,artist2,artist_index
2795,jhdaead,1,carpenter brut,You're Mine,Carpenter Brut,3732
2796,jhehl7t,1,carpenters,A Song For You,The Carpenters,164
2797,jhcx2si,1,carpenters,Close To You,The Carpenters,164
2798,jhcf53k,8,carpenters,Close to You,The Carpenters,164
2799,jhcyiva,2,carpenters,Close to You,The Carpenters,164
...,...,...,...,...,...,...
17402,jhcff56,1,carpenters,We’ve Only Just Begun,The Carpenters,164
17403,jhdqns9,1,carpenters,Yesterday Once More,The Carpenters,164
17404,jhd0h73,1,carpenters,Yesterday once more,The Carpenters,164
21330,jhd2jhh,4,carpenters,Close to You,The Carpenters,164


In [101]:
df['track'] = df['track'].apply(fix_leading_trailing)


In [104]:
df.groupby('track') \
    .count() \
    .reset_index() \
    .sort_values('artist', ascending=False) \
    .head(20)


Unnamed: 0,track,post_id,post_score,artist,artist2,artist_index
3653,hallelujah,166,166,166,166,166
4484,in my life,66,66,66,66,66
10581,what a wonderful world,65,65,65,65,65
3451,god only knows,62,62,62,62,62
1814,clair de lune,61,61,61,61,61
8366,somewhere over the rainbow,56,56,56,56,56
8417,songbird,56,56,56,56,56
6034,moonlight sonata,52,52,52,52,52
2878,fade into you,52,52,52,52,52
7560,river flows in you,52,52,52,52,52


In [103]:
df = df.drop(df.loc[df['track']=='unknown'].index)
df = df.drop(df.loc[df['track']=='cover'].index)
df = df.drop(df.loc[df['track']=='version'].index)
df = df.drop(df.loc[df['track']=='anything'].index)
df = df.drop(df.loc[df['track']=='none'].index)
df = df.drop(df.loc[df['track']==''].index)
df = df.drop(df.loc[df['track'].str.startswith('no track')].index)
df = df.drop(df.loc[df['track'].str.startswith('no artist')].index)
df = df.drop(df.loc[df['track'].str.startswith('various')].index)


In [None]:
# tempdf = df[['artist', 'post_score']] \
#     .groupby('artist') \
#     .sum() \
#     .reset_index() 

# tempdf.loc[tempdf['post_score']> 2].to_csv('x.csv', index=False)

In [105]:
df = df[['artist', 'track', 'post_score']].groupby(["artist", "track"]) \
    .sum() \
    .reset_index() \
    .sort_values('post_score', ascending=False)

df.head(20)



Unnamed: 0,artist,track,post_score
3210,debussy,claire de lune,13974
1197,beatles,yesterday,7901
3619,eagles,hotel california,7682
4058,erik satie,gymnopédies,7003
10962,simon and garfunkel,scarborough fair,6156
8704,neil young,harvest moon,5371
5562,israel kamakawiwo`ole,over the rainbow,5076
2791,cranberries,dreams,4371
8043,mazzy star,fade into you,3982
1061,beach boys,god only knows,3896


In [108]:
df.loc[df['post_score'] >4].to_csv('silver.csv', index=False)


In [109]:
df=pd.read_csv('silver.csv')
df

Unnamed: 0,artist,track,post_score
0,debussy,claire de lune,13974
1,beatles,yesterday,7901
2,eagles,hotel california,7682
3,erik satie,gymnopédies,7003
4,simon and garfunkel,scarborough fair,6156
...,...,...,...
1451,vance joy,riptide,5
1452,joy division,love will tear us apart,5
1453,aurora,it happened quiet,5
1454,sigur ros,njosnavelin,5


## Load into a Spotify playlist


In [110]:
client_credentials_manager = SpotifyClientCredentials(client_id=os.getenv('SPOTIFY_CLIENT_ID'), 
                                                      client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                      )

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


In [114]:
# check artists
df = pd.read_csv("silver.csv")
df.drop_duplicates() \
    .dropna() \
    .sort_values(["artist", "track"])

dedupe = {}
fail_list = []
artist_map = {}
for index, artist, title, score in df.itertuples():
    artist = str(artist)
    if artist in dedupe:
        continue
    dedupe[artist]=1
    query_str = 'artist:%s' % (artist)
    artist_results = sp.search(q=query_str, type='artist', limit=3, offset=0, market='US')
    artist_names = [artist['name'] for artist in artist_results['artists']['items']]
    if artist_names:
        if artist.lower() != artist_names[0].lower():
            artist_map[artist] = artist_names[0]
            print(artist, '->', artist_names[0])
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)

# then clean up manually as appropriate

debussy -> Claude Debussy
beatles -> The Beatles
simon and garfunkel -> Simon & Garfunkel
israel kamakawiwo`ole -> Israel Kamakawiwo'ole
cranberries -> The Cranberries
beach boys -> The Beach Boys
edith piaf -> Édith Piaf
righteous brothers -> The Righteous Brothers
sigur ros -> Sigur Rós
mamas the papas -> The Mamas & The Papas
cure -> The Cure
einaudi -> Ludovico Einaudi
not found: andrew lloyd webber, sarah brightman - pie jesu
not found: glen hansard and marketa irglova - falling slowly
mozart -> Wolfgang Amadeus Mozart
israel kamakawiwoole -> Israel Kamakawiwo'ole
simon garfunkel -> Simon & Garfunkel
dream academy -> The Dream Academy
mancini -> Henry Mancini
various artists -> Various Pro Artists
sinead oconnor -> Sinéad O'Connor
magnetic fields -> The Magnetic Fields
queensryche -> Queensrÿche
smiths -> The Smiths
hollies -> The Hollies
crosby, stills, nash and young -> Crosby, Stills, Nash & Young
killers -> The Killers
moody blues -> The Moody Blues
verve -> The Verve
beethove

In [115]:
artist_map

{'debussy': 'Claude Debussy',
 'beatles': 'The Beatles',
 'simon and garfunkel': 'Simon & Garfunkel',
 'israel kamakawiwo`ole': "Israel Kamakawiwo'ole",
 'cranberries': 'The Cranberries',
 'beach boys': 'The Beach Boys',
 'edith piaf': 'Édith Piaf',
 'righteous brothers': 'The Righteous Brothers',
 'sigur ros': 'Sigur Rós',
 'mamas the papas': 'The Mamas & The Papas',
 'cure': 'The Cure',
 'einaudi': 'Ludovico Einaudi',
 'mozart': 'Wolfgang Amadeus Mozart',
 'israel kamakawiwoole': "Israel Kamakawiwo'ole",
 'simon garfunkel': 'Simon & Garfunkel',
 'dream academy': 'The Dream Academy',
 'mancini': 'Henry Mancini',
 'various artists': 'Various Pro Artists',
 'sinead oconnor': "Sinéad O'Connor",
 'magnetic fields': 'The Magnetic Fields',
 'queensryche': 'Queensrÿche',
 'smiths': 'The Smiths',
 'hollies': 'The Hollies',
 'crosby, stills, nash and young': 'Crosby, Stills, Nash & Young',
 'killers': 'The Killers',
 'moody blues': 'The Moody Blues',
 'verve': 'The Verve',
 'beethoven': 'Ludwi

In [125]:
ignore_list = [
'seal',  # -> Seals and Crofts
'heart',  # -> Tom Petty and the Heartbreakers
'nan',  # -> Nancy Sinatra
'selena',  # -> Selena Gomez
'babyface',  # -> Babyface Ray
'doors',  # -> 3 Doors Down
'joni jam',  # -> Joni James
'flamingos',  # -> Flamingosis
'phil',  # -> Phil Collins
'eric johnson',  # -> Eric D. Johnson
'las',  # -> Sleeping At Last
'bowser',  # -> Dean Bowser
'jason isbell',  # -> Jason Isbell and the 400 Unit
'enigma',  # -> Enigma Norteño
'in',  # -> Falling In Reverse
'rem',  # -> Rema
'train',  # -> Meghan Trainor
'ole',  # -> Ole-Bjørn Talstad
'death',  # -> Five Finger Death Punch
'la la land',  # -> Landon Cube
'sarah',  # -> Sarah McLachlan
'adeem',  # -> Adeem the Artist
'nico',  # -> Nicki Nicole
'mum',  # -> Mumford & Sons
'berlin',  # -> Berliner Philharmoniker
'ren',  # -> La Arrolladora Banda El Limón De Rene Camacho
'pink',  # -> PinkPantheress
'man man',  # -> Gucci Mane
'south park',  # -> South Park Mexican
'to the moon',  # -> A Rocket To The Moon
'lp',  # -> LP Giobbi
'air',  # -> Air Supply
]

for k in ignore_list:
    artist_map.pop(k)





In [126]:
artist_map.get('air')

In [127]:
df['artist'] = df['artist'].apply(lambda s: artist_map[s] if s in artist_map else s)
df.head(20)


In [129]:
df.head(20)

Unnamed: 0,artist,track,post_score
0,Claude Debussy,claire de lune,13974
1,The Beatles,yesterday,7901
2,eagles,hotel california,7682
3,erik satie,gymnopédies,7003
4,Simon & Garfunkel,scarborough fair,6156
5,neil young,harvest moon,5371
6,Israel Kamakawiwo'ole,over the rainbow,5076
7,The Cranberries,dreams,4371
8,mazzy star,fade into you,3982
9,The Beach Boys,god only knows,3896


In [130]:
df.to_csv('silver.csv', index=False)


In [132]:

# check tracks

df = pd.read_csv("silver.csv")

dedupe = {}
mylist = []
fail_list = []
artist_list, track_list, uri_list, album_list, score_list = [], [], [], [], []
orig_artist, orig_track = [], []

for index, artist, title, score in df.itertuples():
    query_str = 'artist:%s track:%s' % (artist, title)
    track_results = sp.search(q=query_str, type='track', limit=1, offset=0, market='US')
    results = track_results['tracks']['items']
    
    if results:
        r = results[0]
        # failsafe to never put same track twice
        if dedupe.get(r['id']):
            continue
        dedupe[r['id']]=True
        if title.lower() != r['name'].lower():
            print ("%s|%s : %s|%s" % (artist, title, r['artists'][0]['name'], r['name']))
        uri_list.append(r['uri'])
        artist_list.append(r['artists'][0]['name'])
        track_list.append(r['name'])
        album_list.append(r['album']['name'])
        orig_artist.append(artist)
        orig_track.append(title)
        score_list.append(score)
#         print('  ',
#               r['artists'][0]['name'],'|',
#               r['name'], '|',
#               r['album']['name'],'|',
#               r['album']['release_date'],'|',
#               r['popularity'])
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)
        

The Beatles|yesterday : The Beatles|Yesterday - Remastered 2009
eagles|hotel california : Eagles|Hotel California - 2013 Remaster
erik satie|gymnopédies : Erik Satie|3 Gymnopédies: No. 1 Lent et douloureux
Simon & Garfunkel|scarborough fair : Simon & Garfunkel|Scarborough Fair / Canticle
not found: neil young - harvest moon
The Beach Boys|god only knows : The Beach Boys|God Only Knows - Mono
The Beatles|in my life : The Beatles|In My Life - Remastered 2009
don mclean|vincent (starry, starry night : Don McLean|Vincent (Starry, Starry Night)
Sigur Rós|hoppipolla : Sigur Rós|Hoppípolla
The Beatles|blackbird : The Beatles|Blackbird - Remastered 2009
john denver|annie’s song : John Denver|Annie's Song
otis redding|sittin' on) the dock of the bay : Otis Redding|(Sittin' On) the Dock of the Bay
jim croce|i got a name : Jim Croce|I'll Have To Say I Love You In A Song
samuel barber|adagio for strings : Samuel Barber|Barber: Adagio for Strings
Israel Kamakawiwo'ole|somewhere over the rainbow/won

The Beatles|julia : The Beatles|Julia - Remastered 2009
Yusuf / Cat Stevens|the wind : Yusuf / Cat Stevens|The Wind - Remastered 2021
not found: bee gees - inmortality
tina turner|simply the best : Tina Turner|Simply the Best - Live in Arnhem
not found: izone - la vie en rose
not found: uncle lucius - 50 years
mark knopfler|romeo and juliet : Mark Knopfler|Romeo And Juliet - Live At Gibson Amphitheatre / June 28th 2006
Crosby, Stills & Nash|helplessly hoping : Crosby, Stills & Nash|Helplessly Hoping - 2005 Remaster
not found: vv - heartfull of ghosts
babyface|every time i close my eyes : Babyface|Every Time I Close My Eyes (with Kenny G)
not found: etta james - i’d rather go blind
not found: flight facilities - claire de lune
disturbed|sound of silence : Disturbed|The Sound of Silence
hozier|cherry wine : Hozier|Cherry Wine - Live
not found: Les Misérables Cast - i dreamed a dream
The Cure|lovesong : The Cure|Lovesong - 2010 Remaster
nobuo uematsu|to zanarkand : Nobuo Uematsu|To Zanark

procol harum|whiter shade of pale : Procol Harum|A Whiter Shade of Pale - Original Single Version
paul mauriat|love is blue : Paul Mauriat|Love Is Blue - Remastered
not found: Oasis Ministry - jesus built my hotrod
The Beatles|abbey road : Peel, David & The Apple Band|The wonderful world of Abbey Road
The Cure|disintegration : The Cure|Disintegration - 2010 Remaster
johnny mathis|chances are : Johnny Mathis|Chances Are (with Ray Conniff & His Orchestra)
Arvo Pärt|spiegel im spiegel : Arvo Pärt|Spiegel im Spiegel - Version for Violin and Piano
not found: prince - while my guitar gently weeps
stevie wonder|isn't she lovely : Stevie Wonder|Isn’t She Lovely
not found: Rainbow Kitten Surprise - catch the rainbow
R.E.M.|it's the end of the world as we know it (and i feel fine : R.E.M.|It's The End Of The World As We Know It (And I Feel Fine)
The Smiths|that joke isn’t funny anymore : The Smiths|That Joke Isn't Funny Anymore
The Beatles|all you need is love : The Beatles|All You Need Is Love 

The Beatles|hey jude : The Beatles|Hey Jude - Remastered 2015
not found: dire straits - dire straits
not found: neil young - unknown legend
Pyotr Ilyich Tchaikovsky|waltz of the flowers : Pyotr Ilyich Tchaikovsky|Tchaikovsky: The Nutcracker, Op. 71, Act II: No. 13, Waltz of the Flowers
not found: Pyotr Ilyich Tchaikovsky - 5th symphony, movement 2
not found: bon iver - roslyn
grateful dead|attics of my life : Grateful Dead|Attics of My Life - 2013 Remaster
The Beatles|because : The Beatles|Because - Remastered 2009
Simon & Garfunkel|a bridge over troubled water : Simon & Garfunkel|Song for the Asking
taylor swift|all too well : Taylor Swift|All Too Well (10 Minute Version) (Taylor's Version) (From The Vault)
marvin gaye|can i get a witness : Marvin Gaye|Can I Get A Witness - Single Version / Mono
The Beatles|a day in the life : The Beatles|A Day In The Life - Remastered 2009
not found: adeem - white trash revelry
evanescence|even in death : Evanescence|Even In Death - 2016 Version
stev

not found: final fantasy x - to zanarkand
not found: south park - japanese toilet episode song
Giacomo Puccini|o mio babbino caro : Giacomo Puccini|Puccini: Gianni Schicchi, Act 1: "O mio babbino caro" (Lauretta)
peter gabriel|red rain : Peter Gabriel|Red Rain - 2012 Remaster
not found: The Smashing Pumpkins - mayonnaise
The Beatles|norwegian wood : The Beatles|Norwegian Wood (This Bird Has Flown) - Remastered 2009
Pyotr Ilyich Tchaikovsky|pas de deux from the nutcracker : Pyotr Ilyich Tchaikovsky|Pas De Deux Variation (From The Nutcracker, Op. 71, Arr. for Piano by Svetoslav Karparov)
not found: hack//sign - key of the twilight
not found: Les Misérables Cast - bring him home
Pyotr Ilyich Tchaikovsky|hymn of the cherubim : Pyotr Ilyich Tchaikovsky|Liturgy of St. John Chrysostom, Op. 41: VI. Hymn of the Cherubim
not found: Des'ree - i’m kissing you
george harrison|my sweet lord : George Harrison|My Sweet Lord - 2014 Remaster
Johannes Brahms|lullaby : Johannes Brahms|Wiegenlied, Op. 49, 

In [133]:
gold_df = pd.DataFrame({'score': score_list,
                        'input_artist': orig_artist,
                        'artist': artist_list,
                        'input_track': orig_track,
                        'track': track_list,
                        'album': album_list,
                        'uri': uri_list})

with pd.option_context("display.max_rows", 9999):
    display(gold_df)


Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
0,13974,Claude Debussy,Claude Debussy,claire de lune,Claire de lune,Träumerei - Liebestraum - Für Elise - Clair de...,spotify:track:6kf7ZCJjEbjZXikivKOsvJ
1,7901,The Beatles,The Beatles,yesterday,Yesterday - Remastered 2009,Help! (Remastered),spotify:track:3BQHpFgAp4l80e1XslIjNI
2,7682,eagles,Eagles,hotel california,Hotel California - 2013 Remaster,Hotel California (2013 Remaster),spotify:track:40riOy7x9W7GXjyGp4pjAv
3,7003,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
4,6156,Simon & Garfunkel,Simon & Garfunkel,scarborough fair,Scarborough Fair / Canticle,"Parsley, Sage, Rosemary And Thyme",spotify:track:3g2fYZW5v2od8KIF7VktT0
5,5076,Israel Kamakawiwo'ole,Israel Kamakawiwo'ole,over the rainbow,Over the Rainbow,Alone In Iz World,spotify:track:3oQomOPRNQ5NVFUmLJHbAV
6,4371,The Cranberries,The Cranberries,dreams,Dreams,"Everybody Else Is Doing It, So Why Can't We?",spotify:track:4JGKZS7h4Qa16gOU3oNETV
7,3982,mazzy star,Mazzy Star,fade into you,Fade Into You,So Tonight That I Might See,spotify:track:1LzNfuep1bnAUR9skqdHCK
8,3896,The Beach Boys,The Beach Boys,god only knows,God Only Knows - Mono,Pet Sounds (Original Mono & Stereo Mix),spotify:track:6iGU74CwXuT4XVepjc9Emf
9,3865,The Beatles,The Beatles,in my life,In My Life - Remastered 2009,Rubber Soul (Remastered),spotify:track:3KfbEIOC7YIv90FIfNSZpo


In [134]:
with pd.option_context("display.max_rows", 999):
    display(gold_df.loc[gold_df['input_artist'].str.lower() != gold_df['artist'].str.lower()])

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
41,832,"andrew lloyd webber, sarah brightman",Andrew Lloyd Webber,pie jesu,Pie Jesu,Diva: The Singles Collection,spotify:track:4D2Ha1TUtxuDqHk2Kb9DO4
46,758,glen hansard and marketa irglova,Glen Hansard,falling slowly,Falling Slowly,Perhaps Love,spotify:track:2lpNVkZb7e1k7IeW8MOzLe
87,311,The Verve,The Cover Crew,bittersweet symphony,Bittersweet Symphony (Acoustic Version) [The V...,"Acoustified Hits, Vol. 2",spotify:track:714doH50K9qbrE9py6izzV
106,223,stevie nicks and don henley,Stevie Nicks,leather and lace,Leather and Lace (with Don Henley),Timespace - The Best Of Stevie Nicks,spotify:track:2npStoMbQxG1LcIeF4PZrj
153,100,joni mitchell,Joni Mitchell Tribute by Strings Attached,a case of you,A Case of You,Strings Attached Plays Joni Mitchell,spotify:track:2tjPtYQMmZyvjPTGthh8fS
177,83,john cale,Caleb & John,hallelujah,Hallelujah Feeling,Hallelujah Feeling,spotify:track:5Gyg9RufRKGzTtzafw2bK6
264,45,alison krauss,Alison Krauss & Union Station,when you say nothing at all,When You Say Nothing At All,Now That I've Found You: A Collection,spotify:track:6HveTIMynTMzTc5L0TcS20
294,37,joni mitchell,Joni Mitchell Tribute by Strings Attached,big yellow taxi,Big Yellow Taxi,Strings Attached Plays Joni Mitchell,spotify:track:6UPMrJt8MhYeXPiRDwLZBx
295,37,doors,The Doors,riders on the storm,Riders on the Storm,L.A. Woman,spotify:track:14XWXWv5FoCbFzLksawpEe
305,35,sonder,Sonder Piano,sonder,sonder,The Past Whispers,spotify:track:04VR2LKx0aoE8CLTS9fPNc


In [135]:
# fis these in silver.csv and rerun as necessary
gold_df2=gold_df.copy().reset_index(drop=True)
gold_df2['input_track']=gold_df2['input_track'].str.lower()
gold_df2['input_track']=gold_df2['input_track'].apply(lambda s: s.strip()[:10])

gold_df2['track']=gold_df2['track'].str.lower()
gold_df2['track']=gold_df2['track'].apply(lambda s: s.strip()[:10])


with pd.option_context("display.max_rows", 999):
    display(gold_df2.loc[gold_df2['input_track'] != gold_df2['track']])

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
1,7901,The Beatles,The Beatles,yesterday,yesterday,Help! (Remastered),spotify:track:3BQHpFgAp4l80e1XslIjNI
3,7003,erik satie,Erik Satie,gymnopédie,3 gymnopéd,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
19,2517,Sigur Rós,Sigur Rós,hoppipolla,hoppípolla,Takk...,spotify:track:0yQPpUq5BJyqah5m2Q5Stt
20,2422,The Beatles,The Beatles,blackbird,blackbird,The Beatles (Remastered),spotify:track:5jgFfDIR6FR0gvlA56Nakr
21,2272,john denver,John Denver,annie’s so,annie's so,Back Home Again,spotify:track:4J0DbyODwZJcmIAiTSJfMF
25,1862,otis redding,Otis Redding,sittin' on,(sittin' o,The Dock of the Bay (Mono),spotify:track:3zBhihYUHBmGd2bcQIobrF
30,1435,jim croce,Jim Croce,i got a na,i'll have,I Got a Name,spotify:track:4t8tx6o7oM6Ax66ZUU361y
32,1223,samuel barber,Samuel Barber,adagio for,barber: ad,Samuel Barber - Adagio,spotify:track:1CSaCKPIp2yCIDL3t7Fyau
39,937,Claude Debussy,Claude Debussy,clair de l,suite berg,"Debussy: Suite bergamasque, L. 75, 3. Clair de...",spotify:track:1cmigB9I6IRpFqjIbzvSQB
43,776,jim croce,Jim Croce,operator,operator (,You Don't Mess Around With Jim,spotify:track:3NJzkMApQqAudLSgYb5Bz2


In [None]:
# these are songs that look like covers or otherwise not the expected response from spotify search 
# (which is a bit wonky, doesn't like quotes and such)
# remove from df and add manually
bad_lookups = [
#    25,134,155,160,200,209,422,445,446,557,737,744,755,759,760,761,762,781,785,790,814,815,842
    21,51,61,63,83,145,212,317,322,439,449,575,759,784,
]

for i in bad_lookups:
    print(gold_df.iloc[i])
    
# add manually, plus 'not found'


In [146]:
gold_df

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
0,13974,Claude Debussy,Claude Debussy,claire de lune,Claire de lune,Träumerei - Liebestraum - Für Elise - Clair de...,spotify:track:6kf7ZCJjEbjZXikivKOsvJ
1,7901,The Beatles,The Beatles,yesterday,Yesterday - Remastered 2009,Help! (Remastered),spotify:track:3BQHpFgAp4l80e1XslIjNI
2,7682,eagles,Eagles,hotel california,Hotel California - 2013 Remaster,Hotel California (2013 Remaster),spotify:track:40riOy7x9W7GXjyGp4pjAv
3,7003,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
4,6156,Simon & Garfunkel,Simon & Garfunkel,scarborough fair,Scarborough Fair / Canticle,"Parsley, Sage, Rosemary And Thyme",spotify:track:3g2fYZW5v2od8KIF7VktT0
...,...,...,...,...,...,...,...
1210,5,avenged sevenfold,Avenged Sevenfold,fiction,Fiction,Nightmare,spotify:track:3b3eu3uMp1b9xPOHbfSwBi
1211,5,vance joy,Vance Joy,riptide,Riptide,Dream Your Life Away (Special Edition),spotify:track:3JvrhDOgAt6p7K8mDyZwRd
1212,5,joy division,Joy Division,love will tear us apart,Love Will Tear Us Apart - 2020 Remaster,Love Will Tear Us Apart,spotify:track:34iOH7LY3vme5rQxsVILZ4
1213,5,aurora,AURORA,it happened quiet,It Happened Quiet,Infections of a Different Kind (Step I),spotify:track:0yXXv7MH5jYTDlOltMbdvt


In [None]:
gold_df = gold_df.drop(
    axis='index',
    labels=bad_lookups)


In [147]:
gold_df[['artist', 'track', 'score']].to_csv('gold.csv', index=False)

with pd.option_context("display.max_rows", 999):
    display(gold_df)

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
0,13974,Claude Debussy,Claude Debussy,claire de lune,Claire de lune,Träumerei - Liebestraum - Für Elise - Clair de...,spotify:track:6kf7ZCJjEbjZXikivKOsvJ
1,7901,The Beatles,The Beatles,yesterday,Yesterday - Remastered 2009,Help! (Remastered),spotify:track:3BQHpFgAp4l80e1XslIjNI
2,7682,eagles,Eagles,hotel california,Hotel California - 2013 Remaster,Hotel California (2013 Remaster),spotify:track:40riOy7x9W7GXjyGp4pjAv
3,7003,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
4,6156,Simon & Garfunkel,Simon & Garfunkel,scarborough fair,Scarborough Fair / Canticle,"Parsley, Sage, Rosemary And Thyme",spotify:track:3g2fYZW5v2od8KIF7VktT0
...,...,...,...,...,...,...,...
1210,5,avenged sevenfold,Avenged Sevenfold,fiction,Fiction,Nightmare,spotify:track:3b3eu3uMp1b9xPOHbfSwBi
1211,5,vance joy,Vance Joy,riptide,Riptide,Dream Your Life Away (Special Edition),spotify:track:3JvrhDOgAt6p7K8mDyZwRd
1212,5,joy division,Joy Division,love will tear us apart,Love Will Tear Us Apart - 2020 Remaster,Love Will Tear Us Apart,spotify:track:34iOH7LY3vme5rQxsVILZ4
1213,5,aurora,AURORA,it happened quiet,It Happened Quiet,Infections of a Different Kind (Step I),spotify:track:0yXXv7MH5jYTDlOltMbdvt


In [137]:
# get playlist id
# first create a playlist in UI to load songs
playlists = sp.user_playlists(os.getenv('SPOTIFY_USERNAME'))
while playlists:
    for i, playlist in enumerate(playlists['items']):
        if playlist['name'] != 'Reddit Prettiest Songs':
            continue
        print(playlist['id'])
        playlist_id = playlist['id']
        print("%4d %s %s" % (i + 1 + playlists['offset'], playlist['uri'],  playlist['name']))
    if playlists['next']:
        playlists = sp.next(playlists)
    else:
        playlists = None
        

08YFkbtTV6GBfNtjJ4PHDu
   1 spotify:playlist:08YFkbtTV6GBfNtjJ4PHDu Reddit Prettiest Songs


In [138]:
# must follow an oauth workflow to write a playlist in Spotify
# running this cell should request a spotify login and then redirect to an url
# paste whole url with id into form to authenticate

scope = "playlist-modify-public"

sp = spotipy.Spotify(auth_manager=spotipy.SpotifyOAuth(scope=scope,
                                                       client_id=os.getenv('SPOTIFY_CLIENT_ID'),
                                                       client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                       redirect_uri="https://druce.ai"
                                                      ))


In [None]:
# addlist = gold_df['uri'].to_list()
# print (len(addlist))

# while(addlist):
#     sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
#                                 playlist_id=playlist_id, 
#                                 tracks=addlist[-100:])
#     addlist = addlist[:-100]
#     print("added items, remaining ", len(addlist))


In [None]:
# manually add the ones that weren't found for some reason


In [139]:
# can run again and add any new tracks, either because OpenAI is a bit random, or new replies in thread
results = sp.user_playlist(os.getenv('SPOTIFY_USERNAME'), playlist_id,
                                fields='tracks,next,name')
tracks = results['tracks']

playlist_dict_by_uri = {}
playlist_dict_by_str = {}

artist_list = []
track_list = []
uri_list = []
popularity_list = []
album_list=[]

while True:
    for track_item in tracks['items']:
        track_dict = track_item['track']
        track_str = track_dict['artists'][0]['name']  + ' | ' + track_dict['name'][:15]
        uri = track_dict['uri']
        if track_str in playlist_dict_by_str:
            print(track_str)
        playlist_dict_by_str[track_str] = uri
        playlist_dict_by_uri[uri] = track_str
        
        uri_list.append(uri)
        artist_list.append(track_dict['artists'][0]['name'])
        track_list.append(track_dict['name'])
        album_list.append(track_dict['album']['name'])
        popularity_list.append(track_dict['popularity'])
        
    # check if there are more pages
    if tracks['next']:
        tracks = sp.next(tracks)
    else:
        break

print (len(list(playlist_dict_by_str.keys())))
print (len(list(playlist_dict_by_uri.keys())))


Enter the URL you were redirected to: https://druce.ai/?code=AQCUDnYHMEgmgR1zuAj88yJVKInDc83B_FPe6URVa-xtdHWyHlK6ERHCQxnfjWRAM1YuMp03FY8aWkWdfE2WfKWqilXm7n1M1v9lqBhRu9xioKaJEIlWvsVpNL0RFjIZJ3QzfWexXqpJUrm97HWZcnHx18nt-AdMZnuctEi-enL1uAU7Y-3HMqsJi9U
Nine Inch Nails | A Warm Place
Joni Mitchell | Both Sides Now
1140
1142


In [145]:
with pd.option_context("display.max_rows", 9999):
    display(gold_df.loc[~gold_df['uri'].isin(playlist_dict_by_uri.keys())])
    

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
0,13974,Claude Debussy,Claude Debussy,claire de lune,Claire de lune,Träumerei - Liebestraum - Für Elise - Clair de...,spotify:track:6kf7ZCJjEbjZXikivKOsvJ
3,7003,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
10,3383,don mclean,Don McLean,"vincent (starry, starry night","Vincent (Starry, Starry Night)",Rearview Mirror: An American Musical Journey,spotify:track:2YDyH60Vro33KkDtNZCXIk
18,2531,joni mitchell,Joni Mitchell,both sides now,Both Sides Now,Feathers.,spotify:track:5EsPLgSs1UQIDJG0U00RuJ
45,760,Simon & Garfunkel,Simon & Garfunkel,"for emily, whenever i may find her","For Emily, Whenever I May Find Her","Parsley, Sage, Rosemary And Thyme",spotify:track:76Fcbx3T7fUgA7phUkmsn7
46,758,glen hansard and marketa irglova,Glen Hansard,falling slowly,Falling Slowly,Perhaps Love,spotify:track:2lpNVkZb7e1k7IeW8MOzLe
48,694,Wolfgang Amadeus Mozart,Wolfgang Amadeus Mozart,lacrimosa,Lacrimosa,Requiem - Music To Die For,spotify:track:1UvaZaHkh3D9AkmBrrnbFg
59,584,louis armstrong,Louis Armstrong,it's a wonderful world,It's Wonderful - Single Version,What A Wonderful World,spotify:track:7tmOKoxLQFLvQWwxzYaodT
62,508,Henry Mancini,Henry Mancini,moon river,Moon River(Vocal Audrey Hepburn),Breakfast At Tiffany's (50th Anniversary Edition),spotify:track:5iGleL7HpEThuuYQ3us2jh
87,311,The Verve,The Cover Crew,bittersweet symphony,Bittersweet Symphony (Acoustic Version) [The V...,"Acoustified Hits, Vol. 2",spotify:track:714doH50K9qbrE9py6izzV


In [141]:
playlist_df = pd.DataFrame({'artist': artist_list,
                           'track': track_list,
                           'album': album_list,
                           'popularity': popularity_list,
                           })



In [142]:
with pd.option_context("display.max_rows", 9999):
    display(playlist_df.sort_values('popularity'))
    

Unnamed: 0,artist,track,album,popularity
909,Yusuf / Cat Stevens,Wild World,The Best Of Cat Stevens 20th Century Masters T...,0
1138,John Williams,Missing You,My World,0
1137,ZaZa,Only You,Nights One and a Thousand,0
769,Jaakko Aukusti,What If All Else Fails?,What If All Else Fails?,0
999,Joni Mitchell,Both Sides Now,Clouds,0
300,Jay-Jay Johanson,Poison,Poison,1
42,The Smiths,Asleep,mental health: look out for yourself,2
31,The Niro,No One Must Find You Here,The Complete Jeff Buckley and Gary Lucas Songbook,2
1008,Johann Sebastian Bach,"Orchestral Suite No. 3 in D Major, BWV 1068: I...",Classical Music In the Background,2
504,Claude Debussy,Debussy: Arabesque no. 2 in G major (Deux Arab...,Debussy: Arabesque no. 2 in G major (Deux Arab...,4


In [143]:
gold_dict_by_uri = {}
gold_dict_by_str = {}
addlist = []
c = 0
for i, artist, track, uri in gold_df[['artist', 'track', 'uri']].itertuples():
    # print(artist, track, uri)
    track_str = artist + ' | ' + track[:15]
    if track_str not in playlist_dict_by_str:
        addlist.append([artist, track, uri])
        print(artist, track, uri)
    gold_dict_by_uri[uri]=track_str
    gold_dict_by_str['track_str']= uri
#     if track_str not in playlist_dict_by_str:
#         c += 1
#         print (c, track_str)
        
print(len(gold_dict_by_str.items()))
print(len(gold_dict_by_uri.items()))

Claude Debussy Claire de lune spotify:track:6kf7ZCJjEbjZXikivKOsvJ
Erik Satie 3 Gymnopédies: No. 1 Lent et douloureux spotify:track:7kTVe6XhIveidvkt8nb7jK
Don McLean Vincent (Starry, Starry Night) spotify:track:2YDyH60Vro33KkDtNZCXIk
Wolfgang Amadeus Mozart Lacrimosa spotify:track:1UvaZaHkh3D9AkmBrrnbFg
Louis Armstrong It's Wonderful - Single Version spotify:track:7tmOKoxLQFLvQWwxzYaodT
Henry Mancini Moon River(Vocal Audrey Hepburn) spotify:track:5iGleL7HpEThuuYQ3us2jh
The Cover Crew Bittersweet Symphony (Acoustic Version) [The Verve Cover] spotify:track:714doH50K9qbrE9py6izzV
The Beatles Golden Slumbers - Remastered 2009 spotify:track:01SfTM5nfCou5gQL70r6gs
Louis Armstrong La vie en rose - Single Version spotify:track:3yYfoYGVpriV4fG9L1ogsD
Johann Sebastian Bach Herz und Mund und Tat und Leben, Cantata BWV 147: Jesu, Joy of Man’s Desiring (Transcr. Hess for Piano) spotify:track:2zl2AqEdHVLhjzStuRulGY
Giacomo Puccini Madama Butterfly, SC 74, Act II Pt. 1: No. 17, Un bel di vedremo (But

In [144]:
addlist

[['Claude Debussy', 'Claire de lune', 'spotify:track:6kf7ZCJjEbjZXikivKOsvJ'],
 ['Erik Satie',
  '3 Gymnopédies: No. 1 Lent et douloureux',
  'spotify:track:7kTVe6XhIveidvkt8nb7jK'],
 ['Don McLean',
  'Vincent (Starry, Starry Night)',
  'spotify:track:2YDyH60Vro33KkDtNZCXIk'],
 ['Wolfgang Amadeus Mozart',
  'Lacrimosa',
  'spotify:track:1UvaZaHkh3D9AkmBrrnbFg'],
 ['Louis Armstrong',
  "It's Wonderful - Single Version",
  'spotify:track:7tmOKoxLQFLvQWwxzYaodT'],
 ['Henry Mancini',
  'Moon River(Vocal Audrey Hepburn)',
  'spotify:track:5iGleL7HpEThuuYQ3us2jh'],
 ['The Cover Crew',
  'Bittersweet Symphony (Acoustic Version) [The Verve Cover]',
  'spotify:track:714doH50K9qbrE9py6izzV'],
 ['The Beatles',
  'Golden Slumbers - Remastered 2009',
  'spotify:track:01SfTM5nfCou5gQL70r6gs'],
 ['Louis Armstrong',
  'La vie en rose - Single Version',
  'spotify:track:3yYfoYGVpriV4fG9L1ogsD'],
 ['Johann Sebastian Bach',
  'Herz und Mund und Tat und Leben, Cantata BWV 147: Jesu, Joy of Man’s Desiring 

In [None]:
addlist = [['ABBA', 'One Of Us', 'spotify:track:6zgtBUEkAfilJ2YEOvNexR'],
 ['Gregorio Allegri',
  'Miserere mei, Deus',
  'spotify:track:6es7DmrhnDoKj5rsFvh3XU'],
 ['Amy Winehouse',
  'Love Is A Losing Game',
  'spotify:track:3uliGwmB52ZA7brgpZMzyH'],
 ['Barbara',
  "Ma plus belle histoire d'amour",
  'spotify:track:0qBVET4VkHsQAoboWlQ2pJ'],
 ['Ludwig van Beethoven',
  'Symphony No. 5 in C Minor, Op. 67: I. Allegro con brio',
  'spotify:track:2ygeBLTP9uu3OW3VTulD8N'],
 ['Benny Goodman', 'Sing, Sing, Sing', 'spotify:track:5L8ta4ECl5zeA6bGqY7G38'],
 ['Bill Withers', 'Lean on Me', 'spotify:track:3M8FzayQWtkvOhqMn2V4T2'],
 ['Billy Joel', 'Piano Man', 'spotify:track:70C4NyhjD5OZUMzvWZ3njJ'],
 ['Bob Dylan', 'Ballad of a Thin Man', 'spotify:track:0f5N14nB8xi0p3o4BlVvbx'],
 ['Bob Dylan', "Blowin' in the Wind", 'spotify:track:18GiV1BaXzPVYpp9rmOg0E'],
 ['Bob Dylan', 'Desolation Row', 'spotify:track:4n1ZGm3TxYmoYe1YR8cMus'],
 ['Bob Dylan', 'Duquesne Whistle', 'spotify:track:5kKW4bszhKSCYVPDO0sMbX'],
 ['Bob Dylan',
  'Forever Young - Slow Version',
  'spotify:track:4yWl0tnEanf3zmZzl9kbQn'],
 ['Bob Dylan', 'Gotta Serve Somebody', 'spotify:track:760420tYNmNjFgi8bWvbop'],
 ['Bob Dylan', 'Highway 61 Revisited', 'spotify:track:6os5B6xjuke9YfBKH3tu1e'],
 ['Bob Dylan',
  'I Shall Be Released - Studio Outtake - 1971',
  'spotify:track:5vyw005QQ42hrzrLxb3xEX'],
 ['Bob Dylan', 'I Want You', 'spotify:track:7tJQ4Ekp2vN3NlI3vJJW3v'],
 ['Bob Dylan', "It Ain't Me Babe", 'spotify:track:5nbNWAfT1S6V1vqj3snHxS'],
 ['Bob Dylan', 'Jokerman', 'spotify:track:6cuHkcRUqtQhtJ4sWCkd1q'],
 ['Bob Dylan',
  "Knockin' On Heaven's Door",
  'spotify:track:6HSXNV0b4M4cLJ7ljgVVeh'],
 ['Bob Dylan', 'Lay, Lady, Lay', 'spotify:track:4uYwlMp841PLJmj1gJJwIq'],
 ['Bob Dylan', 'Like a Rolling Stone', 'spotify:track:3AhXZa8sUQht0UEdBJgpGc'],
 ['Bob Dylan', 'Love Sick', 'spotify:track:3O1hpSOaJDW4SelgUG2XT3'],
 ['Bob Dylan', "Maggie's Farm", 'spotify:track:5rGD8FFgHw74cp3RPhucyg'],
 ['Bob Dylan',
  'Make You Feel My Love',
  'spotify:track:6rfGPGghQL7SJmZPXprXIc'],
 ['Bob Dylan',
  'Mississippi - Version 2',
  'spotify:track:6JWHNd8QMxTvojYkmZtKGI'],
 ['Bob Dylan', 'Mr. Tambourine Man', 'spotify:track:3RkQ3UwOyPqpIiIvGVewuU'],
 ['Bob Dylan', 'Murder Most Foul', 'spotify:track:1LfTvT9JPYuuZanwxLtZCr'],
 ['Bob Dylan', 'Not Dark Yet', 'spotify:track:1qbn6QrHG8XfnqVFKgNzKP'],
 ['Bob Dylan',
  'Rainy Day Women #12 & 35',
  'spotify:track:7BkAlVpGwXXl3sYNn5OoJ7'],
 ['Bob Dylan',
  'Sad-Eyed Lady of the Lowlands',
  'spotify:track:4jdtLLyEL7wY0TlCdMKhxq'],
 ['Bob Dylan', 'She Belongs to Me', 'spotify:track:2itBkHBUxGl4VfDj4HNyoD'],
 ['Bob Dylan',
  'Stuck Inside of Mobile with the Memphis Blues Again',
  'spotify:track:1NYTj6JEw3IOh4ggiBh82h'],
 ['Bob Dylan',
  'Subterranean Homesick Blues',
  'spotify:track:6k9DUKMJpWvu6eFG3O64Lg'],
 ['Bob Dylan', 'Tangled up in Blue', 'spotify:track:6Vcwr9tb3ZLO63F8DL8cqu'],
 ['Bob Dylan', 'Tempest', 'spotify:track:19scNzd4ogVsHrNWsms8Rg'],
 ['Bob Dylan',
  "The Times They Are A-Changin'",
  'spotify:track:52vA3CYKZqZVdQnzRrdZt6'],
 ['Bob Dylan',
  'Things Have Changed - Single Version',
  'spotify:track:5KOi77ameCimkAdw0DMNoy'],
 ['Bob Dylan',
  'Thunder on the Mountain',
  'spotify:track:4wo2eRp6aHcAlmhmfwiTAH'],
 ['Bob Dylan', 'Visions of Johanna', 'spotify:track:2rslQV48gNv3r9pPrQFPW1'],
 ['Brian Wilson', 'God Only Knows', 'spotify:track:2SznAUigFh6rMdGpcS5d7e'],
 ['Bright Eyes',
  'First Day of My Life',
  'spotify:track:0eBryM7ePQH3Klt3jz8xZd'],
 ['Crowded House',
  'Don’t Dream It’s Over - Home Demo',
  'spotify:track:0fiSpF9mvRFQWy0ca64d1g'],
 ['Léo Delibes', 'Flower Duet', 'spotify:track:5K8jqeLAxZIqHR6e5w5so1'],
 ['Dire Straits', 'Brothers In Arms', 'spotify:track:6XYBbVpu455ZdGWZNRLGbG'],
 ['Don McLean',
  'Vincent (Starry, Starry Night)',
  'spotify:track:2YDyH60Vro33KkDtNZCXIk'],
 ['Ed Sheeran', 'Photograph', 'spotify:track:41xNsY82OWtWbIfnRMK2ky'],
 ['Elvis Presley',
  'Can’t Help Falling in Love - Acoustic Cover',
  'spotify:track:0ghQkNDYLSl4GsqfkjTjWx'],
 ['Enya', 'Amarantine', 'spotify:track:0VmzazQQ0Mo1vJldr5NxTW'],
 ['Evan Rachel Wood', 'If I Fell', 'spotify:track:0gd3hRBQAEAw096YOcUrmR'],
 ['Fleetwood Mac', 'Rhiannon', 'spotify:track:05oETzWbd4SI33qK2gbJfR'],
 ['George Harrison',
  'All Things Must Pass - 2014 Remaster',
  'spotify:track:16OwZQuzMqnwn3FZsCBZly'],
 ['George Harrison',
  'Apple Scruffs - 2014 Remaster',
  'spotify:track:2K7WhpfZX3TCCMiwebp0W7'],
 ['George Harrison',
  'Art of Dying - 2014 Remaster',
  'spotify:track:6Jod7qrtYBhU3HcUmKk4hX'],
 ['George Harrison',
  'Awaiting on You All - 2014 Remaster',
  'spotify:track:0b65WkrBrg2qOkzQeDtQ9d'],
 ['George Harrison',
  'Ballad of Sir Frankie Crisp (Let It Roll) - 2014 Remaster',
  'spotify:track:0FWeRrB8T5R6maHbWQw4Kk'],
 ['George Harrison',
  'Behind That Locked Door',
  'spotify:track:2VVbLn8nMcWJzjcL1tZsUr'],
 ['George Harrison',
  'Beware of Darkness - 2014 Remaster',
  'spotify:track:606MCyZFMBlc52Ojnn1nvU'],
 ['George Harrison',
  'Give Me Love (Give Me Peace on Earth)',
  'spotify:track:71fXxvXqo1zxWDtBmjoEVk'],
 ['George Harrison',
  'Hear Me Lord - 2014 Remaster',
  'spotify:track:3kopbNyRj10XO1actGZexP'],
 ['George Harrison',
  'I Dig Love - 2014 Remaster',
  'spotify:track:42yK1Wy62c7malKSRwy0Qk'],
 ['George Harrison',
  'I Remember Jeep - 2014 Remaster',
  'spotify:track:058AE5M3ifbCh8VWOV7903'],
 ['George Harrison',
  "It's Johnny's Birthday - 2014 Remaster",
  'spotify:track:6Cv05rcW8HWwCC6wyEp1fC'],
 ['George Harrison',
  'Let It Down - 2014 Remaster',
  'spotify:track:5FFruMKbVg8AhwHnX4xBov'],
 ['George Harrison',
  'My Sweet Lord - 2014 Remaster',
  'spotify:track:6vE90mi4yKsQGY3YD2OOv1'],
 ['George Harrison',
  'Out of the Blue - 2014 Remaster',
  'spotify:track:1KHMyFaGvwVQ7ax4yjq4BZ'],
 ['George Harrison',
  'Plug Me In - 2014 Remaster',
  'spotify:track:0tyk2xHVjBd3nk16cGktTG'],
 ['George Harrison',
  'Run of the Mill - 2014 Remaster',
  'spotify:track:4uSlUBg3NVOA77E7wwKFTO'],
 ['George Harrison',
  'Thanks for the Pepperoni - 2014 Remaster',
  'spotify:track:3smkwfPqFsTmwfnBztMXaM'],
 ['George Harrison',
  'The Inner Light (Alternative Take) - Instrumental',
  'spotify:track:7gWPnvhaBFMlQsTBWEGcSC'],
 ['George Harrison',
  'Wah-Wah - 2014 Remaster',
  'spotify:track:5j3aqkMO2fl0s5eaSuVnQ8'],
 ['George Harrison',
  'What Is Life - 2014 Remaster',
  'spotify:track:44fw7RulJyj7dGIi9qR86N'],
 ['George Harrison',
  'While My Guitar Gently Weeps - Live At Madison Square Garden; 2009 Remaster',
  'spotify:track:4Egi6XuC0rbLlXfqmQeuFa'],
 ['Glenn Miller', 'In the Mood', 'spotify:track:1xsY8IFXUrxeet1Fcmk4oC'],
 ['Hans Zimmer', 'Cornfield Chase', 'spotify:track:6pWgRkpqVfxnj3WuIcJ7WP'],
 ['Hans Zimmer',
  'Day One (Interstellar Theme)',
  'spotify:track:4WmB04GBqS4xPMYN9dHgBw'],
 ["Israel Kamakawiwo'ole",
  'Maui Medley',
  'spotify:track:6TSJ3L9pBQsYIlCD5pk7ju'],
 ['James Taylor',
  'You’ve Got a Friend',
  'spotify:track:3nK4hWsTEr7fVXziI5bTmh'],
 ['Jay Ungar', 'Ashoken Farewell', 'spotify:track:2s6pqLeVialgt5l5TTSeas'],
 ['Jeff Buckley',
  'If You Knew - Live at Sin-é, New York, NY - July/August 1993',
  'spotify:track:1nd2JEHXbUuQFDiQzCBpsv'],
 ['Jimi Hendrix', 'One Rainy Wish', 'spotify:track:5Zyv0v4rPcrXjkaeImuodv'],
 ['Jimi Hendrix',
  'Spanish Castle Magic',
  'spotify:track:2KFE98Iw0X23sf4vJYcbLH'],
 ['Jimi Hendrix',
  'Wait Until Tomorrow',
  'spotify:track:2YtVzmZzew1ILUdNueyWd7'],
 ['John Lennon',
  'Imagine - Remastered 2010',
  'spotify:track:7pKfPomDEeI4TPT6EOYjn9'],
 ['John Mayer', 'Queen of California', 'spotify:track:0CETmgFGt8Ne8vLnaLcduU'],
 ['Johnny Cash',
  'I Walk The Line - Single Version',
  'spotify:track:1TKPfF2fvn6gVLVfp3iG4j'],
 ['Joni Mitchell',
  'Mitchell: Urge for Going (Instrumental Arrangement of the B-Side Track of the Joni Mitchell Single "You Turn Me on I\'m a Radio")',
  'spotify:track:1I1u9aTdxxQ7SDLgBB3V7b'],
 ['Kanye West', 'Come to Life', 'spotify:track:5xvXeuxISyXJDRbZZf4uzd'],
 ['Leonard Cohen', 'Chelsea Hotel #2', 'spotify:track:4krhCfJg0znykZoyjeMXRe'],
 ['Leonard Cohen', 'Dear Heather', 'spotify:track:3MTKMphPprAcBFG1uIhzPZ'],
 ['Leonard Cohen',
  "Death of a Ladies' Man",
  'spotify:track:5wrylUGwZugelovhryPYg2'],
 ['Leonard Cohen', 'The Future', 'spotify:track:5l8lYrnPEM1ln3J4XaTcy5'],
 ['Leonard Cohen',
  'You Want It Darker',
  'spotify:track:5zb7npjQqoJ7Kcpq4yD9qn'],
 ['Lingers.On', 'In Lingerie', 'spotify:track:6FH3kGlJbFVJDCG9RcERf7'],
 ['Louis Armstrong',
  'La vie en rose - Single Version',
  'spotify:track:3yYfoYGVpriV4fG9L1ogsD'],
 ['The Lovecats', 'The Lovecats', 'spotify:track:7iJUiiTfnuY5cTIeEBnqHr'],
 ['Ludovico Einaudi', 'Primavera', 'spotify:track:4BMHp3DkI8VLsuB9Kr0pzu'],
 ['Mazzy Star', 'Flowers In December', 'spotify:track:0G6Ws8Gbdt0S7pZeuYmkmm'],
 ['Metallica',
  'Fade To Black (Remastered)',
  'spotify:track:0dqGfCMAGyDgpUAgLNOjWd'],
 ['Wolfgang Amadeus Mozart',
  'Requiem in D Minor, K. 626: III. Sequenz No. 6, Lacrimosa dies illa',
  'spotify:track:4bvzJZXpkI3bkjxMCWOSu1'],
 ['My Chemical Romance',
  'The Light Behind Your Eyes',
  'spotify:track:3HyDpKAuR3e4l6QB7hSB2l'],
 ['Paul McCartney',
  'Here Today - Remixed 2015',
  'spotify:track:0QtnwXDziZN1K55fXuLN6q'],
 ['Paul McCartney',
  'I’ll Follow The Sun - Live At Amoeba 2007',
  'spotify:track:3xT59EeQdq0TPGtOlXXI8t'],
 ['Puscifer', 'The Humbling River', 'spotify:track:69GE6yPZZldvqtgBHrKXxg'],
 ['Ray LaMontagne',
  'Such A Simple Thing',
  'spotify:track:4PuUa8e5s7P3Zv1IdCGIsa'],
 ['Ray Manzarek',
  'Riders on the Storm',
  'spotify:track:3FvYcTXO2QtDY7kZQHku2d'],
 ['Red Hot Chili Peppers', 'Dosed', 'spotify:track:1iFIZUVDBCCkWe705FLXto'],
 ['Sky Cries Mary',
  "Don't Forget The Sky",
  'spotify:track:4sVpjCJRClVetRrdxVBolP'],
 ['Stevie Nicks', 'Landslide', 'spotify:track:5fprEY6WEN1wvFXkgfb22C'],
 ['Stevie Wonder', 'Isn’t She Lovely', 'spotify:track:6wGlAaMfyhKdEPr2zycAnN'],
 ['Taylor Swift',
  'Fearless (Taylor’s Version)',
  'spotify:track:77sMIMlNaSURUAXq5coCxE'],
 ['Taylor Swift',
  'the lakes - bonus track',
  'spotify:track:0eFQWVz0qIxDOvhLpZ40P7'],
 ['The Band',
  'When I Paint My Masterpiece - Remastered',
  'spotify:track:76WChUuOPeIK027IeUgr0l'],
 ['The Beach Boys',
  "I Just Wasn't Made For These Times - Mono",
  'spotify:track:4CuO8TINNqM3D7aUdNQ3zG'],
 ['The Beach Boys',
  "Let's Go Away For A While - Mono",
  'spotify:track:3GsgJI1aBrvUtqX8f3MhKT'],
 ['The Beatles',
  "Don't Let Me Down - Naked Version / Remastered 2013",
  'spotify:track:5BhMoGrz5KzG2fA5uzHjZ1'],
 ['The Beatles',
  'Love Me Do - Remastered 2009',
  'spotify:track:3VbGCXWRiouAq8VyMYN2MI'],
 ['The Chemical Brothers',
  'The Boxer',
  'spotify:track:1EUeDFq2zNP784GPaRs9aH'],
 ['The Cure',
  'A Night like This - 2006 Remaster',
  'spotify:track:7cKCz7gG84i1XLvDeM3ByT'],
 ['The Cure',
  'Disintegration - 2010 Remaster',
  'spotify:track:0zY8t5dC1KQXcPUKByWMJM'],
 ['The Cure',
  'From the Edge of the Deep Green Sea',
  'spotify:track:2vwBL9RVyr0vA4Og5VH0i3'],
 ['The Cure',
  'In Between Days - 2006 Remaster',
  'spotify:track:07CyrZF9eVd02zzIse7tZA'],
 ['The Cure', 'A Letter to Elise', 'spotify:track:4DdXOLc1VMAY34ourCn1Xa'],
 ['The Cure',
  'Lullaby - 2010 Remaster',
  'spotify:track:4d4oXk7O2lEhZ83ivV93li'],
 ['The Cure', 'Underneath The Stars', 'spotify:track:0PKVjYlKw7z3IvKAoxrYTR'],
 ['The Eagles', 'The Desperadoes', 'spotify:track:10ppF835WJMYI5v65gFLZ3'],
 ['The Helio Sequence',
  'Keep Your Eyes Ahead',
  'spotify:track:3yatRBsGMJ7wMoUIgDBzzo'],
 ['The Moldy Peaches',
  'Anyone Else But You',
  'spotify:track:2pKi1lRvXNASy7ybeQIDTy'],
 ['The Strokes', 'Someday', 'spotify:track:7hm4HTk9encxT0LYC0J6oI'],
 ['Traditional',
  'Scarborough Fair (Arr. Parkin)',
  'spotify:track:4wlNPczIullwvmwb4x0ltz'],
 ['Van Morrison',
  'Madame George - 1999 Remaster',
  'spotify:track:1N4MKISvC1ddfRCRQDXDd2'],
 ['Various Artists',
  'The Girl From Ipanema',
  'spotify:track:0JgH7g0kwsIs1THEVqhlUS'],
 ['Víg Mihály',
  'Öreg - From "Werckmeister Harmóniák"',
  'spotify:track:63wMgkXQuomlkW4an4O9b4'],
 ['Willie Nelson', 'Crazy', 'spotify:track:0xqtcLB45iKNfHroi5y1em']]


In [None]:
len(addlist)

In [None]:
addlist2 = [a[2] for a in addlist]

print (len(addlist2), 'items')

while(addlist2):
    sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
                                playlist_id=playlist_id, 
                                tracks=addlist2[-100:])
    addlist2 = addlist2[:-100]
    print("added items, remaining ", len(addlist2))
