In [1]:
# this version reads a chunk of posts with the score embedded
# ranks by summing scores

import os
import glob
import pickle
from datetime import datetime
import time
import dotenv
import pandas as pd
import re
from tqdm import tqdm

import pandas_dedupe

import requests
import requests.auth

import praw

import openai

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# load secrets from .env into environment variables
dotenv.load_dotenv()

print(f"{'Praw:':<20} {praw.__version__ :>10}")
print(f"{'OpenAI:':<20} {openai.version.VERSION :>10}")


Praw:                     7.7.0
OpenAI:                  0.27.4


See README.md
 - objective is to use OpenAI for named entity extraction to extract all the songs form [this reddit thread](https://www.reddit.com/r/AskReddit/comments/12viv4v/what_is_the_prettiest_song_you_ever_heard_in_your/) and make Spotify playlist
 - use Reddit PRAW API to download all the comments (get [Reddit API key](https://www.reddit.com/prefs/apps))
 - use OpenAI API with a prompt like, extract all the songs from this text to CSV get ([OpenAI API key](https://platform.openai.com/account/api-keys))
 - use Spotify API to make a playlist (get [Spotify API key](https://developer.spotify.com/documentation/web-api/tutorials/getting-started))
 - works, needed a lot of scrubbing, but about 1 day of work, wouldn't have been possible to do a 700-song playlist manually without a team of Mechanical Turks or something
 - If I wanted to go nuts, would process comments individually, save a file for each comment's extracted songs, would make it easier to track down what OpenAI gets wrong, have a resumable, retryable, repeatable process and 
 - Spotify playist is [here](https://open.spotify.com/playlist/08YFkbtTV6GBfNtjJ4PHDu?si=f4761d983ac84091) 
 
 needs a .env file per dot-env-template
 

In [2]:
# tokenizer to get accurate token count
import tiktoken

# To get the tokeniser corresponding to a specific model in the OpenAI API:
enc = tiktoken.encoding_for_model("gpt-3.5-turbo-0301")
assert enc.decode(enc.encode("hello world")) == "hello world"

def count_tokens(s):
    return len(enc.encode(s))

count_tokens('four score and 7 years go our forefathers brought forth')

13

In [35]:
# a thread 
submission = "12viv4v"

# minimum karma to process a reply 
minkarma = 2

# an output file to accumulate all the responses
savefile = 'bronze.txt'

prompt_prefix="""You will act as a research assistant finding all the artists and track titles mentioned in a series of messages about music, and returning them in a CSV format.
Define a post delimited below by ===
===
post_id: "abcdefg"
post_score: "6996"
I love Yesterday by the Beatles. Also Hotel California from The Eagles. And Bruce Springsteen's Born To Run!
===

Define a CSV format delimited below by ---
---
"post_id","post_score","artist","track"
"abcdefg","6996","The Beatles","Yesterday"
"abcdefg","6996","The Eagles","Hotel California"
"abcdefg","6996","Bruce Springsteen","Born To Run"
---

You will extract all artists and tracks from each post below delimited by ~~~ .
You will return a list of records containing the artist and track extracted from the input, and the post_id and post_score of the post the artist and track is mentioned in.
You will return the records in a CSV format.
The header row should contain `"post_id","post_score","artist","track"`. 
The input is:
"""

# an output file to accumulate all the responses
savefile = 'bronze.txt'

# to speed things we'll cumulate posts til we get to nposts posts or maxchars total chars, whichever comes first
max_post_size=300  # redditor needs to put any songs in 1st couple hundred chars
maxtokens = 1024   # max tokens to send to get_response (with room for prefix and a response)
# maxchars = 6000  # max tokens (words/fragments) is 4096 but I think stuffing the prompt maybe reduces quality?
nposts = 1000 # max posts to combine into a chunk



In [3]:
csv_validate_re = re.compile(r'''
    \s*                # Any whitespace.
    (                  # Start capturing here.
      [^,"']+?         # Either a series of non-comma non-quote characters.
      |                # OR
      "(?:             # A double-quote followed by a string of characters...
          [^"\\]|\\.   # That are either non-quotes or escaped...
       )*              # ...repeated any number of times.
      "                # Followed by a closing double-quote.
      |                # OR
      '(?:[^'\\]|\\.)*'# Same as above, for single quotes.
    )                  # Done capturing.
    \s*                # Allow arbitrary space before the comma.
    (?:,|$)            # Followed by a comma or the end of a string.
    ''', re.VERBOSE)


## Get all comments from a reddit posting

In [4]:
def getPraw():
    return praw.Reddit(user_agent="prettiest_song/0.001", 
                       client_id=os.getenv('CLIENT_ID'), 
                       client_secret=os.getenv('CLIENT_SECRET'))


def getAll(r, submissionId, verbose=True):
    submission = r.submission(submissionId)
    submission.comments.replace_more(limit=None)
    commentsList=submission.comments.list()
    return commentsList


In [5]:
# print(datetime.now())
# r = getPraw()
# res = getAll(r, submission)
# print(datetime.now())

# print("retrieved ", len(res), 'comments')


In [6]:
# # we have a list of comment objects
# # filter comments with at least some karma
# res3 = [r for r in res if r.score >= minkarma]
# print('filtered to ', len(res3), 'comments')
# res3[0].body, res3[0].score


In [159]:
# save so we can reload it later without downloading

# with open('reddit_full.pkl', 'wb') as f:
#     pickle.dump(res3, f)
    
with open('reddit_full.pkl', 'rb') as f:
    res3 = pickle.load(f)


## Extract artists and song titles using OpenAI

In [160]:
# check lengths of posts
shorties = []
big_ones = []
for i in range(len(res3)):
    if len(res3[i].body) <3:
        print (i, res3[i].body)
        shorties.append(i)
    if len(res3[i].body) > 4096:
        print(i, len(res3[i].body))
        big_ones.append(i)
        

423 4162
812 26
8405 4399
11597 Up
17225 5225
21450 W
21621 -🤓
21626 W
21977 :)
22240 t
23405 <3
23481 ✨️
24440 4543
24443 Ye


In [161]:
# avg length
sum([len(r.body) for r in res3]) / len(res3)

78.88987480247964

In [162]:
print (res3[big_ones[0]].body[:500])

Saturn by Sleeping at Last:
https://www.youtube.com/watch?v=dzNvk80XY9s

The version they did with Tim Fain is even more beautiful: 
https://www.youtube.com/watch?v=0nRpeAiur9Q

I'm not good at choosing one thing from a list of favorites as the best, so I've got about 30+ answers that are really a 30+ -way tie, and the one that I would consider as "prettiest" at any given moment is heavily influenced by my current mood. So, it could be any one of these from my "Heart Wrenchingly Beautiful" playl


In [7]:
openai.api_key = os.getenv('OPENAI_API_KEY')


def get_response(messages, prompt_prefix="", verbose=False):

    prompt = prompt_prefix
    
    if type(messages) == list:
        for msg in messages:
            prompt += f"""
~~~
{msg}
~~~
"""
    else:
        prompt += messages
        
    if verbose:
        print(prompt)
        
    # retry loop, have received untrapped 502 error
    RETRIES = 3
    success = False    
    for i in range(RETRIES):
        try:
            response = openai.ChatCompletion.create(
                model='gpt-3.5-turbo-0301',
                messages=[{"role":"user", 
                           "content": prompt}],
                temperature=0,
            )
            # no exception thrown
            success=True
            break   
        except Exception as error:
            print("An exception occurred:", error)
            print("Retrying chunk...")
            time.sleep(5)
            continue  # try again
    if success:
        # check response payload for any error message?
        response_msg = response['choices'][0]['message']
        if len(response_msg['content'])==0:
            print("there was a problem, content is empty, full payload follows:")
            print(response)
        if verbose:
            print(response_msg)
        return response_msg['content']
    else:
        return None



In [168]:
# for each comment object we will extract the body 
# then submit as part of a prompt to chatgpt

nposts = 1000
slist = res3.copy()
total_posts = len(slist)
print("processing %d posts" % total_posts)

outdir = 'out'
logdir = 'logs'
# make sure out and logs are empty
for f in glob.glob('%s/*' % outdir):
    os.remove(f)
for f in glob.glob('%s/*' % logdir):
    os.remove(f)
count = 0
c = 0

while(slist):  # still comments to process
    print(datetime.now(), c)
    tokens_to_date = count_tokens(prompt_prefix1)
    reply_ids = []
    messages = []
    for _ in range(nposts):  # add up to 100 posts to the prompt
        if slist:
            # make sure no single post > max_post_size, truncate in place as nec 
            slist[0].body = slist[0].body[:max_post_size]
            if tokens_to_date + count_tokens(slist[0].body) < maxtokens:
            # total post content < maxchars
            # if chars_to_date + len(slist[0].body) < maxchars:
                reply = slist.pop(0)
                reply_ids.append(reply.id)
                body = reply.body
                
                messages.append(f"""
post_id: "{reply.id}"
post_score: "{reply.score}"
{body}
"""
                )
                tokens_to_date += count_tokens(messages[-1])
                # chars_to_date += len(messages[-1])
                c += 1
            
    response = get_response(messages, prompt_prefix1, verbose=False)
    if response is None:   # FAIL - retries exhausted
        print('Bailing to next chunk')
        continue

    # do basic validation and cleanup
    # should check first line is valid header and doesn't reverse columns
    csv_valid, csv_err = [], []
    for line in response.split("\n"):
        try:
            csv_values = csv_validate_re.findall(line)
            if len(csv_values) == 4:
                csv_valid.append(line)
            else:
                csv_err.append(line)
        except:
            csv_err.append(line)
    csv_output = "\n".join(csv_valid)
        
    with open("%s/%04d.csv" % (outdir, count), 'w') as outfile:
        outfile.write(csv_output)
    
    if csv_err:
        with open("%s/%04d.err" % (outdir, count), 'w') as outfile:
            outfile.write("\n".join(csv_err))
        
    with open("%s/%04d.log" % (logdir, count), 'w') as logfile:
        logfile.write(str(reply_ids))
        logfile.write('\n\n===== raw prompt =====\n\n')        
        logfile.write("\n=====\n".join(messages))
        logfile.write('\n\n===== raw response =====\n\n')
        logfile.write(response)
        logfile.write('\n\n===== failed validation =====\n\n')
        logfile.write("\n".join(csv_err))
 
    count += 1
#     print(c)
    outcount = total_posts-len(slist)
    print(outcount, end=' ')
    
    
print()
print(datetime.now())



2023-05-18 18:54:25.074793
processing 24681 posts
You will act as a research assistant finding all the artists and track titles mentioned in a series of messages about music, and returning them in a CSV format.
Define a post delimited below by ===
===
post_id: "abcdefg"
post_score: "6996"
I love Yesterday by the Beatles. Also Hotel California from The Eagles. And Bruce Springsteen's Born To Run!
===

Define a CSV format delimited below by ---
---
"post_id","post_score","artist","track"
"abcdefg","6996","The Beatles","Yesterday"
"abcdefg","6996","The Eagles","Hotel California"
"abcdefg","6996","Bruce Springsteen","Born To Run"
---

You will extract all artists and tracks from each post below delimited by ~~~ .
You will return a list of records containing the artist and track extracted from the input, and the post_id and post_score of the post the artist and track is mentioned in.
You will return the records in a CSV format.
The header row should contain `"post_id","post_score","artist",

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhc2dyv\",\"6996\",\"Erik Satie\",\"Gymnop\u00e9dies\"\n\"jhc674k\",\"2824\",\"Lord Huron\",\"The night we met\"\n\"jhc7jrt\",\"3383\",\"Don McLean\",\"Vincent (Starry, Starry Night)\"\n\"jhc7zwm\",\"1997\",\"Fleet Foxes\",\"White Winter Hymnal\"\n\"jhcbhk9\",\"3592\",\"Neil Young\",\"Harvest Moon\"\n\"jhc6oud\",\"6144\",\"Simon & Garfunkel\",\"Scarborough Fair\"\n\"jhc9rnl\",\"2212\",\"John Denver\",\"Annie\u2019s Song\"\n\"jhcia7f\",\"2006\",\"Etta James\",\"At last\"\n\"jhc7aon\",\"1175\",\"Samuel Barber\",\"Adagio for Strings\"\n\"jhc5xa6\",\"2076\",\"Joni Mitchell\",\"Both Sides Now\"\n\"jhbm8ne\",\"2461\",\"Sigur Ros\",\"Hoppipolla\"\n\"jhcaeo5\",\"2467\",\"Jim Croce\",\"Time in a Bottle\"\n\"jhc1e1u\",\"3675\",\"The Beatles\",\"In my Life\"\n\"jhcoug4\",\"953\",\"Death Cab for Cutie\",\"I Will Follow You Into the Dark\"\n\"jhbktrn\",\"13941\",\"Claude Debussy\",\"Claire de Lune\"\n\"jhbmw6x\",\"2584\",\"Righteous

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhc0cfu\",\"313\",\"Iron & Wine\",\"\"\n\"jhc0tgd\",\"181\",\"The Beatles\",\"Yesterday\"\n\"jhc0tgd\",\"181\",\"The Beatles\",\"The Long and Winding Road\"\n\"jhc0tgd\",\"181\",\"The Beatles\",\"Golden Slumbers\"\n\"jhbmc53\",\"205\",\"Led Zeppelin\",\"The Rain Song\"\n\"jhbyt7t\",\"70\",\"Enya\",\"Boadicea\"\n\"jhbu0h1\",\"125\",\"Johann Sebastian Bach\",\"Jesu, Joy of Man\u2019s Desiring\"\n\"jhbpoxs\",\"174\",\"Yaz\",\"Only You\"\n\"jhc8h1z\",\"300\",\"The Verve\",\"Bittersweet Symphony\"\n\"jhc8ga6\",\"98\",\"The Cure\",\"Plainsong\"\n\"jhbwxij\",\"190\",\"Tori Amos\",\"Little earthquakes\"\n\"jhbwxij\",\"190\",\"Sinead O\u2019Connor\",\"Troy\"\n\"jhbwxij\",\"190\",\"Pearl Jam\",\"Black\"\n\"jhbwxij\",\"190\",\"Fleetwood Mac\",\"Silver springs\"\n\"jhc9x14\",\"63\",\"The Mamas and the Papas\",\"Dream a little dream\"\n\"jhckgdh\",\"64\",\"Gordon Lightfoot\",\"The Wreck of the Edmund Fitzgerald\"\n\"jhc8ndd\",\"33\"

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhc9zmh\",\"18\",\"Ella Fitzgerald and Louis Armstrong\",\"Summertime\"\n\"jhcgzw1\",\"17\",\"Orbital\",\"Halcyon On and On\"\n\"jhcnm5d\",\"17\",\"Smashing Pumpkins\",\"Mellon Collie and the Infinite Sadness\"\n\"jhcnn05\",\"17\",\"Imogen Heap\",\"Hide and Seek\"\n\"jhct2q3\",\"17\",\"A-Ha\",\"Take on Me (MTV unplugged version)\"\n\"jhct2q3\",\"17\",\"Boz Scaggs\",\"Harbor Lights\"\n\"jhct2q3\",\"17\",\"Fleetwood Mac\",\"Songbird\"\n\"jhct2q3\",\"17\",\"Sixpence None the Richer\",\"Kiss Me\"\n\"jhct2q3\",\"17\",\"Van Morrison\",\"Into the Mystic\"\n\"jhct2q3\",\"17\",\"Adele\",\"Someone Like You\"\n\"jhbn9hi\",\"36\",\"Sigur Ros\",\"Hoppipolla\"\n\"jhc6crs\",\"31\",\"Antonio Vivaldi\",\"Four Seasons\"\n\"jhceijc\",\"35\",\"Prince\",\"Purple Rain\"\n\"jhciah9\",\"35\",\"Van Morrison\",\"Into the Mystic\"\n\"jhc0wti\",\"70\",\"K. D. Lang\",\"Hallelujah\"\n\"jhc9bga\",\"61\",\"The Flaming Lips\",\"Do you realize\"\n\"jhc5

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhcag18\",\"12\",\"Ray Charles\",\"Georgia\"\n\"jhcag18\",\"12\",\"Beethoven\",\"Fur Elise\"\n\"jhcc86f\",\"9\",\"Gustav Holst\",\"Jupiter\"\n\"jhcey9s\",\"11\",\"Blue Rodeo\",\"Try\"\n\"jhcgf2f\",\"10\",\"Bowser\",\"Peaches\"\n\"jhcgwdd\",\"11\",\"Explosions in the Sky\",\"Your Hand in Mine\"\n\"jhcgwdd\",\"11\",\"The Appleseed Cast\",\"A Dream for Us\"\n\"jhckp9m\",\"9\",\"Barry DeVorzon and Perry Botkin Jr.\",\"Nadia's Theme\"\n\"jhcm0at\",\"10\",\"Silversun Pickups\",\"Lazy Eye\"\n\"jhco4h2\",\"10\",\"Bonnie Raitt\",\"I can\u2019t make you love me\"\n\"jhco4h2\",\"10\",\"Ron Sexsmith\",\"Gold in them hills\"\n\"jhco4h2\",\"10\",\"Peter Gabriel\",\"Book of love\"\n\"jhcoau2\",\"11\",\"ABBA\",\"Fernando\"\n\"jhcqyyn\",\"10\",\"Brandi Carlisle\",\"The Story\"\n\"jhcr6xc\",\"10\",\"Beach House\",\"PPP\"\n\"jhd8ati\",\"9\",\"Taylor Swift\",\"All Too Well\"\n\"jhd8ati\",\"9\",\"Taylor Swift\",\"Delicate\"\n\"jhd8ati\",\"9

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhcn691\",\"7\",\"Red Hot Chili Peppers\",\"Wet Sand\"\n\"jhcnos2\",\"8\",\"Minnie Riperton\",\"Les Fleur\"\n\"jhco2lb\",\"7\",\"Ray Peterson\",\"Tell Laura I Love Her\"\n\"jhco2lb\",\"7\",\"John Lennon\",\"Beautiful Boy\"\n\"jhco2lb\",\"7\",\"REM\",\"Everybody Hurts\"\n\"jhctq43\",\"8\",\"Sam Brown\",\"Stop\"\n\"jhctq43\",\"8\",\"Jackie DeShannon\",\"What the world needs now is love\"\n\"jhctq43\",\"8\",\"Ennio Morricone\",\"Un Amico\"\n\"jhctq43\",\"8\",\"ABBA\",\"One of us\"\n\"jhctq43\",\"8\",\"ABBA\",\"The winner takes it all\"\n\"jhctq43\",\"8\",\"Terrence Jay\",\"One blood\"\n\"jhctq43\",\"8\",\"Gary Jules\",\"Mad world\"\n\"jhctq43\",\"8\",\"John Legend\",\"All of me\"\n\"jhctq43\",\"8\",\"Mayer Haw\",\"All better\"\n\"jhc7zvj\",\"43\",\"Aerosmith\",\"Dream On\"\n\"jhfdhn\",\"24\",\"Taylor Swift\",\"Labyrinth\"\n\"jhfdhn\",\"24\",\"Taylor Swift\",\"my tears ricochet\"\n\"jhcb23b\",\"13\",\"Band of Horses\",\"The

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhcky69\",\"4\",\"Astrud Gilberto\",\"Photograph\"\n\"jhcky69\",\"4\",\"Massive Attack\",\"Exchange\"\n\"jhcky69\",\"4\",\"Dido\",\"It Comes and It Goes\"\n\"jhcky69\",\"4\",\"Henri Mancini\",\"Moon River\"\n\"jhcl00i\",\"5\",\"Queen\",\"You Take My Breath Away\"\n\"jhcl0vf\",\"4\",\"Queen\",\"The show must go on\"\n\"jhcl31r\",\"6\",\"Bill Withers\",\"Lovely Day\"\n\"jhcnjr7\",\"6\",\"Billie Joel\",\"She's Got a Way\"\n\"jhcnjr7\",\"6\",\"JVKE\",\"Golden Hour\"\n\"jhcnn5o\",\"5\",\"The Flaming Lips\",\"Do You Realize\"\n\"jhcnprm\",\"4\",\"Elliott Smith\",\"Between The Bars\"\n\"jhcnq00\",\"5\",\"Eagles\",\"New Kid in Town\"\n\"jhcns7l\",\"5\",\"Porcupine Tree\",\"Lazarus\"\n\"jhcnwnt\",\"5\",\"Final Fantasy X\",\"Road to Zanarkand\"\n\"jhcnzdy\",\"6\",\"Unknown\",\"Anything my boyfriend sings\"\n\"jhco5ox\",\"4\",\"Billie Eilish\",\"Ocean Eyes\"\n\"jhcodbb\",\"5\",\"Sufjan Stevens\",\"\"\n\"jhcp0uk\",\"6\",\"Frente\",

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhcnuwh\",\"5\",\"Cocteau Twins\",\"Heaven or alas Vegas\"\n\"jhcnzem\",\"3\",\"NieR Automata Original Soundtrack\",\"Peaceful Sleep\"\n\"jhco51w\",\"2\",\"\",\"Can\u2019t help falling in love\"\n\"jhcog67\",\"4\",\"Gregorio Allegri\",\"Miserere Mei\"\n\"jhcovtp\",\"4\",\"Tchaikovsky\",\"Hymn of the Cherubim\"\n\"jhcpao5\",\"4\",\"Hand Zimmer\",\"Oogway Ascends Kung Fu Panda soundtrack\"\n\"jhcpevu\",\"4\",\"\",\"Married Life\"\n\"jhcpnkv\",\"4\",\"Chopin\",\"Nocturne Op 9 No 2\"\n\"jhcps2k\",\"4\",\"Austrud Gilberto\",\"The Girl From Ipanema\"\n\"jhcpt5q\",\"5\",\"\",\"For River\"\n\"jhcpt5q\",\"5\",\"\",\"To Zanarkand\"\n\"jhcpyns\",\"3\",\"Weyes Blood\",\"Do You Need My Love\"\n\"jhcr23l\",\"5\",\"Kai\",\"Vanilla\"\n\"jhcr23l\",\"5\",\"Alt-J\",\"Bloodflood part ii\"\n\"jhcr23l\",\"5\",\"The cranberries\",\"Linger\"\n\"jhcr23l\",\"5\",\"Palace\",\"Give me the rain\"\n\"jhcr2c1\",\"3\",\"Led Zeppelin\",\"All My Love\"\

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhcc5c8\",\"3\",\"Nick Drake\",\"Northern Sky\"\n\"jhcc5c8\",\"3\",\"The Tremeloes\",\"Silence is Golden\"\n\"jhcc5c8\",\"3\",\"The Left Banke\",\"Walk Away Renee\"\n\"jhcc5c8\",\"3\",\"The Beach Boys\",\"Wouldn\u2019t It Be Nice\"\n\"jhcce6r\",\"3\",\"Josh Ritter\",\"The Curse\"\n\"jhcceah\",\"3\",\"Johann Pachelbel\",\"Canon in D\"\n\"jhccj17\",\"3\",\"Joni Mitchell\",\"A Case of You\"\n\"jhcda4x\",\"3\",\"Eric Whitacre\",\"Sleep\"\n\"jhcda4x\",\"3\",\"VOCES8\",\"Sleep\"\n\"jhcdg2x\",\"3\",\"The Rolling Stones\",\"She\u2019s a rainbow\"\n\"jhcdrnw\",\"3\",\"Philip Glass and the Carducci String Quartet\",\"String Quartet No.3, \u201cMishima\u201d: VI. Mishima/Closing\"\n\"jhce8yx\",\"3\",\"Ivoriancell\",\"Sleep Deprived Machine\"\n\"jhce8yx\",\"3\",\"Ivoriancell\",\"Echo from an Empty Void\"\n\"jhcevp7\",\"3\",\"Queen\",\"Love of my life\"\n\"jhcf7fk\",\"3\",\"The Weeknd\",\"Professional\"\n\"jhcfvpi\",\"3\",\"The Ever

{
  "content": "\"post_id\",\"post_score\",\"artist\",\"track\"\n\"jhcsrql\",\"3\",\"Yann Tiersen\",\"Summer 78\"\n\"jhcsrql\",\"3\",\"Hans Zimmer\",\"\"\n\"jhcsrxb\",\"3\",\"Rolling Stones\",\"She\u2019s A Rainbow\"\n\"jhctc1e\",\"3\",\"Ludovico Einauldi\",\"Experience\"\n\"jhctex8\",\"3\",\"Pentatonix\",\"Imagine\"\n\"jhctex8\",\"3\",\"Cody Fry\",\"I Hear a Symphony\"\n\"jhctjkt\",\"3\",\"Lana Del Rey\",\"Chemtrails over the Country Club\"\n\"jhctjkt\",\"3\",\"Fleetwood Mac\",\"Silver Springs\"\n\"jhctjkt\",\"3\",\"Hozier\",\"Like Real People Do\"\n\"jhctls5\",\"3\",\"Iron and Wine\",\"The Trapeze Swinger\"\n\"jhctzbd\",\"3\",\"Jeff Buckley\",\"Lover, You Should\u2019ve Come Over\"\n\"jhctzq4\",\"3\",\"Ween\",\"The argus\"\n\"jhcumbc\",\"3\",\"Red Hot Chili Peppers\",\"Midnight\"\n\"jhcumbc\",\"3\",\"John Frusciante\",\"Song to Sing When I\u2019m Lonely\"\n\"jhcuxy6\",\"3\",\"Christian hymn\",\"Just as I Am\"\n\"jhcvl3g\",\"3\",\"Childish Gambino\",\"The Night Me and Your Mama Met\"\

KeyboardInterrupt: 

In [None]:
# may still have to tweak the files to get them to load
# should inspect .err files and clean up if possible

# filelist = glob.glob('%s/*.csv' % outdir)

# output_df = None
# count = 0
# for f in sorted(filelist):
#     print(f)
#     try:
#         tempdf = pd.read_csv("%s" % (f), header=None)
#     except Exception as exc:
#         print(str(exc))
#         continue
#     colcount = len(tempdf.columns)
#     if len(tempdf.columns) != 4:
#         print('%s has %d columns, skipped' % (f, colcount))
#         continue
        
#     # ok
#     # truncate header row if it looks like a header
#     if tempdf.iloc[0][0]=='post_id':
#         tempdf = tempdf[1:]
#     # set the header explicitly
#     tempdf.columns=["post_id","post_score","artist","track"]

#     if output_df is not None:        
#         output_df = pd.concat([output_df, tempdf], axis=0)
#     else:
#         output_df = tempdf
#     count += 1
#     if count % 10 == 0:
#         print(count, end=' ')

        
        
        

In [13]:
filelist = glob.glob('%s/*.csv' % outdir)

with open(savefile, 'w') as outfile:
    for f in tqdm(filelist, desc = 'File concat'):
        with open(f, 'r') as infile:
            data = infile.read().strip()
            outfile.write(data)
            outfile.write("\n")
    
    

File concat: 100%|██████████| 448/448 [00:00<00:00, 1426.66it/s]


In [169]:
tempdf = pd.read_csv(savefile, header=None)
tempdf.columns=['post_id','post_score','artist','track']
tempdf

Unnamed: 0,post_id,post_score,artist,track
0,post_id,post_score,artist,track
1,jhc5dm1,99,,Ventura Highway
2,jhc8ga6,98,The Cure,Plainsong
3,jhd1s5a,97,Erik Satie,Gnossienne No. 1
4,jhbq7oi,96,Savage Garden,"Truly, madly, deeply"
...,...,...,...,...
22915,jhd15nl,0,Brandi Carlisle,The Joke
22916,jhd15nl,0,Kelly Clarkson,Run Run Run
22917,jhegm4p,0,,Murmaider
22918,jhd15nl,0,Foo Fighters,Everlong


In [170]:
tempdf = tempdf.drop_duplicates() \
    .sort_values("post_score", ascending=False)
# drop header row
tempdf = tempdf.loc[~(tempdf['post_id'].str.strip()=='post_id')]
# clean up post_score to valid int
tempdf['post_score'] = tempdf['post_score'].apply(lambda s: "".join([c for c in s if c.isdigit()]))
tempdf['post_score'] = tempdf['post_score'].apply(lambda x: x[-5:])
tempdf['post_score'] = tempdf['post_score'].apply(lambda s: int(s) if s else 1)
# drop missing tracks, cleanup track
tempdf = tempdf.drop(tempdf.loc[tempdf['track'].isna()].index)
tempdf['track'] = tempdf['track'].str.strip()
tempdf = tempdf.drop(tempdf.loc[tempdf['track'].str.lower()=='unknown'].index)
tempdf = tempdf.drop(tempdf.loc[tempdf['track'].str.lower()=='track'].index)
# cleanup artist
tempdf['artist'] = tempdf['artist'].str.strip()
tempdf.loc[tempdf['artist'].isna(), 'artist'] = ''
tempdf

Unnamed: 0,post_id,post_score,artist,track
1,jhc5dm1,99,,Ventura Highway
2,jhc8ga6,98,The Cure,Plainsong
3,jhd1s5a,97,Erik Satie,Gnossienne No. 1
4,jhbq7oi,96,Savage Garden,"Truly, madly, deeply"
5,jhcoug4,953,Death Cab for Cutie,I Will Follow You Into the Dark
...,...,...,...,...
22885,jhdm514,0,,Snow on the beach
22886,jhdwbl5,0,,Every time I encounter the prettiest song
22887,jhck57a,0,Jethro Tull,Life’s A Long Song
22888,jhdf3sj,0,The Rolling Stones,Beast of Burden


In [171]:
# save bronze
tempdf.to_csv(savefile, index=False)
len(tempdf)

22918

In [172]:
df = pd.read_csv(savefile) \
    .sort_values(["artist", "track"]) \
    .reset_index(drop=True)

df.to_csv('silver.csv', index=False)

print(len(df))

df


22918


Unnamed: 0,post_id,post_score,artist,track
0,jhfmr74,1,$uicideboy$,My Flaws Burn Through My Skin Like Demonic Fla...
1,jhe9i7a,1,*nsync,selfish
2,jhcvus1,1,-,-
3,jhcp5xc,6,.Hack//SIGN,Key of the Twilight
4,jhcgr19,1,070 Shake,Under the Moon
...,...,...,...,...
22913,jhedblu,1,,xue hua piao piao
22914,jherzqu,1,,yebbas heartbreak
22915,jher576,1,,your mother
22916,jhggg7t,1,,zelda's lullaby


In [33]:
df.loc[df['artist'].isna(), 'artist'] = ''
df.loc[df['track'].isna(), 'track'] = ''

artist_df = df[['artist','track']].groupby('artist') \
    .count() \
    .sort_values('track', ascending=False) \
    .reset_index()
artist_df = artist_df.drop(artist_df.loc[artist_df['artist'].str.strip().str.lower().str.startswith('unknown')].index)
artist_df = artist_df.drop(artist_df.loc[artist_df['artist']==''].index)

artist_df.head(20)

Unnamed: 0,artist,track
0,beatles,425
1,radiohead,248
2,simon garfunkel,181
3,fleetwood mac,176
4,pink floyd,165
5,jeff buckley,156
6,led zeppelin,151
7,bon iver,138
8,enya,134
9,sigur ros,128


In [34]:
# proofread / dedupe artists
# may want to run this whole sequence a couple of times and update df, silver.csv

artist_map={}
nposts = 1000
prompt_prefix2 = """I want you act as a proofreader. I will provide you a list of recording artists or composers.
I would like you to review each input artist for any spelling errors or abbreviations and provide the corrected full artist without abbreviation. 
You will provide them in CSV format, one record per line in the following order: input_artist, corrected_artist. Enclose each field in double-quotes.
The input is:

"""
slist = sorted(artist_df['artist'].tolist())
c=0

while(slist):  # still artists to process
    print(datetime.now(), c)

    prompt = ""
    tokens_to_date = count_tokens(prompt_prefix2)
    for _ in range(nposts):  # add up to 100 posts to the prompt
        if slist:
            if tokens_to_date + count_tokens(slist[0]) < maxtokens:
                artist = f'{slist.pop(0)}\n'
                prompt += artist
                tokens_to_date += count_tokens(artist)
                c+=1
        else:
            break
            
    response = get_response(prompt, prompt_prefix2, verbose=False)
    if response is None:   # FAIL - retries exhausted
        print('Bailing to next chunk')
        continue
    
    if not response:
        print("there was a problem, check the payload")
        
    for line in response.split("\n"):
        try:
            csv_values = csv_validate_re.findall(line)
            if len(csv_values) == 2:
                artist_input, artist_correct = csv_values[0], csv_values[1]
                while not artist_input[0].isalnum():
                    artist_input=artist_input[1:]
                while not artist_input[-1].isalnum():
                    artist_input=artist_input[:-1]
                while not artist_correct[0].isalnum():
                    artist_correct=artist_correct[1:]
                while not artist_correct[-1].isalnum():
                    artist_correct=artist_correct[:-1]
                if artist_input.lower() != artist_correct.lower():
                    artist_map[artist_input]=artist_correct
                    print(f'"{artist_input}", "{artist_correct}"')
            else:
                print('%d values found' % len(csv_values), line)
        except Exception as error:
            print('error', line)
            print(error)
            continue
            
        
print(datetime.now())


2023-05-19 08:16:03.212805
"1975", "The 1975"
"5th Dimension", "The 5th Dimension"
"Above Beyond", "Above & Beyond"
"Airborne Toxic Event", "The Airborne Toxic Event"
"Alan Parsons Project", "The Alan Parsons Project"
"Album Leaf", "The Album Leaf"
"Alina Baraz Galimatias", "Alina Baraz & Galimatias"
"Aline Baraz", "Alina Baraz"
"Alison Krauss Union Station", "Alison Krauss and Union Station"
"Allman Brothers", "The Allman Brothers Band"
2023-05-19 08:19:25.302378
"andy grammar", "Andy Grammer"
"aoife odonovan", "Aoife O'Donovan"
"assassins creed black flag", "Assassin's Creed Black Flag"
2023-05-19 08:22:40.989873
"audra mae the forest rangers", "Audra Mae & The Forest Rangers"
"aunties harp", "Auntie's Harp"
"avalanches", "The Avalanches"
"avett brothers", "The Avett Brothers"
"aviators", "The Aviators"
"b-52s", "The B-52's"
"backseat lovers", "The Backseat Lovers"
"bad bunny and bomba estereo", "Bad Bunny & Bomba Estéreo"
"band perry", "The Band Perry"
"banes world", "Bane's World"


"ludoviczo", "Ludovico"
"ludwig goransson", "Ludwig Göransson"
"ludwig goransson, nirvana, the moody blues", "Ludwig Göransson, Nirvana, The Moody Blues"
"luke sital-singh", "Lukas Sital-Singh"
"lumineers", "The Lumineers"
"lz", "Led Zeppelin"
"magnetic fields", "The Magnetic Fields"
"mago de oz", "Mägo de Oz"
"mairead ni mhaonaigh", "Máiread Ní Mhaonaigh"
"mamas and the papas", "The Mamas & The Papas"
"mamas the papas", "The Mamas & The Papas"
"mamma mia soundtrack", "Mamma Mia! Soundtrack"
"mandalorian", "The Mandalorian"
"maneskin", "Måneskin"
"marcin przybylowicz", "Marcin Przybyłowicz"
"marcos fernandez", "Marcos Fernández"
"margot and the nuclear so and sos", "Margot and the Nuclear So and So's"
"maria gadu", "Maria Gadú"
"mark oconnor and yo-yo ma", "Mark O'Connor and Yo-Yo Ma"
"marmalade", "The Marmalade"
"mars volta", "The Mars Volta"
"marshall tucker band", "The Marshall Tucker Band"
"martin odonnell and michael salvatori", "Martin O'Donnell and Michael Salvatori"
"marty odon

"straylove", "Stray Love"
"strfkr", "Starfucker"
"strokes", "The Strokes"
"stupendium", "The Stupendium"
"style council", "The Style Council"
"stylistics", "The Stylistics"
"submarines", "The Submarines"
"sugarcubes", "The Sugarcubes"
"sundays", "The Sundays"
"suo gan", "Suo Gân"
"super8 tab", "Super8 & Tab"
"supremes", "The Supremes"
"susanne sundfor", "Susanne Sundfør"
"swell season", "The Swell Season"
"tallest man on earth", "The Tallest Man on Earth"
"taxpayers", "The Taxpayers"
"taylor swift the civil wars", "Taylor Swift, The Civil Wars"
"tea party", "The Tea Party"
"telephone", "Téléphone"
"temper trap", "The Temper Trap"
"temptations", "The Temptations"
"teskey brothers", "The Teskey Brothers"
"thedoo", "TheDooo"
"thin lizzie", "Thin Lizzy"
"this moral coil", "This Mortal Coil"
"thrillseekers", "The Thrillseekers"
"thuy", "Thúy"
"tiesto", "Tiësto"
"tiesto and andain", "Tiësto and Andain"
"tito tarantula", "Tito & Tarantula"
2023-05-19 09:59:14.099268
An exception occurred: HTT

In [36]:
artist_map

{'1975': 'The 1975',
 '5th Dimension': 'The 5th Dimension',
 'Above Beyond': 'Above & Beyond',
 'Airborne Toxic Event': 'The Airborne Toxic Event',
 'Alan Parsons Project': 'The Alan Parsons Project',
 'Album Leaf': 'The Album Leaf',
 'Alina Baraz Galimatias': 'Alina Baraz & Galimatias',
 'Aline Baraz': 'Alina Baraz',
 'Alison Krauss Union Station': 'Alison Krauss and Union Station',
 'Allman Brothers': 'The Allman Brothers Band',
 'andy grammar': 'Andy Grammer',
 'aoife odonovan': "Aoife O'Donovan",
 'assassins creed black flag': "Assassin's Creed Black Flag",
 'audra mae the forest rangers': 'Audra Mae & The Forest Rangers',
 'aunties harp': "Auntie's Harp",
 'avalanches': 'The Avalanches',
 'avett brothers': 'The Avett Brothers',
 'aviators': 'The Aviators',
 'b-52s': "The B-52's",
 'backseat lovers': 'The Backseat Lovers',
 'bad bunny and bomba estereo': 'Bad Bunny & Bomba Estéreo',
 'band perry': 'The Band Perry',
 'banes world': "Bane's World",
 'bangles': 'The Bangles',
 'barr b

In [37]:
# check the map for reasonableness
# it does pretty smart stuff like map nin to Nine Inch Nails 
# but if it screws up that artist probably won't show up in spotify
df['artist2'] = df['artist'].apply(lambda s: artist_map[s] if s in artist_map else s)

In [38]:
df.loc[df['artist'] != df['artist2']]

Unnamed: 0,post_id,post_score,artist,track,artist2,artist_dedupe,artist_index
12,jhddxdh,1,1975,Oh Caroline,The 1975,1975,712
366,jhetwhh,1,isaac albeniz-andres segovia,Leyenda,Isaac Albéniz and Andrés Segovia,isaac albeniz-andres segovia,5271
730,jhg9hb1,1,andy grammar,Fresh eyes,Andy Grammer,andy grammar,4082
848,jhe7hbu,1,aoife odonovan,Stanley Park,Aoife O'Donovan,aoife odonovan,4031
1040,jhcpxg4,1,assassins creed black flag,End of the earth,Assassin's Creed Black Flag,assassins creed black flag,4121
...,...,...,...,...,...,...,...
22839,jhftp1n,4,beatles,in my life,The Beatles,beatles,0
22844,jheyb0b,1,edith piaf,la vie en rose,Édith Piaf,edith piaf,74
22859,jhejsy1,2,shins,nobel prize,The Shins,shins,733
22893,jhd43ki,151,simon garfunkel,the 59th street bridge song,Simon & Garfunkel,simon garfunkel,615


In [39]:
# apply the map
df['artist'] = df['artist'].apply(lambda s: artist_map[s] if s in artist_map else s)

In [41]:
# for missing artists, try to impute the artist based on the track
missing_artist_df = df.loc[df['artist'].isna()]
missing_artist_df


Unnamed: 0,post_id,post_score,artist,track,artist2,artist_dedupe,artist_index


In [14]:
missing_track_map={}

prompt_prefix3 = """I will provide you a list of well-known recordings.
I would like you to review each recording, and provide the name of the artist most closely associated with the recording.
You will provide them in CSV format, one record per line in the following order: recording, artist. Enclose each field in double-quotes.
The input is:

"""

slist = missing_artist_df['track'] \
    .dropna() \
    .str.lower() \
    .str.strip() \
    .drop_duplicates() \
    .tolist()

slist.sort()
c=0

while(slist):  # still artists to process
    print(datetime.now(), c)

    tokens_to_date = count_tokens(prompt_prefix3)
    prompt = ''
    for _ in range(nposts):  # add up to nposts posts to the prompt
        if slist:
            if tokens_to_date + count_tokens(slist[0]) < maxtokens:
                track = f'"{slist.pop(0)}"\n'
                prompt += track
                tokens_to_date += count_tokens(track)
                c+=1
        else:
            break
    response = get_response(prompt, prompt_prefix3, verbose=False)
            
    if response is None:   # FAIL - retries exhausted
        print('Bailing to next chunk')
        continue
    
    if not response:
        print("nothing returned ... check returned dict for errors")
        
    for line in response.split("\n"):
        try:
            csv_values = csv_validate_re.findall(line)
            if len(csv_values) == 2:
                track_input, artist_correct = csv_values[0], csv_values[1]
                while not track_input[0].isalnum():
                    track_input=track_input[1:]
                while not track_input[-1].isalnum():
                    track_input=track_input[:-1]
                while not artist_correct[0].isalnum():
                    artist_correct=artist_correct[1:]
                while not artist_correct[-1].isalnum():
                    artist_correct=artist_correct[:-1]
                if artist_correct.lower() != 'unknown' and artist_correct != 'N/A':
                    missing_track_map[track_input]=artist_correct
                    print(f'"{track_input}", "{artist_correct}"')                    
            else:
                print('error', line)
        except Exception as error:
            print('error', line)
            print(error)
            continue
            

"you didn't have to cut me off", "Gotye"
"1, 2, buckle my shoes", "Mother Goose Club"
"10,000 days (wings pt. 2", "Tool"
"23 - jimmy eat world", "Jimmy Eat World"
"2nd part of layla", "Derek and the Dominos"
"3 little birds", "Bob Marley and the Wailers"
"4 gatsu", "Joe Hisaishi"
"50 years", "Uncanny X-Men"
"a bitter sweet genesis for him and her", "Mili"
"a change is gonna come", "Sam Cooke"
"a clockwork orange", "Wendy Carlos"
"a day in the", "The Beatles"
"a hermit thrush song", "Aaron Copland"
"a kissed-out red floatboat", "Elvis Costello"
"a long way past the past", "Pat Metheny"
"a mortal heart", "The Paper Kites"
"a nightingale sang in berkeley square", "Vera Lynn"
"a picture no artist could paint", "Harry Nilsson"
"a rendition of blame it on my youth on npr about 14 years ago", "Jamie Cullum"
"a ring ding ding ding d-ding baa aramba baa baa barooumba. the duck song pretty good also", "Bryant Oden"
"a river runs through you", "Yiruma"
"a song for a winter’s night", "Gordon Light

An exception occurred: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
Retrying chunk...
"deep forest green", "Mumford & Sons"
"deep mutual respect", "The Front Bottoms"
error "define "prettiest"","Aaron West and The Roaring Twenties"
string index out of range
"della's lullaby", "Ryan Gosling & Emma Stone"
"derecho de nacimiento", "Natalia Lafourcade"
"dernière danse", "Indila"
"desolation row", "Bob Dylan"
"diablo tristram theme", "Matt Uelmen"
"diary", "Bread"
"did you get enough love my little dove why do you cry", "The Caretaker"
"die", "Jenny Hval"
"dimash", "Dimash Kudaibergen"
"dimming of the day", "Richard Thompson"
"disintegration", "The Cure"
"dissolved girl", "Massive Attack"
"divenire", "Ludovico Einaudi"
"divine damsel of devastation", "The HU"
"do you love me?,if i can’t love her,evergreen,daylily", "Cast of SpongeBob SquarePants: The Broadway Musical"
"don't break my heart", "UB40"
"don't go", "Yazoo"
"don't sto

"i hear a symphony", "The Supremes"
"i hear a symphony x pluto projector", "Billie Eilish"
"i hope you dance", "Lee Ann Womack"
"i know it's over", "The Smiths"
"i know its over", "The Smiths"
"i know there's an answer", "The Beach Boys"
"i lied", "Joyner Lucas"
"i like formidable alot", "Stromae"
"i love", "Tom T. Hall"
"i may not always love yoooouuu", "The Beatles"
"i see the light", "Mandy Moore and Zachary Levi"
"i stay away", "Alice in Chains"
"i still believe i hear", "Frank Turner"
"i will", "The Beatles"
"i will always love you— dolly parton", "Dolly Parton"
"i will follow you into the dark", "Death Cab for Cutie"
"i will follow you you into the dark", "Death Cab for Cutie"
"i will never fall in love again untill i found her", "Dionne Warwick"
"i will possess your heart", "Death Cab for Cutie"
"i wish you love/ que reste-t-il de nos amours", "Nat King Cole"
"i z us", "Kendrick Lamar"
"i'd rather go blind", "Etta James"
"i'll be seeing you", "Billie Holiday"
"i'll never find an

"nannou", "Aphex Twin"
"naruto shippuden gentle hands", "Toshio Masuda"
"neapolitan dreams", "Lisa Mitchell"
"nearer my god to thee", "The Titanic Band"
"nearly forgot my broken heart", "Chris Cornell"
"nee paratha paarvaiku oru nandri", "Ilaiyaraaja"
"nessun dorma", "Luciano Pavarotti"
"neu roses (transgressor’s song", "Hozier"
"never enough", "Loren Allred"
"never going back again", "Fleetwood Mac"
"never knew i needed", "Ne-Yo"
"never let me go", "Florence + The Machine"
"new favorite", "Alison Krauss & Union Station"
"new york nagaram", "A. R. Rahman"
"night flight", "Vangelis"
"nights in white satin", "The Moody Blues"
"nights in white satin-tmb", "The Moody Blues"
"njosnavelin", "Sigur Rós"
"no big deal and it's reprises from amphibia", "Various Artists"
"no quarter", "Led Zeppelin"
"no surprises", "Radiohead"
"no time for caution", "Hans Zimmer"
"nobel prize", "The Shins"
"nocturne", "Secret Garden"
"nocturne in e minor", "Frédéric Chopin"
"non nobis and te deum", "Patrick Doyle

"someone make a spotify playlist of these songs in order of popularity in this thread. so many good ones and so many i havent heard. \n\nwhat a joy these are. making my day going through this. currently crying to unchained melody", "The Righteous Brothers"
"someone to watch over me", "Ella Fitzgerald"
"something", "The Beatles"
"sometimes you can’t make it on your own", "U2"
"somewhere only we know", "Keane"
"somewhere over the rainbow", "Judy Garland"
"somewhere over the rainbow - all versions", "Israel Kamakawiwo'ole"
"sonata", "Ludwig van Beethoven"
"sonder", "Tash Sultana"
"song", "Phoebe Bridgers"
"song for a winter’s night", "Gordon Lightfoot"
"song for charley", "Bloc Party"
"song for the asking", "Simon & Garfunkel"
"song of the ancients nier gestalt ost", "Keiichi Okabe"
"song to the siren", "Tim Buckley"
"songbird", "Fleetwood Mac"
"sonic space", "Tame Impala"
"sonnet", "The Verve"
"soon", "My Bloody Valentine"
"sora", "Yoko Shimomura"
"soul heart", "Cigarettes After Sex"
"so

"tool-assisted speedcore", "Venetian Snares"
"top 3", "J. Cole"
"total eclipse of the heart", "Bonnie Tyler"
"tough choice between send in the clowns or homeward bound. choir kid lol", "Stephen Sondheim"
"tracks", "Gary Numan"
"trancing pulse", "Minami Kuribayashi"
"transatlantacism", "Death Cab for Cutie"
"transatlanticism", "Death Cab for Cutie"
"trapeze swinger", "Iron & Wine"
"trois gymnopédies", "Erik Satie"
"troubles", "Alicia Keys"
"troy", "Sinead O'Connor"
"true colors", "Cyndi Lauper"
"try a little tenderness", "Otis Redding"
"tu tu picolo iddio", "Giacomo Puccini"
"tupelo honey", "Van Morrison"
"twinkle, twinkle little star", "Jane Taylor"
"two songs", "Lenka"
"two sparrows in a hurricane", "Tanya Tucker"
"two times two", "Bob Seger"
"ultraviolence", "Lana Del Rey"
"un bel di vedremo", "Giacomo Puccini"
"unaccompanied cello suites #1 in g major", "Johann Sebastian Bach"
"unchained melody", "The Righteous Brothers"
"under a blanket of blue / la vie en rose", "Ella Fitzgerald"


In [15]:
missing_track_map 


{"you didn't have to cut me off": 'Gotye',
 '1, 2, buckle my shoes': 'Mother Goose Club',
 '10,000 days (wings pt. 2': 'Tool',
 '23 - jimmy eat world': 'Jimmy Eat World',
 '2nd part of layla': 'Derek and the Dominos',
 '3 little birds': 'Bob Marley and the Wailers',
 '4 gatsu': 'Joe Hisaishi',
 '50 years': 'Uncanny X-Men',
 'a bitter sweet genesis for him and her': 'Mili',
 'a change is gonna come': 'Sam Cooke',
 'a clockwork orange': 'Wendy Carlos',
 'a day in the': 'The Beatles',
 'a hermit thrush song': 'Aaron Copland',
 'a kissed-out red floatboat': 'Elvis Costello',
 'a long way past the past': 'Pat Metheny',
 'a mortal heart': 'The Paper Kites',
 'a nightingale sang in berkeley square': 'Vera Lynn',
 'a picture no artist could paint': 'Harry Nilsson',
 'a rendition of blame it on my youth on npr about 14 years ago': 'Jamie Cullum',
 'a ring ding ding ding d-ding baa aramba baa baa barooumba. the duck song pretty good also': 'Bryant Oden',
 'a river runs through you': 'Yiruma',
 '

In [16]:
missing_track_map = {k: v for k, v in missing_track_map.items() if v != 'N/A'}

In [17]:
# check for reasonableness and apply
df['track']=df['track'].astype(str)
df['artist2'] = df.apply(lambda row: missing_track_map[row.track.lower().strip()] if row.artist=="" and row.track.lower().strip() in missing_track_map else row.artist, axis=1)
df.loc[df['artist'] != df['artist2']]



Unnamed: 0,post_id,post_score,artist,track,artist2
21048,jhcawoc,1,,23 - Jimmy Eat World,Jimmy Eat World
21050,jhgr6v3,1,,2nd part of Layla,Derek and the Dominos
21051,jhinrlz,1,,3 little birds,Bob Marley and the Wailers
21052,jhdmguf,1,,4 gatsu,Joe Hisaishi
21053,jhcxfa0,44,,50 years,Uncanny X-Men
...,...,...,...,...,...
22913,jhedblu,1,,xue hua piao piao,Fei Yu-ching
22914,jherzqu,1,,yebbas heartbreak,Yebba
22915,jher576,1,,your mother,Kendrick Lamar
22916,jhggg7t,1,,zelda's lullaby,Koji Kondo


In [18]:
df['artist'] = df.apply(lambda row: missing_track_map[row.track.lower().strip()] if row.artist=="" and row.track.lower().strip() in missing_track_map else row.artist, axis=1)



In [19]:
def fix_leading_trailing(s):
    """First and last should be alphanumeric"""
    # regex prob better if re.match('^\W+(.*)\W+$',playerName): 

    while len(s) and not s[0].isalnum():
        s = s[1:]
        
    while len(s) and not s[-1].isalnum():
        s = s[:-1]
    
    return s.lower().strip()
        

In [43]:
df[['artist', 'artist2', 'post_score']] \
    .groupby('artist') \
    .agg( \
         count=('post_score', 'count'), \
        ) \
    .reset_index() \
    .sort_values('count', ascending=False) 

Unnamed: 0,artist,count
335,The Beatles,425
4529,radiohead,248
292,Simon & Garfunkel,181
2182,fleetwood mac,176
4437,pink floyd,165
...,...,...
2377,giuseppe allegri,1
2381,glassjaw,1
2382,glay,1
2386,glenn medeiros,1


In [21]:
df['artist_dedupe'] = df['artist'].apply(fix_leading_trailing)
df = df.drop(df.loc[df['artist_dedupe'].str.startswith('unknown')].index)
df = df.drop(df.loc[df['artist_dedupe'].str.startswith('various artists')].index)
df = df.drop(df.loc[df['artist_dedupe']=='none'].index)
df = df.drop(df.loc[df['artist_dedupe']==''].index)
df = df.drop(df.loc[df['artist_dedupe']=='post_score'].index)


In [22]:
df['artist_dedupe'] = df['artist_dedupe'].apply(lambda s: s[4:] if s[:4]=='the ' else s)

df.loc[df['artist_dedupe']=='band', 'artist_dedupe']='the band'



In [44]:
dedupe_df = df[['artist', 'artist_dedupe', 'post_score']] \
    .groupby(['artist', 'artist_dedupe']) \
    .count() \
    .sort_values('post_score', ascending=False) \
    .reset_index() \
    .reset_index() 

dedupe_df


Unnamed: 0,index,artist,artist_dedupe,post_score
0,0,The Beatles,beatles,425
1,1,radiohead,radiohead,248
2,2,Simon & Garfunkel,simon garfunkel,181
3,3,fleetwood mac,fleetwood mac,176
4,4,pink floyd,pink floyd,165
...,...,...,...,...
5716,5716,ghostly kisses,ghostly kisses,1
5717,5717,gianni schicchi,gianni schicchi,1
5718,5718,gilbert and sullivan,gilbert and sullivan,1
5719,5719,ginchy,ginchy,1


In [45]:
# reset dedupe learned settings
# !rm dedupe_dataframe_learned_settings 
# !rm dedupe_dataframe_training.json   
dedupe_df2 = pandas_dedupe.dedupe_dataframe(dedupe_df, ['artist_dedupe'])


Importing data ...
Reading from dedupe_dataframe_learned_settings
Clustering...


  dedupe_df2 = pandas_dedupe.dedupe_dataframe(dedupe_df, ['artist_dedupe'])


# duplicate sets 5706


In [25]:
dedupe_df2

Unnamed: 0,index,artist,artist_dedupe,post_score,cluster id,confidence
0,0,the beatles,beatles,415,0,0.744077
1,1,radiohead,radiohead,234,2,0.532698
2,2,simon garfunkel,simon garfunkel,181,615,1.000000
3,3,fleetwood mac,fleetwood mac,174,4,0.744077
4,4,pink floyd,pink floyd,161,6,0.744077
...,...,...,...,...,...,...
6456,6456,joey pecoraro,joey pecoraro,1,5716,1.000000
6457,6457,joey gx,joey gx,1,5717,1.000000
6458,6458,joey cape,joey cape,1,5718,1.000000
6459,6459,joesef,joesef,1,5719,1.000000


In [46]:
dedupe_df['cluster id'] = dedupe_df2['cluster id']
name2i = {a: i for i, a in zip(dedupe_df['cluster id'].tolist(), dedupe_df['artist_dedupe'].tolist())}
df['artist_index'] = df['artist_dedupe'].apply(lambda s: name2i[s])
df


Unnamed: 0,post_id,post_score,artist,track,artist2,artist_dedupe,artist_index
0,jhfmr74,1,uicideboy,My Flaws Burn Through My Skin Like Demonic Fla...,uicideboy,uicideboy,2919
1,jhe9i7a,1,nsync,selfish,nsync,nsync,1452
3,jhcp5xc,6,hack//sign,Key of the Twilight,hack//sign,hack//sign,5493
4,jhcgr19,1,070 shake,Under the Moon,070 shake,070 shake,2529
5,jhe6tha,1,"10,000 maniacs",Verdi Cries,"10,000 maniacs","10,000 maniacs",694
...,...,...,...,...,...,...,...
22913,jhedblu,1,Fēi Yùchīng,xue hua piao piao,Fēi Yùchīng,fei yu-ching,835
22914,jherzqu,1,yebba,yebbas heartbreak,yebba,yebba,571
22915,jher576,1,kendrick lamar,your mother,kendrick lamar,kendrick lamar,310
22916,jhggg7t,1,koji kondo,zelda's lullaby,koji kondo,koji kondo,276


In [51]:
df.loc[(df['artist_index'].isna())]
df.loc[(df['artist_index']==0)]

Unnamed: 0,post_id,post_score,artist,track,artist2,artist_dedupe,artist_index
16627,jhd8ati,9,taylor swift,All Too Well,taylor swift,taylor swift,0
16628,jhcai6j,1,taylor swift,All of the Girls You Loved Before,taylor swift,taylor swift,0
16629,jhcoq43,5,taylor swift,August,taylor swift,taylor swift,0
16630,jhcfuz1,1,taylor swift,August,taylor swift,taylor swift,0
16631,jhdmlmh,1,taylor swift,Bigger than the whole sky,taylor swift,taylor swift,0
...,...,...,...,...,...,...,...
21469,jhhl7ao,2,taylor swift ft. bon iver,Exile,taylor swift ft. bon iver,taylor swift ft. bon iver,0
21761,jheudy5,1,taylor swift,Invisible String,taylor swift,taylor swift,0
21762,jhd0gda,1,taylor swift,Invisible string,taylor swift,taylor swift,0
22136,jhcsrjq,6,taylor swift,Our song,taylor swift,taylor swift,0


In [57]:
dedupe_df['cluster id']=dedupe_df['cluster id']
dedupe_df

Unnamed: 0,index,artist,artist_dedupe,post_score,cluster id
0,0,The Beatles,beatles,425,15
1,1,radiohead,radiohead,248,16
2,2,Simon & Garfunkel,simon garfunkel,181,17
3,3,fleetwood mac,fleetwood mac,176,18
4,4,pink floyd,pink floyd,165,19
...,...,...,...,...,...
5716,5716,ghostly kisses,ghostly kisses,1,5701
5717,5717,gianni schicchi,gianni schicchi,1,5702
5718,5718,gilbert and sullivan,gilbert and sullivan,1,5703
5719,5719,ginchy,ginchy,1,5704


In [None]:
df.loc[(df['artist_index'].isna())]

In [62]:
tempdf = dedupe_df[['artist_dedupe', 'artist', 'cluster id', 'post_score']] \
    .groupby(['artist_dedupe', 'cluster id']) \
    .agg( \
         count=('post_score', 'count'), \
         artist=('artist', 'first') \
        ) \
    .reset_index() \
    .sort_values('count', ascending=False) 
tempdf

Unnamed: 0,artist_dedupe,cluster id,count,artist
0,070 shake,2529,1,070 shake
3800,muse,284,1,muse
3820,myslovitz,2500,1,myslovitz
3819,myrkur,2498,1,myrkur
3818,mylene farmer,1647,1,Mylène Farmer
...,...,...,...,...
1905,frenzal rhomb,5603,1,frenzal rhomb
1904,frente,341,1,frente
1903,french montana,5604,1,french montana
1902,french 79,5605,1,french 79


In [63]:
i2name = {i: a for i, a in zip(tempdf['cluster id'].tolist(), tempdf['artist'].tolist())}
df['artist'] = df.apply(lambda r: i2name[r.artist_index], axis=1)
df

Unnamed: 0,post_id,post_score,artist,track,artist2,artist_dedupe,artist_index
0,jhfmr74,1,uicideboy,My Flaws Burn Through My Skin Like Demonic Fla...,uicideboy,uicideboy,2919
1,jhe9i7a,1,nsync,selfish,nsync,nsync,1452
3,jhcp5xc,6,hack//sign,Key of the Twilight,hack//sign,hack//sign,5493
4,jhcgr19,1,070 shake,Under the Moon,070 shake,070 shake,2529
5,jhe6tha,1,"10,000 maniacs",Verdi Cries,"10,000 maniacs","10,000 maniacs",694
...,...,...,...,...,...,...,...
22913,jhedblu,1,Fēi Yùchīng,xue hua piao piao,Fēi Yùchīng,fei yu-ching,835
22914,jherzqu,1,yebba,yebbas heartbreak,yebba,yebba,571
22915,jher576,1,kendrick lamar,your mother,kendrick lamar,kendrick lamar,310
22916,jhggg7t,1,koji kondo,zelda's lullaby,koji kondo,koji kondo,276


In [64]:
len(df['artist'].unique())

5691

In [65]:
df.loc[df['artist'].str.find('carp') >=0]

Unnamed: 0,post_id,post_score,artist,track,artist2,artist_dedupe,artist_index
2795,jhdaead,1,carpenter brut,You're Mine,carpenter brut,carpenter brut,3972
9438,jhc74vp,360,karen carpenter,(They Long to Be) Close to You,karen carpenter,karen carpenter,729
9439,jhckvdl,2,karen carpenter,Close to you,karen carpenter,karen carpenter,729
9440,jhd9qxu,3,karen carpenter,Superstar,karen carpenter,karen carpenter,729
9441,jhd5edz,4,karen carpenter,We've Only Just Begun,karen carpenter,karen carpenter,729
9442,jhd4ywl,2,karen carpenter,we've only just begun,karen carpenter,karen carpenter,729
11320,jhctf4h,2,mary chapin carpenter,"10,000 Miles",mary chapin carpenter,mary chapin carpenter,676
11321,jhhffdp,1,mary chapin carpenter,"10,000 Miles",mary chapin carpenter,mary chapin carpenter,676
11322,jhvlx0x,2,mary chapin carpenter,"10,000 miles",mary chapin carpenter,mary chapin carpenter,676
11323,jheilsa,1,mary chapin carpenter,"10,000 miles",mary chapin carpenter,mary chapin carpenter,676


In [66]:
df['track'] = df['track'].apply(fix_leading_trailing)


In [67]:
df.groupby('track') \
    .count() \
    .reset_index() \
    .sort_values('artist', ascending=False) \
    .head(20)


Unnamed: 0,track,post_id,post_score,artist,artist2,artist_dedupe,artist_index
3649,hallelujah,166,166,166,166,166,166
4475,in my life,66,66,66,66,66,66
10558,what a wonderful world,65,65,65,65,65,65
3447,god only knows,62,62,62,62,62,62
1806,clair de lune,61,61,61,61,61,61
8343,somewhere over the rainbow,56,56,56,56,56,56
8394,songbird,56,56,56,56,56,56
10905,yesterday,52,52,52,52,52,52
6022,moonlight sonata,52,52,52,52,52,52
2873,fade into you,52,52,52,52,52,52


In [68]:
df = df.drop(df.loc[df['track']=='unknown'].index)
df = df.drop(df.loc[df['track']=='cover'].index)
df = df.drop(df.loc[df['track']=='version'].index)
df = df.drop(df.loc[df['track']=='anything'].index)
df = df.drop(df.loc[df['track']=='none'].index)
df = df.drop(df.loc[df['track']==''].index)
df = df.drop(df.loc[df['track'].str.startswith('no track')].index)
df = df.drop(df.loc[df['track'].str.startswith('no artist')].index)
df = df.drop(df.loc[df['track'].str.startswith('various')].index)


In [69]:
# tempdf = df[['artist', 'post_score']] \
#     .groupby('artist') \
#     .sum() \
#     .reset_index() 

# tempdf.loc[tempdf['post_score']> 2].to_csv('x.csv', index=False)

In [70]:
df = df[['artist', 'track', 'post_score']].groupby(["artist", "track"]) \
    .sum() \
    .reset_index() \
    .sort_values('post_score', ascending=False)

df.head(20)



Unnamed: 0,artist,track,post_score
3528,claude debussy,claire de lune,14007
923,The Beatles,yesterday,7901
4476,eagles,hotel california,7682
4855,erik satie,gymnopédies,7003
716,Simon & Garfunkel,scarborough fair,6163
9231,neil young,harvest moon,5371
358,Israel Kamakawiwo'ole,over the rainbow,5093
3738,cranberries,dreams,4371
8654,mazzy star,fade into you,3981
820,The Beach Boys,god only knows,3896


In [71]:
df.loc[df['post_score'] >4].to_csv('silver.csv', index=False)


In [72]:
df=pd.read_csv('silver.csv')
df

Unnamed: 0,artist,track,post_score
0,claude debussy,claire de lune,14007
1,The Beatles,yesterday,7901
2,eagles,hotel california,7682
3,erik satie,gymnopédies,7003
4,Simon & Garfunkel,scarborough fair,6163
...,...,...,...
1450,grateful dead,black muddy river,5
1451,metallica,one,5
1452,billie eilish,i love you,5
1453,vance joy,riptide,5


## Load into a Spotify playlist


In [73]:
client_credentials_manager = SpotifyClientCredentials(client_id=os.getenv('SPOTIFY_CLIENT_ID'), 
                                                      client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                      )

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


In [74]:
# check artists
df = pd.read_csv("silver.csv")
df.drop_duplicates() \
    .dropna() \
    .sort_values(["artist", "track"])

dedupe = {}
fail_list = []
artist_map = {}
for index, artist, title, score in df.itertuples():
    artist = str(artist)
    if artist in dedupe:
        continue
    dedupe[artist]=1
    query_str = 'artist:%s' % (artist)
    artist_results = sp.search(q=query_str, type='artist', limit=3, offset=0, market='US')
    artist_names = [artist['name'] for artist in artist_results['artists']['items']]
    if artist_names:
        if artist.lower() != artist_names[0].lower():
            artist_map[artist] = artist_names[0]
            print(artist, '->', artist_names[0])
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)

# then clean up manually as appropriate

cranberries -> The Cranberries
not found: andrew lloyd webber, sarah brightman - pie jesu
not found: hans zimmer and radiohead - interstellar intro
not found: glen hansard and marketa irglova - falling slowly
dream academy -> The Dream Academy
hollies -> The Hollies
not found: harry waters jr., marvin berry, and starlighters - earth angel (will you be mine
goo goo dolls -> The Goo Goo Dolls
alison krauss and union station -> Alison Krauss & Union Station
seal -> Seals and Crofts
yaz -> Yazoo
america -> The All-American Rejects
cat stevens -> Yusuf / Cat Stevens
heart -> Tom Petty and the Heartbreakers
not found: john prine and bonnie raitt - angel from montgomery
frames -> The Frames
selena -> Selena Gomez
not found: orville peck paul cauthen - unchained melody
flaming lips -> The Flaming Lips
moulin rouge -> Original Broadway Cast of Moulin Rouge! The Musical
foundations -> The Foundations
fray -> The Fray
smashing pumpkins -> The Smashing Pumpkins
Crosby, Stills, Nash -> Crosby, Stil

In [75]:
artist_map

{'cranberries': 'The Cranberries',
 'dream academy': 'The Dream Academy',
 'hollies': 'The Hollies',
 'goo goo dolls': 'The Goo Goo Dolls',
 'alison krauss and union station': 'Alison Krauss & Union Station',
 'seal': 'Seals and Crofts',
 'yaz': 'Yazoo',
 'america': 'The All-American Rejects',
 'cat stevens': 'Yusuf / Cat Stevens',
 'heart': 'Tom Petty and the Heartbreakers',
 'frames': 'The Frames',
 'selena': 'Selena Gomez',
 'flaming lips': 'The Flaming Lips',
 'moulin rouge': 'Original Broadway Cast of Moulin Rouge! The Musical',
 'foundations': 'The Foundations',
 'fray': 'The Fray',
 'smashing pumpkins': 'The Smashing Pumpkins',
 'Crosby, Stills, Nash': 'Crosby, Stills & Nash',
 'copland': 'Aaron Copland',
 'cinematic orchestra': 'The Cinematic Orchestra',
 'babyface': 'Babyface Ray',
 'Les Misérables': 'Les Misérables Cast',
 'jose gonzalez': 'José González',
 'doors': '3 Doors Down',
 'mozart': 'Wolfgang Amadeus Mozart',
 'black crowes': 'The Black Crowes',
 'boa': 'bôa',
 'viv

In [77]:
ignore_list = [
'seal',  # -> Seals and Crofts
'america', # -> The All-American Rejects
'heart',  # -> Tom Petty and the Heartbreakers
'nan',  # -> Nancy Sinatra
'selena',  # -> Selena Gomez
'babyface',  # -> Babyface Ray
'doors',  # -> 3 Doors Down
'joni jam',  # -> Joni James
'flamingos',  # -> Flamingosis
'phil',  # -> Phil Collins
'eric johnson',  # -> Eric D. Johnson
'las',  # -> Sleeping At Last
'bowser',  # -> Dean Bowser
'jason isbell',  # -> Jason Isbell and the 400 Unit
'enigma',  # -> Enigma Norteño
'in',  # -> Falling In Reverse
'rem',  # -> Rema
'train',  # -> Meghan Trainor
'ole',  # -> Ole-Bjørn Talstad
'death',  # -> Five Finger Death Punch
'la la land',  # -> Landon Cube
'sarah',  # -> Sarah McLachlan
'adeem',  # -> Adeem the Artist
'nico',  # -> Nicki Nicole
'mum',  # -> Mumford & Sons
'berlin',  # -> Berliner Philharmoniker
'ren',  # -> La Arrolladora Banda El Limón De Rene Camacho
'pink',  # -> PinkPantheress
'man man',  # -> Gucci Mane
'south park',  # -> South Park Mexican
'to the moon',  # -> A Rocket To The Moon
'lp',  # -> LP Giobbi
'air',  # -> Air Supply
]

for k in ignore_list:
    try:
        artist_map.pop(k)
    except:
        pass





In [78]:
artist_map.get('air')

In [79]:
df['artist'] = df['artist'].apply(lambda s: artist_map[s] if s in artist_map else s)
df.head(20)


Unnamed: 0,artist,track,post_score
0,claude debussy,claire de lune,14007
1,The Beatles,yesterday,7901
2,eagles,hotel california,7682
3,erik satie,gymnopédies,7003
4,Simon & Garfunkel,scarborough fair,6163
5,neil young,harvest moon,5371
6,Israel Kamakawiwo'ole,over the rainbow,5093
7,The Cranberries,dreams,4371
8,mazzy star,fade into you,3981
9,The Beach Boys,god only knows,3896


In [80]:
df.to_csv('silver.csv', index=False)


In [81]:

# check tracks

df = pd.read_csv("silver.csv")

dedupe = {}
mylist = []
fail_list = []
artist_list, track_list, uri_list, album_list, score_list = [], [], [], [], []
orig_artist, orig_track = [], []

for index, artist, title, score in df.itertuples():
    query_str = 'artist:%s track:%s' % (artist, title)
    track_results = sp.search(q=query_str, type='track', limit=1, offset=0, market='US')
    results = track_results['tracks']['items']
    
    if results:
        r = results[0]
        # failsafe to never put same track twice
        if dedupe.get(r['id']):
            continue
        dedupe[r['id']]=True
        if title.lower() != r['name'].lower():
            print ("%s|%s : %s|%s" % (artist, title, r['artists'][0]['name'], r['name']))
        uri_list.append(r['uri'])
        artist_list.append(r['artists'][0]['name'])
        track_list.append(r['name'])
        album_list.append(r['album']['name'])
        orig_artist.append(artist)
        orig_track.append(title)
        score_list.append(score)
#         print('  ',
#               r['artists'][0]['name'],'|',
#               r['name'], '|',
#               r['album']['name'],'|',
#               r['album']['release_date'],'|',
#               r['popularity'])
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)
        

The Beatles|yesterday : The Beatles|Yesterday - Remastered 2009
eagles|hotel california : Eagles|Hotel California - 2013 Remaster
erik satie|gymnopédies : Erik Satie|3 Gymnopédies: No. 1 Lent et douloureux
Simon & Garfunkel|scarborough fair : Simon & Garfunkel|Scarborough Fair / Canticle
not found: neil young - harvest moon
The Beach Boys|god only knows : The Beach Boys|God Only Knows - Mono
The Beatles|in my life : The Beatles|In My Life - Remastered 2009
don mclean|vincent (starry, starry night : Don McLean|Vincent (Starry, Starry Night)
Sigur Rós|hoppipolla : Sigur Rós|Hoppípolla
The Beatles|blackbird : The Beatles|Blackbird - Remastered 2009
john denver|annie’s song : John Denver|Annie's Song
otis redding|sittin' on) the dock of the bay : Otis Redding|(Sittin' On) the Dock of the Bay
jim croce|i got a name : Jim Croce|I'll Have To Say I Love You In A Song
samuel barber|adagio for strings : Samuel Barber|Barber: Adagio for Strings
Israel Kamakawiwo'ole|somewhere over the rainbow/won

The Beatles|julia : The Beatles|Julia - Remastered 2009
tina turner|simply the best : Tina Turner|Simply the Best - Live in Arnhem
not found: bee gees - inmortality
mark knopfler|romeo and juliet : Mark Knopfler|Romeo And Juliet - Live At Gibson Amphitheatre / June 28th 2006
not found: etta james - i’d rather go blind
babyface|every time i close my eyes : Babyface|Every Time I Close My Eyes (with Kenny G)
not found: vv - heartfull of ghosts
disturbed|sound of silence : Disturbed|The Sound of Silence
hozier|cherry wine : Hozier|Cherry Wine - Live
not found: Les Misérables Cast - i dreamed a dream
The Cure|lovesong : The Cure|Lovesong - 2010 Remaster
nobuo uematsu|to zanarkand : Nobuo Uematsu|To Zanarkand (Final Fantasy X)
not found: Simon & Garfunkel - for emily wherever i may find her
not found: i prevail - iprevail
eagles|desperado : Eagles|Desperado - 2013 Remaster
not found: tash sultana - sonder
Wolfgang Amadeus Mozart|confutatis : Wolfgang Amadeus Mozart|Requiem in D Minor, K. 626

The Cure|disintegration : The Cure|Disintegration - 2010 Remaster
Arvo Pärt|spiegel im spiegel : Arvo Pärt|Spiegel im Spiegel - Version for Violin and Piano
not found: taylor swift ft. bon iver - delicate
The Beatles|abbey road : Peel, David & The Apple Band|The wonderful world of Abbey Road
not found: Rainbow Kitten Surprise - catch the rainbow
not found: Oasis Ministry - jesus built my hotrod
not found: taylor swift ft. bon iver - invisible string
The Smiths|that joke isn’t funny anymore : The Smiths|That Joke Isn't Funny Anymore
not found: barbra streisand - partners
not found: Tori Kelly - pretty good year
Björk|yoga : Björkliden|Yoga Nidra
The Beatles|pet sounds : The Pocket Gods|Pet Sounds V Sergeant Peppers
The Beatles|dear prudence : The Beatles|Dear Prudence - Remastered 2009
The Beatles|all you need is love : The Beatles|All You Need Is Love - Remastered 2009
not found: The Front Bottoms - deep mutual respect
yes|and you and i : Yes|And You and I - 2003 Remaster
radiohead|str

not found: XXXTENTACION - stars
grateful dead|attics of my life : Grateful Dead|Attics of My Life - 2013 Remaster
not found: dire straits - dire straits
pyotr ilyich tchaikovsky|waltz of the flowers : Pyotr Ilyich Tchaikovsky|Tchaikovsky: The Nutcracker, Op. 71, Act II: No. 13, Waltz of the Flowers
tori amos|winter : Tori Amos|Winter - 2015 Remaster
not found: pyotr ilyich tchaikovsky - 5th symphony, movement 2
dave matthews band|41 : Dave Matthews Band|#41
not found: etta james - i'd rather go blind
Simon & Garfunkel|a bridge over troubled water : Simon & Garfunkel|Song for the Asking
not found: lakme - flower duet
The Beatles|hey jude : The Beatles|Hey Jude - Remastered 2015
not found: Sigur Rós - hoppipola
la la land|mia and sebastian’s theme : Kyle Landry|Mia and Sebastian's Theme
The Beatles|a day in the life : The Beatles|A Day In The Life - Remastered 2009
The Beatles|because : The Beatles|Because - Remastered 2009
Grover Washington, Jr.|just the two of us : Grover Washington, J

not found: The Band CAMINO - atlantic city
not found: joni mitchell - roses blue
yann tiersen|comptine d'un autre été : Yann Tiersen|Comptine d'un autre été, l'après-midi
not found: hack//sign - key of the twilight
not found: Izzamuzzic - somewhere over the rainbow
queen|las palabras de amor : Queen|Las Palabras De Amor (The Words Of Love) - Remastered 2011
joe hisaishi|one summer's day : Joe Hisaishi|One Summer Day
led zeppelin|bron-yr-aur : Led Zeppelin|Bron-Yr-Aur - Remaster
eagles|new kid in town : Eagles|New Kid in Town - 2013 Remaster
paul mccartney|calico skies : Paul McCartney|Calico Skies - 2020 Remaster
george harrison|my sweet lord : George Harrison|My Sweet Lord - 2014 Remaster
van morrison|astral weeks : Van Morrison|Astral Weeks - 1999 Remaster
paul mccartney|junk : Paul McCartney|Junk - 2011 Remaster
not found: reinbert de leeuw - reinbert de leeuw's renditions
The Beatles|michelle : The Beatles|Michelle - Remastered 2009
not found: Minnie Riperton - loving you
The Beatl

In [84]:
gold_df = pd.DataFrame({'score': score_list,
                        'input_artist': orig_artist,
                        'artist': artist_list,
                        'input_track': orig_track,
                        'track': track_list,
                        'album': album_list,
                        'uri': uri_list})

with pd.option_context("display.max_rows", 9999):
    display(gold_df)



Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
0,14007,claude debussy,Claude Debussy,claire de lune,Claire de lune,Träumerei - Liebestraum - Für Elise - Clair de...,spotify:track:6kf7ZCJjEbjZXikivKOsvJ
1,7901,The Beatles,The Beatles,yesterday,Yesterday - Remastered 2009,Help! (Remastered),spotify:track:3BQHpFgAp4l80e1XslIjNI
2,7682,eagles,Eagles,hotel california,Hotel California - 2013 Remaster,Hotel California (2013 Remaster),spotify:track:40riOy7x9W7GXjyGp4pjAv
3,7003,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
4,6163,Simon & Garfunkel,Simon & Garfunkel,scarborough fair,Scarborough Fair / Canticle,"Parsley, Sage, Rosemary And Thyme",spotify:track:3g2fYZW5v2od8KIF7VktT0
5,5093,Israel Kamakawiwo'ole,Israel Kamakawiwo'ole,over the rainbow,Over the Rainbow,Alone In Iz World,spotify:track:3oQomOPRNQ5NVFUmLJHbAV
6,4371,The Cranberries,The Cranberries,dreams,Dreams,"Everybody Else Is Doing It, So Why Can't We?",spotify:track:4JGKZS7h4Qa16gOU3oNETV
7,3981,mazzy star,Mazzy Star,fade into you,Fade Into You,So Tonight That I Might See,spotify:track:1LzNfuep1bnAUR9skqdHCK
8,3896,The Beach Boys,The Beach Boys,god only knows,God Only Knows - Mono,Pet Sounds (Original Mono & Stereo Mix),spotify:track:6iGU74CwXuT4XVepjc9Emf
9,3865,The Beatles,The Beatles,in my life,In My Life - Remastered 2009,Rubber Soul (Remastered),spotify:track:3KfbEIOC7YIv90FIfNSZpo


In [89]:
# inspect where the track name differs
with pd.option_context("display.max_rows", 999):
    display(gold_df.loc[gold_df['input_artist'].str.lower().str[:8] != gold_df['artist'].str.lower().str[:8]])
    

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
87,311,The Verve,The Cover Crew,bittersweet symphony,Bittersweet Symphony (Acoustic Version) [The V...,"Acoustified Hits, Vol. 2",spotify:track:714doH50K9qbrE9py6izzV
178,83,john cale,Caleb & John,hallelujah,Hallelujah Feeling,Hallelujah Feeling,spotify:track:5Gyg9RufRKGzTtzafw2bK6
210,61,The Carpenters,The Ventures,close to you,(They Long To Be) Close To You,The Ventures Play The Carpenters,spotify:track:5JgDMH2jLvHzdNSr5Yzvj4
297,37,doors,The Doors,riders on the storm,Riders on the Storm,L.A. Woman,spotify:track:14XWXWv5FoCbFzLksawpEe
317,32,america,American Folk Channel,the boxer,The Boxer,American Country Folk for the Trucks 2,spotify:track:6jLXjtanB0Yp82aAV7VGsN
381,23,flamingos,The Flamingos,i only have eyes for you,I Only Have Eyes for You,Flamingo Serenade,spotify:track:3YdKJzcoMZMacISlpY4QoP
396,22,The Cure,The Rain Library,show,A Winter Shower,39 Restless Sleep Rain Cure,spotify:track:2pf8CXAaI4PZPHkBJqccS4
433,19,The Carpenters,The Ventures,superstar,Superstar,The Ventures Play The Carpenters,spotify:track:6UUDZo4OIkvTNOcmgZNdLG
497,16,las,The La's,there she goes,There She Goes,The La's,spotify:track:0SMkzFGJOBFDI9KfYD55L0
501,16,The Beatles,"Peel, David & The Apple Band",abbey road,The wonderful world of Abbey Road,Bring Back The Beatles,spotify:track:6zZU0UM3IWCGqUnEJBI8cT


In [None]:
# these are songs that look like covers or otherwise not the expected response from spotify search 
# (which is a bit wonky, doesn't like quotes and such)
# remove from df and add manually
bad_lookups = [
#    25,134,155,160,200,209,422,445,446,557,737,744,755,759,760,761,762,781,785,790,814,815,842
    21,51,61,63,83,145,212,317,322,439,449,575,759,784,
]

for i in bad_lookups:
    print(gold_df.iloc[i])
    
# add manually, plus 'not found'


In [146]:
gold_df

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
0,13974,Claude Debussy,Claude Debussy,claire de lune,Claire de lune,Träumerei - Liebestraum - Für Elise - Clair de...,spotify:track:6kf7ZCJjEbjZXikivKOsvJ
1,7901,The Beatles,The Beatles,yesterday,Yesterday - Remastered 2009,Help! (Remastered),spotify:track:3BQHpFgAp4l80e1XslIjNI
2,7682,eagles,Eagles,hotel california,Hotel California - 2013 Remaster,Hotel California (2013 Remaster),spotify:track:40riOy7x9W7GXjyGp4pjAv
3,7003,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
4,6156,Simon & Garfunkel,Simon & Garfunkel,scarborough fair,Scarborough Fair / Canticle,"Parsley, Sage, Rosemary And Thyme",spotify:track:3g2fYZW5v2od8KIF7VktT0
...,...,...,...,...,...,...,...
1210,5,avenged sevenfold,Avenged Sevenfold,fiction,Fiction,Nightmare,spotify:track:3b3eu3uMp1b9xPOHbfSwBi
1211,5,vance joy,Vance Joy,riptide,Riptide,Dream Your Life Away (Special Edition),spotify:track:3JvrhDOgAt6p7K8mDyZwRd
1212,5,joy division,Joy Division,love will tear us apart,Love Will Tear Us Apart - 2020 Remaster,Love Will Tear Us Apart,spotify:track:34iOH7LY3vme5rQxsVILZ4
1213,5,aurora,AURORA,it happened quiet,It Happened Quiet,Infections of a Different Kind (Step I),spotify:track:0yXXv7MH5jYTDlOltMbdvt


In [None]:
gold_df = gold_df.drop(
    axis='index',
    labels=bad_lookups)


In [147]:
# this you could upload and make a new playlist
# my playlist is result of multiple iterations

gold_df[['artist', 'track', 'score']].to_csv('gold.csv', index=False)

with pd.option_context("display.max_rows", 999):
    display(gold_df)

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
0,13974,Claude Debussy,Claude Debussy,claire de lune,Claire de lune,Träumerei - Liebestraum - Für Elise - Clair de...,spotify:track:6kf7ZCJjEbjZXikivKOsvJ
1,7901,The Beatles,The Beatles,yesterday,Yesterday - Remastered 2009,Help! (Remastered),spotify:track:3BQHpFgAp4l80e1XslIjNI
2,7682,eagles,Eagles,hotel california,Hotel California - 2013 Remaster,Hotel California (2013 Remaster),spotify:track:40riOy7x9W7GXjyGp4pjAv
3,7003,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
4,6156,Simon & Garfunkel,Simon & Garfunkel,scarborough fair,Scarborough Fair / Canticle,"Parsley, Sage, Rosemary And Thyme",spotify:track:3g2fYZW5v2od8KIF7VktT0
...,...,...,...,...,...,...,...
1210,5,avenged sevenfold,Avenged Sevenfold,fiction,Fiction,Nightmare,spotify:track:3b3eu3uMp1b9xPOHbfSwBi
1211,5,vance joy,Vance Joy,riptide,Riptide,Dream Your Life Away (Special Edition),spotify:track:3JvrhDOgAt6p7K8mDyZwRd
1212,5,joy division,Joy Division,love will tear us apart,Love Will Tear Us Apart - 2020 Remaster,Love Will Tear Us Apart,spotify:track:34iOH7LY3vme5rQxsVILZ4
1213,5,aurora,AURORA,it happened quiet,It Happened Quiet,Infections of a Different Kind (Step I),spotify:track:0yXXv7MH5jYTDlOltMbdvt


In [90]:
# get playlist id
# first create a playlist in UI to load songs
playlists = sp.user_playlists(os.getenv('SPOTIFY_USERNAME'))
while playlists:
    for i, playlist in enumerate(playlists['items']):
        if playlist['name'] != 'Reddit Prettiest Songs':
            continue
        print(playlist['id'])
        playlist_id = playlist['id']
        print("%4d %s %s" % (i + 1 + playlists['offset'], playlist['uri'],  playlist['name']))
    if playlists['next']:
        playlists = sp.next(playlists)
    else:
        playlists = None
        

08YFkbtTV6GBfNtjJ4PHDu
   1 spotify:playlist:08YFkbtTV6GBfNtjJ4PHDu Reddit Prettiest Songs


In [138]:
# must follow an oauth workflow to write a playlist in Spotify
# running this cell should request a spotify login and then redirect to an url
# paste whole url with id into form to authenticate

scope = "playlist-modify-public"

sp = spotipy.Spotify(auth_manager=spotipy.SpotifyOAuth(scope=scope,
                                                       client_id=os.getenv('SPOTIFY_CLIENT_ID'),
                                                       client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                       redirect_uri="https://druce.ai"
                                                      ))


In [None]:
# add songs to playlist 

# addlist = gold_df['uri'].to_list()
# print (len(addlist))

# while(addlist):
#     sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
#                                 playlist_id=playlist_id, 
#                                 tracks=addlist[-100:])
#     addlist = addlist[:-100]
#     print("added items, remaining ", len(addlist))


In [None]:
# manually add the ones that weren't found for some reason


In [91]:
# compare to existing playlist
# can run again and add any new tracks, either because OpenAI is a bit random, or new replies in thread
results = sp.user_playlist(os.getenv('SPOTIFY_USERNAME'), playlist_id,
                                fields='tracks,next,name')
tracks = results['tracks']

playlist_dict_by_uri = {}
playlist_dict_by_str = {}

artist_list = []
track_list = []
uri_list = []
popularity_list = []
album_list=[]

while True:
    for track_item in tracks['items']:
        track_dict = track_item['track']
        track_str = track_dict['artists'][0]['name']  + ' | ' + track_dict['name'][:15]
        uri = track_dict['uri']
        if track_str in playlist_dict_by_str:
            print(track_str)
        playlist_dict_by_str[track_str] = uri
        playlist_dict_by_uri[uri] = track_str
        
        uri_list.append(uri)
        artist_list.append(track_dict['artists'][0]['name'])
        track_list.append(track_dict['name'])
        album_list.append(track_dict['album']['name'])
        popularity_list.append(track_dict['popularity'])
        
    # check if there are more pages
    if tracks['next']:
        tracks = sp.next(tracks)
    else:
        break

print (len(list(playlist_dict_by_str.keys())))
print (len(list(playlist_dict_by_uri.keys())))


Nine Inch Nails | A Warm Place
Joni Mitchell | Both Sides Now
1156
1158


In [92]:
with pd.option_context("display.max_rows", 9999):
    display(gold_df.loc[~gold_df['uri'].isin(playlist_dict_by_uri.keys())])
    

Unnamed: 0,score,input_artist,artist,input_track,track,album,uri
0,14007,claude debussy,Claude Debussy,claire de lune,Claire de lune,Träumerei - Liebestraum - Für Elise - Clair de...,spotify:track:6kf7ZCJjEbjZXikivKOsvJ
3,7003,erik satie,Erik Satie,gymnopédies,3 Gymnopédies: No. 1 Lent et douloureux,Satie: The Magic of Satie,spotify:track:7kTVe6XhIveidvkt8nb7jK
10,3383,don mclean,Don McLean,"vincent (starry, starry night","Vincent (Starry, Starry Night)",Rearview Mirror: An American Musical Journey,spotify:track:2YDyH60Vro33KkDtNZCXIk
18,2531,joni mitchell,Joni Mitchell,both sides now,Both Sides Now,Feathers.,spotify:track:5EsPLgSs1UQIDJG0U00RuJ
44,763,Simon & Garfunkel,Simon & Garfunkel,"for emily, whenever i may find her","For Emily, Whenever I May Find Her","Parsley, Sage, Rosemary And Thyme",spotify:track:76Fcbx3T7fUgA7phUkmsn7
45,757,glen hansard and marketa irglova,Glen Hansard,falling slowly,Falling Slowly,Perhaps Love,spotify:track:2lpNVkZb7e1k7IeW8MOzLe
47,693,wolfgang amadeus mozart,Wolfgang Amadeus Mozart,lacrimosa,Lacrimosa,Requiem - Music To Die For,spotify:track:1UvaZaHkh3D9AkmBrrnbFg
58,584,louis armstrong,Louis Armstrong,it's a wonderful world,It's Wonderful - Single Version,What A Wonderful World,spotify:track:7tmOKoxLQFLvQWwxzYaodT
61,504,henry mancini,Henry Mancini,moon river,Moon River(Vocal Audrey Hepburn),Breakfast At Tiffany's (50th Anniversary Edition),spotify:track:5iGleL7HpEThuuYQ3us2jh
87,311,The Verve,The Cover Crew,bittersweet symphony,Bittersweet Symphony (Acoustic Version) [The V...,"Acoustified Hits, Vol. 2",spotify:track:714doH50K9qbrE9py6izzV


In [141]:
playlist_df = pd.DataFrame({'artist': artist_list,
                           'track': track_list,
                           'album': album_list,
                           'popularity': popularity_list,
                           })



In [142]:
with pd.option_context("display.max_rows", 9999):
    display(playlist_df.sort_values('popularity'))
    

Unnamed: 0,artist,track,album,popularity
909,Yusuf / Cat Stevens,Wild World,The Best Of Cat Stevens 20th Century Masters T...,0
1138,John Williams,Missing You,My World,0
1137,ZaZa,Only You,Nights One and a Thousand,0
769,Jaakko Aukusti,What If All Else Fails?,What If All Else Fails?,0
999,Joni Mitchell,Both Sides Now,Clouds,0
300,Jay-Jay Johanson,Poison,Poison,1
42,The Smiths,Asleep,mental health: look out for yourself,2
31,The Niro,No One Must Find You Here,The Complete Jeff Buckley and Gary Lucas Songbook,2
1008,Johann Sebastian Bach,"Orchestral Suite No. 3 in D Major, BWV 1068: I...",Classical Music In the Background,2
504,Claude Debussy,Debussy: Arabesque no. 2 in G major (Deux Arab...,Debussy: Arabesque no. 2 in G major (Deux Arab...,4


In [143]:
gold_dict_by_uri = {}
gold_dict_by_str = {}
addlist = []
c = 0
for i, artist, track, uri in gold_df[['artist', 'track', 'uri']].itertuples():
    # print(artist, track, uri)
    track_str = artist + ' | ' + track[:15]
    if track_str not in playlist_dict_by_str:
        addlist.append([artist, track, uri])
        print(artist, track, uri)
    gold_dict_by_uri[uri]=track_str
    gold_dict_by_str['track_str']= uri
#     if track_str not in playlist_dict_by_str:
#         c += 1
#         print (c, track_str)
        
print(len(gold_dict_by_str.items()))
print(len(gold_dict_by_uri.items()))

Claude Debussy Claire de lune spotify:track:6kf7ZCJjEbjZXikivKOsvJ
Erik Satie 3 Gymnopédies: No. 1 Lent et douloureux spotify:track:7kTVe6XhIveidvkt8nb7jK
Don McLean Vincent (Starry, Starry Night) spotify:track:2YDyH60Vro33KkDtNZCXIk
Wolfgang Amadeus Mozart Lacrimosa spotify:track:1UvaZaHkh3D9AkmBrrnbFg
Louis Armstrong It's Wonderful - Single Version spotify:track:7tmOKoxLQFLvQWwxzYaodT
Henry Mancini Moon River(Vocal Audrey Hepburn) spotify:track:5iGleL7HpEThuuYQ3us2jh
The Cover Crew Bittersweet Symphony (Acoustic Version) [The Verve Cover] spotify:track:714doH50K9qbrE9py6izzV
The Beatles Golden Slumbers - Remastered 2009 spotify:track:01SfTM5nfCou5gQL70r6gs
Louis Armstrong La vie en rose - Single Version spotify:track:3yYfoYGVpriV4fG9L1ogsD
Johann Sebastian Bach Herz und Mund und Tat und Leben, Cantata BWV 147: Jesu, Joy of Man’s Desiring (Transcr. Hess for Piano) spotify:track:2zl2AqEdHVLhjzStuRulGY
Giacomo Puccini Madama Butterfly, SC 74, Act II Pt. 1: No. 17, Un bel di vedremo (But

In [144]:
addlist

[['Claude Debussy', 'Claire de lune', 'spotify:track:6kf7ZCJjEbjZXikivKOsvJ'],
 ['Erik Satie',
  '3 Gymnopédies: No. 1 Lent et douloureux',
  'spotify:track:7kTVe6XhIveidvkt8nb7jK'],
 ['Don McLean',
  'Vincent (Starry, Starry Night)',
  'spotify:track:2YDyH60Vro33KkDtNZCXIk'],
 ['Wolfgang Amadeus Mozart',
  'Lacrimosa',
  'spotify:track:1UvaZaHkh3D9AkmBrrnbFg'],
 ['Louis Armstrong',
  "It's Wonderful - Single Version",
  'spotify:track:7tmOKoxLQFLvQWwxzYaodT'],
 ['Henry Mancini',
  'Moon River(Vocal Audrey Hepburn)',
  'spotify:track:5iGleL7HpEThuuYQ3us2jh'],
 ['The Cover Crew',
  'Bittersweet Symphony (Acoustic Version) [The Verve Cover]',
  'spotify:track:714doH50K9qbrE9py6izzV'],
 ['The Beatles',
  'Golden Slumbers - Remastered 2009',
  'spotify:track:01SfTM5nfCou5gQL70r6gs'],
 ['Louis Armstrong',
  'La vie en rose - Single Version',
  'spotify:track:3yYfoYGVpriV4fG9L1ogsD'],
 ['Johann Sebastian Bach',
  'Herz und Mund und Tat und Leben, Cantata BWV 147: Jesu, Joy of Man’s Desiring 

In [None]:
addlist = [['ABBA', 'One Of Us', 'spotify:track:6zgtBUEkAfilJ2YEOvNexR'],
 ['Gregorio Allegri',
  'Miserere mei, Deus',
  'spotify:track:6es7DmrhnDoKj5rsFvh3XU'],
 ['Amy Winehouse',
  'Love Is A Losing Game',
  'spotify:track:3uliGwmB52ZA7brgpZMzyH'],
 ['Barbara',
  "Ma plus belle histoire d'amour",
  'spotify:track:0qBVET4VkHsQAoboWlQ2pJ'],
 ['Ludwig van Beethoven',
  'Symphony No. 5 in C Minor, Op. 67: I. Allegro con brio',
  'spotify:track:2ygeBLTP9uu3OW3VTulD8N'],
 ['Benny Goodman', 'Sing, Sing, Sing', 'spotify:track:5L8ta4ECl5zeA6bGqY7G38'],
 ['Bill Withers', 'Lean on Me', 'spotify:track:3M8FzayQWtkvOhqMn2V4T2'],
 ['Billy Joel', 'Piano Man', 'spotify:track:70C4NyhjD5OZUMzvWZ3njJ'],
 ['Bob Dylan', 'Ballad of a Thin Man', 'spotify:track:0f5N14nB8xi0p3o4BlVvbx'],
 ['Bob Dylan', "Blowin' in the Wind", 'spotify:track:18GiV1BaXzPVYpp9rmOg0E'],
 ['Bob Dylan', 'Desolation Row', 'spotify:track:4n1ZGm3TxYmoYe1YR8cMus'],
 ['Bob Dylan', 'Duquesne Whistle', 'spotify:track:5kKW4bszhKSCYVPDO0sMbX'],
 ['Bob Dylan',
  'Forever Young - Slow Version',
  'spotify:track:4yWl0tnEanf3zmZzl9kbQn'],
 ['Bob Dylan', 'Gotta Serve Somebody', 'spotify:track:760420tYNmNjFgi8bWvbop'],
 ['Bob Dylan', 'Highway 61 Revisited', 'spotify:track:6os5B6xjuke9YfBKH3tu1e'],
 ['Bob Dylan',
  'I Shall Be Released - Studio Outtake - 1971',
  'spotify:track:5vyw005QQ42hrzrLxb3xEX'],
 ['Bob Dylan', 'I Want You', 'spotify:track:7tJQ4Ekp2vN3NlI3vJJW3v'],
 ['Bob Dylan', "It Ain't Me Babe", 'spotify:track:5nbNWAfT1S6V1vqj3snHxS'],
 ['Bob Dylan', 'Jokerman', 'spotify:track:6cuHkcRUqtQhtJ4sWCkd1q'],
 ['Bob Dylan',
  "Knockin' On Heaven's Door",
  'spotify:track:6HSXNV0b4M4cLJ7ljgVVeh'],
 ['Bob Dylan', 'Lay, Lady, Lay', 'spotify:track:4uYwlMp841PLJmj1gJJwIq'],
 ['Bob Dylan', 'Like a Rolling Stone', 'spotify:track:3AhXZa8sUQht0UEdBJgpGc'],
 ['Bob Dylan', 'Love Sick', 'spotify:track:3O1hpSOaJDW4SelgUG2XT3'],
 ['Bob Dylan', "Maggie's Farm", 'spotify:track:5rGD8FFgHw74cp3RPhucyg'],
 ['Bob Dylan',
  'Make You Feel My Love',
  'spotify:track:6rfGPGghQL7SJmZPXprXIc'],
 ['Bob Dylan',
  'Mississippi - Version 2',
  'spotify:track:6JWHNd8QMxTvojYkmZtKGI'],
 ['Bob Dylan', 'Mr. Tambourine Man', 'spotify:track:3RkQ3UwOyPqpIiIvGVewuU'],
 ['Bob Dylan', 'Murder Most Foul', 'spotify:track:1LfTvT9JPYuuZanwxLtZCr'],
 ['Bob Dylan', 'Not Dark Yet', 'spotify:track:1qbn6QrHG8XfnqVFKgNzKP'],
 ['Bob Dylan',
  'Rainy Day Women #12 & 35',
  'spotify:track:7BkAlVpGwXXl3sYNn5OoJ7'],
 ['Bob Dylan',
  'Sad-Eyed Lady of the Lowlands',
  'spotify:track:4jdtLLyEL7wY0TlCdMKhxq'],
 ['Bob Dylan', 'She Belongs to Me', 'spotify:track:2itBkHBUxGl4VfDj4HNyoD'],
 ['Bob Dylan',
  'Stuck Inside of Mobile with the Memphis Blues Again',
  'spotify:track:1NYTj6JEw3IOh4ggiBh82h'],
 ['Bob Dylan',
  'Subterranean Homesick Blues',
  'spotify:track:6k9DUKMJpWvu6eFG3O64Lg'],
 ['Bob Dylan', 'Tangled up in Blue', 'spotify:track:6Vcwr9tb3ZLO63F8DL8cqu'],
 ['Bob Dylan', 'Tempest', 'spotify:track:19scNzd4ogVsHrNWsms8Rg'],
 ['Bob Dylan',
  "The Times They Are A-Changin'",
  'spotify:track:52vA3CYKZqZVdQnzRrdZt6'],
 ['Bob Dylan',
  'Things Have Changed - Single Version',
  'spotify:track:5KOi77ameCimkAdw0DMNoy'],
 ['Bob Dylan',
  'Thunder on the Mountain',
  'spotify:track:4wo2eRp6aHcAlmhmfwiTAH'],
 ['Bob Dylan', 'Visions of Johanna', 'spotify:track:2rslQV48gNv3r9pPrQFPW1'],
 ['Brian Wilson', 'God Only Knows', 'spotify:track:2SznAUigFh6rMdGpcS5d7e'],
 ['Bright Eyes',
  'First Day of My Life',
  'spotify:track:0eBryM7ePQH3Klt3jz8xZd'],
 ['Crowded House',
  'Don’t Dream It’s Over - Home Demo',
  'spotify:track:0fiSpF9mvRFQWy0ca64d1g'],
 ['Léo Delibes', 'Flower Duet', 'spotify:track:5K8jqeLAxZIqHR6e5w5so1'],
 ['Dire Straits', 'Brothers In Arms', 'spotify:track:6XYBbVpu455ZdGWZNRLGbG'],
 ['Don McLean',
  'Vincent (Starry, Starry Night)',
  'spotify:track:2YDyH60Vro33KkDtNZCXIk'],
 ['Ed Sheeran', 'Photograph', 'spotify:track:41xNsY82OWtWbIfnRMK2ky'],
 ['Elvis Presley',
  'Can’t Help Falling in Love - Acoustic Cover',
  'spotify:track:0ghQkNDYLSl4GsqfkjTjWx'],
 ['Enya', 'Amarantine', 'spotify:track:0VmzazQQ0Mo1vJldr5NxTW'],
 ['Evan Rachel Wood', 'If I Fell', 'spotify:track:0gd3hRBQAEAw096YOcUrmR'],
 ['Fleetwood Mac', 'Rhiannon', 'spotify:track:05oETzWbd4SI33qK2gbJfR'],
 ['George Harrison',
  'All Things Must Pass - 2014 Remaster',
  'spotify:track:16OwZQuzMqnwn3FZsCBZly'],
 ['George Harrison',
  'Apple Scruffs - 2014 Remaster',
  'spotify:track:2K7WhpfZX3TCCMiwebp0W7'],
 ['George Harrison',
  'Art of Dying - 2014 Remaster',
  'spotify:track:6Jod7qrtYBhU3HcUmKk4hX'],
 ['George Harrison',
  'Awaiting on You All - 2014 Remaster',
  'spotify:track:0b65WkrBrg2qOkzQeDtQ9d'],
 ['George Harrison',
  'Ballad of Sir Frankie Crisp (Let It Roll) - 2014 Remaster',
  'spotify:track:0FWeRrB8T5R6maHbWQw4Kk'],
 ['George Harrison',
  'Behind That Locked Door',
  'spotify:track:2VVbLn8nMcWJzjcL1tZsUr'],
 ['George Harrison',
  'Beware of Darkness - 2014 Remaster',
  'spotify:track:606MCyZFMBlc52Ojnn1nvU'],
 ['George Harrison',
  'Give Me Love (Give Me Peace on Earth)',
  'spotify:track:71fXxvXqo1zxWDtBmjoEVk'],
 ['George Harrison',
  'Hear Me Lord - 2014 Remaster',
  'spotify:track:3kopbNyRj10XO1actGZexP'],
 ['George Harrison',
  'I Dig Love - 2014 Remaster',
  'spotify:track:42yK1Wy62c7malKSRwy0Qk'],
 ['George Harrison',
  'I Remember Jeep - 2014 Remaster',
  'spotify:track:058AE5M3ifbCh8VWOV7903'],
 ['George Harrison',
  "It's Johnny's Birthday - 2014 Remaster",
  'spotify:track:6Cv05rcW8HWwCC6wyEp1fC'],
 ['George Harrison',
  'Let It Down - 2014 Remaster',
  'spotify:track:5FFruMKbVg8AhwHnX4xBov'],
 ['George Harrison',
  'My Sweet Lord - 2014 Remaster',
  'spotify:track:6vE90mi4yKsQGY3YD2OOv1'],
 ['George Harrison',
  'Out of the Blue - 2014 Remaster',
  'spotify:track:1KHMyFaGvwVQ7ax4yjq4BZ'],
 ['George Harrison',
  'Plug Me In - 2014 Remaster',
  'spotify:track:0tyk2xHVjBd3nk16cGktTG'],
 ['George Harrison',
  'Run of the Mill - 2014 Remaster',
  'spotify:track:4uSlUBg3NVOA77E7wwKFTO'],
 ['George Harrison',
  'Thanks for the Pepperoni - 2014 Remaster',
  'spotify:track:3smkwfPqFsTmwfnBztMXaM'],
 ['George Harrison',
  'The Inner Light (Alternative Take) - Instrumental',
  'spotify:track:7gWPnvhaBFMlQsTBWEGcSC'],
 ['George Harrison',
  'Wah-Wah - 2014 Remaster',
  'spotify:track:5j3aqkMO2fl0s5eaSuVnQ8'],
 ['George Harrison',
  'What Is Life - 2014 Remaster',
  'spotify:track:44fw7RulJyj7dGIi9qR86N'],
 ['George Harrison',
  'While My Guitar Gently Weeps - Live At Madison Square Garden; 2009 Remaster',
  'spotify:track:4Egi6XuC0rbLlXfqmQeuFa'],
 ['Glenn Miller', 'In the Mood', 'spotify:track:1xsY8IFXUrxeet1Fcmk4oC'],
 ['Hans Zimmer', 'Cornfield Chase', 'spotify:track:6pWgRkpqVfxnj3WuIcJ7WP'],
 ['Hans Zimmer',
  'Day One (Interstellar Theme)',
  'spotify:track:4WmB04GBqS4xPMYN9dHgBw'],
 ["Israel Kamakawiwo'ole",
  'Maui Medley',
  'spotify:track:6TSJ3L9pBQsYIlCD5pk7ju'],
 ['James Taylor',
  'You’ve Got a Friend',
  'spotify:track:3nK4hWsTEr7fVXziI5bTmh'],
 ['Jay Ungar', 'Ashoken Farewell', 'spotify:track:2s6pqLeVialgt5l5TTSeas'],
 ['Jeff Buckley',
  'If You Knew - Live at Sin-é, New York, NY - July/August 1993',
  'spotify:track:1nd2JEHXbUuQFDiQzCBpsv'],
 ['Jimi Hendrix', 'One Rainy Wish', 'spotify:track:5Zyv0v4rPcrXjkaeImuodv'],
 ['Jimi Hendrix',
  'Spanish Castle Magic',
  'spotify:track:2KFE98Iw0X23sf4vJYcbLH'],
 ['Jimi Hendrix',
  'Wait Until Tomorrow',
  'spotify:track:2YtVzmZzew1ILUdNueyWd7'],
 ['John Lennon',
  'Imagine - Remastered 2010',
  'spotify:track:7pKfPomDEeI4TPT6EOYjn9'],
 ['John Mayer', 'Queen of California', 'spotify:track:0CETmgFGt8Ne8vLnaLcduU'],
 ['Johnny Cash',
  'I Walk The Line - Single Version',
  'spotify:track:1TKPfF2fvn6gVLVfp3iG4j'],
 ['Joni Mitchell',
  'Mitchell: Urge for Going (Instrumental Arrangement of the B-Side Track of the Joni Mitchell Single "You Turn Me on I\'m a Radio")',
  'spotify:track:1I1u9aTdxxQ7SDLgBB3V7b'],
 ['Kanye West', 'Come to Life', 'spotify:track:5xvXeuxISyXJDRbZZf4uzd'],
 ['Leonard Cohen', 'Chelsea Hotel #2', 'spotify:track:4krhCfJg0znykZoyjeMXRe'],
 ['Leonard Cohen', 'Dear Heather', 'spotify:track:3MTKMphPprAcBFG1uIhzPZ'],
 ['Leonard Cohen',
  "Death of a Ladies' Man",
  'spotify:track:5wrylUGwZugelovhryPYg2'],
 ['Leonard Cohen', 'The Future', 'spotify:track:5l8lYrnPEM1ln3J4XaTcy5'],
 ['Leonard Cohen',
  'You Want It Darker',
  'spotify:track:5zb7npjQqoJ7Kcpq4yD9qn'],
 ['Lingers.On', 'In Lingerie', 'spotify:track:6FH3kGlJbFVJDCG9RcERf7'],
 ['Louis Armstrong',
  'La vie en rose - Single Version',
  'spotify:track:3yYfoYGVpriV4fG9L1ogsD'],
 ['The Lovecats', 'The Lovecats', 'spotify:track:7iJUiiTfnuY5cTIeEBnqHr'],
 ['Ludovico Einaudi', 'Primavera', 'spotify:track:4BMHp3DkI8VLsuB9Kr0pzu'],
 ['Mazzy Star', 'Flowers In December', 'spotify:track:0G6Ws8Gbdt0S7pZeuYmkmm'],
 ['Metallica',
  'Fade To Black (Remastered)',
  'spotify:track:0dqGfCMAGyDgpUAgLNOjWd'],
 ['Wolfgang Amadeus Mozart',
  'Requiem in D Minor, K. 626: III. Sequenz No. 6, Lacrimosa dies illa',
  'spotify:track:4bvzJZXpkI3bkjxMCWOSu1'],
 ['My Chemical Romance',
  'The Light Behind Your Eyes',
  'spotify:track:3HyDpKAuR3e4l6QB7hSB2l'],
 ['Paul McCartney',
  'Here Today - Remixed 2015',
  'spotify:track:0QtnwXDziZN1K55fXuLN6q'],
 ['Paul McCartney',
  'I’ll Follow The Sun - Live At Amoeba 2007',
  'spotify:track:3xT59EeQdq0TPGtOlXXI8t'],
 ['Puscifer', 'The Humbling River', 'spotify:track:69GE6yPZZldvqtgBHrKXxg'],
 ['Ray LaMontagne',
  'Such A Simple Thing',
  'spotify:track:4PuUa8e5s7P3Zv1IdCGIsa'],
 ['Ray Manzarek',
  'Riders on the Storm',
  'spotify:track:3FvYcTXO2QtDY7kZQHku2d'],
 ['Red Hot Chili Peppers', 'Dosed', 'spotify:track:1iFIZUVDBCCkWe705FLXto'],
 ['Sky Cries Mary',
  "Don't Forget The Sky",
  'spotify:track:4sVpjCJRClVetRrdxVBolP'],
 ['Stevie Nicks', 'Landslide', 'spotify:track:5fprEY6WEN1wvFXkgfb22C'],
 ['Stevie Wonder', 'Isn’t She Lovely', 'spotify:track:6wGlAaMfyhKdEPr2zycAnN'],
 ['Taylor Swift',
  'Fearless (Taylor’s Version)',
  'spotify:track:77sMIMlNaSURUAXq5coCxE'],
 ['Taylor Swift',
  'the lakes - bonus track',
  'spotify:track:0eFQWVz0qIxDOvhLpZ40P7'],
 ['The Band',
  'When I Paint My Masterpiece - Remastered',
  'spotify:track:76WChUuOPeIK027IeUgr0l'],
 ['The Beach Boys',
  "I Just Wasn't Made For These Times - Mono",
  'spotify:track:4CuO8TINNqM3D7aUdNQ3zG'],
 ['The Beach Boys',
  "Let's Go Away For A While - Mono",
  'spotify:track:3GsgJI1aBrvUtqX8f3MhKT'],
 ['The Beatles',
  "Don't Let Me Down - Naked Version / Remastered 2013",
  'spotify:track:5BhMoGrz5KzG2fA5uzHjZ1'],
 ['The Beatles',
  'Love Me Do - Remastered 2009',
  'spotify:track:3VbGCXWRiouAq8VyMYN2MI'],
 ['The Chemical Brothers',
  'The Boxer',
  'spotify:track:1EUeDFq2zNP784GPaRs9aH'],
 ['The Cure',
  'A Night like This - 2006 Remaster',
  'spotify:track:7cKCz7gG84i1XLvDeM3ByT'],
 ['The Cure',
  'Disintegration - 2010 Remaster',
  'spotify:track:0zY8t5dC1KQXcPUKByWMJM'],
 ['The Cure',
  'From the Edge of the Deep Green Sea',
  'spotify:track:2vwBL9RVyr0vA4Og5VH0i3'],
 ['The Cure',
  'In Between Days - 2006 Remaster',
  'spotify:track:07CyrZF9eVd02zzIse7tZA'],
 ['The Cure', 'A Letter to Elise', 'spotify:track:4DdXOLc1VMAY34ourCn1Xa'],
 ['The Cure',
  'Lullaby - 2010 Remaster',
  'spotify:track:4d4oXk7O2lEhZ83ivV93li'],
 ['The Cure', 'Underneath The Stars', 'spotify:track:0PKVjYlKw7z3IvKAoxrYTR'],
 ['The Eagles', 'The Desperadoes', 'spotify:track:10ppF835WJMYI5v65gFLZ3'],
 ['The Helio Sequence',
  'Keep Your Eyes Ahead',
  'spotify:track:3yatRBsGMJ7wMoUIgDBzzo'],
 ['The Moldy Peaches',
  'Anyone Else But You',
  'spotify:track:2pKi1lRvXNASy7ybeQIDTy'],
 ['The Strokes', 'Someday', 'spotify:track:7hm4HTk9encxT0LYC0J6oI'],
 ['Traditional',
  'Scarborough Fair (Arr. Parkin)',
  'spotify:track:4wlNPczIullwvmwb4x0ltz'],
 ['Van Morrison',
  'Madame George - 1999 Remaster',
  'spotify:track:1N4MKISvC1ddfRCRQDXDd2'],
 ['Various Artists',
  'The Girl From Ipanema',
  'spotify:track:0JgH7g0kwsIs1THEVqhlUS'],
 ['Víg Mihály',
  'Öreg - From "Werckmeister Harmóniák"',
  'spotify:track:63wMgkXQuomlkW4an4O9b4'],
 ['Willie Nelson', 'Crazy', 'spotify:track:0xqtcLB45iKNfHroi5y1em']]


In [None]:
len(addlist)

In [None]:
addlist2 = [a[2] for a in addlist]

print (len(addlist2), 'items')

while(addlist2):
    sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
                                playlist_id=playlist_id, 
                                tracks=addlist2[-100:])
    addlist2 = addlist2[:-100]
    print("added items, remaining ", len(addlist2))
