In [1]:
import os
import glob
import pickle
from datetime import datetime
import time
import dotenv
import pandas as pd

import requests
import requests.auth

import praw

import openai

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# load secrets from .env into environment variables
dotenv.load_dotenv()

praw.__version__

'7.7.0'

See README.md
 - objective is to use OpenAI for named entity extraction to extract all the songs form [this reddit thread](https://www.reddit.com/r/AskReddit/comments/12viv4v/what_is_the_prettiest_song_you_ever_heard_in_your/) and make Spotify playlist
 - use Reddit PRAW API to download all the comments (get [Reddit API key](https://www.reddit.com/prefs/apps))
 - use OpenAI API with a prompt like, extract all the songs from this text to CSV get ([OpenAI API key](https://platform.openai.com/account/api-keys))
 - use Spotify API to make a playlist (get [Spotify API key](https://developer.spotify.com/documentation/web-api/tutorials/getting-started))
 - works, needed a lot of scrubbing, but about 1 day of work, wouldn't have been possible to do a 700-song playlist manually without a team of Mechanical Turks or something
 - If I wanted to go nuts, would process comments individually, save a file for each comment's extracted songs, would make it easier to track down what OpenAI gets wrong, have a resumable, retryable, repeatable process and 
 - Spotify playist is [here](https://open.spotify.com/playlist/08YFkbtTV6GBfNtjJ4PHDu?si=f4761d983ac84091) 
 
 needs a .env file per dot-env-template
 

In [117]:
# a thread 
submission = "12viv4v"

# minimum karma to process a reply 
minkarma = 5

# a prompt to apply to replies on the thread
# prefix = """Define an example CSV file output as follows: 
# "artist","song_title"
# "The Beatles","Yesterday"
# "Eagles","Hotel California"

# Extract all song titles and artists from the following input, and return a CSV file output of the artists and song titles you extract from the input. If there were no songs extracted from the input, return "no songs found". the input is:
# """

system_prompt="You will act as a research assistant finding all the songs and artists in a document and returning them a a CSV file"
assistant_prompt="""Define a CSV file output as follows: 
artist,song_title
"The Beatles","Yesterday"
"Eagles","Hotel California" """
user_prefix="Extract all song titles and artists from the following input, and return a CSV file output containing the artists and song titles you extract from the input. The header row should contain 'artist,song_title'. The fields of each record should be enclosed in double-quotes. The input is:"
# an output file to accumulate all the responses
savefile = 'bronze.txt'


## Get all comments from a reddit posting

In [3]:
def getPraw():
    return praw.Reddit(user_agent="prettiest_song/0.001", 
                       client_id=os.getenv('CLIENT_ID'), 
                       client_secret=os.getenv('CLIENT_SECRET'))


def getAll(r, submissionId, verbose=True):
    submission = r.submission(submissionId)
    submission.comments.replace_more(limit=None)
    commentsList=submission.comments.list()
    return commentsList


In [4]:
print(datetime.now())
r = getPraw()
res = getAll(r, submission)
print(datetime.now())

print("retrieved ", len(res), 'comments')

2023-04-30 16:49:10.659259
2023-04-30 17:29:18.276437
retrieved  24839 comments


In [7]:
# we have a list of comment objects
# filter comments with at least some karma
res3 = [r for r in res if r.score >= minkarma]
print('filtered to ', len(res3), 'comments')
res3[0].body, res3[0].score

filtered to  2515 comments


('Gymnopédies - Erik Satie', 6925)

In [8]:
# save so we can reload it later without downloading

with open('reddit.pkl', 'wb') as f:
    pickle.dump(res3, f)
    
with open('reddit.pkl', 'rb') as f:
    res3 = pickle.load(f)
    

## Extract artists and song titles using OpenAI

In [9]:
# check lengths of posts
shorties = []
big_ones = []
for i in range(len(res3)):
    if len(res3[i].body) <3:
        print (i, res3[i].body)
        shorties.append(i)
    if len(res3[i].body) > 4096:
        print(i, len(res3[i].body))
        big_ones.append(i)
        

474 4162
1539 W


In [10]:
# avg length
sum([len(r.body) for r in res3]) / len(res3)

105.34075546719681

In [102]:
print (res3[big_ones[0]].body[:500])

Saturn by Sleeping at Last:
https://www.youtube.com/watch?v=dzNvk80XY9s

The version they did with Tim Fain is even more beautiful: 
https://www.youtube.com/watch?v=0nRpeAiur9Q

I'm not good at choosing one thing from a list of favorites as the best, so I've got about 30+ answers that are really a 30+ -way tie, and the one that I would consider as "prettiest" at any given moment is heavily influenced by my current mood. So, it could be any one of these from my "Heart Wrenchingly Beautiful" playl


In [118]:
# for each comment object we will extract the body 
# then submit as part of a prompt to chatgpt
print(datetime.now())

openai.api_key = os.getenv('OPENAI_API_KEY')

slist = res3.copy()

# to speed things we'll cumulate posts til we get to 100 posts or 5000 chars, whichever comes first
maxchars = 5000  # max tokens is 4096 but we'll limit each prompt to 5000 chars
nposts = 100

# make sure no single post > maxchars + prefix which breaks the logic below
for i in range(len(slist)):
    if len(slist[i].body) > maxchars + len(prefix):
        print ("truncated ", i)
        slist[i].body = slist[i].body[:maxchars + len(prefix)]
        
outdir = 'out'
logdir = 'logs'
# make sure out and logs are empty
for f in glob.glob('%s/*' % outdir):
    os.remove(f)
for f in glob.glob('%s/*' % logdir):
    os.remove(f)
count = 0
c = 0


while(slist):  # still comments to process
    prompt = ""
    reply_ids = []

    for _ in range(nposts):  # add up to 100 posts to the prompt
        if slist:
            if len(prompt) + len(slist[0].body) < maxchars:
                reply = slist.pop(0)
                reply_ids.append(reply.id)
                body = reply.body # in order, for better context 
#                 if len(body) <3:
#                     print (c, body)
                prompt += body
                prompt += " \n \n"
#                 c += 1            
            
    # retry loop, have received untrapped 502 error
    for _ in range(3):
        try:
            response = openai.ChatCompletion.create(
                model='gpt-3.5-turbo-0301',
                messages=[{"role":"system", "content": system_prompt},
                          {"role":"assistant", "content": assistant_prompt},
                          {"role":"user", "content": user_prefix + prompt}
                         ],
                temperature=0,
            )
        except Exception as error:
            print("An exception occurred:", error)
            print("looping...")
            time.sleep(5)
            continue  # try again
        break   # exception not triggered

    with open("%s/%04d.csv" % (outdir, count), 'w') as outfile:
        outfile.write(response['choices'][0]['message']['content'])
    
    with open("%s/%04d.log" % (logdir, count), 'w') as logfile:
        logfile.write(str(reply_ids))
        logfile.write('\n\n===========\n\n')        
        logfile.write(prompt)
        logfile.write('\n\n===========\n\n')
        logfile.write(response['choices'][0]['message']['content'])
 
    count += 1
#     print(c)
    print('.', end='')

print()
print(datetime.now())



2023-05-01 08:31:38.383844
..........................................................
2023-05-01 08:45:55.078346


In [127]:
# may have to tweak the files to get them to load

filelist = os.listdir(outdir)

output_df = None

for f in sorted(filelist):
    print(f)
    tempdf = pd.read_csv("%s/%s" % (outdir, f))
    colcount = len(tempdf.columns)
    if len(tempdf.columns) != 2:
        print('%s has %d columns' % (f, colcount))
        continue
    elif tempdf.columns[0] != "artist":
        print('%s 1st column is %s' % (f, tempdf.columns[0]))
        continue
    elif tempdf.columns[1] != "song_title":
        print('%s 2nd column is %s' % (f, tempdf.columns[1]))
        continue
    if output_df is not None:
        output_df = pd.concat([output_df, tempdf], axis=0)
    else:
        output_df = tempdf



0000.csv
0001.csv
0002.csv
0003.csv
0004.csv
0005.csv
0006.csv
0007.csv
0008.csv
0009.csv
0010.csv
0011.csv
0012.csv
0013.csv
0014.csv
0015.csv
0016.csv
0017.csv
0018.csv
0019.csv
0020.csv
0021.csv
0022.csv
0023.csv
0024.csv
0025.csv
0026.csv
0027.csv
0028.csv
0029.csv
0030.csv
0031.csv
0032.csv
0033.csv
0034.csv
0035.csv
0036.csv
0037.csv
0038.csv
0039.csv
0040.csv
0041.csv
0042.csv
0043.csv
0044.csv
0045.csv
0046.csv
0047.csv
0048.csv
0049.csv
0050.csv
0051.csv
0052.csv
0053.csv
0054.csv
0055.csv
0056.csv
0057.csv


In [129]:
# save bronze
output_df.to_csv(savefile, index=False)
len(output_df)

1397

In [130]:
df = pd.read_csv(savefile) \
    .drop_duplicates() \
    .dropna() \
    .sort_values(["artist", "song_title"]) \
    .reset_index(drop=True)

df.to_csv('silver.csv', index=False)

print(len(df))
# tweak further to get to gold.csv

df


1084


Unnamed: 0,artist,song_title
0,311,Amber
1,A Flock of Seagulls,I Ran (So Far Away)
2,A Perfect Circle,Blue
3,A Perfect Circle,By and Down the River
4,A Perfect Circle,Feathers
...,...,...
1079,Yo-Yo Ma,Cello Suite No. 1 in G major
1080,alt-J,Dissolve Me
1081,beabadoobee,glue
1082,paper kites,Bloom


In [131]:
df.groupby('artist') \
    .count() \
    .reset_index() \
    .sort_values('song_title', ascending=False) \
    .head(10)



Unnamed: 0,artist,song_title
464,The Beatles,28
389,Radiohead,18
425,Simon & Garfunkel,17
149,Enya,15
474,The Cure,14
165,Fleet Foxes,13
422,Sigur Ros,12
242,Jim Croce,12
426,Simon and Garfunkel,12
374,Pink Floyd,11


In [132]:
df = df.drop(df.loc[df['artist']=='Unknown'].index)
df = df.drop(df.loc[df['artist']=='unknown'].index)

In [133]:
df.groupby('song_title') \
    .count() \
    .reset_index() \
    .sort_values('artist', ascending=False) \
    .head(10)


Unnamed: 0,song_title,artist
320,Hallelujah,6
4,(unknown),5
103,Blackbird,5
949,Your Song,4
213,Dream a Little Dream of Me,4
210,Dream A Little Dream Of Me,4
448,La Vie En Rose,4
579,Nothing Compares 2 U,4
842,Time,4
864,Unchained Melody,3


In [134]:
df = df.drop(df.loc[df['song_title']=='Unknown'].index)
df = df.drop(df.loc[df['song_title']=='unknown'].index)

In [135]:
len(df)

1082

In [136]:
df.to_csv('silver.csv', index=False)


## Load into a Spotify playlist


In [137]:
client_credentials_manager = SpotifyClientCredentials(client_id=os.getenv('SPOTIFY_CLIENT_ID'), 
                                                      client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                      )

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


In [138]:
# check artists
df = pd.read_csv("silver.csv")
df.drop_duplicates() \
    .dropna() \
    .sort_values(["artist", "song_title"])

dedupe = {}
fail_list = []

for index, artist, title in df.itertuples():
    if artist in dedupe:
        continue
    dedupe[artist]=1
    query_str = 'artist:%s' % (artist)
    artist_results = sp.search(q=query_str, type='artist', limit=3, offset=0, market='US')
    artist_names = [artist['name'] for artist in artist_results['artists']['items']]
    if artist_names:
        if artist.lower() != artist_names[0].lower():
            print(artist, artist_names)
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)

# then clean up manually as appropriate

A-Ha ['Daryl Hall & John Oates', 'a-ha', 'half•alive']
Adeem ['Adeem the Artist', 'Adeem', 'Adeema']
Al Stewart ['Alexander Stewart', 'Al Stewart', 'Alec Lee-Stewart']
Alan Parsons Project ['The Alan Parsons Project', 'The Alan Parsons Symphonic Project', 'The Alan Parsons Tribute Project']
Alison Kraus ['Alison Krauss', 'Alison Krauss & Union Station', 'Alison Kraus']
not found: Allison Kraus - When You Say Nothing at All
America ['The All-American Rejects', 'America', 'American Authors']
not found: Andrew Lloyd Webber, Sarah Brightman, Paul Miles-Kingston - Pie Jesu
Babyface ['Babyface Ray', 'Babyface', 'BabyFaceWood']
Barber ['Sam Barber', 'Jill Barber', 'Samuel Barber']
not found: Barry De Vorzon and Perry Botkin Jr. - Nadia's Theme
Beatles ['The Beatles', 'The Beatles Complete On Ukulele', 'The New Beatles']
Berlin ['Berliner Philharmoniker', 'Berlin', 'Konzerthaus Kammerorchester Berlin']
Beyonce ['Beyoncé', 'Beyonce Smith', 'Mc Beyonce']
not found: Bill Withers & Grover Washingt

Yaz ['Yazoo', 'Yazmin Lacey', 'Yazz']
paper kites ['The Paper Kites', 'Kites And Paper Trees']


In [139]:
# check songs

df = pd.read_csv("silver.csv")
df.drop_duplicates() \
    .dropna() \
    .sort_values(["artist", "song_title"])

dedupe = {}
mylist = []
fail_list = []
artist_list, track_list, uri_list, album_list = [], [], [], []
orig_artist, orig_track = [], []

for index, artist, title in df.itertuples():
    query_str = 'artist:%s track:%s' % (artist, title)
    track_results = sp.search(q=query_str, type='track', limit=1, offset=0, market='US')
    results = track_results['tracks']['items']
    
    if results:
        r = results[0]
        # failsafe to never put same track twice
        if dedupe.get(r['id']):
            continue
        dedupe[r['id']]=True
        if title.lower() != r['name'].lower():
            print ("%s|%s : %s|%s" % (artist, title, r['artists'][0]['name'], r['name']))
        uri_list.append(r['uri'])
        artist_list.append(r['artists'][0]['name'])
        track_list.append(r['name'])
        album_list.append(r['album']['name'])
        orig_artist.append(artist)
        orig_track.append(title)
#         print('  ',
#               r['artists'][0]['name'],'|',
#               r['name'], '|',
#               r['album']['name'],'|',
#               r['album']['release_date'],'|',
#               r['popularity'])
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)

A Flock of Seagulls|I Ran (So Far Away) : A Flock Of Seagulls|I Ran (So Far Away) - Single Edit
not found: A Perfect Circle - The Nurse who Loved me (cover)
A Perfect Circle|Three Libras : A Perfect Circle|3 Libras
not found: Adeem - White Trash Revelry
not found: Alice In Chains - Don't Follow
Alice in Chains|Down in a Hole : Alice In Chains|Down In A Hole (2022 Remaster)
Alice in Chains|Rain When I Die : Alice In Chains|Rain When I Die (2022 Remaster)
not found: Allison Kraus - When You Say Nothing at All
not found: Allison Kraus - Whiskey Lullaby
Andy Williams|Moon River : Andy Williams|Moon River (From "Breakfast at Tiffany's")
not found: Aphex Twin - Avrl 14
Aphex Twin|Come to Daddy : Aphex Twin|Come To Daddy - Pappy Mix
not found: Aphex Twin - Lichen
Aphex Twin|Rhubarb : Aphex Twin|Donkey Rhubarb
not found: Aphex Twin - Untitled #17
not found: Aretha Franklin - (unknown)
Arvo Pärt|Spiegel im Spiegel : Arvo Pärt|Spiegel im Spiegel - Version for Violin and Piano
Audrey Hepburn|Moon

Grateful Dead|Casey Jones : Grateful Dead|Casey Jones - 2013 Remaster
Grateful Dead|Mountains of the Moon : Grateful Dead|Mountains of the Moon - 2013 Remaster
Grateful Dead|Ripple : Grateful Dead|Ripple - 2013 Remaster
Grateful Dead|Ship of Fools : Grateful Dead|Ship of Fools - 2013 Remaster
Grateful Dead|Standing on the Moon : Grateful Dead|Standing on the Moon - 2013 remaster
Grateful Dead|Stella Blue : Grateful Dead|Stella Blue - 2013 Remaster
Grateful Dead|They Love each Other : Grateful Dead|They Love Each Other - Live at Barton Hall, Cornell University, Ithaca, NY 5/8/77
Grateful Dead|Unbroken Chain : Grateful Dead|Unbroken Chain - 2013 Remaster
not found: Gregory Alan Isakov - Flightless bird, American Mouth (Cover)
Grimes|Symphonia IX : Grimes|Symphonia IX (My Wait Is U)
not found: Guitars and Dragons - Concerning Hobbits
Gustav Holst|Jupiter : Gustav Holst|The Planets, Op. 32: 4. Jupiter, the Bringer of Jollity
Hans Zimmer|Chevaliers de Sangreal : Hans Zimmer|Chevaliers De Sa

not found: Mogwai - Mogwai Fear Satan (Kevin Shields remix)
not found: Mono - Formica Blues
not found: Monovision - Monovision
Moody Blues|Nights In White Satin : The Moody Blues|Nights In White Satin - Single Version / Mono
Mozart|Lacrimosa : Wolfgang Amadeus Mozart|Requiem in D Minor, K. 626: III. Sequenz No. 6, Lacrimosa dies illa
Mum|Green Grass of Tunnel : múm|Green Green Grass Of Tunnel
not found: Mumford & Sons - Babel Album
not found: Mumford and Sons - Bridge Over Troubled Water
not found: Muse - Exogenesis Symphony 1-3
not found: Natalis Lafourcade - El Lugar Correcto
Native|By and By : Native American Flute Ensemble & Jessita Reyes|Surrounded By Healing (From "Native American Flutes for Massage, Meditation & Healing")
not found: Neil Young - Beautiful ❤️
not found: Neil Young - Harvest Moon
not found: Neil Young - Helpless
not found: Neil Young - Natural Beauty
not found: Neil Young - Old Man
Nobuo Uematsu|To Zanarkand : Nobuo Uematsu|To Zanarkand (Final Fantasy X)
not found

The Beatles|and I love her : The Beatles|And I Love Her - Remastered 2009
The Beautiful South|Dream a Little Dream of Me : The Karaoke Channel|Dream A Little Dream Of Me [In the Style of Beautiful South] {Karaoke Version}
not found: The Carpenters - We've Only Just Begun
The Chordettes|They Say It’s Wonderful : The Chordettes|They Say It's Wonderful
not found: The Cranberries - Oh, I thought the world of you
The Cure|A Night Like This : The Cure|A Night like This - 2006 Remaster
The Cure|Close to Me : The Cure|Close to Me - 2006 Remaster
The Cure|In Between Days : The Cure|In Between Days - 2006 Remaster
The Cure|Letter to Elise : The Cure|A Letter to Elise
The Cure|Lovesong : The Cure|Lovesong - 2010 Remaster
The Cure|Lullaby : The Cure|Lullaby - 2010 Remaster
The Cure|Pictures of You : The Cure|Pictures of You - 2010 Remaster
The Cure|Plainsong : The Cure|Plainsong - 2010 Remaster
The Cure|The Same Deep Water As You : The Cure|The Same Deep Water as You - 2010 Remaster
not found: The

In [140]:
gold_df = pd.DataFrame({'input_artist': orig_artist,
                        'artist': artist_list,
                        'input_track': orig_track,
                        'track': track_list,
                        'album': album_list,
                        'uri': uri_list})
gold_df

Unnamed: 0,input_artist,artist,input_track,track,album,uri
0,311,311,Amber,Amber,Greatest Hits '93 - '03,spotify:track:51UtgWS4z1eMPuLQOzPtNH
1,A Flock of Seagulls,A Flock Of Seagulls,I Ran (So Far Away),I Ran (So Far Away) - Single Edit,A Flock Of Seagulls (Deluxe),spotify:track:0nIDF0qdk5iAIla7rZxxys
2,A Perfect Circle,A Perfect Circle,Blue,Blue,Thirteenth Step,spotify:track:6tgTTBaIf0tO6lvDhoXfMg
3,A Perfect Circle,A Perfect Circle,By and Down the River,By And Down The River,Eat The Elephant,spotify:track:1gv7dUAsupITkccwEqkJqM
4,A Perfect Circle,A Perfect Circle,Feathers,Feathers,Eat The Elephant,spotify:track:26aVyDKN3MDAMoyO3VDB3g
...,...,...,...,...,...,...
831,Yo La Tengo,Yo La Tengo,My Little Corner of the World,My Little Corner of the World,I Can Hear The Heart Beating As One,spotify:track:2bY66Hf5NbHJ8Ai8eNmJHG
832,Yo-Yo Ma,Victor Yoran,Cello Suite No. 1 in G major,"Cello Suite No. 1 in G Major, BWV 1007: I. Pre...",#1 Classical - Suites for Violoncello I,spotify:track:0kp7C0F3Yp7PVCCWqdLE6T
833,alt-J,alt-J,Dissolve Me,Dissolve Me,An Awesome Wave,spotify:track:2Dv7PTwSoB17f3VFDIKw8m
834,beabadoobee,beabadoobee,glue,Glue Song,Glue Song,spotify:track:3iBgrkexCzVuPy4O9vx7Mf


In [141]:
with pd.option_context("display.max_rows", 999):
    display(gold_df.loc[gold_df['input_artist'].str.lower() != gold_df['artist'].str.lower()])

Unnamed: 0,input_artist,artist,input_track,track,album,uri
16,Alan Parsons Project,The Alan Parsons Project,Old and Wise,Old and Wise,Eye In The Sky (Expanded Edition),spotify:track:5Jt2AQv1c3RUF5ENtAYF1i
17,Alan Parsons Project,The Alan Parsons Project,Silence and I,Silence and I,Eye In The Sky (Expanded Edition),spotify:track:1MXd1awM0A7T7FxUPKAujf
18,Alan Parsons Project,The Alan Parsons Project,Time,Time,The Turn Of A Friendly Card (Expanded Edition),spotify:track:48yJZwYYDZX5GKFND7wDfC
24,Alison Kraus,Alison Krauss & Union Station,New Favorite,New Favorite,New Favorite,spotify:track:4iVeKORDsSUxkLzmme2bbL
32,America,American Folk Channel,The Boxer,The Boxer,American Country Folk for the Trucks 2,spotify:track:6jLXjtanB0Yp82aAV7VGsN
39,"Andrew Lloyd Webber, Sarah Brightman, Paul Mil...",Andrew Lloyd Webber,Pie Jesu,Pie Jesu,Diva: The Singles Collection,spotify:track:4D2Ha1TUtxuDqHk2Kb9DO4
60,Barber,Samuel Barber,Adagio for Strings,Barber: Adagio for Strings,Samuel Barber - Adagio,spotify:track:1CSaCKPIp2yCIDL3t7Fyau
67,Beatles,The Beatles,In My Life,In My Life - Remastered 2009,Rubber Soul (Remastered),spotify:track:3KfbEIOC7YIv90FIfNSZpo
72,Beyonce,Beyoncé,Plastic off the sofa,PLASTIC OFF THE SOFA,RENAISSANCE,spotify:track:6ufcuVInt0ocHrUimDjGlb
83,Bjork,Björk,Hyperballad,Hyperballad,Post,spotify:track:4z1fNs2B7KndCsvyPgrhq5


In [142]:
with pd.option_context("display.max_rows", 999):
    display(gold_df.loc[gold_df['input_track'].str.lower() != gold_df['track'].str.lower()])

Unnamed: 0,input_artist,artist,input_track,track,album,uri
1,A Flock of Seagulls,A Flock Of Seagulls,I Ran (So Far Away),I Ran (So Far Away) - Single Edit,A Flock Of Seagulls (Deluxe),spotify:track:0nIDF0qdk5iAIla7rZxxys
7,A Perfect Circle,A Perfect Circle,Three Libras,3 Libras,Mer De Noms,spotify:track:5kHkaBN8OEQlmXfQkACxSt
21,Alice in Chains,Alice In Chains,Down in a Hole,Down In A Hole (2022 Remaster),Dirt (2022 Remaster),spotify:track:7FRfYOql61DGDp9VPPe2qA
22,Alice in Chains,Alice In Chains,Rain When I Die,Rain When I Die (2022 Remaster),Dirt (2022 Remaster),spotify:track:6a9SPVrXyrlVh5Fh08f8Bz
40,Andy Williams,Andy Williams,Moon River,"Moon River (From ""Breakfast at Tiffany's"")",Moon River And Other Great Movie Themes,spotify:track:24AIahNHzBxm9S12peXbnG
47,Aphex Twin,Aphex Twin,Come to Daddy,Come To Daddy - Pappy Mix,Come To Daddy,spotify:track:5H6cQ9QrYP23R6PALr1KCc
50,Aphex Twin,Aphex Twin,Rhubarb,Donkey Rhubarb,Donkey Rhubarb,spotify:track:3IfmatoAjPtQOqteemdUnp
54,Arvo Pärt,Arvo Pärt,Spiegel im Spiegel,Spiegel im Spiegel - Version for Violin and Piano,Arvo Pärt: Alina,spotify:track:4wudbbceOZKjRlhTtbHvAs
56,Audrey Hepburn,Audrey Hepburn,Moon River,Moon River (From Breakfast at Tiffany's) [Rema...,Moon River (Breakfast at Tiffany's) [Remastered],spotify:track:1XwAKjAZ1xDZOcuyZoqce4
58,Babyface,Babyface,Every Time I Close My Eyes,Every Time I Close My Eyes (with Kenny G),The Day,spotify:track:2ezqQeBiC72gwMJoO4w1hA


In [None]:
gold_df.iloc[29]

In [89]:
# these are songs that look like covers or otherwise not the expected response from spotify search 
# (which is a bit wonky, doesn't like quotes and such)

bad_lookups = [
    28, 79, 110, 286, 675, 693, 696, 697, 698, 699, 721, 726, 754
]

for i in bad_lookups:
    print(gold_df.iloc[i])
    
# add manually, plus 'not found'


input_artist                                   America
artist                           American Folk Channel
input_track                                  The Boxer
track                                        The Boxer
album           American Country Folk for the Trucks 2
uri               spotify:track:6jLXjtanB0Yp82aAV7VGsN
Name: 28, dtype: object
input_artist                                                Bjork
artist                                                 Björkliden
input_track                                                  Yoga
track                                                  Yoga Nidra
album           Meditation Spa yoga healing therapy relaxation...
uri                          spotify:track:3AdIPVFDkqSSvBjKFcN0mr
Name: 79, dtype: object
input_artist                                    Cher
artist                                Cherie Nichole
input_track                               Moonstruck
track                                     Moonstruck
album         

In [90]:
gold_df = gold_df.drop(
    axis='index',
    labels=bad_lookups)

gold_df[['artist', 'track']].to_csv('gold.csv', index=False)

with pd.option_context("display.max_rows", 999):
    display(gold_df)

Unnamed: 0,input_artist,artist,input_track,track,album,uri
0,311,311,Amber,Amber,Greatest Hits '93 - '03,spotify:track:51UtgWS4z1eMPuLQOzPtNH
1,A Perfect Circle,A Perfect Circle,Blue,Blue,Thirteenth Step,spotify:track:6tgTTBaIf0tO6lvDhoXfMg
2,A Perfect Circle,A Perfect Circle,Gravity,Gravity,Thirteenth Step,spotify:track:1CO4BB8CaiQggtJ0R6GwGt
3,A Perfect Circle,A Perfect Circle,Judith,Judith,Mer De Noms,spotify:track:5KDNFlHAdDJ84fhK27c35X
4,A Perfect Circle,A Perfect Circle,Orestes,Orestes,Mer De Noms,spotify:track:6YUKAH1icwkA3U7fVp3amo
5,A Perfect Circle,A Perfect Circle,The Noose,The Noose,Thirteenth Step,spotify:track:6lvNLD1XRU5paMwWH0RGRI
6,A Perfect Circle,A Perfect Circle,Three Libras,3 Libras,Mer De Noms,spotify:track:5kHkaBN8OEQlmXfQkACxSt
7,A-Ha,a-ha,Take on Me,Take on Me,Hunting High and Low,spotify:track:2WfaOiMkCvy7F5fcp2zZ8L
8,ABBA,ABBA,Fernando,Fernando,Arrival,spotify:track:4BM8yJ0PzBi2ZewpMTOxtx
9,ABBA,ABBA,One of us,One Of Us,The Essential Collection,spotify:track:6zgtBUEkAfilJ2YEOvNexR


In [143]:
# get playlist id
# first create a playlist in UI to load songs
playlists = sp.user_playlists(os.getenv('SPOTIFY_USERNAME'))
while playlists:
    for i, playlist in enumerate(playlists['items']):
        if playlist['name'] != 'Reddit Prettiest Songs':
            continue
        print(playlist['id'])
        playlist_id = playlist['id']
        print("%4d %s %s" % (i + 1 + playlists['offset'], playlist['uri'],  playlist['name']))
    if playlists['next']:
        playlists = sp.next(playlists)
    else:
        playlists = None

08YFkbtTV6GBfNtjJ4PHDu
   1 spotify:playlist:08YFkbtTV6GBfNtjJ4PHDu Reddit Prettiest Songs


In [144]:
# must follow an oauth workflow to write a playlist in Spotify
# running this cell should request a spotify login and then redirect to an url
# paste whole url with id into form to authenticate

scope = "playlist-modify-public"

sp = spotipy.Spotify(auth_manager=spotipy.SpotifyOAuth(scope=scope,
                                                       client_id=os.getenv('SPOTIFY_CLIENT_ID'),
                                                       client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                       redirect_uri="https://druce.ai"
                                                      ))

In [None]:
addlist = gold_df['uri'].to_list()
print (len(addlist))

while(addlist):
    sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
                                playlist_id=playlist_id, 
                                tracks=addlist[-100:])
    addlist = addlist[:-100]
    print("added items, remaining ", len(addlist))


In [None]:
# manually add the ones that weren't found for some reason


In [168]:
# can run again and add any new tracks, either because OpenAI is a bit random, or new replies in thread
results = sp.user_playlist(os.getenv('SPOTIFY_USERNAME'), playlist_id,
                                fields='tracks,next,name')
tracks = results['tracks']

playlist_dict_by_uri = {}
playlist_dict_by_str = {}

artist_list = []
track_list = []
uri_list = []
popularity_list = []
album_list=[]

while True:
    for track_item in tracks['items']:
        track_dict = track_item['track']
        track_str = track_dict['artists'][0]['name']  + ' | ' + track_dict['name'][:15]
        uri = track_dict['uri']
        if track_str in playlist_dict_by_str:
            print(track_str)
        playlist_dict_by_str[track_str] = uri
        playlist_dict_by_uri[uri] = track_str
        
        uri_list.append(uri)
        artist_list.append(track_dict['artists'][0]['name'])
        track_list.append(track_dict['name'])
        album_list.append(track_dict['album']['name'])
        popularity_list.append(track_dict['popularity'])
        
    # check if there are more pages
    if tracks['next']:
        tracks = sp.next(tracks)
    else:
        break

print (len(list(playlist_dict_by_str.keys())))
print (len(list(playlist_dict_by_uri.keys())))


1036
1036


In [169]:
playlist_df = pd.DataFrame({'artist': artist_list,
                           'track': track_list,
                           'album': album_list,
                           'popularity': popularity_list,
                           })



In [170]:
with pd.option_context("display.max_rows", 9999):
    display(playlist_df.sort_values('popularity'))
    

Unnamed: 0,artist,track,album,popularity
915,Wolfgang Amadeus Mozart,"Requiem in D Minor, K. 626: III. Seq. 5, Confu...",Classical Goth,0
885,Giacomo Puccini,"Tosca, S. 69, Act II: Vissi d'arte",TikTok Klassische Musik,0
704,Pyotr Ilyich Tchaikovsky,"Romeo and Juliet, TH 42 ""Fantasy Overture""",Classical Love songs,0
789,Jaakko Aukusti,What If All Else Fails?,What If All Else Fails?,0
733,Astrud Gilberto,Photograph,BRAZIL ALL STARS : BOSSA NOVA HITS,0
688,Sergei Rachmaninoff,"14 Songs, Op. 34: No. 14, Vocalise",Piano Musik Zum Studieren,0
792,Johann Sebastian Bach,"Jesu Joy of Man's Desiring, BWV 147 (Arr. Hess)",Klassik zum Studieren,0
741,Frédéric Chopin,"Ballade No. 3 in A-Flat Major, Op. 47","Mozart, Chopin: Best Classical Music",0
609,Johann Sebastian Bach,"Orchestral Suite No. 3 in D Major, BWV 1068: I...",Sleep for Baby,0
937,Yusuf / Cat Stevens,Wild World,The Best Of Cat Stevens 20th Century Masters T...,0


In [146]:
gold_dict_by_uri = {}
gold_dict_by_str = {}
addlist = []
c = 0
for i, artist, track, uri in gold_df[['artist', 'track', 'uri']].itertuples():
    # print(artist, track, uri)
    track_str = artist + ' | ' + track[:15]
    if track_str not in playlist_dict_by_str:
        addlist.append([artist, track, uri])
        print(artist, track, uri)
    gold_dict_by_uri[uri]=track_str
    gold_dict_by_str['track_str']= uri
#     if track_str not in playlist_dict_by_str:
#         c += 1
#         print (c, track_str)
        
print(len(gold_dict_by_str.items()))
print(len(gold_dict_by_uri.items()))

A Flock Of Seagulls I Ran (So Far Away) - Single Edit spotify:track:0nIDF0qdk5iAIla7rZxxys
A Perfect Circle By And Down The River spotify:track:1gv7dUAsupITkccwEqkJqM
A Perfect Circle Feathers spotify:track:26aVyDKN3MDAMoyO3VDB3g
a-ha Take on Me spotify:track:2WfaOiMkCvy7F5fcp2zZ8L
Alice In Chains Down In A Hole (2022 Remaster) spotify:track:7FRfYOql61DGDp9VPPe2qA
Alison Krauss & Union Station New Favorite spotify:track:4iVeKORDsSUxkLzmme2bbL
Alter Bridge In Loving Memory spotify:track:0T9tbj6zsRKLiUx06sbOWX
American Folk Channel The Boxer spotify:track:6jLXjtanB0Yp82aAV7VGsN
Andrew Bird Beyond the Valley of the Three White Horses spotify:track:1DP0urxeM1qc2O9ky2vrDM
Aphex Twin Come To Daddy - Pappy Mix spotify:track:5H6cQ9QrYP23R6PALr1KCc
Audrey Hepburn Moon River (From Breakfast at Tiffany's) [Remastered] spotify:track:1XwAKjAZ1xDZOcuyZoqce4
BTS Dynamite spotify:track:5QDLhrAOJJdNAmCTJ8xMyW
Bob Dylan All Along the Watchtower spotify:track:0Fnb2pfBfu0ka33d6Yki17
Bon Iver Wash. spotify

In [147]:
addlist

[['A Flock Of Seagulls',
  'I Ran (So Far Away) - Single Edit',
  'spotify:track:0nIDF0qdk5iAIla7rZxxys'],
 ['A Perfect Circle',
  'By And Down The River',
  'spotify:track:1gv7dUAsupITkccwEqkJqM'],
 ['A Perfect Circle', 'Feathers', 'spotify:track:26aVyDKN3MDAMoyO3VDB3g'],
 ['a-ha', 'Take on Me', 'spotify:track:2WfaOiMkCvy7F5fcp2zZ8L'],
 ['Alice In Chains',
  'Down In A Hole (2022 Remaster)',
  'spotify:track:7FRfYOql61DGDp9VPPe2qA'],
 ['Alison Krauss & Union Station',
  'New Favorite',
  'spotify:track:4iVeKORDsSUxkLzmme2bbL'],
 ['Alter Bridge', 'In Loving Memory', 'spotify:track:0T9tbj6zsRKLiUx06sbOWX'],
 ['American Folk Channel',
  'The Boxer',
  'spotify:track:6jLXjtanB0Yp82aAV7VGsN'],
 ['Andrew Bird',
  'Beyond the Valley of the Three White Horses',
  'spotify:track:1DP0urxeM1qc2O9ky2vrDM'],
 ['Aphex Twin',
  'Come To Daddy - Pappy Mix',
  'spotify:track:5H6cQ9QrYP23R6PALr1KCc'],
 ['Audrey Hepburn',
  "Moon River (From Breakfast at Tiffany's) [Remastered]",
  'spotify:track:1XwAKj

In [148]:
[
    ['Alice In Chains',
  'Down In A Hole (2022 Remaster)',
  'spotify:track:7FRfYOql61DGDp9VPPe2qA'],
 ['Alter Bridge', 'In Loving Memory', 'spotify:track:0T9tbj6zsRKLiUx06sbOWX'],
 ['Aphex Twin',
  'Come To Daddy - Pappy Mix',
  'spotify:track:5H6cQ9QrYP23R6PALr1KCc'],
 ['Audrey Hepburn',
  "Moon River (From Breakfast at Tiffany's) [Remastered]",
  'spotify:track:1XwAKjAZ1xDZOcuyZoqce4'],
 ['BTS', 'Dynamite', 'spotify:track:5QDLhrAOJJdNAmCTJ8xMyW'],
 ['Bon Iver', 'Wash.', 'spotify:track:7rIhp6EWLNtM8qFIQruJPT'],
 ['Brandi Carlile',
  'I Remember Everything',
  'spotify:track:1J6OHcQjnrD0upP4skciHO'],
 ['Cocteau Twins',
  'Cherry-coloured Funk',
  'spotify:track:37pKTyMwalomKCZjxTc2QZ'],
 ['Cocteau Twins', 'Eperdu', 'spotify:track:1jzjoSLzXwYX3RNWAq5QrY'],
 ['Dave Matthews Band', '#41', 'spotify:track:2nklcPJ3YhYJfC2ibStXz9'],
 ['Dawid Podsiadło', 'Let You Down', 'spotify:track:1qpGMJi0ippCaMUOs7cz2q'],
 ['Ella Fitzgerald',
  'Dream A Little Dream Of Me - Single Version',
  'spotify:track:78MI7mu1LV1k4IA2HzKmHe'],
 ['Felix Mendelssohn',
  'Lieder ohne Worte (Songs without Words), Book 1, Op. 19b: Lied ohne Worte (Song without Words) No. 6 in G Minor, Op. 19, No. 6, "Venezianisches Gondellied" (Venetian Gondola Song)',
  'spotify:track:3awXX3NCCFczArHPXTdrNk'],
 ['Fleet Foxes', 'Can I Believe You', 'spotify:track:3pYB28IRzhtR5cHXLINchp'],
 ['Fleet Foxes', 'Montezuma', 'spotify:track:5Civg4JEWHLe2gqMG5vW6E'],
 ['Foo Fighters',
  'Everlong - Acoustic Version',
  'spotify:track:3QmesrvdbPjwf7i40nht1D'],
 ['George Frideric Handel',
  'Handel / Orch. Hale: Keyboard Suite No. 4 in D Minor, HWV 437: III. Sarabande',
  'spotify:track:1upQiytDIEZfl9ItruoXuC'],
 ['Gotye',
  'Somebody That I Used To Know',
  'spotify:track:4wCmqSrbyCgxEXROQE6vtV'],
 ['Grateful Dead',
  'Black Muddy River - 2013 Remaster',
  'spotify:track:6LpXex2zyOAeBaF6OxXqsZ'],
 ['Grateful Dead',
  'Brokedown Palace - 2013 Remaster',
  'spotify:track:362CS15hE1upuTKoWApzLn'],
 ['Grateful Dead',
  'Ship of Fools - 2013 Remaster',
  'spotify:track:5ks4ht7EDua6UsaI4Dk7Lz'],
 ['Grateful Dead',
  'Stella Blue - 2013 Remaster',
  'spotify:track:3yXTSJKglvmJYPYaQeyTSm'],
 ['Grateful Dead',
  'Unbroken Chain - 2013 Remaster',
  'spotify:track:0QT7prPZzXJKBYMVdWlE1Z'],
 ['Hans Zimmer', 'Oogway Ascends', 'spotify:track:1dZCiUTIbbvwOCALmzBh9y'],
 ['Hellbound Hearts', "Nancy's House", 'spotify:track:4tDlkhY0hD6hOL4RsOK5hC'],
 ['Indigo Girls', 'Kid Fears', 'spotify:track:72EQ2arxZ3o2yM2k3gqX2R'],
 ['Iron & Wine', 'Jezebel', 'spotify:track:66uRb1ytl0l3PHAAAjqySU'],
 ['Jacques Brel', 'Ne me quitte pas', 'spotify:track:6IRA4KOVbtiGiTdYoEThJN'],
 ['Jasmine Thompson',
  'You Are My Sunshine',
  'spotify:track:5qMjtrGnezsafpH4oqg7oU'],
 ['Jason Isbell', 'Cover Me Up', 'spotify:track:5qW6ZYct54PhKliCntyxRX'],
 ['Jeff Buckley', 'Forget Her', 'spotify:track:6UuVONmxXwTKN1ISepuAoQ'],
 ['Caleb & John',
  'Hallelujah Feeling',
  'spotify:track:5Gyg9RufRKGzTtzafw2bK6'],
 ['John Denver',
  'Leaving, On a Jet Plane - "Greatest Hits" Version',
  'spotify:track:3D8dwH690MXQRhtIZTSS9c'],
 ['Josh Groban',
  'Vincent (Starry, Starry Night)',
  'spotify:track:1w8krEiwe6FsADhCDzsazP'],
 ['José González', 'Far Away', 'spotify:track:1pNJhRzMY7QJP4eVC2Y3mz'],
 ['Lord Huron', 'Long Lost', 'spotify:track:3ouRySkzdJhbRmnUffQn3Q'],
 ['Ludwig van Beethoven',
  'Sonata in Fa maggiore, Op. 24 - La Primavera. Allegro',
  'spotify:track:0mmL4zHNljYGC7P76DMw8S'],
 ['Mac Miller', 'Self Care', 'spotify:track:5bJ1DrEM4hNCafcDd1oxHx'],
 ['Jacques Offenbach',
  'Gaîté parisienne: Barcarolle',
  'spotify:track:5PAdbw7X61H3fQ8OwaTuBO'],
 ['Orville Peck', 'Dead of Night', 'spotify:track:08unC8N1V1dEcqiyi06g6W'],
 ['Otis Redding',
  'Try a Little Tenderness',
  'spotify:track:36AlMHDBFwSsD3FQOy1R81'],
 ['Pearl Jam', 'Oceans', 'spotify:track:0LBmvPJYmtEJ7kkWvc3kbT'],
 ['Pearl Jam', 'The End', 'spotify:track:0PkOdIbNj8L3e0p1JGIfLO'],
 ['Pink Floyd', 'Comfortably Numb', 'spotify:track:5HNCy40Ni5BZJFw1TKzRsC'],
 ['Pink Floyd', 'Mother', 'spotify:track:1GEOSS415bZVHNuXWlCT6b'],
 ['Pink Floyd', 'On the Turning Away', 'spotify:track:7KA6U0WOHdGxWWLGPYN2Sb'],
 ['Pink Floyd', 'Time', 'spotify:track:3TO7bbrUKrOSPGRTB5MeCz'],
 ['Pink Floyd', 'Us and Them', 'spotify:track:1TKTiKp3zbNgrBH2IwSwIx'],
 ['Poets of the Fall',
  'Cradled in Love',
  'spotify:track:7saeg1XSIx0RhNMHY5km2e'],
 ['Procol Harum',
  'A Whiter Shade of Pale - Original Single Version',
  'spotify:track:78ZqE2tjAxbqEGGlvGnQfT'],
 ['Puscifer', 'Bedlamite', 'spotify:track:004k1lOcYagbbpUcITtDvx'],
 ['Queen',
  'Bohemian Rhapsody - Remastered 2011',
  'spotify:track:7tFiyTwD0nx5a1eklYtX2J'],
 ['Radiohead', 'No Surprises', 'spotify:track:10nyNJ6zNy2YVYLrcwLccB'],
 ['Simon & Garfunkel', 'Peggy-O', 'spotify:track:07XMAGYqts7KZYeqAx0qie'],
 ['The Black Keys', 'Weight of Love', 'spotify:track:3xMQOd1C3TXsjQ3pmzOmkC'],
 ['The Limeybirds', 'The Flower Duet', 'spotify:track:53dVtSnbWjNl2nIRYv2L6p'],
 ['The Killers', 'Romeo And Juliet', 'spotify:track:1kfrnPViuzKdNwmH21ehLg'],
 ['The Mamas & The Papas',
  'Dedicated To The One I Love',
  'spotify:track:1mFrjW8e8fuAOowlU3Q3Dr'],
 ['Victor Yoran',
  'Cello Suite No. 1 in G Major, BWV 1007: I. Prelude',
  'spotify:track:0kp7C0F3Yp7PVCCWqdLE6T']]


[['Alice In Chains',
  'Down In A Hole (2022 Remaster)',
  'spotify:track:7FRfYOql61DGDp9VPPe2qA'],
 ['Alter Bridge', 'In Loving Memory', 'spotify:track:0T9tbj6zsRKLiUx06sbOWX'],
 ['Aphex Twin',
  'Come To Daddy - Pappy Mix',
  'spotify:track:5H6cQ9QrYP23R6PALr1KCc'],
 ['Audrey Hepburn',
  "Moon River (From Breakfast at Tiffany's) [Remastered]",
  'spotify:track:1XwAKjAZ1xDZOcuyZoqce4'],
 ['BTS', 'Dynamite', 'spotify:track:5QDLhrAOJJdNAmCTJ8xMyW'],
 ['Bon Iver', 'Wash.', 'spotify:track:7rIhp6EWLNtM8qFIQruJPT'],
 ['Brandi Carlile',
  'I Remember Everything',
  'spotify:track:1J6OHcQjnrD0upP4skciHO'],
 ['Cocteau Twins',
  'Cherry-coloured Funk',
  'spotify:track:37pKTyMwalomKCZjxTc2QZ'],
 ['Cocteau Twins', 'Eperdu', 'spotify:track:1jzjoSLzXwYX3RNWAq5QrY'],
 ['Dave Matthews Band', '#41', 'spotify:track:2nklcPJ3YhYJfC2ibStXz9'],
 ['Dawid Podsiadło', 'Let You Down', 'spotify:track:1qpGMJi0ippCaMUOs7cz2q'],
 ['Ella Fitzgerald',
  'Dream A Little Dream Of Me - Single Version',
  'spotify:trac

In [149]:
len(addlist)

115

In [150]:
addlist2 = [a[2] for a in addlist]

print (len(addlist2), 'items')

while(addlist2):
    sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
                                playlist_id=playlist_id, 
                                tracks=addlist2[-100:])
    addlist2 = addlist2[:-100]
    print("added items, remaining ", len(addlist2))


115 items
added items, remaining  15
added items, remaining  0
