In [5]:
import os
from datetime import datetime
import time
import dotenv
import pandas as pd

import requests
import requests.auth

import praw

import openai

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# load secrets from .env into environment variables
dotenv.load_dotenv()

praw.__version__

'7.7.0'

todo, make proper readme, 
 - objective is to use OpenAI for named entity extraction to extract all the songs form [this reddit thread](https://www.reddit.com/r/AskReddit/comments/12viv4v/what_is_the_prettiest_song_you_ever_heard_in_your/) and make Spotify playlist
 - use Reddit PRAW API to download all the comments (get [Reddit API key](https://www.reddit.com/prefs/apps))
 - use OpenAI API with a prompt like, extract all the songs from this text to CSV get ([OpenAI API key](https://platform.openai.com/account/api-keys))
 - use Spotify API to make a playlist (get [Spotify API key](https://developer.spotify.com/documentation/web-api/tutorials/getting-started))
 - works, needed a lot of scrubbing, but about 1 day of work, wouldn't have been possible to do a 700-song playlist manually without a team of Mechanical Turks or something
 - If I wanted to go nuts, would process comments individually, save a file for each comment's extracted songs, would make it easier to track down what OpenAI gets wrong, have a resumable, retryable, repeatable process and 
 - output a big github table of all the songs in the readme and link to spotify playlist and individual songs
 
 needs a .env file per dot-env-template
 

## Get comments from a reddit posting

In [None]:
def getPraw():
    return praw.Reddit(user_agent="prettiest_song/0.001", 
                       client_id=os.getenv('CLIENT_ID'), 
                       client_secret=os.getenv('CLIENT_SECRET'))


def getAll(r, submissionId, verbose=True):
    submission = r.submission(submissionId)
    submission.comments.replace_more(limit=None)
    commentsList=submission.comments.list()
    return commentsList


In [None]:
submission = "12viv4v"
print(datetime.now())
r = getPraw()
res = getAll(r, submission)
print(datetime.now())

print("retrieved ", len(res), 'comments')

In [None]:
# filter comments with at least 5 karma
res3 = [r for r in res if r.score >= 5]
res3[0].body, res3[0].score

## Extract all songs using OpenAI

In [None]:
openai.api_key = os.getenv('OPENAI_API_KEY')
pd.DataFrame(openai.Model.list()["data"])


In [None]:
slist = res3.copy()
outfile = open('bronze.txt', 'w')

while(slist):
    prompt = """Define an example CSV file output as follows: 
"artist","song_title"
"The Beatles","Yesterday"
"Eagles","Hotel California"

from the following text, extract all song titles and artists, and return a CSV file output of extracted artists and song titles exactly as defined above:
        
"""
    for _ in range(20):  # add up to 20 posts to the prompt
        if slist:
            prompt += slist.pop(0).body
            
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo-0301',
        messages=[{"role":"user", 
                   "content": prompt}])

    outfile.write(response['choices'][0]['message']['content'])
    outfile.write('\n\n')
    outfile.flush()
    print('.', end='')

outfile.close()


In [None]:
# will have to tweak the file to get it to load

df = pd.read_csv("bronze.txt")
df


In [None]:
df.drop_duplicates() \
    .dropna() \
    .sort_values(["artist", "song_tittle"]) \
    .to_csv('silver.csv', index=False)

# tweak further to get to gold.csv



## Load into a Spotify playlist


In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id=os.getenv('SPOTIFY_CLIENT_ID'), 
                                                      client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                      )

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


In [7]:
# get playlist ids
# first create a playlist in UI to load songs
playlists = sp.user_playlists(os.getenv('SPOTIFY_USERNAME'))
while playlists:
    for i, playlist in enumerate(playlists['items']):
        if playlist['name'] != 'reddit':
            continue
        print(playlist['id'])
        print("%4d %s %s" % (i + 1 + playlists['offset'], playlist['uri'],  playlist['name']))
    if playlists['next']:
        playlists = sp.next(playlists)
    else:
        playlists = None

5TCTR1JE09PNJU79kiZgHZ
   1 spotify:playlist:5TCTR1JE09PNJU79kiZgHZ reddit


In [51]:
df = pd.read_csv("gold.csv")
df


Unnamed: 0,artist,song_title
0,311,Amber
1,A Dream for Us,The Appleseed Cast
2,A Horse with No Name,America
3,A Perfect Circle,Blue
4,A Perfect Circle,Gimme
...,...,...
716,Yazoo,Situation
717,Yazoo,Winter Kills
718,Yeah Yeah Yeahs,Maps
719,Yes,And you and I


In [None]:
dedupe = {}
mylist = []
fail_list = []
for index, artist, title in df.itertuples():
    query_str = 'artist:%s track:%s' % (artist, title)
    track_results = sp.search(q=query_str, type='track', limit=10, offset=0, market='US')
    results = track_results['tracks']['items']
    # sort by popularity
    if results:
        results.sort(key=lambda z: z['popularity'], reverse=True)    
        r = results[0]
        # failsafe to never put same track twice
        if dedupe.get(r['id']):
            continue
        dedupe[r['id']]=True
        mylist.append(r['uri'])
        print(artist, '--', title)
        print('  ',
              r['artists'][0]['name'],'|',
              r['name'], '|',
              r['album']['name'],'|',
              r['album']['release_date'],'|',
              r['popularity'])
    else:
        fail_list.append((artist, title))
        print("not found:", artist, "-", title)


In [None]:
# must follow an oauth workflow to write a playlist in Spotify
# running this cell should request a spotify login and then redirect to an url
# paste whole url with id into form to authenticate

scope = "playlist-modify-public"

sp = spotipy.Spotify(auth_manager=spotipy.SpotifyOAuth(scope=scope,
                                                       client_id=os.getenv('SPOTIFY_CLIENT_ID'),
                                                       client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
                                                       redirect_uri="https://druce.ai"
                                                      ))

In [None]:
addlist = mylist.copy()
print (len(addlist))

while(addlist):
    sp.user_playlist_add_tracks(os.getenv('SPOTIFY_USERNAME'), 
                                playlist_id='5TCTR1JE09PNJU79kiZgHZ', 
                                tracks=addlist[-100:])
    addlist = addlist[:-100]
    print("added items, remaining ", len(addlist))


In [None]:
# manually add the ones that weren't found for some reason
