In [1]:
import plotly.express as px
from jupyter_dash import JupyterDash

import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output


In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

from time import sleep
from random import randint

In [3]:
# get spotify credentials
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
client_credentials_manager = SpotifyClientCredentials(client_id= '6426e18767ac42d78a9c08f41cacba65',
                                                     client_secret= '4a2cdab50e1c4ecd9fc62cae20664e29')
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
# put together a list of various standard major & minor chords
sl1 = ['A', 'Bb', 'B', 'C', 'C#', 'Db', 'D', 'Eb', 'E', 'F', 'F#', 'G', 'G#', 'Ab' ]
sl2 = [s + 'maj' for s in sl1]
sl3 = [s + 'm' for s in sl1]
sl4 = [s + 'min' for s in sl1]

standardList = sl1+sl2+sl3+sl4

In [5]:
# load the model
import pickle
# load the model from disk
filename = 'dtree_model_0924.sav'
dtree = pickle.load(open(filename, 'rb'))

In [6]:
# load the chord data frame
dfSong = pd.read_csv('playlist_df.csv')

In [7]:
def find_link_from_search(artist, song):
    # url = 'https://www.ultimate-guitar.com/search.php?search_type=title&value=weezer%20island%20in%20the%20sun'
    # search UG for artist and song
    
    if ' - ' in song:
        song = song.split(' - ')[0]
    
    url = 'https://www.ultimate-guitar.com/search.php?search_type=title&value='+artist+'%20'+song

    # get the soup
    sleep(randint(1,4))
    text = requests.get(url).text
    soup = BeautifulSoup(text)

    # do some crafty splitting to get the tab URL
    # first few results are often ads...
    searchResults = str(soup).split('"tab_url":"')[3:6] 

    new_link = str(searchResults).split("},{")[0][2:-1]

    return new_link

In [8]:
def parse_title(url):
    artist_and_title = url.split('tab/')[1]  

    if '/' in artist_and_title:
        artist = artist_and_title.split('/')[0]  
        title  = artist_and_title.split('/')[1]
        title  = title.split('-ch')[0]
    else:
        artist = 'unknown'
        title = 'unknown'
    
    return artist, title

In [9]:
def scrape(url): 
    output_list = []
    chordlist   = []
    
    sleep(randint(1,4))
    response = requests.get(url)
    html_soup = BeautifulSoup(response.text, 'lxml')

    lessSoupy = html_soup.find('div', class_='js-store')
    stringySoup = str(lessSoupy)

    splitSoup = stringySoup.split('[ch]')[1:-1]

    for ielement in range(0, len(splitSoup)):
        chordlist.append(str(splitSoup[ielement]).split('[/ch')[0])
        
    artist_name, song_name = parse_title(url)
    output_list.append(("Song", song_name))
    output_list.append(("Artist", artist_name))

#     allChords = ("All Chords", chords)
    #output_list.append(allChords)

    chords = set(chordlist)
    chords = ",".join(chords)
    chords = ("Chords", chords)
    output_list.append(chords)
#     output_list.append(allChords)

    song_info = dict(output_list)

    return song_info

In [10]:
def getFeatures(dfSong):
    # get feature dataframe for the known song.
    nUniqueChords   = []
    nHardChords     = []
    difficultyLabel = []

    spotFeatures = {}

    # spotFeatures['key'] = []
    spotFeatures['tempo'] = []
    # spotFeatures['duration_ms'] = []
    spotFeatures['time_signature'] = []
    spotFeatures['energy'] = []

    for iSong in range(0, len(dfSong)):
        songName   = dfSong['Song'][iSong].replace('-', ' ').title()
        artistName = dfSong['Artist'][iSong].replace('-', ' ').title()

        # search spotify api
        result = sp.search(songName, limit=10) 
        nItems = len(result['tracks']['items'])

        # match artist info, and get URI
        for iArtist in range(0, nItems):    
            if result['tracks']['items'][iArtist]['artists'][0]['name'] == artistName:
               songURI = result['tracks']['items'][iArtist]['uri']

        # if we successfully found the song in spotify, go ahead and gather featers
        #     if songURI:
        # pull audio features for a songURI
        features = sp.audio_features(songURI)

    #     spotFeatures['key'].append(features[0]['key'])
        spotFeatures['tempo'].append(features[0]['tempo'])
    #     spotFeatures['duration_ms'].append(features[0]['duration_ms'])
        spotFeatures['time_signature'].append(features[0]['time_signature'])
        spotFeatures['energy'].append(features[0]['energy'])

        # append the difficulty label
        # difficultyLabel.append(dfSong[0][3].split(',')[1])

        # get the chord information
    #     uniqChords = dfSong[0][2].split(',')[1:]
        uniqChords = dfSong['Chords'][iSong].split(',')[1:]
        nUniqueChords.append(len(uniqChords))

        tmpHard = []
        for kCh in range(0, nUniqueChords[iSong]):
            tmpHard.append(uniqChords[kCh] not in standardList)

        nHardChords.append(sum(tmpHard));

    # pack things up in a df called allFeatures    
    spotFeatures = pd.DataFrame.from_dict(spotFeatures)        
    allFeatures = spotFeatures

    allFeatures['n_unique_chords']    = nUniqueChords
    allFeatures['n_difficult_chords'] = nHardChords
    
    return allFeatures

In [11]:
def check_db_for_playlist(playlist):
    pl_id   = pl_link.split('playlist/')[1]
    pl_id   = pl_id.split('?')[0]
    
    pl_tracks  = []
    pl_artists = []

    for nItem in range(0, len(results['tracks']['items'])):
        pl_tracks.append(results['tracks']['items'][nItem]['track']['name'])
        pl_artists.append(results['tracks']['items'][nItem]['track']['artists'][0]['name'])

    return pl_tracks, pl_artists

In [12]:
def chordSimilarity(dfSong, novel_song_label):
    clist   = dfSong['Chords'][0].split(',')
    nchords = len(clist)

    nSameChords     = []
    propSameChords  = []
    exactSame       = []
    similar_nchords = []
    sim_nchords_ix  = []
    for iSong in range(1, len(dfSong)):
        similar_nchords.append(len(dfSong['Chords'][iSong].split(',')) <= nchords )

        if similar_nchords[iSong-1]==True & novel_song_label[0] == novel_song_label[iSong]:
            sim_nchords_ix.append(iSong)
            nSameChords.append( len([ele for ele in clist if(ele in dfSong['Chords'][iSong].split(','))]) )
            propSameChords.append( len([ele for ele in clist if(ele in dfSong['Chords'][iSong].split(','))]) / len(dfSong['Chords'][iSong].split(','))  )

    ixMostSame = np.argmax(propSameChords)
    ixMostSame = sim_nchords_ix[ixMostSame]
    
    ch_song_out = dfSong.iloc[ixMostSame]
    
    # fuck it, do it again!
    nSameChords     = []
    propSameChords  = []
    exactSame       = []
    similar_nchords = []
    sim_nchords_ix  = []
    for iSong in range(1, len(dfSong)):
        similar_nchords.append(len(dfSong['Chords'][iSong].split(',')) <= nchords )

        if similar_nchords[iSong-1]==True:
            if float(novel_song_label[0]) == float(novel_song_label[iSong-1]):
                sim_nchords_ix.append(iSong)
                nSameChords.append( len([ele for ele in clist if(ele in dfSong['Chords'][iSong].split(','))]) )
                propSameChords.append( len([ele for ele in clist if(ele in dfSong['Chords'][iSong].split(','))]) / len(dfSong['Chords'][iSong].split(','))  )

    ixMostSame = np.argmax(propSameChords)
    ixMostSame = sim_nchords_ix[ixMostSame]
    
    ez_song_out = dfSong.iloc[ixMostSame]

    return ez_song_out, ch_song_out

In [13]:
# ------------------------------------------------------------------------------
# DASH DASH DASH DASH DASH DASH DASH

app = JupyterDash(__name__)


# ------------------------------------------------------------------------------
# App layout
app.layout = html.Div([
    
    html.H4("Enter a playlist you enjoy"),

    html.Div([#"Input: ",
          dcc.Input(id='playlist', 
                    placeholder='spotify url',
                    debounce= True,
                    type='text')]),
    
    html.Br(),
    
    html.H4("Enter a song you know"),
    
    html.Div([#"Input: ",
              dcc.Input(id='known_song', 
                        placeholder='artist, title',
                        debounce= True,
                        type='text')]),   

    html.Div(id='in_song_chords'),
    html.Br(),

    html.H4("Here's a song at your level"),
    html.Div(id='ez_out_song'),
    html.Div(id='ez_out_artist'),
    html.Div(id='ez_out_chords'),
    
    html.H4("Here's a song to challenge yourself"),
    html.Div(id='ch_out_song'),
    html.Div(id='ch_out_artist'),
    html.Div(id='ch_out_chords'),

])

# ------------------------------------------------------------------------------
# Callbacks
@app.callback(
#     Output("out-all-types", "children"),
#     [Input("input_{}".format(_), "value") for _ in ALLOWED_TYPES],

#    Output(component_id='out_song', component_property='children'),
    [Output(component_id='in_song_chords', component_property='children'),
     Output(component_id='ez_out_song', component_property='children'),
     Output(component_id='ez_out_artist', component_property='children'),
     Output(component_id='ez_out_chords', component_property='children'),
     
     Output(component_id='ch_out_song', component_property='children'),
     Output(component_id='ch_out_artist', component_property='children'),
     Output(component_id='ch_out_chords', component_property='children')
    ],
    
    [Input(component_id='known_song', component_property='value'),
     Input(component_id='playlist', component_property='value')]
)
def getRec(track, playlist):

    song   = str(track).split(', ')[1]
    artist = str(track).split(', ')[0]
    
    new_link = find_link_from_search(artist, song)
    
    if song.replace(' ', '-').lower() in list(dfSong['Song']):
        knownSong_ix = list(dfSong['Song']).index(song.replace(' ', '-').lower())
        allFeatures  = getFeatures(dfSong)
    else:
        song_info    = scrape(new_link)
        df_knownSong = pd.DataFrame.from_dict(song_info, orient='index').transpose()    
        allFeatures  = getFeatures(df_knownSong)
    
    # predict difficulty level
    int_song_label = dtree.predict(allFeatures)

    ez_song_out, ch_song_out = chordSimilarity(dfSong, int_song_label)
#     if int_song_label[knownSong_ix] == 2:
#         str_song_label = 'intermediate'
#     elif int_song_label[knownSong_ix] == 1:
#         str_song_label = 'advanced'
#     else:
#         str_song_label = 'novice'
    
#     return str(str_song_label)
    return dfSong['Chords'][knownSong_ix], ez_song_out['Song'].replace('-', ' ').title(), ez_song_out['Artist'].replace('-', ' ').title(), ez_song_out['Chords'], ch_song_out['Song'].replace('-', ' ').title(), ch_song_out['Artist'].replace('-', ' ').title(), ch_song_out['Chords']
    
if __name__ == "__main__":
     app.run_server(debug=False)
#      app.run_server(mode='inline')

 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [25/Sep/2020 08:21:40] "[37mGET /_alive_11854a0f-38ba-4ea8-b36e-5ee1bfd85088 HTTP/1.1[0m" 200 -


Dash app running on http://127.0.0.1:8050/


127.0.0.1 - - [25/Sep/2020 08:21:41] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [25/Sep/2020 08:21:41] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [25/Sep/2020 08:21:41] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [25/Sep/2020 08:21:42] "[1m[35mPOST /_dash-update-component HTTP/1.1[0m" 500 -
127.0.0.1 - - [25/Sep/2020 08:21:55] "[1m[35mPOST /_dash-update-component HTTP/1.1[0m" 500 -
127.0.0.1 - - [25/Sep/2020 08:22:11] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [25/Sep/2020 08:22:36] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [25/Sep/2020 08:22:36] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [25/Sep/2020 08:22:36] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [25/Sep/2020 08:22:36] "[37mGET /_favicon.ico?v=1.16.1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [25/Sep/2020 08:22:36] "[1m[35mPOST /_dash-update-component HTTP/1.1[0m" 500 -
127.0.0.1 - - [25/Sep/2020 08:40:56] "[1m[35mPOST /_da