To get this running:

- Function to get uku tabs given song
- Function to get songs from spotify playlist

To improve results:

- Allow for minor keys
- Add 7 chords and chord identification
- Function to get tabs from other websites
- Guess the key based on multiple tabs 

To clean up code:

- Write piano as a class
- Put tests in a reasonable place

In [1]:
from bs4 import BeautifulSoup
from requests import get
from collections import Counter
import re

In [2]:
url = "https://ukutabs.com/top-tabs/99-most-popular-ukulele-songs/all-time/"
response = get(url)
soup = BeautifulSoup(response.text)
urls = []
for link in soup.find_all('a'):
    link = link.get('href')
    if link and "https://ukutabs" in link:
        urls.append(link)
urls = list(set(urls))

In [43]:
def get_chords(url): 
    response = get(url)
    soup = BeautifulSoup(response.text)
    chords = [x["name"] for x in soup.select(".hoverchord")]
    return chords 

In [95]:
keyboard = ["C", "Db", "D", "Eb", "E", "F", "Gb", "G", "Ab", "A", "Bb", "B"]

# maps all keys to the keyboard
sharp_to_flat = {}
for i, key in enumerate(keyboard):
    if "b" in key: 
        sharp_to_flat[keyboard[i-1] + "#"] = keyboard[i]
    sharp_to_flat[key] = key

# computes number of halfsteps for a major interval 
halfsteps = {x: i for x, i in zip(keyboard, range(len(keyboard)))}
major_interval_to_halfstep = {"M" + str(x): i for x, i in zip(range(1, 8), [halfsteps[key] for key in "CDEFGAB"])}
for interval in range(2, 8):
    interval_name = "M" + str(interval)
    major_interval_to_halfstep["-" + interval_name] = -major_interval_to_halfstep[interval_name]

minor_interval_to_halfstep = {"m" + str(x): i for x, i in zip([2, 3, 6, 7], [halfsteps[key] for key in ["Db", "Eb", "Ab", "Bb"]])}
for interval in [2, 3, 6, 7]:
    interval_name = "m" + str(interval)
    minor_interval_to_halfstep["-" + interval_name] = -minor_interval_to_halfstep[interval_name]

interval_to_halfstep = {**major_interval_to_halfstep, **minor_interval_to_halfstep}

halfstep_to_interval = {interval_to_halfstep[x]: x for x in interval_to_halfstep}

# returns key up/down an interval 
def get_interval(start, interval_name):
    return keyboard[(halfsteps[start] + interval_to_halfstep[interval_name]) % len(keyboard)]

assert(get_interval("A", "M4") == "D")
assert(get_interval("A", "-M4") == "E")

def clean_chord(chord): 
    minor = ("m" in chord)
    if len(chord) > 1 and chord[1] in ["b", "#"]:
        chord = chord[:2]
    else:
        chord = chord[0]
    chord = sharp_to_flat[chord]
    return chord, minor
    
    
# hypothesis: identifying I, iii, IV, V, vi is enough 
def chord_to_major_key(chord):
    chord, minor = clean_chord(chord)
    if not minor:
        I_chord = chord
        IV_chord = get_interval(chord, "-M4")
        V_chord = get_interval(chord, "-M5")
        chords = [I_chord, IV_chord, V_chord]
        chords.sort()
        return chords
    else: 
        iii_chord = get_interval(chord, "-M3")
        vi_chord = get_interval(chord, "-M6")
        chords = [iii_chord, vi_chord]
        chords.sort()
        return chords
    
assert(chord_to_major_key("A") == ["A", "D", "E"])
assert(chord_to_major_key("C") == ["C", "F", "G"])
assert(chord_to_major_key("Bb") == ["Bb", "Eb", "F"])
assert(chord_to_major_key("A#") == ["Bb","Eb", "F", ])
assert(chord_to_major_key("Am") == ["C", "F"])

In [121]:
def get_key_from_chords(chords):
    key_frequencies = Counter()
    for chord in chords: 
        chord_major_keys = chord_to_major_key(chord)
        for key in chord_major_keys:
            key_frequencies[key] += patterns[pattern]
    return key_frequencies.most_common(1)[0][0]
        
urls = [judge_url, riptide_url, delilah_url]
chords = [get_chords(url) for url in urls]
keys = [get_key_from_chords(chord) for chord in chords]


assert(keys == ["C", "Db" , "D"])

In [115]:
def get_interval_between(low, high):
    halfsteps = (keyboard.index(high) - keyboard.index(low)) % len(keyboard)
    return halfstep_to_interval[halfsteps]

number_to_roman = {1: "I", 2: "II", 3: "III", 4: "IV", 5: "V", 6: "VI", 7: "VII"}
    
def get_chord_numbers(chords, key): 
    chord_numbers = []
    for chord in chords:
        chord, minor = clean_chord(chord)
        interval = get_interval_between(key, chord)[1]
        chord_number = number_to_roman[int(interval)]
        if minor:
            chord_number = chord_number.lower()
        chord_numbers.append(chord_number)
    return chord_numbers
        
    

In [131]:
# should be replaced by pattern mining? 

def get_pattern(chord_numbers):
    best_pattern_freq = 0
    best_pattern = None
    best_pattern_length = 0
    for pattern_length in range(2, 7):
        # looks at chord progressions of length pattern_length
        lines = [chord_numbers[i: i+pattern_length] for i in range(len(chord_numbers) - pattern_length)]
        filtered_lines = []
        # sort each chord progression and keep if distinct
        for line in lines:
            line.sort()
            distinct = True
            for j in range(len(line) - 1):
                distinct = distinct and (line[j] != line[j+1])
            if distinct:
                filtered_lines.append("".join(line))
        # note frequency of most common chord progression
        if len(filtered_lines) > 0:
            freqs = Counter(filtered_lines)
            pattern, freq = freqs.most_common(1)[0]
            if freq > best_pattern_freq:
                best_pattern_freq = freq
                best_pattern = freqs
                best_pattern_length = pattern_length
    return (best_pattern, best_pattern_length)

urls = [judge_url, shape_of_you_url, riptide_url]
chords = [get_chords(url) for url in urls]
keys = [get_key_from_chords(chord) for chord in chords]
chord_numbers = [get_chord_numbers(x[0], x[1]) for x in zip(chords, keys)]
pattern_lengths = [get_pattern(chord_number)[1] for chord_number in chord_numbers]

assert(pattern_lengths == [4, 4, 3])