In [1]:
from wilcoxon.spiderman import *
import re
import json
import numpy as np
import pychord
import operator
import copy
import pandas as pd
import collections
from tqdm import tqdm
import time

In [2]:
scales = {
    "C": {"C","D","E","F","G","A","B"},
    "C#": {"Db","Eb","F","Gb","Ab","Bb","C","C#","D#","F#","G#","A#"},
    "Db": {"Db","Eb","F","Gb","Ab","Bb","C","C#","D#","F#","G#","A#"},
    "D": {"D","E","F#","G","A","B","C#","Gb","Db"},
    "D#": {"Eb","F","G","Ab","Bb","C","D","D#","G#","A#"},
    "Eb": {"Eb","F","G","Ab","Bb","C","D","D#","G#","A#"},
    "E": {"E","F#","G#","A","B","C#","D#","Gb","Ab","Db","Eb"},
    "F": {"F","G","A","Bb","C","D","E","A#"},
    "F#": {"F#","G#","A#","B","C#","D#","E#","Gb","Ab","Db","Eb","F"},
    "Gb": {"F#","G#","A#","B","C#","D#","E#","Gb","Ab","Db","Eb","F"},
    "G": {"G","A","B","C","D","E","F#","Gb"},
    "G#": {"Ab","Bb","C","Db","Eb","F","G","G#","A#","C#","D#"},
    "Ab": {"Ab","Bb","C","Db","Eb","F","G","G#","A#","C#","D#"},
    "A": {"A","B","C#","D","E","F#","G#","Db","Gb","Ab"},
    "A#": {"Bb","C","D","Eb","F","G","A","A#","D#"},
    "Bb": {"Bb","C","D","Eb","F","G","A","A#","D#"},
    "B": {"B","C#","D#","E","F#","G#","A#","Db","Eb","Gb","Ab","Bb"}
}

def guessLen(seq):
    max_len = int((len(seq) - (len(seq) % 2)) / 2) + 1
    for x in range(2, max_len):
        if seq[0:x] == seq[x:2*x]:
            return x
    return len(seq)

def searchSong(song, artist):
    
    fullLinkResults = []
    searchParam = "+".join(artist.split()) + "+" + "+".join(song.split())
    page = 1
    
    while (True):
        
        site = website(f"https://www.ultimate-guitar.com/search.php?title={searchParam}&page={page}&type=300")
        
        try:
            linkResults = json.loads(site.html(class_="js-store")[0]["data-content"])["store"]["page"]["data"]["results"]
            linkResults = list(filter(lambda x: "id" in x, linkResults))
        except: break
            
        page += 1
        
        fullLinkResults.extend(linkResults)
    
    endResults = []
    
    for index, link in enumerate(fullLinkResults):
        
        try:
        
            link = link["tab_url"]

            subsite = website(link)

            tabJson = json.loads(subsite.html(class_="js-store")[0]["data-content"])["store"]["page"]["data"]["tab_view"]["wiki_tab"]["content"]
            statsJson = json.loads(subsite.html(class_="js-store")[0]["data-content"])["store"]["page"]["data"]["tab_view"]["stats"]
            htmlTitle = subsite.html.find("title").string
            songName = htmlTitle.split("CHORDS")[0].strip().capitalize()
            artistName = htmlTitle.split(" by ", 1)[1].split("@")[0].strip()

            # Finds all section names and chords
            allResults = re.findall("((?<=\[ch\]).*?(?=\[/ch\])|\[[A-Z].+\])", tabJson)
            # Finds all section names with enumeration
            filteredResults = [(i, x.replace("[", "").replace("]", "")) for i, x in enumerate(allResults) if re.compile("\[.*\]").search(x)]
            # Section names only
            sectionNames = [x[1] for x in filteredResults]

            sections = {}
            for sectionNumber in range(len(filteredResults)):
                try:
                    chordsInSection = allResults[filteredResults[sectionNumber][0] + 1 : filteredResults[sectionNumber + 1][0]]
                    sectionName = filteredResults[sectionNumber][1]
                    if sectionNames.count(sectionName) != 1:
                        sectionName = sectionName + " " + str(sectionNames[:sectionNumber + 1].count(sectionName))
                    if len(chordsInSection) % 7 == 0 and chordsInSection[0:3] == chordsInSection[4:7]:
                        sections[sectionName] = chordsInSection[0:4]
                    else:
                        guessedLen = guessLen(chordsInSection)
                        if guessedLen > 8 and guessLen(chordsInSection[:-1]) < 8:
                            sections[sectionName] = chordsInSection[:guessLen(chordsInSection[:-1])]
                        else:
                            sections[sectionName] = chordsInSection[:guessedLen]

                except Exception as e:
                    chordsInSection = allResults[filteredResults[sectionNumber][0] + 1: ]
                    sectionName = filteredResults[sectionNumber][1]
                    if sectionNames.count(sectionName) != 1:
                        sectionName = sectionName + " " + str(sectionNames[:sectionNumber + 1].count(sectionName))
                    if len(chordsInSection) % 7 == 0 and chordsInSection[0:3] == chordsInSection[4:7]:
                        sections[sectionName] = chordsInSection[0:4]
                    else:
                        guessedLen = guessLen(chordsInSection)
                        if guessedLen > 8 and guessLen(chordsInSection[:-1]) < 8:
                            sections[sectionName] = chordsInSection[:guessLen(chordsInSection[:-1])]
                        else:
                            sections[sectionName] = chordsInSection[:guessedLen]

            # Average chord length of section
            simplicity = np.mean(list(filter(lambda x: x > 0, [len(sections[section]) for section in sections])))

            # Average word length of chords
            allChordsInBrackets = [sections[section] for section in sections]
            allChords = [item for sublist in allChordsInBrackets for item in sublist]
            complexity = np.mean([len(chord
                                      .replace("m", "")
                                      .replace("b", "")
                                      .replace("#", "")) for chord in allChords])
            
            views = int(statsJson["view_total"])
            favorites = int(statsJson["favorites_count"])
            popularity = favorites / views

            endResults.append({"Song": songName,
                               "Artist": artistName,
                               "Tabs": sections,
                               "Simplicity": simplicity,
                               "Complexity": complexity,
                               "Views": views,
                               "Favorites": favorites,
                               "Popularity": popularity,
                               "Link": link})
        
        except Exception as e: print(e)
        
    setOfWorks = list(filter(lambda x: not np.isnan(x["Simplicity"]) and not "Mashup" in x["Artist"] and all([len(x) <= 16 and "ch" not in "".join(x) for x in x["Tabs"].values()]), endResults))
    
    return setOfWorks

def cleanSong(setOfWorks):
    
    newSetOfWorks = copy.deepcopy(setOfWorks)

    for index, work in enumerate(setOfWorks):

        try:

            progression = pychord.ChordProgression(
                [item for sublist in list(work["Tabs"].values()) for item in sublist])
            noteSet = set([item for sublist in [x.components() for x in progression] for item in sublist])
            notesUsed = collections.Counter([item for sublist in [x.components() for x in progression] for item in sublist])
            
            while len(noteSet) > 7:
                noteSet = noteSet - {min(notesUsed.items(),key=operator.itemgetter(1))[0]}
                
            notesUsedSorted = list(dict(sorted(notesUsed.items(), key = operator.itemgetter(1), reverse = True)).keys())
            # print(notesUsedSorted)
            scaleDiff = {x: noteSet - scales[x] for x in scales.keys()}
            # print(scaleDiff)
            scaleMinNum = min([len(noteSet - scales[x]) for x in scales.keys()])
            # print(scaleMinNum)
            scaleWithMinNum = [x for x in scaleDiff.keys() if len(scaleDiff[x]) == scaleMinNum]
            # print(scaleWithMinNum)
            scaleMin = [tuple for x in notesUsedSorted for tuple in scaleWithMinNum if tuple[0] == x][0]
            # print(scaleMin)
            
            toTranspose = pychord.analyzer.notes_to_positions([scaleMin], "C")[0]

            newSetOfWorks[index].pop("Tabs", None)

            for section in work["Tabs"]:

                if section[0] == "I":
                    cleanedSection = section[:5].capitalize()
                else:
                    cleanedSection = section[0] + re.findall("\d*$", section)[0]

                sectionProgression = pychord.ChordProgression(work["Tabs"][section])
                sectionProgression.transpose(-toTranspose)
                newSetOfWorks[index][cleanedSection] = {"-".join([str(chord) for chord in sectionProgression])}
                try:
                    newSetOfWorks[index][re.sub("\d*", "", cleanedSection)].append("-".join([str(chord) for chord in sectionProgression]))
                except:
                    newSetOfWorks[index][re.sub("\d*", "", cleanedSection)] = {"-".join([str(chord) for chord in sectionProgression])}
        except: continue
    
    return newSetOfWorks

# def compileSong(newSetOfWorks):
    
    

def findBPM(song, artist):
    
    searchParam = "+".join(artist.split()) + "+" + "+".join(song.split())
    site = website(f"https://tunebat.com/Search?q={searchParam}")
    
    try: 
        bpm = int(site.html(class_ = "search-attribute-value")[2].string)
        scale = site.html(class_ = "search-attribute-value")[0].string
        return {"BPM": bpm, "Scale": scale}
    except: return {"BPM": 0, "Scale": ""}

In [14]:
findBPM("you belong with me","taylor swift")

{'BPM': 0, 'Scale': ''}

In [12]:
setOfWorks = searchSong("Mine","Taylor Swift")

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [13]:
setOfWorks

[{'Song': 'Mine',
  'Artist': 'Taylor Swift',
  'Tabs': {'Intro': ['Cadd9',
    'G',
    'Em7',
    'D',
    'Dsus4',
    'D',
    'Dsus2',
    'Cadd9',
    'G',
    'Em7',
    'D'],
   'Verse 1': ['Cadd9', 'G', 'Em7', 'D'],
   'Pre-Chorus 1': ['Cadd9', 'G', 'Em7', 'D'],
   'Chorus 1': ['Cadd9', 'G', 'Em7', 'D'],
   'Verse 2': ['Cadd9', 'G', 'Em7', 'D'],
   'Pre-Chorus 2': ['Cadd9', 'G', 'Em7', 'D'],
   'Chorus 2': ['Cadd9', 'G', 'Em7', 'D'],
   'Bridge': ['Em7', 'Cadd9', 'G', 'D'],
   'Chorus 3': ['Cadd9', 'G', 'Em7', 'D']},
  'Simplicity': 4.777777777777778,
  'Complexity': 2.3488372093023258,
  'Views': 1030629,
  'Favorites': 2769,
  'Popularity': 0.002686708796278777,
  'Link': 'https://tabs.ultimate-guitar.com/tab/taylor-swift/mine-chords-975150'},
 {'Song': 'Mine',
  'Artist': 'Taylor Swift',
  'Tabs': {'Intro': ['Cadd9', 'G', 'D', 'Em7'],
   'Verse 1': ['Cadd9', 'G', 'D', 'Em7'],
   'Pre-Chorus 1': ['Cadd9', 'G', 'D'],
   'Chorus 1': ['Cadd9', 'G', 'D', 'Em7', 'D'],
   'Verse 2

In [3]:
def getBillboard(year):
    
    site = website(f"https://www.billboard.com/charts/year-end/{year}/hot-100-songs")
    return pd.DataFrame(
    [{"Song": list(x(class_ = "ye-chart-item__title")[0].stripped_strings)[0]
     ,"Artist": re.sub("[fF]eat.*$", "", list(x(class_ = "ye-chart-item__artist")[0].stripped_strings)[0])
     ,"Year": year}
     for x in site.html(class_ = "ye-chart-item__text")]
    )

In [4]:
billboard = pd.concat([getBillboard(x) for x in range(2006, 2021)])

In [139]:
songs = []
for row in tqdm(billboard.to_dict("records")[306:]):
    songs.append(cleanSong(searchSong(row["Song"], row["Artist"])))

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
 33%|███▎      | 392/1192 [1:33:32<2:39:30, 11.96s/it] 

('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))


 35%|███▍      | 415/1192 [1:44:29<2:00:58,  9.34s/it]  

list index out of range


 40%|███▉      | 473/1192 [3:15:20<4:56:55, 24.78s/it]  


KeyboardInterrupt: 

In [140]:
pd.DataFrame([item for sublist in songs for item in sublist]).to_clipboard()

In [35]:
scales.items()

dict_items([('C', {'C', 'B', 'G', 'E', 'D', 'A', 'F'}), ('C#', {'Db', 'C', 'C#', 'Ab', 'Eb', 'F#', 'Bb', 'Gb', 'G#', 'F', 'A#', 'D#'}), ('Db', {'Db', 'C', 'C#', 'Ab', 'Eb', 'F#', 'Bb', 'Gb', 'G#', 'F', 'A#', 'D#'}), ('D', {'Db', 'B', 'C#', 'G', 'F#', 'E', 'A', 'D', 'Gb'}), ('D#', {'C', 'F', 'G', 'Eb', 'Bb', 'D', 'G#', 'Ab', 'A#', 'D#'}), ('Eb', {'C', 'F', 'G', 'Eb', 'Bb', 'D', 'G#', 'Ab', 'A#', 'D#'}), ('E', {'Db', 'B', 'C#', 'F#', 'Eb', 'E', 'A', 'Gb', 'G#', 'Ab', 'D#'}), ('F', {'C', 'G', 'Bb', 'A', 'D', 'E', 'F', 'A#'}), ('F#', {'Db', 'F', 'B', 'C#', 'E#', 'F#', 'Eb', 'Gb', 'G#', 'Ab', 'A#', 'D#'}), ('Gb', {'Db', 'F', 'B', 'C#', 'E#', 'F#', 'Eb', 'Gb', 'G#', 'Ab', 'A#', 'D#'}), ('G', {'C', 'B', 'G', 'F#', 'E', 'A', 'D', 'Gb'}), ('G#', {'Db', 'C', 'F', 'C#', 'G', 'Eb', 'Bb', 'G#', 'Ab', 'A#', 'D#'}), ('Ab', {'Db', 'C', 'F', 'C#', 'G', 'Eb', 'Bb', 'G#', 'Ab', 'A#', 'D#'}), ('A', {'Db', 'B', 'C#', 'F#', 'E', 'D', 'A', 'Gb', 'G#', 'Ab'}), ('A#', {'C', 'G', 'Eb', 'Bb', 'D', 'A', 'F', 'A#'

In [19]:
pychord.Chord("C/G")

<Chord: C/G>