In [456]:
import bs4
import requests
import pandas

In [452]:
# Returns the Beautifulsoup of the input 'web_url'
def soup_page(web_url):
    req = requests.get(web_url)
    req_soup = bs4.BeautifulSoup(req.content, 'html.parser')
    return req_soup

# This function scrapes chords from a ultimate guitar website and
# returns a list of all the chords on that page
def get_chords(web_url):
    req_soup = soup_page(web_url)
    content = req_soup.find_all('pre', {'class':'js-tab-content'})
    chords_list = [word.replace('<span>', '').replace('</span>', '') for word in str(content).split() if word[0:6] == '<span>']
    return chords_list

# Check whether a certain query has any result on ultimate guitar
# return false if there is at least one result, true otherwise
def check_no_result(soup_result):
    not_found = soup_result.find_all('div',{'class':'not_found'})
    return len(not_found) != 0

# Returns the table of different versions from the search page
def get_table(soup_result):
    return soup_result.find_all('table', {'class':'tresults'})[0]

# Returns a list of rows
def get_rows(soup_result):
    return get_table(soup_result).find_all('tr')[1:len(get_table(soup_result))]

# Returns the rating of a certain row in the ultimate guitar result page
def get_rating(row):
    temp_rate = row.find('td', {'class':'gray4 tresults--rating'})
    rate_list = temp_rate.find_all('b', {'class':'ratdig'})
    if (len(rate_list) > 0):
        rate = rate_list[0].text.strip()
        return float(rate)

# Returns the type of a certain row in a result page
def get_type(row):
    row_type_list = row.find_all('strong')
    if (len(row_type_list) > 0):
        row_type = row_type_list[0].text.strip()
        return row_type

# Return the url of a certain row (version) of chords in a result page
def get_url(row):
    search_version = row.find_all('td',{'class':'search-version--td'})[0]
    href = search_version.find_all('a')[0].attrs['href']
    return href

# Gets the top 100 songs from Billboard of the input year
def get_top100(int_year):
    pairs = list()
    top100Songs = list()
    pairs = [data(d) for d in update_bbd_url(int_year)]
    for p in pairs:
        top100Songs += p
    return top100Songs

# Converts the top 100 songs in a particular year into
# a searchable string
def get_top100_query(int_year):
    if (int_year > 2016): int_year = 2016
    elif (int_year < 2006): int_year = 2006
    top100 = get_top100(int_year)
    result = [clean_pair(pair) for pair in top100]
    return result

def clean_pair(pair):
    return clean_name(pair[0]) + ' ' + clean_name(pair[1])

# Clean up both artist name and song titles from get_top100() list
def clean_name(str_input):
    return str_input.lower().replace('(','').replace(')','').replace('&','').split('featuring',1)[0]

# Generates a search result page based on the input
# The url can be both a list of different versions of chords
# or "No Result" page
def search_url(str_query):
    root = 'https://www.ultimate-guitar.com/search.php?search_type=title&order=&value='
    str_query = str_query.replace(' ', '+')
    return root+str_query

# -- Puyush's functions --
def get_bbd_year_url(year_input):
    url = 'http://www.billboard.com/charts/year-end/'
    url += str(year_input)
    url += '/hot-100-songs'
    return url

def update_bbd_url(int_year):
    billboard = requests.get(get_bbd_year_url(int_year))
    billboard_soup = bs4.BeautifulSoup(billboard.content, 'html.parser')
    innerContent =  billboard_soup.find_all('div', {'data-content-type': 'yearEndChart'})
    artistData = innerContent[0].find_all('div' , {'class': 'ye-chart__layout-row'})
    artistData = artistData[1:]
    return artistData

def data(row):
    p = list()
    article = row.find_all('article', {'id': 'chart'})
    for i in range(len(article)):
        article_chart = article[i].find_all('div', {'class': 'ye-chart__item-primary'})
        songTitle = article_chart[0].find_all('h1', {'class':'ye-chart__item-title'})[0].text.strip()
        artistName = article_chart[0].find_all('h2', {'class':'ye-chart__item-subtitle'})[0].text.strip()
        p.append((artistName, songTitle))
    return p
# -- End of Puyush's functions --

In [450]:
query_list = get_top100_query(2016)
for pair in query_list:
    pair_soup = soup_page(search_url(pair))
    if (not check_no_result(pair_soup)):
        rows = get_rows(pair_soup)
        print(pair)
        for row in rows:
            row_type = get_type(row)
            row_rating = get_rating(row)
            if row_type == 'chords' and row_rating != None:
                print('\t' + str(get_rating(row)) + ' ' + get_url(row))

love yourself justin bieber
	4510.0 https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_crd.htm
	191.0 https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_ver2_crd.htm
	76.0 https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_ver3_crd.htm
	16.0 https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_ver4_crd.htm
	9.0 https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_ver5_crd.htm
sorry justin bieber
	734.0 https://tabs.ultimate-guitar.com/j/justin_bieber/sorry_crd.htm
	27.0 https://tabs.ultimate-guitar.com/j/justin_bieber/sorry_ver2_crd.htm
	9.0 https://tabs.ultimate-guitar.com/j/justin_bieber/sorry_ver3_crd.htm
one dance drake 
	113.0 https://tabs.ultimate-guitar.com/d/drake/one_dance_crd.htm
	82.0 https://tabs.ultimate-guitar.com/d/drake/one_dance_ver2_crd.htm
	56.0 https://tabs.ultimate-guitar.com/d/drake/one_dance_ver3_crd.htm
	77.0 https://tabs.ultimate-guitar.com/d/drake/one_dance_acoustic_crd.htm
work rihanna 
	86.0 https://tab

In [461]:
top_list = get_top100(2016)
total = pandas.DataFrame(columns=('artist','title','rating','link'))

In [463]:
total_list = list()
for pair in query_list:
    pair_soup = soup_page(search_url(pair))
    if (not check_no_result(pair_soup)):
        pair_list = list()
        rows = get_rows(pair_soup)
        pair_list.append(pair)
        for row in rows:
            row_type = get_type(row)
            row_rating = get_rating(row)
            if row_type == 'chords' and row_rating != None:
#                 print('\t' + str(get_rating(row)) + ' ' + get_url(row))
                pair_list.append(str(get_rating(row)))
                pair_list.append(str(get_url(row)))
        total_list.append(pair_list)

In [465]:
kk = pandas.DataFrame(total_list)

In [466]:
kk

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,love yourself justin bieber,4510.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,191.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,76.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,16.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,9.0,...,,,,,,,,,,
1,sorry justin bieber,734.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,27.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,9.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,,,,...,,,,,,,,,,
2,one dance drake,113.0,https://tabs.ultimate-guitar.com/d/drake/one_d...,82.0,https://tabs.ultimate-guitar.com/d/drake/one_d...,56.0,https://tabs.ultimate-guitar.com/d/drake/one_d...,77.0,https://tabs.ultimate-guitar.com/d/drake/one_d...,,...,,,,,,,,,,
3,work rihanna,86.0,https://tabs.ultimate-guitar.com/r/rihanna/wor...,,,,,,,,...,,,,,,,,,,
4,stressed out twenty one pilots,883.0,https://tabs.ultimate-guitar.com/t/twenty_one_...,10.0,https://tabs.ultimate-guitar.com/t/twenty_one_...,36.0,https://tabs.ultimate-guitar.com/t/twenty_one_...,96.0,https://tabs.ultimate-guitar.com/t/twenty_one_...,15.0,...,,,,,,,,,,
5,panda desiigner,,,,,,,,,,...,,,,,,,,,,
6,hello adele,517.0,https://tabs.ultimate-guitar.com/a/adele/hello...,47.0,https://tabs.ultimate-guitar.com/a/adele/hello...,1021.0,https://tabs.ultimate-guitar.com/a/adele/hello...,148.0,https://tabs.ultimate-guitar.com/a/adele/hello...,,...,,,,,,,,,,
7,don't let me down the chainsmokers,282.0,https://tabs.ultimate-guitar.com/t/the_chainsm...,9.0,https://tabs.ultimate-guitar.com/t/the_chainsm...,69.0,https://tabs.ultimate-guitar.com/t/the_chainsm...,13.0,https://tabs.ultimate-guitar.com/t/the_chainsm...,,...,,,,,,,,,,
8,can't stop the feeling! justin timberlake,46.0,https://tabs.ultimate-guitar.com/j/justin_timb...,155.0,https://tabs.ultimate-guitar.com/j/justin_timb...,5.0,https://tabs.ultimate-guitar.com/j/justin_timb...,31.0,https://tabs.ultimate-guitar.com/j/justin_timb...,4.0,...,,,,,,,,,,
9,closer the chainsmokers,67.0,https://tabs.ultimate-guitar.com/t/the_chainsm...,605.0,https://tabs.ultimate-guitar.com/t/the_chainsm...,138.0,https://tabs.ultimate-guitar.com/t/the_chainsm...,23.0,https://tabs.ultimate-guitar.com/t/the_chainsm...,,...,,,,,,,,,,


In [483]:
top_list = get_top100(2016)[0:5]
total = pandas.DataFrame(columns=('artist','title','rating','link'))
total_list = list();
for pair in top_list:
    query_pair = clean_pair(pair)
    pair_soup = soup_page(search_url(query_pair))
    if (not check_no_result(pair_soup)):
        rows = get_rows(pair_soup)
        temp_list = list()
        for row in rows:
            row_type = get_type(row)
            row_rating = get_rating(row)
            if row_type == 'chords' and row_rating != None:
                temp_list.append(pair[0])
                temp_list.append(pair[1])
                temp_list.append(row_rating)
                temp_list.append(str(get_url(row)))
                temp_list.sort_values(temp_list[2])
    total_list.append(temp_list)

In [484]:
total_list

[['Justin Bieber',
  'LOVE YOURSELF',
  4508.0,
  'https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_crd.htm',
  'Justin Bieber',
  'LOVE YOURSELF',
  191.0,
  'https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_ver2_crd.htm',
  'Justin Bieber',
  'LOVE YOURSELF',
  76.0,
  'https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_ver3_crd.htm',
  'Justin Bieber',
  'LOVE YOURSELF',
  16.0,
  'https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_ver4_crd.htm',
  'Justin Bieber',
  'LOVE YOURSELF',
  9.0,
  'https://tabs.ultimate-guitar.com/j/justin_bieber/love_yourself_ver5_crd.htm'],
 ['Justin Bieber',
  'SORRY',
  734.0,
  'https://tabs.ultimate-guitar.com/j/justin_bieber/sorry_crd.htm',
  'Justin Bieber',
  'SORRY',
  27.0,
  'https://tabs.ultimate-guitar.com/j/justin_bieber/sorry_ver2_crd.htm',
  'Justin Bieber',
  'SORRY',
  9.0,
  'https://tabs.ultimate-guitar.com/j/justin_bieber/sorry_ver3_crd.htm'],
 ['Drake Featuring WizKid & Kyla',
  '

In [485]:
cc = pandas.DataFrame(total_list)
cc

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,Justin Bieber,LOVE YOURSELF,4508.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,Justin Bieber,LOVE YOURSELF,191.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,Justin Bieber,LOVE YOURSELF,76.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,Justin Bieber,LOVE YOURSELF,16.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,Justin Bieber,LOVE YOURSELF,9.0,https://tabs.ultimate-guitar.com/j/justin_bieb...
1,Justin Bieber,SORRY,734.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,Justin Bieber,SORRY,27.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,Justin Bieber,SORRY,9.0,https://tabs.ultimate-guitar.com/j/justin_bieb...,,,,,,,,
2,Drake Featuring WizKid & Kyla,ONE DANCE,113.0,https://tabs.ultimate-guitar.com/d/drake/one_d...,Drake Featuring WizKid & Kyla,ONE DANCE,82.0,https://tabs.ultimate-guitar.com/d/drake/one_d...,Drake Featuring WizKid & Kyla,ONE DANCE,56.0,https://tabs.ultimate-guitar.com/d/drake/one_d...,Drake Featuring WizKid & Kyla,ONE DANCE,77.0,https://tabs.ultimate-guitar.com/d/drake/one_d...,,,,
3,Rihanna Featuring Drake,WORK,86.0,https://tabs.ultimate-guitar.com/r/rihanna/wor...,,,,,,,,,,,,,,,,
4,twenty one pilots,STRESSED OUT,883.0,https://tabs.ultimate-guitar.com/t/twenty_one_...,twenty one pilots,STRESSED OUT,10.0,https://tabs.ultimate-guitar.com/t/twenty_one_...,twenty one pilots,STRESSED OUT,36.0,https://tabs.ultimate-guitar.com/t/twenty_one_...,twenty one pilots,STRESSED OUT,96.0,https://tabs.ultimate-guitar.com/t/twenty_one_...,twenty one pilots,STRESSED OUT,15.0,https://tabs.ultimate-guitar.com/t/twenty_one_...
