In [113]:
import re
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
pd.set_option("display.max_rows", 101)

In [14]:
def get_soup(url):
    """
    Small wrapper around urllib and BeautifulSoup. Takes a url
    and produces the soup object. Includes a header so that 
    http errors are less likely.
    """
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = Request(url, headers=hdr)
    page = urlopen(req)
    soup = BeautifulSoup(page, 'html')
    return soup

# Pitchfork

In [15]:
url = "https://pitchfork.com/features/lists-and-guides/the-200-best-albums-of-the-2010s/"

# html = urlopen(url)

soup = get_soup(url)

In [16]:
artist = []
album = []
year = []
for tag in soup.find_all('h2'):
    artist.append(tag.contents[0][:-2])
    album.append(tag.contents[1].contents[0])
    year.append(tag.contents[2][2:-1])
    
# cleaning
year[117] = '2013'
year[135] = '2015'

pitchfork_df = pd.DataFrame(
    {'artist' : artist,
    'album' : album,
    'year' : year,
    'pitchfork_rank' : [200 - n for n in range(200)]}
)

In [17]:
pitchfork_df.head()

Unnamed: 0,artist,album,year,pitchfork_rank
0,Ratking,So It Goes,2014,200
1,Wu Lyf,Go Tell Fire to the Mountain,2011,199
2,Jean Grae / Quelle Chris,Everything’s Fine,2018,198
3,Fatima Al Qadiri,Genre-Specific Xperience,2011,197
4,Portal,Vexovoid,2013,196


# billboard

In [59]:
url = "https://www.billboard.com/articles/news/list/8543722/best-albums-of-the-2010s-top-100"

soup = get_soup(url)

for tag in soup.find_all('strong'):
    print(tag.text)

100. Lady Gaga & Bradley Cooper, A Star Is Born Soundtrack (2018) 
99. Lady Antebellum, Need You Now (2010)
98. Japandroids, Celebration Rock (2012)
97. Porter Robinson, Worlds (2014)
96. Ed Sheeran, x (2014)
95. Chris Stapleton, Traveller (2015)
94. Nipsey Hussle, Victory Lap (2018)
93. P!nk, The Truth About Love (2012)
92. Ozuna, Odisea (2017)
91. Miley Cyrus, Bangerz (2013)
90. Against Me!, Transgender Dysphoria Blues (2014)
89. Childish Gambino, "Awaken, My Love!" (2016)
88. Sky Ferreira, Night Time, My Time (2013)
87. Brandi Carlile, By the Way, I Forgive You (2018)
86. Jonas Brothers, Happiness Begins (2019)
85. J. Cole, 2014 Forest Hills Drive (2014)
84. Rihanna, Loud (2010)
83. Travis Scott, Astroworld (2018)
82. The 1975, I Like It When You Sleep, For You Are So Beautiful Yet So Unaware of It (2016)
81. Mumford & Songs, Sigh No More (2010)
80. Camila Cabello, Camila (2017)
79. Courtney Barnett, Sometimes I Sit and Think and Sometimes I Just Sit (2015)
78. Adele, 25 (2015)
77. 

# Stereogum

In [20]:
url = "https://www.stereogum.com/featured/best-albums-of-the-2010s-list/"

soup = get_soup(url)

In [21]:
def h2_if_nonempty(tag):
    if tag.contents:
        if "h2" in tag.name:
            return True
        
    else:
        return False

In [22]:
stereogum_rank = []
artist = []
album = []
label = []
year = []
for tag in soup.find_all(h2_if_nonempty):
    stereogum_rank.append(tag.contents[0].contents[0])
    artist.append(tag.contents[1])
    album.append(tag.contents[2].contents[0])
    label.append(tag.contents[4].contents[0].split(',')[0])
    year.append(tag.contents[4].contents[0].split(',')[1])
    
# Cleaning
artist = [x[1:-3] for x in artist]
label = [x[1:] for x in label]
year = [x[1:-1] for x in year]

stereogum_df = pd.DataFrame(
    {'artist' : artist,
    'album' : album,
    'label' : label,
    'year' : year,
    'stereogum_rank' : stereogum_rank}
    
)

In [23]:
stereogum_df.head()

Unnamed: 0,artist,album,label,year,stereogum_rank
0,Future,Pluto,Epic / A1 / Free Bandz,2012,100
1,Car Seat Headrest,Teens Of Denial,Matador,2016,99
2,PUP,The Dream Is Over,SideOneDummy,2016,98
3,Courtney Barnett,"Sometimes I Sit And Think, And Sometimes I Jus...",Mom + Pop,2015,97
4,Colleen Green,I Want To Grow Up,Hardly Art,2015,96


# Consequence of Sound 

In [24]:
url =  "https://consequenceofsound.net/2019/11/top-albums-of-the-2010s/full-post/"

soup = get_soup(url)

In [25]:
cos_rank = []
artist = []
album = []
year = []

for tag in soup.find_all('h2')[2:-1]:
    cos_rank.append(tag.contents[0].split('.')[0])
    artist.append(tag.contents[0].split('.', 1)[1][1:-3])
    album.append(tag.contents[1].contents[0])
    year.append(tag.contents[2][2:-1])

In [26]:
cos_df = pd.DataFrame(
    {'artist' : artist,
    'album' : album,
    'year' : year,
    'cos_rank' : cos_rank}
)

In [27]:
cos_df.head()

Unnamed: 0,artist,album,year,cos_rank
0,PJ Harvey,Let England Shake,2011,100
1,Savages,Silence Yourself,2013,99
2,Destroyer,Kaputt,2011,98
3,Ariana Grande,"thank you, next",2019,97
4,Bon Iver,"22, A Million",2016,96


# Time

In [61]:
url =  "https://time.com/5725768/best-albums-2010s-decade/"
soup = get_soup(url)

for tag in soup.find_all("strong"):
    print(tag.text)


Fiona Apple, The Idler Wheel… (2012)
Miguel, Kaleidoscope Dream (2012)
Beauty Pill, Beauty Pill Describes Things As They Are (2015)
Carly Rae Jepsen, E•MO•TION (2015)
Beyoncé, Lemonade (2016)
Leonard Cohen, You Want It Darker (2016)
Miranda Lambert, The Weight of These Wings (2016)
Solange, A Seat At the Table (2016)
Kendrick Lamar, DAMN. (2017)
Ozuna, Aura (2018)
Contact us


# Paste Magazine


In [47]:
urls = [
    "https://www.pastemagazine.com/articles/2019/10/best-albums-of-the-2010s-paste.html",
    "https://www.pastemagazine.com/articles/2019/10/best-albums-of-the-2010s-paste.html?p=2",
    "https://www.pastemagazine.com/articles/2019/10/best-albums-of-the-2010s-paste.html?p=3",
    "https://www.pastemagazine.com/articles/2019/10/best-albums-of-the-2010s-paste.html?p=4"
]

In [51]:
paste_rank = []
artist = []
album = []
year = []

tag_list = []
for url in urls:
    soup = get_soup(url)
    
    for tag in soup.find_all("b", class_="big"):
        tag_list.append(tag)

tag_list = tag_list[1:]

for tag in tag_list:
    paste_rank.append(tag.text.split('.', 1)[0])
    artist.append(tag.text.split('.', 1)[1].split(':', 1)[0][1:])
    album.append(tag.text.split(':', 1)[1][1:-7])
    year.append(tag.text[-5:-1])
    
paste_df = pd.DataFrame(
    {'artist' : artist,
    'album' : album,
    'year' : year,
    'paste_rank' : paste_rank}
)

In [32]:
paste_df.head()

Unnamed: 0,artist,album,year,paste_rank
0,Drake,Take Care,2011,100
1,Japanese Breakfast,Soft Sounds from Another Planet,2017,99
2,Nick Cave & The Bad Seeds,Skeleton Tree,2016,98
3,Deafheaven,Sunbather,2013,97
4,"Tyler, The Creator",Flower Boy,2017,96


# Genius

In [73]:
url =  "https://genius.com/a/the-genius-communitys-100-best-albums-of-the-2010s"

soup = get_soup(url)

In [111]:
genius_rank = []
artist = []
album = []

for tag in soup.find_all('div', class_='g_list-item-header'):
    genius_rank.append(tag.text.replace('\n', '')[10:].split(' ', 1)[0])
    artist.append(tag.text.replace('\n', '')[10:].split(' ', 1)[1][17:].split(' – ')[0])
    album.append(tag.text.replace('\n', '')[10:].split(' ', 1)[1][17:].split(' – ')[1])


genius_df = pd.DataFrame(
    {'artist' : artist,
    'album' : album,
    'genius_rank' : genius_rank}
)

In [114]:
genius_df

Unnamed: 0,artist,album,genius_rank
0,Justin Bieber,Purpose,100
1,Katy Perry,Teenage Dream,99
2,Bon Iver,"Bon Iver, Bon Iver",98
3,Billie Eilish,"WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?",97
4,Schoolboy Q,Oxymoron,96
5,Ariana Grande,Dangerous Woman,95
6,Isaiah Rashad,Cilvia Demo,94
7,alt-j,An Awesome Wave,93
8,Flying Lotus,Cosmogramma,92
9,Logic,Under Pressure,91
