In [1]:
import os
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

pd.set_option("display.max_rows", 500)

from utilities import DataCleaner


def ranks(n):
    return list(range(1, n + 1))[::-1]


def remove_parentheses(string):
    return string.split("(")[0].strip()


def fix(df, idx, col, new):
    df.loc[idx, col] = new

# Scraping

This is a nice wrapper for getting the soup object from a url. 

In [2]:
websites=[]

## Pitchfork

In [3]:
pitchfork_url = (
    "https://pitchfork.com/features/lists-and-guides/the-200-best-albums-of-the-2010s/"
)

pitchfork = DataCleaner(pitchfork_url, "Pitchfork")

In [4]:
pitchfork.create_tags("h2")
pitchfork.split(": ")
pitchfork.transpose()
# pitchfork.albums = pitchfork.apply(pitchfork.albums, remove_parentheses)
pitchfork.extract_year_from_paren("albums")
pitchfork.apply("albums", remove_parentheses)
pitchfork.create_df(ranks(200))

# pitchfork.print_data()
pitchfork.df.head()

Unnamed: 0,artist,album,rank,genre,year,reviewer,reviewer_url
0,Ratking,So It Goes,200,,2014,Pitchfork,https://pitchfork.com/features/lists-and-guide...
1,Wu Lyf,Go Tell Fire to the Mountain,199,,2011,Pitchfork,https://pitchfork.com/features/lists-and-guide...
2,Jean Grae / Quelle Chris,Everything’s Fine,198,,2018,Pitchfork,https://pitchfork.com/features/lists-and-guide...
3,Fatima Al Qadiri,Genre-Specific Xperience,197,,2011,Pitchfork,https://pitchfork.com/features/lists-and-guide...
4,Portal,Vexovoid,196,,2013,Pitchfork,https://pitchfork.com/features/lists-and-guide...


In [5]:
websites.append(pitchfork)

## billboard

In [6]:
bill_url = "https://www.billboard.com/articles/news/list/8543722/best-albums-of-the-2010s-top-100"
bill = DataCleaner(bill_url, "Billboard")
bill.create_tags("strong")
bill.strings.insert(25, "75. Charli XCX, True Romance (2013)")
bill.strings.insert(41, "59. Pusha T, Daytona (2018)")
bill.strings.insert(50, "50. Carly Rae Jepsen, E•MO•TION (2015)")
bill.trim("strings", chars="1234567890. ")
bill.extract_year_from_paren("strings")
bill.split(",")
bill.transpose()
bill.apply("albums", remove_parentheses)
bill.apply("albums", lambda x: x.replace("\xa0", " "))
bill.trim("albums", chars="\xa0", right=True)
bill.create_df(ranks(100))
fix(bill.df, bill.df["artist"].str.startswith("Tyler"), "artist", "Tyler, the Creator")
fix(bill.df, bill.df["artist"].str.startswith("Tyler"), "album", "IGOR")

# bill.print_data()
bill.df.head()

Unnamed: 0,artist,album,rank,genre,year,reviewer,reviewer_url
0,Lady Gaga & Bradley Cooper,A Star Is Born Soundtrack,100,,2018,Billboard,https://www.billboard.com/articles/news/list/8...
1,Lady Antebellum,Need You Now,99,,2010,Billboard,https://www.billboard.com/articles/news/list/8...
2,Japandroids,Celebration Rock,98,,2012,Billboard,https://www.billboard.com/articles/news/list/8...
3,Porter Robinson,Worlds,97,,2014,Billboard,https://www.billboard.com/articles/news/list/8...
4,Ed Sheeran,x,96,,2014,Billboard,https://www.billboard.com/articles/news/list/8...


In [7]:
websites.append(bill)

## Stereogum

In [8]:
stereo_url = "https://www.stereogum.com/featured/best-albums-of-the-2010s-list/"
stereo = DataCleaner(stereo_url, "Stereogum")
stereo.create_tags("h2")
stereo.strings = stereo.strings[1::2]
stereo.trim("strings", "1234567890 ")
stereo.split(" – ")
stereo.transpose()
stereo.extract_year_from_paren("albums", ",)")
stereo.apply("albums", remove_parentheses)
stereo.create_df(ranks(100))
fix(stereo.df, stereo.df["album"].str.startswith("Rich Gang"), "artist", "Rich Gang")

# stereo.print_data()
stereo.df.head()

Unnamed: 0,artist,album,rank,genre,year,reviewer,reviewer_url
0,Future,Pluto,100,,2012,Stereogum,https://www.stereogum.com/featured/best-albums...
1,Car Seat Headrest,Teens Of Denial,99,,2016,Stereogum,https://www.stereogum.com/featured/best-albums...
2,PUP,The Dream Is Over,98,,2016,Stereogum,https://www.stereogum.com/featured/best-albums...
3,Courtney Barnett,"Sometimes I Sit And Think, And Sometimes I Jus...",97,,2015,Stereogum,https://www.stereogum.com/featured/best-albums...
4,Colleen Green,I Want To Grow Up,96,,2015,Stereogum,https://www.stereogum.com/featured/best-albums...


In [9]:
websites.append(stereo)

## Consequence of Sound 

In [10]:
cos_urls = [
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/2/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/3/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/4/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/5/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/6/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/7/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/8/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/9/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/10/",
    "https://consequenceofsound.net/2019/12/top-albums-of-the-2010s/11/",
] 

In [11]:
cos = [
    DataCleaner(url, "Consequence_of_Sound" + str(cos_urls.index(url)))
    for url in cos_urls
]

In [12]:
for idx, cos_page in enumerate(cos):
    cos_page.create_tags("h2")
    cos_page.strings = cos_page.strings[2:]
    cos_page.strings.remove("Create your account")
    cos_page.trim("strings", "1234567890. ")
    cos_page.split(" – ")
    cos_page.transpose()
    cos_page.extract_year_from_paren("albums")
    cos_page.apply("albums", remove_parentheses)
    cos_page.create_df(ranks(100)[idx*10:(idx+1)*10])
    print(cos_page.df.head())

          artist              album  rank genre  year               reviewer  \
0      PJ Harvey  Let England Shake   100  None  2011  Consequence_of_Sound0   
1        Savages   Silence Yourself    99  None  2013  Consequence_of_Sound0   
2      Destroyer             Kaputt    98  None  2011  Consequence_of_Sound0   
3  Ariana Grande    thank you, next    97  None  2019  Consequence_of_Sound0   
4       Bon Iver      22, A Million    96  None  2016  Consequence_of_Sound0   

                                        reviewer_url  
0  https://consequenceofsound.net/2019/12/top-alb...  
1  https://consequenceofsound.net/2019/12/top-alb...  
2  https://consequenceofsound.net/2019/12/top-alb...  
3  https://consequenceofsound.net/2019/12/top-alb...  
4  https://consequenceofsound.net/2019/12/top-alb...  
                     artist                                          album  \
0          Carly Rae Jepsen                                      E•MO•TION   
1                   Big Boi  Sir 

In [13]:
websites.extend(cos)

## Time

In [14]:
time_url =  "https://time.com/5725768/best-albums-2010s-decade/"
time = DataCleaner(time_url, "Time")
time.create_tags("strong")
time.strings = time.strings[:-1]
time.split(", ")
time.transpose()
time.extract_year_from_paren("albums")
time.apply("albums", remove_parentheses)
time.create_df(10)

# time.print_data()
time.df

Unnamed: 0,artist,album,rank,genre,year,reviewer,reviewer_url
0,Fiona Apple,The Idler Wheel…,10,,2012,Time,https://time.com/5725768/best-albums-2010s-dec...
1,Miguel,Kaleidoscope Dream,10,,2012,Time,https://time.com/5725768/best-albums-2010s-dec...
2,Beauty Pill,Beauty Pill Describes Things As They Are,10,,2015,Time,https://time.com/5725768/best-albums-2010s-dec...
3,Carly Rae Jepsen,E•MO•TION,10,,2015,Time,https://time.com/5725768/best-albums-2010s-dec...
4,Beyoncé,Lemonade,10,,2016,Time,https://time.com/5725768/best-albums-2010s-dec...
5,Leonard Cohen,You Want It Darker,10,,2016,Time,https://time.com/5725768/best-albums-2010s-dec...
6,Miranda Lambert,The Weight of These Wings,10,,2016,Time,https://time.com/5725768/best-albums-2010s-dec...
7,Solange,A Seat At the Table,10,,2016,Time,https://time.com/5725768/best-albums-2010s-dec...
8,Kendrick Lamar,DAMN.,10,,2017,Time,https://time.com/5725768/best-albums-2010s-dec...
9,Ozuna,Aura,10,,2018,Time,https://time.com/5725768/best-albums-2010s-dec...


In [15]:
websites.append(time)

## Paste Magazine

In [16]:
paste_urls = [
    "https://www.pastemagazine.com/articles/2019/10/best-albums-of-the-2010s-paste.html",
    "https://www.pastemagazine.com/articles/2019/10/best-albums-of-the-2010s-paste.html?p=2",
    "https://www.pastemagazine.com/articles/2019/10/best-albums-of-the-2010s-paste.html?p=3",
    "https://www.pastemagazine.com/articles/2019/10/best-albums-of-the-2010s-paste.html?p=4",
]

paste_1, paste_2, paste_3, paste_4 = [
    DataCleaner(url, "Paste" + str(paste_urls.index(url))) for url in paste_urls
]

In [17]:
paste_1.create_tags("b")
paste_1.strings = paste_1.strings[2:26]
paste_1.trim("strings", "1234567890. ")
paste_1.split(": ")
paste_1.transpose()
paste_1.extract_year_from_paren("albums")
paste_1.apply("albums", remove_parentheses)
paste_1.create_df(ranks(100)[:24])

# paste_1.print_data()
paste_1.df

Unnamed: 0,artist,album,rank,genre,year,reviewer,reviewer_url
0,Drake,Take Care,100,,2011,Paste0,https://www.pastemagazine.com/articles/2019/10...
1,Japanese Breakfast,Soft Sounds from Another Planet,99,,2017,Paste0,https://www.pastemagazine.com/articles/2019/10...
2,Nick Cave & The Bad Seeds,Skeleton Tree,98,,2016,Paste0,https://www.pastemagazine.com/articles/2019/10...
3,Deafheaven,Sunbather,97,,2013,Paste0,https://www.pastemagazine.com/articles/2019/10...
4,"Tyler, The Creator",Flower Boy,96,,2017,Paste0,https://www.pastemagazine.com/articles/2019/10...
5,The Black Keys,Brothers,95,,2010,Paste0,https://www.pastemagazine.com/articles/2019/10...
6,Sky Ferreira,"Night Time, My Time",94,,2013,Paste0,https://www.pastemagazine.com/articles/2019/10...
7,Charles Bradley,Victim of Love,93,,2013,Paste0,https://www.pastemagazine.com/articles/2019/10...
8,Travis Scott,ASTROWORLD,92,,2018,Paste0,https://www.pastemagazine.com/articles/2019/10...
9,Lucius,Wildewoman,91,,2013,Paste0,https://www.pastemagazine.com/articles/2019/10...


In [18]:
paste_2.create_tags("b", class_="big")

paste_2.trim("strings", "1234567890. ")
paste_2.split(": ")
paste_2.transpose()
paste_2.extract_year_from_paren("albums")
paste_2.apply("albums", remove_parentheses)
paste_2.create_df(ranks(100)[24:51])

# paste_2.print_data()
paste_2.df

Unnamed: 0,artist,album,rank,genre,year,reviewer,reviewer_url
0,IDLES,Joy As An Act of Resistance,76,,2018,Paste1,https://www.pastemagazine.com/articles/2019/10...
1,Fiona Apple,The Idler Wheel Is Wiser than the Driver of th...,75,,2012,Paste1,https://www.pastemagazine.com/articles/2019/10...
2,Sturgill Simpson,Metamodern Sounds in Country Music,74,,2014,Paste1,https://www.pastemagazine.com/articles/2019/10...
3,Kevin Morby,City Music,73,,2017,Paste1,https://www.pastemagazine.com/articles/2019/10...
4,Frightened Rabbit,The Winter of Mixed Drinks,72,,2010,Paste1,https://www.pastemagazine.com/articles/2019/10...
5,Caribou,Our Love,71,,2014,Paste1,https://www.pastemagazine.com/articles/2019/10...
6,Destroyer,Kaputt,70,,2011,Paste1,https://www.pastemagazine.com/articles/2019/10...
7,Leon Bridges,Coming Home,69,,2015,Paste1,https://www.pastemagazine.com/articles/2019/10...
8,The Decemberists,The King is Dead,68,,2011,Paste1,https://www.pastemagazine.com/articles/2019/10...
9,Cardi B,Invasion of Privacy,67,,2018,Paste1,https://www.pastemagazine.com/articles/2019/10...


In [19]:
paste_3.create_tags("b", class_="big")

paste_3.trim("strings", "1234567890. ")
paste_3.split(": ")
paste_3.transpose()
paste_3.extract_year_from_paren("albums")
paste_3.apply("albums", remove_parentheses)
paste_3.create_df(ranks(100)[51:75])

# paste_3.print_data()
paste_3.df

Unnamed: 0,artist,album,rank,genre,year,reviewer,reviewer_url
0,A Tribe Called Quest,We got it from Here… Thank You 4 Your service,49,,2016,Paste2,https://www.pastemagazine.com/articles/2019/10...
1,Alabama Shakes,Boys & Girls,48,,2012,Paste2,https://www.pastemagazine.com/articles/2019/10...
2,Vince Staples,Summertime ‘06,47,,2015,Paste2,https://www.pastemagazine.com/articles/2019/10...
3,Chance the Rapper,Coloring Book,46,,2016,Paste2,https://www.pastemagazine.com/articles/2019/10...
4,Snail Mail,Lush,45,,2018,Paste2,https://www.pastemagazine.com/articles/2019/10...
5,Tame Impala,Lonerism,44,,2012,Paste2,https://www.pastemagazine.com/articles/2019/10...
6,Weyes Blood,Titanic Rising,43,,2019,Paste2,https://www.pastemagazine.com/articles/2019/10...
7,Janelle Monáe,Dirty Computer,42,,2018,Paste2,https://www.pastemagazine.com/articles/2019/10...
8,Japandroids,Celebration Rock,41,,2012,Paste2,https://www.pastemagazine.com/articles/2019/10...
9,Beyoncé,Beyoncé,40,,2013,Paste2,https://www.pastemagazine.com/articles/2019/10...


In [20]:
paste_4.create_tags("b", class_="big")

paste_4.trim("strings", "1234567890. ")
paste_4.split(": ")
paste_4.transpose()
paste_4.extract_year_from_paren("albums")
paste_4.apply("albums", remove_parentheses)
paste_4.create_df(ranks(100)[75:])

# paste_4.print_data()
paste_4.df

Unnamed: 0,artist,album,rank,genre,year,reviewer,reviewer_url
0,Kanye West,Yeezus,25,,2013,Paste3,https://www.pastemagazine.com/articles/2019/10...
1,The War on Drugs,Lost in the Dream,24,,2014,Paste3,https://www.pastemagazine.com/articles/2019/10...
2,Mitski,Puberty 2,23,,2016,Paste3,https://www.pastemagazine.com/articles/2019/10...
3,Father John Misty,I Love You Honeybear,22,,2015,Paste3,https://www.pastemagazine.com/articles/2019/10...
4,Arcade Fire,The Suburbs,21,,2010,Paste3,https://www.pastemagazine.com/articles/2019/10...
5,Lucy Dacus,Historian,20,,2018,Paste3,https://www.pastemagazine.com/articles/2019/10...
6,Alvvays,Antisocialites,19,,2017,Paste3,https://www.pastemagazine.com/articles/2019/10...
7,Robyn,Body Talk,18,,2010,Paste3,https://www.pastemagazine.com/articles/2019/10...
8,Radiohead,A Moon Shaped Pool,17,,2016,Paste3,https://www.pastemagazine.com/articles/2019/10...
9,Beach House,Teen Dream,16,,2010,Paste3,https://www.pastemagazine.com/articles/2019/10...


In [21]:
websites.extend([paste_1, paste_2, paste_3, paste_4])

## Genius

In [22]:
genius_url = "https://genius.com/a/the-genius-communitys-100-best-albums-of-the-2010s"
genius = DataCleaner(genius_url, "Genius")

genius.create_tags("div", class_="g_list-item-header")
genius.trim("strings", "\n 1234567890")
genius.trim("strings", "\n ", left=False, right=True)
genius.split(" – ")
genius.transpose()
genius.create_df(ranks(100))

# genius.print_data()
genius.df.head()

Unnamed: 0,artist,album,rank,genre,reviewer,reviewer_url
0,Justin Bieber,Purpose,100,,Genius,https://genius.com/a/the-genius-communitys-100...
1,Katy Perry,Teenage Dream,99,,Genius,https://genius.com/a/the-genius-communitys-100...
2,Bon Iver,"Bon Iver, Bon Iver",98,,Genius,https://genius.com/a/the-genius-communitys-100...
3,Billie Eilish,"WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?",97,,Genius,https://genius.com/a/the-genius-communitys-100...
4,Schoolboy Q,Oxymoron,96,,Genius,https://genius.com/a/the-genius-communitys-100...


In [23]:
websites.append(genius)

## Rolling Stone

In [24]:
roll_urls = [
  "https://www.rollingstone.com/music/music-lists/best-albums-2010s-ranked-913997/",
    "https://www.rollingstone.com/music/music-lists/best-albums-2010s-ranked-913997/carly-rae-jepsen-emotion-album-917470/"
] 
roll_1, roll_2 = [DataCleaner(url, "Rolling_Stone" + str(roll_urls.index(url))) for url in roll_urls]

In [25]:
roll_1.create_tags("header", class_="c-list__header")
roll_1.trim("strings", "\n\t1234567890", right=True)
roll_1.split(', ')
roll_1.transpose()
roll_1.apply("albums", lambda x: x[1:-1])
roll_1.create_df(ranks(100)[:50])

# roll_1.print_data()
roll_1.df.head()

Unnamed: 0,artist,album,rank,genre,reviewer,reviewer_url
0,The War on Drugs,A Deeper Understanding,100,,Rolling_Stone0,https://www.rollingstone.com/music/music-lists...
1,Lana Del Rey,Ultraviolence,99,,Rolling_Stone0,https://www.rollingstone.com/music/music-lists...
2,John Prine,The Tree of Forgiveness,98,,Rolling_Stone0,https://www.rollingstone.com/music/music-lists...
3,Rich Gang,Tha Tour Part 1,97,,Rolling_Stone0,https://www.rollingstone.com/music/music-lists...
4,Low Cut Connie,Call Me Sylvia,96,,Rolling_Stone0,https://www.rollingstone.com/music/music-lists...


In [26]:
roll_2.create_tags("header", class_="c-list__header")
roll_2.trim("strings", "\n\t1234567890", right=True)
roll_2.strings[5] = 'Hamilton, "Hamilton" Original Broadway Cast Recording'
roll_2.split(', ')
roll_2.transpose()
roll_2.apply("albums", lambda x: x[1:-1])
roll_2.create_df(ranks(50))

# roll_2.print_data()
roll_2.df.head()

Unnamed: 0,artist,album,rank,genre,reviewer,reviewer_url
0,Carly Rae Jepsen,Emotion,50,,Rolling_Stone1,https://www.rollingstone.com/music/music-lists...
1,Jay-Z and Kanye West,Watch the Throne,49,,Rolling_Stone1,https://www.rollingstone.com/music/music-lists...
2,Leonard Cohen,You Want It Darker,48,,Rolling_Stone1,https://www.rollingstone.com/music/music-lists...
3,Bon Iver,"22, A Million",47,,Rolling_Stone1,https://www.rollingstone.com/music/music-lists...
4,Bruce Springsteen,Wrecking Ball,46,,Rolling_Stone1,https://www.rollingstone.com/music/music-lists...


In [27]:
websites.extend([roll_1, roll_2])

# Combining Data 

In [28]:
albums = pd.concat([website.df for website in websites], sort=False)
albums["ARTIST"] = albums.artist.str.upper()
albums.sort_values("ARTIST", inplace=True)

In [33]:
fix(
    albums,
    albums.artist == "A Tribe Called Quest",
    "album",
    "We Got it From Here... Thank You 4 Your Service",
)
fix(
    albums,
    ((albums.artist == "Angel Olsen") & (albums.album.str.startswith("Burn"))),
    "album",
    "Burn Your Fire for No Witnesses",
)
fix(
    albums,
    ((albums.artist == "Ariana Grande") & (albums.album.str.upper().str.startswith("THANK"))),
    "album",
    "thank u, next",
)
fix(
    albums, (albums.artist == "Bad Bunny"), "album", "X 100PRE",
)
fix(
    albums,
    (albums.artist == "Billie Eilish"),
    "album",
    "When We All Fall Asleep, Where Do We Go?",
)
fix(
    albums, (albums.album == "Bon Iver, Bon Iver"), "album", "Bon Iver",
)
fix(
    albums, (albums.artist == "Brandi Carlile"), "album", "By the Way, I Forgive You",
)
fix(
    albums,
    ((albums.artist == "Car Seat Headrest") & (albums.album.str.startswith("Teens"))),
    "album",
    "Teens of Denial",
)
fix(
    albums,
    ((albums.artist == "Cardi B") & (albums.album.str.startswith("Invasion"))),
    "album",
    "Invasion of Privacy",
)
fix(
    albums, (albums.artist == "Carly Rae Jepsen"), "album", "Emotion",
)
fix(albums, albums.artist.str.startswith("Chance"), "artist", "Chance the Rapper")
fix(
    albums,
    (albums.artist == "Courtney Barnett"),
    "album",
    "Sometimes I Sit and Think, and Sometimes I Just Sit",
)
fix(
    albums,
    albums.album.str.endswith("Black Messiah"),
    "artist",
    "D'Angelo and the Vanguard",
)
fix(albums, albums.artist.str.startswith("D'Angelo"), "album", "Black Messiah")
fix(albums, albums.artist == "David Bowie", "album", "Blackstar")
fix(
    albums,
    ((albums.artist == "Drake") & (albums.album.str.endswith("Late"))),
    "album",
    "If You're Reading This, It's Too Late",
)
fix(
    albums,
    ((albums.artist == "Drake") & (albums.album.str.endswith("Same"))),
    "album",
    "Nothing Was the Same",
)

fix(albums, albums.artist.str.startswith("FKA"), "artist", "FKA twigs")
fix(albums, albums.artist == "Father John Misty", "album", "I Love You, Honeybear")
fix(albums, albums.artist == "Fiona Apple", "album", "The Idler Wheel...")
fix(
    albums,
    (
        (albums.artist == "Frank Ocean")
        & (albums.album.str.upper().str.startswith("CHANNEL"))
    ),
    "album",
    "Channel Orange",
)
fix(
    albums, albums.artist.str.startswith("Freddie Gibbs"), "album", "Pinata",
)
fix(
    albums,
    ((albums.artist == "Future") & (albums.album.str.startswith("Dirty"))),
    "album",
    "DS2",
)
fix(
    albums, albums.artist.str.upper() == "HAIM", "artist", "HAIM",
)
fix(
    albums,
    albums.artist == "Hamilton",
    "album",
    "Hamilton: Original Broadway Cast Recording",
)
fix(
    albums, albums.artist.str.endswith("Balvin"), "artist", "J Balvin",
)
fix(
    albums, albums.album.str.endswith("Throne"), "artist", "JAY-Z & Kanye West",
)
fix(
    albums, albums.artist == "Jay-Z", "artist", "JAY-Z",
)
fix(
    albums, albums.artist.str.startswith("JAY-Z &"), "album", "Watch the Throne",
)
fix(
    albums, albums.artist.str.startswith("Jamie"), "artist", "Jamie xx",
)
fix(
    albums, albums.artist == "Jamie xx", "album", "In Colour",
)
fix(
    albums, albums.artist == "Jamila Woods", "album", "LEGACY! LEGACY!",
)
fix(
    albums, albums.artist.str.startswith("Janelle"), "artist", "Janelle Monae",
)
fix(
    albums,
    albums.artist == "Japanese Breakfast",
    "album",
    "Soft Sounds from Another Planet",
)
fix(
    albums, albums.artist == "Joanna Newsom", "album", "Have One on Me",
)
fix(
    albums,
    (
        (albums.artist == "Kendrick Lamar")
        & (albums.album.str.upper().str.startswith("DAMN"))
    ),
    "album",
    "DAMN.",
)
fix(
    albums,
    (
        (albums.artist == "Kendrick Lamar")
        & (albums.album.str.upper().str.startswith("GOOD"))
    ),
    "album",
    "good kid, m.A.A.d city",
)
fix(
    albums,
    (
        (albums.artist == "Kendrick Lamar")
        & (albums.album.str.upper().str.startswith("TO"))
    ),
    "album",
    "To Pimp a Butterfly",
)
fix(
    albums, albums.artist == "Kurt Vile", "album", "Smoke Ring for My Halo",
)
fix(
    albums,
    (
        (albums.artist == "Lana Del Rey")
        & (albums.album.str.upper().str.startswith("BORN"))
    ),
    "album",
    "Born to Die",
)
fix(
    albums, (albums.artist == "Lin-Manuel Miranda"), "artist", "Hamilton",
)
fix(
    albums, (albums.artist == "Metallica"), "album", "Hardwired... to Self-Destruct",
)
fix(
    albums, (albums.artist == "Mount Eerie"), "album", "A Crow Looked at Me",
)
fix(
    albums, (albums.artist == "My Bloody Valentine"), "album", "m b v",
)
fix(
    albums,
    albums.artist.str.startswith("Nick Cave"),
    "artist",
    "Nick Cave & the Bad Seeds",
)
fix(
    albums,
    ((albums.artist.str.startswith("Nick Cave")) & albums.album.str.endswith("Tree")),
    "album",
    "Skeleton Tree",
)
fix(
    albums, albums.artist.str.startswith("Pusha"), "artist", "Pusha-T",
)
fix(
    albums, albums.artist.str.startswith("Pusha"), "album", "DAYTONA",
)
fix(
    albums, albums.ARTIST.str.startswith("ROSAL"), "artist", "Rosalia",
)
fix(
    albums, albums.artist == "Rae Sremmurd", "album", "SremmLife",
)
fix(
    albums, albums.artist == "Rich Gang", "album", "Rich Gang: Tha Tour Pt. 1",
)
fix(
    albums,
    ((albums.artist == "Rihanna") & (albums.album.str.startswith("A"))),
    "album",
    "ANTI",
)
fix(
    albums, albums.artist.str.startswith("Run"), "artist", "Run the Jewels",
)
fix(
    albums, albums.artist.str.startswith("Run"), "album", "Run the Jewels 2",
)
fix(
    albums, albums.artist == "SZA", "album", "CTRL",
)
fix(
    albums,
    ((albums.artist == "Solange") & (albums.album.str.startswith("A Seat"))),
    "album",
    "A Seat at the Table",
)
fix(
    albums,
    (
        (albums.artist == "St. Vincent")
        & (albums.album.str.upper().str.startswith("MASS"))
    ),
    "album",
    "MASSEDUCTION",
)
fix(
    albums,
    ((albums.artist == "Sturgill Simpson") & (albums.album.str.startswith("A Sailor"))),
    "album",
    "A Sailor's Guide to Earth",
)
fix(
    albums, albums.artist == "Sufjan Stevens", "album", "Carrie & Lowell",
)
fix(
    albums,
    (
        (albums.artist == "The 1975")
        & (albums.album.str.upper().str.startswith("I LIKE"))
    ),
    "album",
    "I Like It When You Sleep...",
)

fix(
    albums, albums.artist == "The Knife", "album", "Shaking the Habitual",
)
fix(
    albums, albums.artist.str.startswith("The War"), "artist", "The War on Drugs",
)

fix(
    albums,
    (
        (albums.artist == "The War on Drugs")
        & (albums.album.str.upper().str.startswith("LOST"))
    ),
    "album",
    "Lost in the Dream",
)
fix(
    albums,
    (
        (albums.artist == "Travis Scott")
        & (albums.album.str.upper().str.startswith("ASTRO"))
    ),
    "album",
    "ASTROWORLD",
)
fix(
    albums,
    albums.artist.str.startswith("Tyler,"),
    "artist",
    "Tyler, the Creator",
)
fix(
    albums,
    (
        (albums.artist == "Vampire Weekend")
        & (albums.album.str.upper().str.startswith("FATHER"))
    ),
    "album",
    "Father of the Bride",
)
fix(
    albums,
    (
        (albums.artist == "Vampire Weekend")
        & (albums.album.str.upper().str.startswith("MODERN"))
    ),
    "album",
    "Modern Vampires of the City",
)
fix(
    albums,
    (
        (albums.artist == "Various Artists")
        & (albums.album.str.upper().str.startswith("HAMILTON"))
    ),
    "album",
    "Hamilton: Original Broadway Cast Recording",
)

fix(
    albums,
    (
        (albums.artist == "Various Artists")
        & (albums.album.str.upper().str.startswith("HAMILTON"))
    ),
    "artist",
    "Hamilton",
)
fix(
    albums,
    (
        (albums.artist == "Vince Staples")
        & (albums.album.str.upper().str.startswith("SUMMER"))
    ),
    "album",
    "Summertime '06",
)
fix(albums, albums.artist=="Beyoncé", "artist", "Beyonce")
fix(albums, albums.album=="Beyoncé", "album", "Beyonce")

In [34]:
# albums.loc[albums.artist.str.startswith("1")
albums.loc[:
    , ["artist", "album", "rank"]].groupby(["artist", "album"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,rank
artist,album,Unnamed: 2_level_1
(Sandy) Alex G,DSU,2
(Sandy) Alex G,Rocket,1
21 Savage / Metro Boomin,Savage Mode,1
A Tribe Called Quest,We Got it From Here... Thank You 4 Your Service,5
A$AP Rocky,LIVE.LOVE.A$AP,2
ANOHNI,Hopelessness,1
Adele,21,5
Adele,25,1
Against Me!,Transgender Dysphoria Blues,2
Alabama Shakes,Boys & Girls,2


# Old