In [18]:
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go


## Read In and Clean Data

In [19]:
from data.data import AOTY

In [20]:
AOTY

Unnamed: 0,Rank,Lister,Artist,Album,album_submission_count,album_score,top_10_album,album_average_rank,unique_album_submission,artist_album_release_count,multi_album_artist
0,1,Aaron,The Dirty Nil,Fuck Art,2,15,False,3.500000,False,1,False
1,2,Aaron,Whitechapel,Kin,1,9,False,2.000000,True,1,False
2,3,Aaron,That Handsome Devil,Your Parents Are Sellouts,1,8,False,3.000000,True,1,False
3,4,Aaron,Billie Eilish,Happier Than Ever,22,137,True,4.772727,False,1,False
4,5,Aaron,Lorna Shore,...And I Return To Nothingness,1,6,False,5.000000,True,1,False
...,...,...,...,...,...,...,...,...,...,...,...
975,6,Zack,Mark Barlow,Hymns & Soul,2,15,False,3.500000,False,1,False
976,7,Zack,Aquilo,A Safe Place To Be,1,4,False,7.000000,True,1,False
977,8,Zack,Tingsek,Home,2,4,False,9.000000,False,1,False
978,9,Zack,Ben Howard,Collections From The Whiteout,2,6,False,8.000000,False,1,False


## Get Extra Metadata

* Total Listers
* Total Artists
* Total Albums

In [21]:
from data.data import TOTAL_LISTERS, TOTAL_ARTISTS, TOTAL_ALBUMS

In [22]:
TOTAL_LISTERS

98

In [6]:
TOTAL_ARTISTS

432

In [7]:
TOTAL_ALBUMS

436

* album_submission_count for each album
* unique_album_submission tag for each submission
* artist_album_release_count
* multi_album_artist

In [8]:
AOTY.loc[AOTY["multi_album_artist"] == True][["Artist", "Album"]].drop_duplicates()

Unnamed: 0,Artist,Album
49,Hobo Johnson,The Revenge of Hobo Johnson
85,Drake,Certified Lover Boy
102,Benny Sings,Music
213,Drake,Scary Hours 2
243,Lana Del Rey,Chemtrails Over the Country Club
267,Eric Church,Soul
416,Benny Sings,Beat Tape II
534,Various Artists,The Harder They Fall (The Motion Picture Sound...
622,Lana Del Rey,Blue Banisters
643,Eric Church,Heart


In [9]:
len(set(pd.read_csv("data/AOTY-2021-lists.csv")["Album"]))

436

In [10]:
AOTY["Album"].nunique()

436

### Scrape Genre Metadata?

---

# Dataviz

##### Top 10 Album Barchart

In [11]:

AOTY

Unnamed: 0,Rank,Lister,Artist,Album,album_submission_count,album_score,top_10_album,album_average_rank,unique_album_submission,artist_album_release_count,multi_album_artist
0,1,Aaron,The Dirty Nil,Fuck Art,2,15,False,3.500000,False,1,False
1,2,Aaron,Whitechapel,Kin,1,9,False,2.000000,True,1,False
2,3,Aaron,That Handsome Devil,Your Parents Are Sellouts,1,8,False,3.000000,True,1,False
3,4,Aaron,Billie Eilish,Happier Than Ever,22,137,True,4.772727,False,1,False
4,5,Aaron,Lorna Shore,...And I Return To Nothingness,1,6,False,5.000000,True,1,False
...,...,...,...,...,...,...,...,...,...,...,...
975,6,Zack,Mark Barlow,Hymns & Soul,2,15,False,3.500000,False,1,False
976,7,Zack,Aquilo,A Safe Place To Be,1,4,False,7.000000,True,1,False
977,8,Zack,Tingsek,Home,2,4,False,9.000000,False,1,False
978,9,Zack,Ben Howard,Collections From The Whiteout,2,6,False,8.000000,False,1,False


In [12]:
from data.data import AOTY_by_album


In [25]:
AOTY_by_album[["Album", "album_score", "album_submission_count"]].sort_values("album_score", ascending = False).head(10)

Unnamed: 0,Album,album_score,album_submission_count
340,An Evening With Silk Sonic,171,25
6,30,138,21
295,Sour,137,25
43,Happier Than Ever,137,22
402,Call Me If You Get Lost,133,20
239,Montero,102,19
210,Little Oblivions,79,13
53,Inside (The Songs),78,13
245,Solar Power,71,12
300,Day/Night,64,9


In [26]:
AOTY_by_album[["Album", "album_score", "album_submission_count"]].sort_values("album_submission_count", ascending = False).head(10)

Unnamed: 0,Album,album_score,album_submission_count
340,An Evening With Silk Sonic,171,25
295,Sour,137,25
43,Happier Than Ever,137,22
6,30,138,21
402,Call Me If You Get Lost,133,20
239,Montero,102,19
53,Inside (The Songs),78,13
210,Little Oblivions,79,13
245,Solar Power,71,12
218,Donda,63,11


In [14]:
# TODO: Make this take in the long-form df to add to the pipeline.


def add_top_10_albums_by_score(df: pd.DataFrame) -> pd.DataFrame:
    """
    Adds a boolean column based on if the album is one of the top 10 albums
    of the year.

    Parameters
    ----------
    df : pd.DataFrame
        The long form AOTY dataframe (grouped by album).

    Returns
    -------
    pd.DataFrame
        The wide form AOTY by album df with an added column.
    """
    new_df = df.copy()
    top_10_albums = new_df.sort_values("album_score", ascending=False).head(10).values
    new_df["top_10_album"] = new_df["Album"].apply(
        lambda album: True if album in top_10_albums else False
    )
    return new_df



AOTY_by_album = add_top_10_albums_by_score(AOTY_by_album)

top_10_albums = AOTY_by_album.loc[AOTY_by_album["top_10_album"] == True].sort_values(
    "album_score", ascending=False
)


In [15]:
figure = px.bar(
    top_10_albums,
    x="Album",
    y=["album_score"],
    barmode="group",
    text_auto=True,
    hover_name="Album",
    custom_data=["Artist"],
    title="Album Scores for our Top 10 Albums of 2021",
    labels={"value": "Album Score"},
    template="simple_white",
)
figure.update_traces(
    hovertemplate="<br>".join(
        [
            "<b>%{customdata}</b> - <i>%{x}</i>",
            "",
            "Album Score: %{y}" "<extra></extra>",
        ]
    )
)
figure.update_layout(hovermode="closest", showlegend=False)

figure.add_hrect(
    y0=AOTY_by_album["album_score"].quantile(0.25),
    y1=AOTY_by_album["album_score"].quantile(0.75),
    opacity=0.3,
    fillcolor="dark grey",
    annotation_text=f"'Usual' Album scores: {AOTY_by_album['album_score'].quantile(0.25):.2f} - {AOTY_by_album['album_score'].quantile(0.75):.2f}",
    annotation_position="inside right",
    annotation_font_color="antiquewhite",
)

figure.show()


In [16]:
figure = px.bar(
    top_10_albums,
    x="Album",
    y=["album_submission_count"],
    barmode="group",
    text_auto=True,
    hover_name="Album",
    custom_data=["Artist"],
    title="Album Submission Count for our Top 10 Albums of 2021",
    labels={"value": "Album Submissions"},
    template="simple_white",
)
figure.update_traces(
    hovertemplate="<br>".join(
        [
            "<b>%{customdata}</b> - <i>%{x}</i>",
            "",
            "Submission Count: %{y}" "<extra></extra>",
        ]
    )
)
figure.update_layout(hovermode="closest", showlegend=False)

figure.add_hrect(
    y0=AOTY_by_album["album_submission_count"].quantile(0.25),
    y1=AOTY_by_album["album_submission_count"].quantile(0.75),
    opacity=0.3,
    fillcolor="dark grey",
    annotation_text=f"'Usual' Submission counts: {AOTY_by_album['album_submission_count'].quantile(0.25):.2f} - {AOTY_by_album['album_submission_count'].quantile(0.75):.2f}",
    annotation_position="inside right",
    annotation_font_color="antiquewhite",
)

figure.show()


In [17]:
max_submissions = AOTY_by_album["album_submission_count"].max()

figure = px.scatter(
    AOTY_by_album,
    x="album_submission_count",
    y="album_average_rank",
    color = "top_10_album",
    size = "album_score",
    template="simple_white",
    labels={
        "album_average_rank": "Average Album Rank",
        "album_submission_count": "Album Submission Count",
        "top_10_album" : "Top 10?"
    },
    custom_data=["Artist", "Album", "album_score"],
    opacity = 0.4,
    color_discrete_sequence=["navy", "darkorchid"]
)
figure.update_traces(
    hovertemplate="<br>".join(
        [
            "<b>%{customdata[0]}</b> - <i>%{customdata[1]}</i>",
            "",
            "Average Album Rank: %{y:.2f}",
            "Album Submission Count : %{x}",
            "Album Score: %{customdata[2]}",
            "<extra></extra>",
        ]
    ),
    # marker = {"size" : 12},
    selector=dict(mode="markers"),
)

figure.add_shape(
    type="line",
    x0=max_submissions/2, y0=0, x1=max_submissions/2, y1=10.5,
    line=dict(
        color="grey",
        width=4,
        dash="dash",
    )
)
figure.add_shape(
    type="line",
    x0=1, y0=5.5, x1=max_submissions+1, y1=5.5,
    line=dict(
        color="grey",
        width=4,
        dash="dash",
    )
)

figure.add_trace(go.Scatter(
    x=[2, 2, max_submissions-1, max_submissions-1],
    y=[0, 10.5, 0, 10.5],
    mode="text",
    name="Zones",
    text=["Hidden Gems", "Barely Made It", "Certified Bangers", "Liked, not Loved"],
    textposition="bottom center",
    textfont = {
        "color" : "grey"
    },
    showlegend = False,
    hoverinfo = "skip"
))


figure["layout"]["yaxis"]["autorange"] = "reversed"

figure.show()
