In [14]:
from pathlib import Path
from datetime import datetime, timedelta
from functools import reduce
from operator import add
from collections import Counter

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from  matplotlib.colors import LinearSegmentedColormap, TwoSlopeNorm
import matplotlib.dates as mdates

from music_league_graphs.scraper import create_dataframe
from music_league_graphs.spotify_api import SpotifyAPI

In [2]:
paths = [Path(f"../league_{i}") for i in range(1, 5)]

dfs: list[pd.DataFrame] = []
names: set[str] = set()

for path in paths:
    df, league_names = create_dataframe(path)
    df["league"] = path.stem
    dfs.append(df)
    names = names.union(set(league_names))

names = list(names)
df = pd.concat(dfs, axis=0, ignore_index=True)

df["round_score"] = df[names].sum(axis=1)
df["round_rank"] = df.groupby("round")["round_score"].rank(ascending=False).astype(int)


In [3]:
df.sort_values("round_score", ascending=False)[["submitter", "round", "round_score"]]

Unnamed: 0,submitter,round,round_score
168,murray,Ich spreche kein Englisch 👽,29.0
617,Tim,👇 Straight to the point,26.0
465,fred,Sp00ky 👻,25.0
227,Jenny Seaborne,Best live version 🎤,24.0
113,Jamie,Film Soundtrack 🎞,23.0
...,...,...,...
267,sam24ahhhhhh,To the polls❌,-11.0
246,Olek,Best live version 🎤,-12.0
576,sam24ahhhhhh,Colours 🎨,-12.0
130,Olek,Film Soundtrack 🎞,-13.0


In [4]:
df.groupby("submitter")["round_score"].sum().sort_values(ascending=False)

submitter
Martha Mukungurutse         463.0
fred                        395.0
Jenny                       384.0
Helen Adams                 349.0
Jamie                       295.0
Russell                     283.0
Tim                         282.0
Mel Shallcrass              272.0
James Hardwick              265.0
Jenny Seaborne              242.0
murray                      234.0
Sacha Darwin                214.0
diplodocus.17               213.0
Tim            :)           202.0
Andrej Zacharenkov          149.0
Figataur                    145.0
Bethany Dickens-Devereux    138.0
Rory                        132.0
Sowdagar                    119.0
owainst                      94.0
Una                          78.0
sam24ahhhhhh                 69.0
Victoria Whitehead           50.0
Peter Rowe                   38.0
Olek                          6.0
Name: round_score, dtype: float64

In [6]:
try:
    api = SpotifyAPI()
    api.get_data(df["song_id"])
    spotify_data = api.df
    df = pd.concat((df, spotify_data), axis=1)
except RuntimeError:
    print("Couldn't access spotify API")
    raise

  return bound(*args, **kwds)


In [15]:
artist_counts = Counter(df["artist_names"].explode())


In [21]:
counters: dict[str, Counter] = {}

for submitter, mini_df in df.groupby("submitter")["artist_names"]:
    print(f"{submitter:<25s} | {Counter(mini_df.explode()).most_common(3)}")




Andrej Zacharenkov        | [('t.A.T.u.', 2), ('Lady Gaga', 2), ('Cornelia Jakobs', 1)]
Bethany Dickens-Devereux  | [('Charli xcx', 2), ('Eleni Foureira', 1), ('Fugees', 1)]
Figataur                  | [('Carl Douglas', 1), ('The Specials', 1), ('Mirrorball', 1)]
Helen Adams               | [('Stevie Nicks', 2), ('Gina G.', 1), ('Fever Ray', 1)]
James Hardwick            | [('SHY FX', 2), ('Sunstroke', 1), ('Olia Tira', 1)]
Jamie                     | [('Eminem', 2), ('Käärijä', 1), ('The Last Dinner Party', 1)]
Jenny                     | [('Lizzo', 2), ('KEiiNO', 1), ('WARGASM (UK)', 1)]
Jenny Seaborne            | [('Wheatus', 1), ('Crazy Town', 1), ('Nirvana', 1)]
Martha Mukungurutse       | [('Dolly Parton', 2), ('Kraftwerk', 2), ('Nick Cave & The Bad Seeds', 2)]
Mel Shallcrass            | [('The White Stripes', 2), ('Barbara Pravi', 1), ('Wheeler Walker Jr.', 1)]
Olek                      | [('Justice', 3), ('khai dreams', 2), ('Lordi', 1)]
Peter Rowe                | [('The Bea

In [19]:
song_counts = Counter(df["song_name"].explode())
song_counts.most_common(3)

[('Zombie', 3), ('Here Comes the Hotstepper', 3), ('Cha Cha Cha', 2)]