In [None]:
THRESHOLD = 5
METRICS = ["Artists", "Albums", "Tracks", "Spend"]

In [None]:
%run database.ipynb
%run pathutils.ipynb
%run export.ipynb

In [None]:
def calculate_metrics(df, metric):
    # Total for the metric
    total = df[metric].sum()

    # Percentage of artists by genre
    metric_pct = (df.set_index("Genre")[metric] / total) * 100

    # Split into large and small genres
    large_genre_se = metric_pct[metric_pct >= THRESHOLD]
    small_genre_se = metric_pct[metric_pct < THRESHOLD]

    # Combine small genres into "Other"
    if not small_genre_se.empty:
        large_genre_se["Other"] = small_genre_se.sum()

    return large_genre_se

In [None]:
import matplotlib.pyplot as plt


def plot_metrics(series, metric):
    # Create explode array (explode only "Other")
    explode = [0.1 if label == "Other" else 0 for label in series.index]

    # Plot the data
    plt.figure()
    plt.pie(
        series,
        labels=series.index,
        autopct="%1.1f%%",
        startangle=90,
        explode=explode
    )
    plt.title(f"% of {metric} by Genre")
    plt.axis("equal")

    # Export the chart
    export_chart(f"{metric.casefold()}-by-genre", "", "png")

    plt.show()

In [None]:
# Load and preview the data
connection = connect()
genre_stats_df = load_genre_statistics(connection)
display(genre_stats_df)

In [None]:
import pandas as pd

export = {
    "Data": genre_stats_df
}

# Iterate over the metrics of interest
for metric in METRICS:
    # Calculate the % by genre for the current metric and add it to the export dictionary
    genre_series = calculate_metrics(genre_stats_df, metric)
    export[f"{metric} Statistics"] = genre_series

    # Plot and export the chart
    plot_metrics(genre_series, metric)

# Export the data
export_to_spreadsheet("genre-statistics", export)