In [None]:
from pathlib import Path

BASE_DIR = Path.cwd() / "data" / "musicgen"

In [None]:
import pandas as pd

df_individual_hard = pd.read_pickle(BASE_DIR / "individual_hard.pkl")
# df_individual_hard['Edit'] += ' (Individual - Hard)'
df_individual_hard["Type"] = "Hard"

df_individual_soft = pd.read_pickle(BASE_DIR / "individual_soft.pkl")
# df_individual_soft['Edit'] += ' (Individual - Soft)'
df_individual_soft["Type"] = "Soft"

df_leave_one_out_hard = pd.read_pickle(BASE_DIR / "leave_one_out_hard.pkl")
# df_leave_one_out_hard['Edit'] += ' (Leave One Out - Hard)'
df_leave_one_out_hard["Type"] = "Hard"

df_leave_one_out_soft = pd.read_pickle(BASE_DIR / "leave_one_out_soft.pkl")
# df_leave_one_out_soft['Edit'] += ' (Leave One Out - Soft)'
df_leave_one_out_soft["Type"] = "Soft"

df_incremental_hard = pd.read_pickle(BASE_DIR / "incremental_hard.pkl")
# df_incremental_hard['Edit'] += ' (Incremental - Hard)'
df_incremental_hard["Type"] = "Hard"

df_incremental_soft = pd.read_pickle(BASE_DIR / "incremental_soft.pkl")
# df_incremental_soft['Edit'] += ' (Incremental - Soft)'
df_incremental_soft["Type"] = "Soft"

df = pd.concat(
    (
        df_individual_hard,
        df_individual_soft,
        df_leave_one_out_hard,
        df_leave_one_out_soft,
        df_incremental_hard,
        df_incremental_soft,
    )
)

In [None]:
from IPython.display import HTML, display, Audio
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import hmean

types = [str, str, str, str, Audio, Audio, float, float, float, float, float, float, float, str]


def showcase_results(df, dtypes, columns=None, sampling_rate=32000):
    dtypes = dict(zip(df.columns.values, dtypes))
    if columns is None:
        columns = df.columns

    def construct_column(dtype, row, column):
        if dtype is str:
            return f"<td class='scrollable-text'>{row[column]}</td>"
        elif dtype is Audio:
            return f"<td>{Audio(data=row[column], rate=sampling_rate)._repr_html_()}</td>"
        elif dtype is float:
            return f"<td>{row[column]:.2f}</td>"
        else:
            raise NotImplementedError(dtype)

    def construct_row(row):
        data = ""

        for column in columns:
            data += construct_column(dtypes[column], row, column)

        return "<tr>" f"{data}" "</tr>"

    # Set the Bootstrap styling for the table with borders
    bootstrap_styles = """
        <style>
            table {
                font-family: Arial, sans-serif;
                width: 100%;
                border-collapse: separate;  /* Separate borders between cells */
                border-spacing: 0;  /* Remove spacing between cells */
            }

            th, td {
                text-align: center;
                padding: 10px;
                white-space: nowrap;  /* Prevent text from wrapping */
                overflow: scroll;  /* Hide overflowed content */
                border: none;
            }

            .scrollable-text {
                max-width: 250px;
                word-wrap: keep-all;  /* Wrap long words */
            }

            audio { width: 250px; height: 50px; }
        </style>
    """

    # Display Bootstrap styling
    display(HTML(bootstrap_styles))

    rows_html = ""
    for _, row in df.iterrows():
        rows_html += construct_row(row)

    table_html = (
        "<table>"
        "<thead>"
        "<tr>"
        f"{''.join(f'<th>{column}</th>' for column in columns)}"
        "</tr>"
        "</thead>"
        f"{rows_html}"
        "</table>"
    )

    # Display HTML table with playable audio widgets and borders in Jupyter Notebook
    display(HTML(table_html))

def calculate_score(df, columns=None):
    if columns is None:
        columns = {
            "Text-Audio Cosine Similarity": 0.25,
            "Audio-Audio Cosine Similarity": 0.25,
            "Melody Accuracy": 0.25,
            "Signal to Noise Ratio": 0.25,
        }

    min_max_scaler = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))
    normalized_scores = df[list(columns.keys())].apply(min_max_scaler)

    # Calculate the weighted harmonic mean of normalized scores
    df['Score'] = normalized_scores.apply(hmean, axis=1, weights=list(columns.values()))

    return df

def top_across_edits(df, column="Score", k=1, ):
    # Group by 'Layers' and calculate mean Score for each layer
    mean_scores = df.groupby("Layers")[column].mean()

    # Sort layers based on mean Score in descending order and get the top K
    top_k_layers = mean_scores.sort_values(ascending=False).head(k).index

    # Sort the groups based on the order of top_k_layers
    order_dict = {layer: order for order, layer in enumerate(top_k_layers)}
    df["_LayerOrder"] = df["Layers"].map(order_dict)

    # Filter rows corresponding to the top K layers and sort by _LayerOrder and Score
    result_df_top_k_layers = (
        df[df["Layers"].isin(top_k_layers)]
        .sort_values(by=["_LayerOrder", column], ascending=[True, False])
        .drop(columns="_LayerOrder")  # Optional: Remove the temporary 'LayerOrder' column
    )

    return (
        result_df_top_k_layers.groupby(["Edit", "Type"])
        .apply(lambda x: x.head(k))
        .reset_index(drop=True)
    )


def plot_against(distribution_a, distribution_b, labels=None, title=None):
    # Fit lines to the distributions
    x_values = np.arange(1, len(distribution_a) + 1)

    # Plotting the distributions
    plt.figure(figsize=(10, 6))

    # Plot histogram for distribution_a
    plt.plot(x_values, distribution_a, color="#1f77b4", alpha=0.7, label=labels[0])

    # Plot histogram for distribution_b
    plt.plot(x_values, distribution_b, color="#ff7f0e", alpha=0.7, label=labels[1])

    # Customize the plot
    plt.title(title)
    plt.xlabel("Group #")
    plt.legend()
    plt.grid(True)

In [None]:
columns = [
    "Edit",
    "Type",
    "Layers",
    "Score",
    "Source Audio",
    "Editted Audio",
    "Text-Audio Cosine Similarity",
    "Audio-Audio Cosine Similarity",
    "Melody Accuracy",
    "Beat Consistency Score",
    "Signal to Noise Ratio",
    "Structural Similarity Index",
]

showcase_results(top_across_edits(calculate_score(df), k=5), types, columns=columns)

In [None]:
plt.plot(
    df_incremental_soft[df_incremental_soft["Edit"] == "Replace"]["Score"]
    / max(df_incremental_soft[df_incremental_soft["Edit"] == "Replace"]["Score"])
)