In [None]:
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.spatial.distance import euclidean
from scipy.stats import pearsonr, spearmanr

### 1. Load and Preview Data

In [None]:
df = pd.read_csv('dataset.csv', delimiter=';', quotechar='"', index_col=False)

def compute_euclidean_pattern(row):
    a = np.array([float(x) for x in row["patternA"].split()])
    b = np.array([float(x) for x in row["patternB"].split()])
    return euclidean(a, b)

df["objectiveSimilarity"] = df.apply(compute_euclidean_pattern, axis=1)
df["objectiveSimilarity"] = df["objectiveSimilarity"] / np.sqrt(16)
df["objectiveSimilarity"] = 1 - df["objectiveSimilarity"]

def compute_euclidean_descriptors(row):
    a = np.array([float(x) / 127 for x in row["descriptorsA"].split()])
    b = np.array([float(x) / 127 for x in row["descriptorsB"].split()])
    return euclidean(a, b)

df["parametricSimilarity"] = df.apply(compute_euclidean_descriptors, axis=1)
df["parametricSimilarity"] = df["parametricSimilarity"] / np.sqrt(5)
df["parametricSimilarity"] = 1 - df["parametricSimilarity"]
df["subjectiveSimilarity"] = df["subjectiveSimilarity"] / 5

In [None]:
def plot_participant_data(participant_index):
    norm = mcolors.Normalize(vmin=0, vmax=1)
    cmap = plt.colormaps["coolwarm"]

    def add_colored_cell(ax, value, title, i):
        color = cmap(norm(value))
        rect = patches.Rectangle((0, 0), 1, 1, transform=ax.transAxes, facecolor=color, edgecolor="none", zorder=0)
        ax.add_patch(rect)

        brightness = np.dot(color[:3], [0.299, 0.587, 0.114])
        text_color = "black" if brightness > 0.5 else "white"

        ax.text(0.5, 0.5, f"{value:.2f}", ha="center", va="center", fontsize=12, color=text_color, transform=ax.transAxes, zorder=1)

        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_frame_on(False)
        ax.set_title(title if i == 0 else "")

    fig, axes = plt.subplots(8, 7, figsize=(18, 6), sharex=True, gridspec_kw={'width_ratios': [1, 6, 6, 1, 1, 1, 1]})

    for i, df_i in enumerate(range(participant_index, participant_index + 8)):
        axes[i, 0].text(0.5, 0.5, f"{i+1}", ha="center", va="center", fontsize=12, transform=axes[i, 0].transAxes)
        axes[i, 0].set_xticks([])
        axes[i, 0].set_yticks([])
        axes[i, 0].set_frame_on(False)
        axes[i, 0].set_title("Ex." if i == 0 else "")

        patternA = np.array([float(x) for x in df.iloc[df_i]["patternA"].split()])
        patternB = np.array([float(x) for x in df.iloc[df_i]["patternB"].split()])
        elapsed_time = df.iloc[df_i]["elapsedTime"]
        objective_similarity = df.iloc[df_i]["objectiveSimilarity"]
        subjective_similarity = df.iloc[df_i]["subjectiveSimilarity"]
        parametric_similarity = df.iloc[df_i]["parametricSimilarity"]

        axes[i, 1].imshow(patternA[np.newaxis, :], aspect="equal", cmap="Greys", vmin=0, vmax=1)
        axes[i, 1].set_yticks([])
        axes[i, 1].set_xticks(range(16))
        axes[i, 1].set_title("Pattern A" if i == 0 else "")

        axes[i, 2].imshow(patternB[np.newaxis, :], aspect="equal", cmap="Greys", vmin=0, vmax=1)
        axes[i, 2].set_yticks([])
        axes[i, 2].set_xticks(range(16))
        axes[i, 2].set_title("Pattern B" if i == 0 else "")

        axes[i, 3].text(0.5, 0.5, f"{elapsed_time} s", ha="center", va="center", fontsize=12, transform=axes[i, 3].transAxes)
        axes[i, 3].set_xticks([])
        axes[i, 3].set_yticks([])
        axes[i, 3].set_frame_on(False)
        axes[i, 3].set_title("Time (s)" if i == 0 else "")

        add_colored_cell(axes[i, 4], objective_similarity, "Obj. Sim.", i)
        add_colored_cell(axes[i, 5], subjective_similarity, "Subj. Sim.", i)
        add_colored_cell(axes[i, 6], parametric_similarity, "Param. Sim.", i)

    plt.tight_layout()
    plt.show()

In [None]:
plot_participant_data(2)

### 2. Age and Music Experience Distribution

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(10, 2.5))

bar_color = "gray"

columns = ["ageRange", "yearsStudying", "yearsPerforming", "yearsPercussion"]
xlabels = ["Age Range", "Years Studying Music", "Years Performing Music", "Years Performing Percussion"]
titles = ["Age Range Distribution", "Years Studying Distribution", "Years Performing Distribution", "Years Percussion Distribution"]

for ax, col, xlabel, title in zip(axes, columns, xlabels, titles):
    counts = df[col].value_counts().sort_index() / 8
    counts.plot.bar(ax=ax, color=bar_color, edgecolor="black")
    ax.set_title(title, fontsize=10)
    ax.set_ylabel("Count")
    ax.set_xlabel(xlabel, fontsize=9)
    ax.tick_params(axis='x', labelsize=8)
    ax.tick_params(axis='y', labelsize=8)
    ax.set_xticklabels(counts.index, rotation=0)

plt.tight_layout()
plt.show()

### 3. Similarity Distribution Analysis

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(10, 2.5))

kde_obj = sns.kdeplot(df["objectiveSimilarity"], ax=axes[0], fill=True, color='gray', alpha=1, edgecolor='black')
kde_param = sns.kdeplot(df["parametricSimilarity"], ax=axes[1], fill=True, color='gray', alpha=1, edgecolor='black')
kde_subj = sns.kdeplot(df["subjectiveSimilarity"], ax=axes[2], fill=True, color='gray', alpha=1, edgecolor='black')

ymax = max(ax.get_ylim()[1] for ax in axes)
for ax in axes:
    ax.set_ylim(0, ymax)

axes[0].set_title("Objective Similarity Distribution")
axes[0].set_xlabel("Objective Similarity (0-1)")
axes[0].set_ylabel("Density")
axes[0].set_xlim(0, 1)

axes[1].set_title("Parametric Similarity Distribution")
axes[1].set_xlabel("Parametric Similarity (0-1)")
axes[1].set_ylabel("Density")
axes[1].set_xlim(0, 1)

axes[2].set_title("Subjective Similarity Distribution")
axes[2].set_xlabel("Subjective Similarity (0-1)")
axes[2].set_ylabel("Density")
axes[2].set_xlim(0, 1)

plt.tight_layout()
plt.show()

objective_stats = {
    "mean": df["objectiveSimilarity"].mean(),
    "std": df["objectiveSimilarity"].std(),
    "min": df["objectiveSimilarity"].min(),
    "max": df["objectiveSimilarity"].max(),
    "median": df["objectiveSimilarity"].median()
}
print("Objective Similarity Overview:", objective_stats)

parametric_stats = {
    "mean": df["parametricSimilarity"].mean(),
    "std": df["parametricSimilarity"].std(),
    "min": df["parametricSimilarity"].min(),
    "max": df["parametricSimilarity"].max(),
    "median": df["parametricSimilarity"].median()
}
print("Parametric Similarity Overview:", parametric_stats)

subjective_stats = {
    "mean": df["subjectiveSimilarity"].mean(),
    "std": df["subjectiveSimilarity"].std(),
    "min": df["subjectiveSimilarity"].min(),
    "max": df["subjectiveSimilarity"].max(),
    "median": df["subjectiveSimilarity"].median()
}
print("Subjective Similarity Overview:", subjective_stats)

### 4. Simmilarity Correlation Analysis

subjective vs. objective:

- given the high correlation, we can say that the metric we have defined to compare patterns (euclidean distance) is effective in capturing the similarities perceived by participants
- the participants are capable of evaluating how similar are the patterns they are generating compared to the initial pattern
- the model is aligned with human judgment in terms of pattern similarity

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(10, 2.5))

def plot_corr(ax, x, y, data, method="spearman", xlabel=None, ylabel=None, title=None):
    if method == "spearman":
        corr, p_value = spearmanr(data[x], data[y])
        corr_label = "ρ"
    else:
        corr, p_value = pearsonr(data[x], data[y])
        corr_label = "r"
    
    sns.regplot(ax=ax, data=data, x=x, y=y, scatter_kws={"alpha":0.5, "color": "gray"}, line_kws={"color":"red"})
    
    ax.set_xlabel(xlabel if xlabel else x)
    ax.set_ylabel(ylabel if ylabel else y)
    ax.set_xlim(-0.05, 1.05)
    ax.set_ylim(-0.05, 1.05)
    
    ax.text(0.05, 0.95, f"{corr_label}={corr:.2f}\np={p_value:.1e}", transform=ax.transAxes, ha="left", va="top", fontsize=12, bbox=dict(boxstyle="round", fc="white", ec="black"))
    if title: ax.set_title(title)

    print(f"{title}: {corr_label}={corr:.2f}, p={p_value:.1e}")

plot_corr(axes[0], "objectiveSimilarity", "subjectiveSimilarity", df,method="spearman",xlabel="Objective Similarity",ylabel="Subjective Similarity",title="Subjective vs Objective Similarity")
plot_corr(axes[1], "parametricSimilarity", "subjectiveSimilarity", df,method="spearman",xlabel="Parametric Similarity",ylabel="Subjective Similarity",title="Subjective vs Parametric Similarity")
plot_corr(axes[2], "parametricSimilarity", "objectiveSimilarity", df,method="pearson",xlabel="Parametric Similarity",ylabel="Objective Similarity",title="Objective vs Parametric Similarity")

plt.subplots_adjust(wspace=0.3)
plt.show()

### 5. Elapsed Time / Background and Similarity Correlation

In [None]:
corr, p_value = spearmanr(df["subjectiveSimilarity"], df["elapsedTime"])
print(f"Subjective Similarity vs Elapsed Time: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = pearsonr(df["objectiveSimilarity"], df["elapsedTime"])
print(f"Objective Similarity vs Elapsed Time: Pearson r={corr:.2f}, p={p_value:.1e}")

corr, p_value = pearsonr(df["parametricSimilarity"], df["elapsedTime"])
print(f"Parametric Similarity vs Elapsed Time: Pearson r={corr:.2f}, p={p_value:.1e}")

print('---')

corr, p_value = spearmanr(df["ageRange"], df["objectiveSimilarity"])
print(f"Age Range vs Objective Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = spearmanr(df["ageRange"], df["subjectiveSimilarity"])
print(f"Age Range vs Subjective Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = spearmanr(df["ageRange"], df["parametricSimilarity"])
print(f"Age Range vs Parametric Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

print('---')

corr, p_value = spearmanr(df["yearsStudying"], df["objectiveSimilarity"])
print(f"Music Study vs Objective Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = spearmanr(df["yearsStudying"], df["subjectiveSimilarity"])
print(f"Music Study vs Subjective Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = spearmanr(df["yearsStudying"], df["parametricSimilarity"])
print(f"Music Study vs Parametric Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

print('---')

corr, p_value = spearmanr(df["yearsPerforming"], df["objectiveSimilarity"])
print(f"Music Performance vs Objective Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = spearmanr(df["yearsPerforming"], df["subjectiveSimilarity"])
print(f"Music Performance vs Subjective Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = spearmanr(df["yearsPerforming"], df["parametricSimilarity"])
print(f"Music Performance vs Parametric Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

print('---')

corr, p_value = spearmanr(df["yearsPercussion"], df["objectiveSimilarity"])
print(f"Percussion Performance vs Objective Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = spearmanr(df["yearsPercussion"], df["subjectiveSimilarity"])
print(f"Percussion Performance vs Subjective Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

corr, p_value = spearmanr(df["yearsPercussion"], df["parametricSimilarity"])
print(f"Percussion Performance vs Parametric Similarity: Spearman r={corr:.2f}, p={p_value:.1e}")

### 6. Descriptor Difficulty Distribution

In [None]:
difficulty_cols = ["onsetCountDifficulty", "startDifficulty", "centerDifficulty", "syncopationDifficulty", "balanceDifficulty"]
difficulty_labels = ["Onset count", "Start", "Center", "Syncopation", "Balance"]

difficulty_sums = df[difficulty_cols].sum()
max_possible = len(df) * 5
difficulty_rel = (difficulty_sums / max_possible) * 100
difficulty_rel_sorted = difficulty_rel.sort_values()
sorted_labels = [difficulty_labels[difficulty_cols.index(col)] for col in difficulty_rel_sorted.index]

fig = plt.figure(figsize=(7.5, 5))
ax = fig.add_axes([0.15, 0.15, 0.7, 0.7])

ax.bar(sorted_labels, difficulty_rel_sorted.values, color="gray", edgecolor="black")
ax.set_ylabel("Relative Difficulty (% of maximum)")
ax.set_xlabel("Descriptor")
ax.set_ylim(0, 100)
ax.set_xticks(range(len(sorted_labels)))
ax.set_xticklabels(sorted_labels, rotation=0)

for spine in ax.spines.values():
    spine.set_visible(True)

plt.show()

levels = list(range(6))
bar_color = "gray"

scaled_counts_list = []
for col in difficulty_cols:
    counts = df[col].value_counts().sort_index().reindex(levels, fill_value=0)
    scaled_counts_list.append(counts.values / 8)

ymax = max([counts.max() for counts in scaled_counts_list]) * 1.05

fig = plt.figure(figsize=(10, 5))

top_width = 0.25
top_height = 0.4
top_bottom = 0.55
spacing = 0.05

for i in range(3):
    left = 0.05 + i*(top_width + spacing)
    ax = fig.add_axes([left, top_bottom, top_width, top_height])
    ax.bar(levels, scaled_counts_list[i], color=bar_color, edgecolor="black")
    ax.set_title(difficulty_labels[i])
    ax.set_xlabel("Difficulty Level")
    ax.set_ylabel("Count")
    ax.set_xticks(levels)
    ax.set_xticklabels(levels, rotation=0)
    ax.set_ylim(0, ymax)
    for spine in ax.spines.values():
        spine.set_visible(True)

bottom_width = top_width
bottom_height = top_height
bottom_bottom = 0.0005
left_start = 0.5 - (2*bottom_width + spacing)/2

for i in range(2):
    left = left_start + i*(bottom_width + spacing)
    ax = fig.add_axes([left, bottom_bottom, bottom_width, bottom_height])
    ax.bar(levels, scaled_counts_list[3+i], color=bar_color, edgecolor="black")
    ax.set_title(difficulty_labels[3+i])
    ax.set_xlabel("Difficulty Level")
    ax.set_ylabel("Count")
    ax.set_xticks(levels)
    ax.set_xticklabels(levels, rotation=0)
    ax.set_ylim(0, ymax)
    for spine in ax.spines.values():
        spine.set_visible(True)

plt.show()