In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

import os

import config



In [None]:
proc = config.PROC_DATA_PATH

wclusterpath = "ExpertFeedback_WithinCluster.xlsx"
bwclusterpath = "ExpertFeedback_BetweenCluster.xlsx"

wclusterpath = os.path.join(proc, wclusterpath)
bwclusterpath = os.path.join(proc, bwclusterpath)

wcluster = pd.read_excel(wclusterpath)
clusterkl0 = pd.read_excel(bwclusterpath, sheet_name='KL0')

In [None]:
clusterkl1 = pd.read_excel(bwclusterpath, sheet_name='KL1')
clusterkl2 = pd.read_excel(bwclusterpath, sheet_name='KL2')
clusterkl3 = pd.read_excel(bwclusterpath, sheet_name='KL3')
clusterkl4 = pd.read_excel(bwclusterpath, sheet_name='KL4')

In [None]:
wcluster.rename(columns={"Unnamed: 0": "KL-Score"}, inplace=True)

In [None]:
wcluster

In [None]:
sns.set_theme(style="whitegrid", font_scale=1.2)

# heatmap
plt.figure(figsize=(8, 6))
heatmap = sns.heatmap(wcluster.set_index("KL-Score"), annot=True, cmap = "flare", cbar_kws={'label': 'Similarity Rating'})
heatmap.set_title("Expert Feedback Within Clusters")
# plt.ylabel("KL-Score")
# plt.xlabel("Cluster")
plt.tight_layout()
plt.show()

In [None]:
wcluster_unpivot = wcluster.melt(id_vars=["KL-Score"], var_name="cluster", value_name="rating")

In [None]:
wcluster_unpivot = wcluster_unpivot.dropna()
wcluster_unpivot = wcluster_unpivot[wcluster_unpivot['rating'] != -1]

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(
    data=wcluster_unpivot,
    x="cluster",
    y="rating",
    palette="Set2"
)
plt.tight_layout()
plt.show()

In [None]:
clusterkl0.rename(columns={"Unnamed: 0": "Cluster"}, inplace=True)
clusterkl1.rename(columns={"Unnamed: 0": "Cluster"}, inplace=True)
clusterkl2.rename(columns={"Unnamed: 0": "Cluster"}, inplace=True)
clusterkl3.rename(columns={"Unnamed: 0": "Cluster"}, inplace=True)
clusterkl4.rename(columns={"Unnamed: 0": "Cluster"}, inplace=True)

In [None]:
kl = [0, 1, 2, 3, 4]

fig= plt.figure(figsize=(15,15))
axes = fig.subplots(3,2)

for k in kl:
    #show heatmaps for each kl score
    ax = axes[k//2, k%2]
    cluster_df = eval(f"clusterkl{k}")
    heatmap = sns.heatmap(cluster_df.set_index("Cluster"), annot=True, cmap = "flare", ax=ax)
    heatmap.set_title(f"Expert Feedback Between Clusters for KL-{k}")
    heatmap.set_ylabel("Cluster")
    heatmap.set_xlabel("Cluster")   
#only show one cbar

    ax.collections[0].colorbar.remove()
    
plt.tight_layout()
plt.show()

In [None]:

clusters = ["Cluster 0", "Cluster 1", "Cluster 2", "Cluster 3"]

palette = dict(
    zip(
        clusters,
        sns.color_palette("flare", n_colors=len(clusters))
    )
)

In [None]:
dfs_long = []

for k in kl:
    df = eval(f"clusterkl{k}")

    df = df.set_index("Cluster")
    df.index.name = "cluster_a"

    df_long = (
        df
        .stack()
        .reset_index()
        .rename(columns={
            "level_1": "cluster_b",
            0: "rating"
        })
        .dropna()
    )

    df_long["KL"] = k
    dfs_long.append(df_long)

long_df = pd.concat(dfs_long, ignore_index=True)


In [None]:
long_df_copy = long_df.copy()

long_df_copy['cluster_b2'] = long_df_copy['cluster_b']
long_df_copy['cluster_b'] = long_df_copy['cluster_a']
long_df_copy['cluster_a'] = long_df_copy['cluster_b2']

In [None]:
long_df_copy.drop(columns=['cluster_b2'], inplace=True)

In [None]:
print(long_df.shape, long_df_copy.shape)
long_df = pd.concat([long_df, long_df_copy], ignore_index=True)
print(long_df.shape)

In [None]:
long_df.columns

In [None]:
#remove duplicate rows
long_df = long_df.drop_duplicates(subset=['KL', 'cluster_a', 'cluster_b', 'rating'])

In [None]:
long_df.shape

In [None]:
kl_values = sorted(long_df["KL"].unique())

kl_palette = dict(
    zip(
        kl_values,
        sns.color_palette("Set3", n_colors=len(kl_values))
    )
)

# --- FIX 2: global cluster order for y-axis ---
cluster_b_order = sorted(long_df["cluster_b"].unique())

clusters = sorted(long_df["cluster_a"].unique())

# --- FIX 3: global x-axis limits ---
xmin = long_df["rating"].min()
xmax = long_df["rating"].max()

# --- FIX 4: share x-axis explicitly ---
fig, axes = plt.subplots(
    1, len(clusters),
   # 2, 2,
    figsize=(4*len(clusters), 6),
    sharey=True,
    sharex=True   # ðŸ”‘ ensures identical x-axis
)

# for ax in axes.flatten():
#     ax.grid(False)

for ax, cluster in zip(axes.flatten(), clusters):
    subset = long_df[long_df["cluster_a"] == cluster]

    sns.barplot(
        data=subset,
        x="rating",
        y="cluster_b",
        hue="KL",
        palette=kl_palette,
        order=cluster_b_order,
        ax=ax,
        legend=False
    )

    ax.set_title(cluster)
    ax.set_xlabel("Similarity rating")
    ax.set_ylabel("")
    ax.set_xlim(xmin, xmax)

    # ðŸ”‘ draw category separators
    n_cats = len(cluster_b_order)
    for y in range(1, n_cats):
        ax.axhline(y=y - 0.5, color="lightgray", lw=1, zorder=0)

# --- Global legend (unchanged) ---
handles = [
    Patch(facecolor=kl_palette[k], label=f"KL {k}")
    for k in kl_values
]

fig.legend(
    handles=handles,
    title="KL score",
    loc="upper center",
    ncol=len(kl_values),
    frameon=False
)

plt.tight_layout(rect=[0, 0, 1, 0.9])
plt.show()

In [None]:
long_df

In [None]:
fig, axes = plt.subplots(
    1, len(clusters),
    figsize=(4 * len(clusters), 4),
    sharey=True
)

for ax, cluster in zip(axes, clusters):
    subset = long_df[long_df["cluster_a"] == cluster]

    subset = subset.sort_values(by="cluster_b", ascending=True)

    sns.pointplot(
        data=subset,
        x="rating",
        y="cluster_b",
        hue="KL",
        palette=kl_palette,
        dodge=True,
        join=False,
        ax=ax,
        legend=False
    )


In [None]:
for ax, k in zip(axes, kl):
    df = long_df[long_df["KL"] == k]

    pivot = df.pivot(
        index="cluster_a",
        columns="cluster_b",
        values="rating"
    )

    sns.heatmap(
        pivot,
        cmap="viridis",
        annot=True,
        ax=ax,
        cbar=(ax == axes[-1])
    )

    ax.set_title(f"KL {k}")
plt.show()