cosine similarity calculate
Gender-related school context, student’s name, and pronoun are integrated in prompts, including **male clues (Group A)**, **female clues (Group B)**, and **neutral-gender clues (Group C)**.

In [None]:
# Install necessary packages
!pip install openai pandas scipy numpy openpyxl tqdm matplotlib pingouin statsmodels seaborn

# Import required libraries
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist, pdist, squareform
from tqdm import tqdm
import matplotlib.pyplot as plt
import pingouin as pg
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from itertools import combinations

# upload embeddings
# Load three groups of embeddings
embeddings_a = np.load("embeddings_a.npy")
embeddings_b = np.load("embeddings_b.npy")
embeddings_c = np.load("embeddings_c.npy")

# Check shapes
print(f"Group A shape: {embeddings_a.shape}")
print(f"Group B shape: {embeddings_b.shape}")
print(f"Group C shape: {embeddings_c.shape}")

# ============================================
# 1. Pairwise Similarity Assessment
# ============================================
def assess_similarity(matrix1, matrix2):
    """Compute cosine similarity and Euclidean distance between aligned vectors"""
    cosine_sim = np.array([np.dot(v1,v2)/(np.linalg.norm(v1)*np.linalg.norm(v2))
                      for v1,v2 in zip(matrix1, matrix2)])
    euclidean_dist = np.linalg.norm(matrix1 - matrix2, axis=1)

    return pd.DataFrame({
        'cosine_similarity': cosine_sim,
        'euclidean_distance': euclidean_dist
    })

# ============================================
# 2. Enhanced Permutation Test with Original Plots
# ============================================
def permutation_test(groups, group_names, n_permutations=5000, metric='cosine', seed=42):
    """
    Perform permutation testing between multiple groups with original visualization
    """
    np.random.seed(seed)
    n_groups = len(groups)
    combined = np.vstack(groups)
    group_sizes = [len(g) for g in groups]

    # Store all permutation distributions for plotting
    full_perm_stats = {}

    # Observed statistics and pairwise results
    observed_stats = {}
    pairwise_results = {}

    for i in range(n_groups):
        for j in range(i+1, n_groups):
            key = f"{group_names[i]}-{group_names[j]}"

            # Original permutation test logic for each pair
            obs_stat = np.mean(cdist(groups[i], groups[j], metric=metric))
            perm_stats = []

            for _ in range(n_permutations):
                perm_indices = np.random.permutation(len(combined))
                perm_group1 = combined[perm_indices[:group_sizes[i]]]
                perm_group2 = combined[perm_indices[group_sizes[i]:group_sizes[i]+group_sizes[j]]]
                perm_stats.append(np.mean(cdist(perm_group1, perm_group2, metric=metric)))

            perm_stats = np.array(perm_stats)
            full_perm_stats[key] = perm_stats

            # Calculate statistics
            p_value = np.mean(np.abs(perm_stats - np.mean(perm_stats)) >= np.abs(obs_stat - np.mean(perm_stats)))
            pooled_std = np.std(cdist(groups[i], groups[j], metric=metric))
            effect_size = (obs_stat - np.mean(perm_stats)) / pooled_std

            # Cohen's d calculation
            if metric == 'cosine':
                all_dists = 1 - cosine_similarity(groups[i], groups[j]).flatten()
                within_dists = []
                for g in [i, j]:
                    within_dists.extend(1 - cosine_similarity(groups[g]).flatten())
            else:
                all_dists = cdist(groups[i], groups[j], metric=metric).flatten()
                within_dists = []
                for g in [i, j]:
                    within_dists.extend(squareform(pdist(groups[g], metric=metric)).flatten())

            cohen_d = pg.compute_effsize(all_dists, within_dists, eftype='cohen')

            # Store results
            observed_stats[key] = obs_stat
            pairwise_results[key] = {
                'observed': obs_stat,
                'perm_mean': np.mean(perm_stats),
                'p_value': p_value,
                'effect_size': effect_size,
                'cohen_d': cohen_d,
                'perm_dist': perm_stats
            }

            # Print results for each pair
            print(f"\n=== {group_names[i]} vs {group_names[j]} ({metric}) ===")
            print(f"Observed Statistic: {obs_stat:.4f}")
            print(f"Permutation Mean: {np.mean(perm_stats):.4f}")
            print(f"p-value (two-tailed): {p_value:.4f}")
            print(f"Effect Size (Corrected Z): {effect_size:.4f}")
            print(f"Cohen's d (between vs within): {cohen_d:.4f}")

            # Plot permutation distribution
            plt.figure(figsize=(10, 6))
            plt.hist(perm_stats, bins=50, alpha=0.7, label='Permutation Distribution')
            plt.axvline(obs_stat, color='red', linestyle='--', linewidth=2, label='Observed')
            plt.axvline(np.mean(perm_stats), color='green', linestyle=':', linewidth=2, label='Perm Mean')
            plt.legend()
            plt.title(f"Permutation Test: {group_names[i]} vs {group_names[j]} ({metric}, n_perm={n_permutations})")
            plt.xlabel("Distance" if metric != 'cosine' else "1 - Cosine Similarity")
            plt.ylabel("Frequency")
            plt.show()

    return {
        'observed_stats': observed_stats,
        'pairwise_results': pairwise_results,
        'full_perm_stats': full_perm_stats
    }

# ============================================
# 3. Enhanced Visualization Functions
# ============================================
def plot_similarity_distributions(groups, group_names):
    """Plot similarity/distance distributions for all pairs"""
    # Create all pairwise combinations
    pairs = [(i,j) for i in range(len(groups)) for j in range(i+1, len(groups))]

    for metric in ['cosine', 'euclidean']:
        plt.figure(figsize=(15, 5*len(pairs)))
        for idx, (i,j) in enumerate(pairs, 1):
            # Compute similarities
            if metric == 'cosine':
                values = cosine_similarity(groups[i], groups[j]).diagonal()
                xlabel = "Cosine Similarity"
            else:
                values = np.linalg.norm(groups[i] - groups[j], axis=1)
                xlabel = "Euclidean Distance"

            # Plot
            plt.subplot(len(pairs), 2, 2*idx-1)
            sns.histplot(values, bins=30, kde=True)
            plt.title(f"{group_names[i]} vs {group_names[j]} {xlabel} Distribution")
            plt.xlabel(xlabel)

            plt.subplot(len(pairs), 2, 2*idx)
            sns.boxplot(x=values)
            plt.title(f"{group_names[i]} vs {group_names[j]} {xlabel} Spread")
            plt.xlabel(xlabel)

        plt.tight_layout()
        plt.show()

def plot_multi_group_comparison(results_dict, metric):
    """Visualize all pairwise comparisons together"""
    comparisons = []
    obs_values = []
    p_values = []
    effect_sizes = []

    for key, res in results_dict['pairwise_results'].items():
        comparisons.append(key)
        obs_values.append(res['observed'])
        p_values.append(res['p_value'])
        effect_sizes.append(res['effect_size'])

    # Create figure
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

    # Plot observed statistics
    sns.barplot(x=comparisons, y=obs_values, ax=ax1, palette="viridis")
    ax1.set_title(f"Observed {metric} Statistics")
    ax1.set_ylabel("Mean Distance" if metric == 'euclidean' else "1 - Mean Cosine Similarity")

    # Plot effect sizes
    sns.barplot(x=comparisons, y=effect_sizes, ax=ax2, palette="magma")
    ax2.axhline(0.2, color='red', linestyle='--', alpha=0.5, label='Small effect')
    ax2.axhline(0.5, color='red', linestyle=':', alpha=0.5, label='Medium effect')
    ax2.set_title(f"Effect Sizes ({metric})")
    ax2.set_ylabel("Corrected Z-score Effect Size")
    ax2.legend()

    # Add significance markers
    y_max = max(obs_values) * 1.1
    for i, pval in enumerate(p_values):
        if pval < 0.05:
            ax1.text(i, y_max, "*" if pval < 0.05 else "**" if pval < 0.01 else "***",
                    ha='center', va='bottom', color='red', fontsize=14)

    plt.tight_layout()
    plt.show()

# ============================================
# 4. Main Analysis Pipeline
# ============================================
if __name__ == "__main__":
    groups = [embeddings_a, embeddings_b, embeddings_c]
    group_names = ['A', 'B', 'C']

    # 1. Pairwise distribution visualization
    print("\n=== Pairwise Distribution Visualizations ===")
    plot_similarity_distributions(groups, group_names)

    # 2. Calculate and print descriptive statistics for all pairs
    print("\n=== Pairwise Descriptive Statistics ===")
    for i in range(len(groups)):
        for j in range(i+1, len(groups)):
            print(f"\nGroup {group_names[i]} vs Group {group_names[j]}:")
            sim_results = assess_similarity(groups[i], groups[j])
            desc = sim_results.describe()
            for col in sim_results.columns:
                print(f"\n--- {col.replace('_', ' ').title()} ---")
                print(desc[col].to_string(float_format="%.4f"))

    # 3. Run permutation tests
    print("\n=== Cosine Similarity Analysis ===")
    cos_results = permutation_test(groups, group_names, metric='cosine')
    plot_multi_group_comparison(cos_results, 'cosine')

    print("\n=== Euclidean Distance Analysis ===")
    euc_results = permutation_test(groups, group_names, metric='euclidean')
    plot_multi_group_comparison(euc_results, 'euclidean')

    # 4. ANOVA and post-hoc tests
    print("\n=== ANOVA Results ===")
    for metric in ['cosine', 'euclidean']:
        # Prepare data (convert similarities to distances for consistency)
        data = []
        for i in range(len(groups)):
            if metric == 'cosine':
                dists = 1 - cosine_similarity(groups[i]).flatten()
            else:
                dists = squareform(pdist(groups[i], 'euclidean')).flatten()
            data.append(dists[~np.isclose(dists, 0)])  # Remove self-comparisons

        # Perform ANOVA
        f_val, p_val = f_oneway(*data)
        print(f"\n{metric.upper()} Distance:")
        print(f"F-statistic: {f_val:.2f}, p-value: {p_val:.4f}")

        if p_val < 0.05:
            # Post-hoc Tukey test
            print("\nPost-hoc Tukey HSD:")
            all_data = np.concatenate(data)
            group_labels = np.concatenate([[f"Group{group_names[i]}"]*len(arr)
                                         for i, arr in enumerate(data)])
            tukey = pairwise_tukeyhsd(all_data, group_labels)
            print(tukey)

Dimensionality Reduction Visualization

In [None]:
!pip install umap-learn
import umap
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import mean_squared_error
from sklearn.manifold import trustworthiness
from sklearn.neighbors import NearestNeighbors
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import combinations

def analyze_and_visualize_embeddings(embeddings_list, group_names=('GroupA', 'GroupB', 'GroupC')):
    """
    Perform dimensionality reduction and comparative analysis of multiple sets of embeddings.

    Parameters:
        embeddings_list: List of embedding arrays [emb1, emb2, ...]
        group_names: Tuple of group labels
    """
    # Combine all embeddings and create group labels
    combined = np.vstack(embeddings_list)
    group_labels = np.concatenate([[name]*len(emb) for name, emb in zip(group_names, embeddings_list)])

    # Color palette for visualization
    palette = sns.color_palette("husl", len(embeddings_list))
    color_dict = {name: palette[i] for i, name in enumerate(group_names)}

    # ============================================
    # 1. PCA Dimensionality Reduction
    # ============================================
    pca = PCA(n_components=2)
    reduced_pca = pca.fit_transform(combined)

    plt.figure(figsize=(10, 8))
    sns.scatterplot(x=reduced_pca[:, 0], y=reduced_pca[:, 1],
                    hue=group_labels, palette=color_dict,
                    alpha=0.7, s=50)
    plt.title("PCA: Principal Component Analysis (2D Projection)")
    plt.xlabel("Principal Component 1")
    plt.ylabel("Principal Component 2")
    plt.grid(True)
    plt.legend(title='Groups')
    plt.show()

    # PCA diagnostic metrics
    explained_var = pca.explained_variance_ratio_
    eigenvalues = pca.explained_variance_
    projected = pca.inverse_transform(reduced_pca)
    reconstruction_error = mean_squared_error(combined, projected)

    print("PCA Diagnostic Metrics:")
    print(f"- Eigenvalues: {eigenvalues}")
    print(f"- Explained Variance Ratio: {explained_var}")
    print(f"- Cumulative Explained Variance: {np.sum(explained_var):.4f}")
    print(f"- Reconstruction MSE: {reconstruction_error:.4f}")
    print("""Statistical Interpretation:
    Eigenvalues indicate the magnitude of variance captured by each principal component.
    Explained variance ratios show the proportion of total variance accounted for by each component.
    Reconstruction error measures information loss during dimensionality reduction.\n""")

    # ============================================
    # 2. t-SNE Visualization
    # ============================================
    tsne = TSNE(n_components=2, perplexity=30, random_state=42, init='pca')
    reduced_tsne = tsne.fit_transform(combined)

    plt.figure(figsize=(10, 8))
    sns.scatterplot(x=reduced_tsne[:, 0], y=reduced_tsne[:, 1],
                    hue=group_labels, palette=color_dict,
                    alpha=0.7, s=50)
    plt.title("t-SNE: t-distributed Stochastic Neighbor Embedding")
    plt.xlabel("t-SNE Dimension 1")
    plt.ylabel("t-SNE Dimension 2")
    plt.grid(True)
    plt.legend(title='Groups')
    plt.show()

    # t-SNE quality metrics
    trust = trustworthiness(combined, reduced_tsne, n_neighbors=5)

    # k-NN preservation calculation
    knn_original = NearestNeighbors(n_neighbors=5).fit(combined).kneighbors(return_distance=False)
    knn_embedded = NearestNeighbors(n_neighbors=5).fit(reduced_tsne).kneighbors(return_distance=False)
    preserved = np.mean([
        len(np.intersect1d(knn_original[i], knn_embedded[i])) / 5.0
        for i in range(len(combined))
    ])

    print("t-SNE Quality Metrics:")
    print(f"- KL Divergence: {tsne.kl_divergence_:.4f}")
    print(f"- Trustworthiness: {trust:.4f}")
    print(f"- k-NN Preservation Rate: {preserved:.4f}")
    print("""Statistical Interpretation:
    KL divergence quantifies the difference between high-dimensional and low-dimensional distributions.
    Trustworthiness measures the preservation of neighborhood relationships (higher is better).
    k-NN preservation rate indicates local structure maintenance during dimensionality reduction.\n""")

    # ============================================
    # 3. UMAP Projection
    # ============================================
    reducer = umap.UMAP(n_components=2, n_neighbors=15, min_dist=0.1, metric='euclidean', random_state=42)
    reduced_umap = reducer.fit_transform(combined)

    plt.figure(figsize=(10, 8))
    sns.scatterplot(x=reduced_umap[:, 0], y=reduced_umap[:, 1],
                    hue=group_labels, palette=color_dict,
                    alpha=0.7, s=50)
    plt.title("UMAP: Uniform Manifold Approximation and Projection")
    plt.xlabel("UMAP Dimension 1")
    plt.ylabel("UMAP Dimension 2")
    plt.grid(True)
    plt.legend(title='Groups')
    plt.show()

    # UMAP quality assessment
    knn_embedded_umap = NearestNeighbors(n_neighbors=5).fit(reduced_umap).kneighbors(return_distance=False)
    preserved_umap = np.mean([
        len(np.intersect1d(knn_original[i], knn_embedded_umap[i])) / 5.0
        for i in range(len(combined))
    ])

    print("UMAP Quality Metrics:")
    print(f"- k-NN Preservation Rate: {preserved_umap:.4f}")
    print("""Statistical Interpretation:
    The k-NN preservation rate quantifies how well UMAP maintains local neighborhood structures
    from the original high-dimensional space in the 2D projection.\n""")

    # ============================================
    # 4. Pairwise Similarity Analysis
    # ============================================
    print("\n=== Pairwise Similarity Analysis ===")

    # Create all possible group pairs
    group_pairs = list(combinations(range(len(embeddings_list)), 2))

    for i, j in group_pairs:
        # Dot product analysis
        dot_products = np.sum(embeddings_list[i] * embeddings_list[j], axis=1)
        mean_dot = np.mean(dot_products)
        std_dot = np.std(dot_products)

        # Euclidean distance
        euc_distances = np.linalg.norm(embeddings_list[i] - embeddings_list[j], axis=1)

        # Plotting
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

        # Dot product plot
        sns.histplot(dot_products, bins=30, color=palette[i], alpha=0.7, ax=ax1)
        ax1.axvline(mean_dot, color='red', linestyle='--', label=f"Mean: {mean_dot:.4f}")
        ax1.set_title(f"Dot Products: {group_names[i]} vs {group_names[j]}")
        ax1.set_xlabel("Dot Product Value")
        ax1.legend()

        # Euclidean distance plot
        sns.histplot(euc_distances, bins=30, color=palette[j], alpha=0.7, ax=ax2)
        ax2.axvline(np.mean(euc_distances), color='red', linestyle='--',
                    label=f"Mean: {np.mean(euc_distances):.4f}")
        ax2.set_title(f"Euclidean Distances: {group_names[i]} vs {group_names[j]}")
        ax2.set_xlabel("Distance")
        ax2.legend()

        plt.tight_layout()
        plt.show()

        print(f"\n{group_names[i]} vs {group_names[j]} Statistics:")
        print(f"- Dot Product Mean ± Std: {mean_dot:.4f} ± {std_dot:.4f}")
        print(f"- Euclidean Distance Mean ± Std: {np.mean(euc_distances):.4f} ± {np.std(euc_distances):.4f}")
        print("""Statistical Interpretation:
        Dot products measure vector alignment (higher = more similar).
        Euclidean distances measure absolute separation (lower = more similar).\n""")

# Example usage with three groups
embeddings_a = np.load("embeddings_a.npy")
embeddings_b = np.load("embeddings_b.npy")
embeddings_c = np.load("embeddings_c.npy")

analyze_and_visualize_embeddings([embeddings_a, embeddings_b, embeddings_c],
                                group_names=('GroupA', 'GroupB', 'GroupC'))

Score data analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_rel
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, cohen_kappa_score, classification_report
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import MultiComparison

# Load data
df = pd.read_excel("/content/explicitextracted_scores_only.xlsx")
a_scores = df['group a score']
b_scores = df['group b score']
c_scores = df['group c score']

# Descriptive Statistics
print("Descriptive Statistics:")
print(df[['group a score', 'group b score', 'group c score']].describe())

# Pairwise Paired t-tests
print("\nPaired t-tests:")
for (x, y, label) in [(a_scores, b_scores, 'A vs B'), (a_scores, c_scores, 'A vs C'), (b_scores, c_scores, 'B vs C')]:
    t_stat, p_val = ttest_rel(x, y)
    print(f"{label}: t = {t_stat:.4f}, p = {p_val:.4f} -> {'significant' if p_val < 0.05 else 'not significant'}")

# Boxplot and Density Plot
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
sns.boxplot(data=df[['group a score', 'group b score', 'group c score']], palette="Set2")
plt.title("Boxplot of Scores")
plt.subplot(1, 2, 2)
sns.kdeplot(a_scores, label='Group A', fill=True)
sns.kdeplot(b_scores, label='Group B', fill=True)
sns.kdeplot(c_scores, label='Group C', fill=True)
plt.title("Density Plot of Scores")
plt.legend()
plt.tight_layout()
plt.show()

# Repeated Measures ANOVA
df_long = pd.melt(df[['group a score', 'group b score', 'group c score']],
                  var_name='Group', value_name='Score')
df_long['Subject'] = np.tile(np.arange(len(df)), 3)
model = ols('Score ~ C(Group) + C(Subject)', data=df_long).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("\nRepeated Measures ANOVA:")
print(anova_table)

# Post-hoc test
print("\nPost-hoc Tukey HSD test:")
mc = MultiComparison(df_long['Score'], df_long['Group'])
result = mc.tukeyhsd()
print(result.summary())

# Pairwise Scatterplots
sns.pairplot(df[['group a score', 'group b score', 'group c score']])
plt.suptitle("Pairwise Relationships Among Group Scores", y=1.02)
plt.show()

# Difference Distributions
df['b - a'] = b_scores - a_scores
df['c - a'] = c_scores - a_scores
df['c - b'] = c_scores - b_scores

for col in ['b - a', 'c - a', 'c - b']:
    plt.figure(figsize=(6, 4))
    sns.histplot(df[col], kde=True)
    plt.axvline(0, color='red', linestyle='--')
    plt.title(f"Distribution of Score Differences ({col.upper()})")
    plt.xlabel("Score Difference")
    plt.grid(True)
    plt.show()

# Cohen's Kappa Analysis
bins = [1.9, 2.25, 2.75, 3.25, 3.75, 4.25, 4.75]
labels = ['2', '2.5', '3', '3.5', '4', '4.5']

a_cat = pd.cut(a_scores, bins=bins, labels=labels, right=False)
b_cat = pd.cut(b_scores, bins=bins, labels=labels, right=False)
c_cat = pd.cut(c_scores, bins=bins, labels=labels, right=False)

for (true, pred, label) in [(a_cat, b_cat, "A vs B"), (a_cat, c_cat, "A vs C"), (b_cat, c_cat, "B vs C")]:
    print(f"\nCohen’s Kappa Score for {label}:")
    kappa = cohen_kappa_score(true, pred)
    print(f"  Score: {kappa:.3f}")
    level = ("Almost no consistency" if kappa < 0.2 else
             "Less consistency" if kappa < 0.4 else
             "Medium consistency" if kappa < 0.6 else
             "Good consistency" if kappa < 0.8 else
             "Almost perfect consistency")
    print(f"  → Consistency level: {level}")

    conf_mat = confusion_matrix(true, pred, labels=labels)
    disp = ConfusionMatrixDisplay(conf_mat, display_labels=labels)
    disp.plot(cmap="Blues", values_format="d")
    plt.title(f"Confusion Matrix ({label})")
    plt.grid(False)
    plt.show()

    print("Classification Report:")
    print(classification_report(true, pred, target_names=labels, zero_division=0))

# Change Direction
for label, diff_col in [('B vs A', 'b - a'), ('C vs A', 'c - a'), ('C vs B', 'c - b')]:
    df['direction'] = np.where(df[diff_col] > 0, 'Increase',
                            np.where(df[diff_col] < 0, 'Decrease', 'No change'))
    direction_counts = df['direction'].value_counts()
    print(f"\nScore change direction for {label}:")
    print(direction_counts)

    plt.figure(figsize=(6, 4))
    sns.barplot(x=direction_counts.index, y=direction_counts.values, palette="viridis")
    plt.title(f"Score Change Direction: {label}")
    plt.ylabel("Count")
    plt.show()

# Per-score level difference
print("\nAnalysis of variances by specific Group A scores:")
score_levels = sorted(df['group a score'].unique())
for score in score_levels:
    subset = df[df['group a score'] == score]
    for label, col in [('Group B', 'group b score'), ('Group C', 'group c score')]:
        mean_diff = (subset[col] - subset['group a score']).mean()
        count = len(subset)
        print(f"When Group A is {score:.1f}（n={count}）, {label} avg diff: {mean_diff:.3f}")

# KDE per score level
for diff_col in ['b - a', 'c - a', 'c - b']:
    plt.figure(figsize=(10, 6))
    for score in sorted(df['group a score'].unique()):
        subset = df[df['group a score'] == score]
        sns.kdeplot(subset[diff_col], label=f'A={score}', fill=True)
    plt.axvline(0, color='red', linestyle='--')
    plt.title(f"Distribution of {diff_col.upper()} by Group A Score")
    plt.xlabel("Score Difference")
    plt.legend(title="Group A Score")
    plt.grid(True)
    plt.show()

# Overall mean difference
print("\nAverage Score Differences:")
for label, col in [('B - A', 'b - a'), ('C - A', 'c - a'), ('C - B', 'c - b')]:
    print(f"{label}: {df[col].mean():.3f}")