## Utils

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from transformers import DistilBertTokenizer, DistilBertModel
import torch 

def get_embeddings(texts, model='bert', token='cls'):

    if model == 'bert':
        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        model = BertModel.from_pretrained("bert-base-uncased")
    else:
        tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
        model = DistilBertModel.from_pretrained('distilbert-base-uncased')

    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
        
    if token == 'cls':
        sentence_embeddings = outputs.last_hidden_state[:, 0, :]  # Shape: (batch_size, hidden_size)
        return sentence_embeddings
    else:
        return outputs.last_hidden_state.mean(dim=1).squeeze()

In [None]:
def get_answers_for_mistake_and_profile(df, knowledge_profile, mistake_type=None, profile_column='Knowledge Profile', mistake_column='Comments - Eylül', answer_column='CONTRAPOSITION task'):
    """
    Retrieves example answers from the DataFrame based on a given knowledge profile and optionally a mistake type.
    
    Parameters:
    df (pd.DataFrame): The DataFrame containing the data.
    knowledge_profile (str): The knowledge profile to filter by.
    mistake_type (str, optional): The type of mistake to filter by. If None, only knowledge profile is used.
    profile_column (str): The column name for the knowledge profile.
    mistake_column (str): The column name for the mistake type.
    answer_column (str): The column name for the answers.
    
    Returns:
    list: A list of answers matching the given criteria.
    """
    filtered_df = df[df[profile_column] == knowledge_profile]
    
    if mistake_type is not None:
        filtered_df = filtered_df[filtered_df[mistake_column].str.contains(mistake_type, case=False, na=False)]

    answers = filtered_df[answer_column].tolist()

    return answers

In [None]:
import os
def read_txt_files_to_array(directory):
    txt_files = [f for f in os.listdir(directory) if f.endswith('.txt')]
    content_array = []

    for txt_file in txt_files:
        file_path = os.path.join(directory, txt_file)
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read().strip()
            content_array.append(content)

    print(f"Total files read: {len(content_array)}")
    return content_array

In [None]:
import math
import random

def plot_original_and_generated_kp(knowledge_profile, original_embeddings, generated_data_dir, model='bert', token='cls', csv_path=None):
    if csv_path is None:
        print("CSV path must be provided.")
        return

    data = pd.read_csv(csv_path)

    rubric_conditions = [data[f"rubric{i}"] == int(k) for i, k in enumerate(knowledge_profile, start=1)]
    filtered_data = data.loc[np.logical_and.reduce(rubric_conditions)]

    if len(filtered_data) < 100:
        print(f"Not enough samples for knowledge profile {knowledge_profile}.")
        return

    selected_samples = filtered_data.sample(n=100, random_state=42)['text'].tolist()

    generated_embeddings = []
    labels = []

    generated_profile_dir = os.path.join(generated_data_dir, knowledge_profile)
    if not os.path.exists(generated_profile_dir):
        print(f"No generated data found for knowledge profile: {knowledge_profile}")
        return

    # Match samples with mistake directories for labels
    for mistake_type in os.listdir(generated_profile_dir):
        if ".ipynb_checkpoints" in mistake_type:
            continue

        mistake_dir = os.path.join(generated_profile_dir, mistake_type)
        if os.path.isdir(mistake_dir):
            mistake_texts = read_txt_files_to_array(mistake_dir)

            for sample in selected_samples:
                if sample in mistake_texts:
                    labels.append(mistake_type)
                    embeddings = get_embeddings([sample], model, token)
                    generated_embeddings.append(embeddings)

     # Ensure all selected samples have labels
    if len(labels) < len(selected_samples):
        print(f"Warning: Some samples for profile {knowledge_profile} could not be labeled.")


    # Generate colors for plotting based on unique labels
    unique_labels = list(set(labels))
    print(len(unique_labels))
    cmap = plt.cm.get_cmap('tab10', len(unique_labels) + 2)
    colors = {label: cmap(i + 1) for i, label in enumerate(unique_labels)}


    # Combine embeddings for PCA
    all_embeddings = np.vstack([original_embeddings] + generated_embeddings)
    pca = PCA(n_components=2)
    reduced = pca.fit_transform(all_embeddings)

    # Plot original data
    plt.figure(figsize=(12, 8))
    plt.scatter(
        reduced[:len(original_embeddings), 0],
        reduced[:len(original_embeddings), 1],
        label='Original Data',
        c=cmap(0),
        marker='o',
        alpha=0.7
    )

    
    # Plot generated data
    start_idx = len(original_embeddings)
    for label in unique_labels:
        indices = [i for i, l in enumerate(labels) if l == label]
        plt.scatter(
            reduced[start_idx + np.array(indices), 0],
            reduced[start_idx + np.array(indices), 1],
            label=f'Generated ({label})',
            c=[colors[label]],
            marker='x',
            alpha=0.7
        )

    plt.title(f'Original and Generated (Simple) Embeddings for Knowledge Profile {knowledge_profile}')
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')
    plt.legend()

    figure_name = f"simple_{knowledge_profile}_{model}_{token}.png"
    plt.savefig(os.path.join("diversity_figures", figure_name))

    plt.show()
    return generated_embeddings


In [None]:
import numpy as np
from itertools import combinations

def compute_similarity(embeddings1, embeddings2=None, metric='cosine'):
    """
    Computes the average pairwise similarity within a single group of embeddings
    or between two groups of embeddings.
    
    Parameters:
    embeddings1 (np.array): Sentence embeddings for the first group (shape: [n_samples1, embedding_dim])
    embeddings2 (np.array, optional): Sentence embeddings for the second group (shape: [n_samples2, embedding_dim])
    metric (str): Type of similarity to compute ('cosine' or 'euclidean')
    
    Returns:
    float: The average pairwise similarity within the group or between the two groups.
    """
    # Normalize embeddings if computing cosine similarity
    if metric == 'cosine':
        embeddings1 = embeddings1 / np.linalg.norm(embeddings1, axis=1, keepdims=True)
        if embeddings2 is not None:
            embeddings2 = embeddings2 / np.linalg.norm(embeddings2, axis=1, keepdims=True)
    
    similarities = []

    # If a second group is provided, compute between-group similarity
    if embeddings2 is not None:
        for emb1 in embeddings1:
            for emb2 in embeddings2:
                if metric == 'cosine':
                    similarity = np.dot(emb1, emb2)
                elif metric == 'euclidean':
                    similarity = np.linalg.norm(emb1 - emb2)
                else:
                    raise ValueError("Invalid metric. Use 'cosine' or 'euclidean'.")
                similarities.append(similarity)
    else:
        # Compute within-group similarity
        for emb1, emb2 in combinations(embeddings1, 2):
            if metric == 'cosine':
                similarity = np.dot(emb1, emb2)
            elif metric == 'euclidean':
                similarity = np.linalg.norm(emb1 - emb2)
            else:
                raise ValueError("Invalid metric. Use 'cosine' or 'euclidean'.")
            similarities.append(similarity)
    
    return np.mean(similarities)

In [None]:
def generate_latex_table(metrics, target_knowledge_profile, method_name='Simple'):
    """
    Generates a LaTeX table from computed metrics.

    Args:
        metrics (dict): Dictionary with metrics.
            Example structure:
                {
                    "cosine_within_original": 0.9678,
                    "euclidean_within_original": 3.1954,
                    "cosine_within_generated": 0.9863,
                    "euclidean_within_generated": 2.1517,
                    "cosine_between": 0.9718,
                    "euclidean_between": 3.0448,
                    "energy_between": 0.3936
                }
        target_knowledge_profile (str): Knowledge profile for the data.
        method_name (str): Name of the method for the generated data.

    Returns:
        str: LaTeX table as a string.
    """
    latex_table = f"""
\\begin{{table}}[h!]
\\centering
\\begin{{tabular}}{{cccc}}
\\hline
\\textbf{{Dataset}}              & \\textbf{{Cosine}} & \\textbf{{Euclidean}} & \\textbf{{Energy}} \\\\ \\hline
Original                      & {metrics['cosine_within_original']:.4f}          & {metrics['euclidean_within_original']:.4f}             & -               \\\\
Generated ({method_name})            & {metrics['cosine_within_generated']:.4f}          & {metrics['euclidean_within_generated']:.4f}             & -               \\\\
Original - Generated ({method_name}) & {metrics['cosine_between']:.4f}          & {metrics['euclidean_between']:.4f}             & {metrics['energy_between']:.4f}          \\\\ \\hline
\\end{{tabular}}
\\caption{{{target_knowledge_profile} {method_name}}}
\\end{{table}}
"""
    return latex_table

In [None]:
import torch
from geomloss import SamplesLoss

# Define the energy distance loss
# "energy" type is used to compute the Energy Distance (also known as C2 distance)
energy_distance_nd = SamplesLoss(loss="energy")


In [None]:
dataset_path = 'path/to/original/data'

import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import torch
from transformers import BertTokenizer, BertModel

df = pd.read_excel(dataset_path, sheet_name='contraposition')

# rubrics of the original dataset
rubrics = [
    'Statement of what should be proven: A proof by contraposition of an implication consists in showing that if x rational, then x^2 is rational. ',
    'Correct assumption: x is rational [Assumption] ',
    'Correct proof reasoning',
    'Proof conclusion: By contraposition, if x^2 is irrational, then x is irrational.'
]

# knowledge profile column
df['Knowledge Profile'] = df[rubrics].astype(str).agg(''.join, axis=1)

In [None]:
generated_data_dir = "generation/generation_simple/correct_responses/temp_1"
csv_path = "generated_datasets/simple_mixed_data_500.csv"


In [None]:
models = ['bert', 'distilbert']
tokens = ['cls', 'mean']

generated_embeddings_dict = {model: {token: [] for token in tokens} for model in models}
original_embeddings_dict = {model: {token: [] for token in tokens} for model in models}

## 0000

In [None]:
model = 'distilbert'
token = 'cls'
target_knowledge_profile = '0000'

print(f'Results for {model} - {token} - {target_knowledge_profile}')
# retrieve all original data for this knowledge
original_kp_data = get_answers_for_mistake_and_profile(df, target_knowledge_profile)
# Compute embeddings for the original data
embeddings_original_kp_data = get_embeddings(original_kp_data, model, token)
generated_embeddings = plot_original_and_generated_kp(target_knowledge_profile, embeddings_original_kp_data, generated_data_dir, model, token, csv_path)        
generated_embeddings = torch.vstack(generated_embeddings)

generated_embeddings_dict[model][token].append(generated_embeddings)
original_embeddings_dict[model][token].append(embeddings_original_kp_data)

metrics = {
        "cosine_within_original": compute_similarity(embeddings_original_kp_data),
        "euclidean_within_original": compute_similarity(embeddings_original_kp_data, metric="euclidean"),
        "cosine_within_generated": compute_similarity(generated_embeddings),
        "euclidean_within_generated": compute_similarity(generated_embeddings, metric="euclidean"),
        "cosine_between": compute_similarity(generated_embeddings, embeddings_original_kp_data),
        "euclidean_between": compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean"),
        "energy_between": energy_distance_nd(generated_embeddings, embeddings_original_kp_data),
    }

latex_table = generate_latex_table(metrics, target_knowledge_profile)

# Save the LaTeX table to a file
output_dir = "./latex_tables/"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, f"simple_{target_knowledge_profile}_div_table.tex")
with open(output_file, "w") as f:
    f.write(latex_table)

print(f"LaTeX table saved to {output_file}")

#### METRICS #####
# Between samples
print(f'Cosine similarity within generated: {compute_similarity(generated_embeddings)}')
print(f'Euclidean distance within generated: {compute_similarity(generated_embeddings, metric="euclidean")}')

print(f'Cosine similarity within original: {compute_similarity(embeddings_original_kp_data)}')
print(f'Euclidean distance within original: {compute_similarity(embeddings_original_kp_data, metric="euclidean")}')

# between ds
print(f'Cosine similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data)}')
print(f'Euclidean similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean")}')
print(f'Energy distance between original - generated: {energy_distance_nd(generated_embeddings, embeddings_original_kp_data)}')


## 1000

In [None]:
model = 'distilbert'
token = 'cls'
target_knowledge_profile = '1000'

print(f'Results for {model} - {token} - {target_knowledge_profile}')
# retrieve all original data for this knowledge
original_kp_data = get_answers_for_mistake_and_profile(df, target_knowledge_profile)
# Compute embeddings for the original data
embeddings_original_kp_data = get_embeddings(original_kp_data, model, token)
generated_embeddings = plot_original_and_generated_kp(target_knowledge_profile, embeddings_original_kp_data, generated_data_dir, model, token, csv_path)        
generated_embeddings = torch.vstack(generated_embeddings)

generated_embeddings_dict[model][token].append(generated_embeddings)
original_embeddings_dict[model][token].append(embeddings_original_kp_data)

metrics = {
        "cosine_within_original": compute_similarity(embeddings_original_kp_data),
        "euclidean_within_original": compute_similarity(embeddings_original_kp_data, metric="euclidean"),
        "cosine_within_generated": compute_similarity(generated_embeddings),
        "euclidean_within_generated": compute_similarity(generated_embeddings, metric="euclidean"),
        "cosine_between": compute_similarity(generated_embeddings, embeddings_original_kp_data),
        "euclidean_between": compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean"),
        "energy_between": energy_distance_nd(generated_embeddings, embeddings_original_kp_data),
    }

latex_table = generate_latex_table(metrics, target_knowledge_profile)

# Save the LaTeX table to a file
output_dir = "./latex_tables/"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, f"simple_{target_knowledge_profile}_div_table.tex")
with open(output_file, "w") as f:
    f.write(latex_table)

print(f"LaTeX table saved to {output_file}")

#### METRICS #####
# Between samples
print(f'Cosine similarity within generated: {compute_similarity(generated_embeddings)}')
print(f'Euclidean distance within generated: {compute_similarity(generated_embeddings, metric="euclidean")}')

print(f'Cosine similarity within original: {compute_similarity(embeddings_original_kp_data)}')
print(f'Euclidean distance within original: {compute_similarity(embeddings_original_kp_data, metric="euclidean")}')

# between ds
print(f'Cosine similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data)}')
print(f'Euclidean similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean")}')
print(f'Energy distance between original - generated: {energy_distance_nd(generated_embeddings, embeddings_original_kp_data)}')


## 1110

In [None]:
model = 'distilbert'
token = 'cls'
target_knowledge_profile = '1110'

print(f'Results for {model} - {token} - {target_knowledge_profile}')
# retrieve all original data for this knowledge
original_kp_data = get_answers_for_mistake_and_profile(df, target_knowledge_profile)
# Compute embeddings for the original data
embeddings_original_kp_data = get_embeddings(original_kp_data, model, token)
generated_embeddings = plot_original_and_generated_kp(target_knowledge_profile, embeddings_original_kp_data, generated_data_dir, model, token, csv_path)        
generated_embeddings = torch.vstack(generated_embeddings)

generated_embeddings_dict[model][token].append(generated_embeddings)
original_embeddings_dict[model][token].append(embeddings_original_kp_data)

metrics = {
        "cosine_within_original": compute_similarity(embeddings_original_kp_data),
        "euclidean_within_original": compute_similarity(embeddings_original_kp_data, metric="euclidean"),
        "cosine_within_generated": compute_similarity(generated_embeddings),
        "euclidean_within_generated": compute_similarity(generated_embeddings, metric="euclidean"),
        "cosine_between": compute_similarity(generated_embeddings, embeddings_original_kp_data),
        "euclidean_between": compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean"),
        "energy_between": energy_distance_nd(generated_embeddings, embeddings_original_kp_data),
    }

latex_table = generate_latex_table(metrics, target_knowledge_profile)

# Save the LaTeX table to a file
output_dir = "./latex_tables/"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, f"simple_{target_knowledge_profile}_div_table.tex")
with open(output_file, "w") as f:
    f.write(latex_table)

print(f"LaTeX table saved to {output_file}")

#### METRICS #####
# Between samples
print(f'Cosine similarity within generated: {compute_similarity(generated_embeddings)}')
print(f'Euclidean distance within generated: {compute_similarity(generated_embeddings, metric="euclidean")}')

print(f'Cosine similarity within original: {compute_similarity(embeddings_original_kp_data)}')
print(f'Euclidean distance within original: {compute_similarity(embeddings_original_kp_data, metric="euclidean")}')

# between ds
print(f'Cosine similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data)}')
print(f'Euclidean similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean")}')
print(f'Energy distance between original - generated: {energy_distance_nd(generated_embeddings, embeddings_original_kp_data)}')


## 0110

In [None]:
model = 'distilbert'
token = 'cls'
target_knowledge_profile = '0110'

print(f'Results for {model} - {token} - {target_knowledge_profile}')
# retrieve all original data for this knowledge
original_kp_data = get_answers_for_mistake_and_profile(df, target_knowledge_profile)
# Compute embeddings for the original data
embeddings_original_kp_data = get_embeddings(original_kp_data, model, token)
generated_embeddings = plot_original_and_generated_kp(target_knowledge_profile, embeddings_original_kp_data, generated_data_dir, model, token, csv_path)        
generated_embeddings = torch.vstack(generated_embeddings)

generated_embeddings_dict[model][token].append(generated_embeddings)
original_embeddings_dict[model][token].append(embeddings_original_kp_data)

metrics = {
        "cosine_within_original": compute_similarity(embeddings_original_kp_data),
        "euclidean_within_original": compute_similarity(embeddings_original_kp_data, metric="euclidean"),
        "cosine_within_generated": compute_similarity(generated_embeddings),
        "euclidean_within_generated": compute_similarity(generated_embeddings, metric="euclidean"),
        "cosine_between": compute_similarity(generated_embeddings, embeddings_original_kp_data),
        "euclidean_between": compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean"),
        "energy_between": energy_distance_nd(generated_embeddings, embeddings_original_kp_data),
    }

latex_table = generate_latex_table(metrics, target_knowledge_profile)

# Save the LaTeX table to a file
output_dir = "./latex_tables/"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, f"simple_{target_knowledge_profile}_div_table.tex")
with open(output_file, "w") as f:
    f.write(latex_table)

print(f"LaTeX table saved to {output_file}")

#### METRICS #####
# Between samples
print(f'Cosine similarity within generated: {compute_similarity(generated_embeddings)}')
print(f'Euclidean distance within generated: {compute_similarity(generated_embeddings, metric="euclidean")}')

print(f'Cosine similarity within original: {compute_similarity(embeddings_original_kp_data)}')
print(f'Euclidean distance within original: {compute_similarity(embeddings_original_kp_data, metric="euclidean")}')

# between ds
print(f'Cosine similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data)}')
print(f'Euclidean similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean")}')
print(f'Energy distance between original - generated: {energy_distance_nd(generated_embeddings, embeddings_original_kp_data)}')


## 0111

In [None]:
model = 'distilbert'
token = 'cls'
target_knowledge_profile = '0111'

print(f'Results for {model} - {token} - {target_knowledge_profile}')
# retrieve all original data for this knowledge
original_kp_data = get_answers_for_mistake_and_profile(df, target_knowledge_profile)
# Compute embeddings for the original data
embeddings_original_kp_data = get_embeddings(original_kp_data, model, token)
generated_embeddings = plot_original_and_generated_kp(target_knowledge_profile, embeddings_original_kp_data, generated_data_dir, model, token, csv_path)        
generated_embeddings = torch.vstack(generated_embeddings)

generated_embeddings_dict[model][token].append(generated_embeddings)
original_embeddings_dict[model][token].append(embeddings_original_kp_data)

metrics = {
        "cosine_within_original": compute_similarity(embeddings_original_kp_data),
        "euclidean_within_original": compute_similarity(embeddings_original_kp_data, metric="euclidean"),
        "cosine_within_generated": compute_similarity(generated_embeddings),
        "euclidean_within_generated": compute_similarity(generated_embeddings, metric="euclidean"),
        "cosine_between": compute_similarity(generated_embeddings, embeddings_original_kp_data),
        "euclidean_between": compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean"),
        "energy_between": energy_distance_nd(generated_embeddings, embeddings_original_kp_data),
    }

latex_table = generate_latex_table(metrics, target_knowledge_profile)

# Save the LaTeX table to a file
output_dir = "./latex_tables/"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, f"simple_{target_knowledge_profile}_div_table.tex")
with open(output_file, "w") as f:
    f.write(latex_table)

print(f"LaTeX table saved to {output_file}")

#### METRICS #####
# Between samples
print(f'Cosine similarity within generated: {compute_similarity(generated_embeddings)}')
print(f'Euclidean distance within generated: {compute_similarity(generated_embeddings, metric="euclidean")}')

print(f'Cosine similarity within original: {compute_similarity(embeddings_original_kp_data)}')
print(f'Euclidean distance within original: {compute_similarity(embeddings_original_kp_data, metric="euclidean")}')

# between ds
print(f'Cosine similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data)}')
print(f'Euclidean similarity between original - generated: {compute_similarity(generated_embeddings, embeddings_original_kp_data, metric="euclidean")}')
print(f'Energy distance between original - generated: {energy_distance_nd(generated_embeddings, embeddings_original_kp_data)}')


## Datasets

In [None]:
model = 'distilbert'
token = 'cls'

print(f"### {model} - {token} ###") 
generated = torch.vstack(generated_embeddings_dict[model][token])
original = torch.vstack(original_embeddings_dict[model][token])

# Combine embeddings for PCA
all_embeddings = torch.cat([original, generated], dim=0).numpy()
pca = PCA(n_components=2)
reduced = pca.fit_transform(all_embeddings)

# Split the reduced embeddings back
original_reduced = reduced[:len(original)]
generated_reduced = reduced[len(original):]

cmap = plt.cm.get_cmap('tab10', 4)
# Plot original and generated data
plt.figure(figsize=(12, 8))
plt.scatter(
    original_reduced[:, 0],
    original_reduced[:, 1],
    label='Original',
    c=cmap(0),
    marker='o',
    alpha=0.7
)

plt.scatter(
    generated_reduced[:, 0],
    generated_reduced[:, 1],
    label='Generated',
    c=cmap(1),
    marker='x',
    alpha=0.7
)

plt.title(f'Original and Generated (Simple) Embeddings (Dataset)')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.legend()

# Save the figure
figure_name = f"simple_dataset.png"
os.makedirs("diversity_figures", exist_ok=True)
plt.savefig(os.path.join("diversity_figures", figure_name))
print(f"Figure saved as {figure_name}")

plt.show()

# Metric computation
#generated_embeddings = torch.vstack(generated_embeddings_dict[model][token])
#original_embeddings = torch.tensor(original_embeddings_dict[model][token])

metrics = {
    "cosine_within_original": compute_similarity(original),
    "euclidean_within_original": compute_similarity(original, metric="euclidean"),
    "cosine_within_generated": compute_similarity(generated),
    "euclidean_within_generated": compute_similarity(generated, metric="euclidean"),
    "cosine_between": compute_similarity(generated, original),
    "euclidean_between": compute_similarity(generated, original, metric="euclidean"),
    "energy_between": energy_distance_nd(generated, original),
}

latex_table = generate_latex_table(metrics, target_knowledge_profile)

# Save the LaTeX table to a file
output_dir = "./latex_tables/"
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, f"simple_ds_div_table.tex")
with open(output_file, "w") as f:
    f.write(latex_table)