In [4]:

import pandas as pd

# Load the uploaded Excel files
file_paths = [
    'results/gmm_adam_vs_em_varying_datasizes.xlsx',
    'results/gmm_adam_vs_em_varying_clusters.xlsx',
    'results/gmm_adam_vs_em_varying_dimensions.xlsx',
    'results/gmm_experiment_random_vs_points_initialization.xlsx',
]

# Load each file into a dataframe
df_varying_datasizes = pd.read_excel(file_paths[0])
df_varying_clusters = pd.read_excel(file_paths[1])
df_varying_dimensions = pd.read_excel(file_paths[2])
df_random_vs_points = pd.read_excel(file_paths[3])

# List to store dataframes and experiment names
experiments = [
    (df_varying_datasizes, "Varying Datasizes"),
    (df_varying_clusters, "Varying Clusters"),
    (df_varying_dimensions, "Varying Dimensions"),
    (df_random_vs_points, "Random Initializations"),
]

compact_column_names = {
    'method': 'Method, LR',
    'data_size': 'Size',
    'n_features': 'N',
    'n_clusters': 'K',
    'iterations': 'Iters',
    'log_likelihood': 'LL',
    'rand_score': 'RI',  # Rand Index
    'adjusted_mutual_info_score': 'AMI',  # Adjusted Mutual Info
    'v_measure_score': 'V',  # V-Measure
    'time_taken': 'Time (s)',
}

# Function to update the method column with learning rate for Adam-based methods
def update_method_column(row):
    if row['method'] == 'Adam':
        return f"Adam, {row['learning_rate']:.0e}"
    return row['method']

# Function to create a LaTeX table from a dataframe
def create_latex_table(df, experiment_name):
    metrics_columns = ['iterations', 'log_likelihood', 'rand_score', 'adjusted_mutual_info_score', 'v_measure_score', 'time_taken']
    
    # Apply the method update to add learning rates for Adam
    df['method'] = df.apply(update_method_column, axis=1)
    
    # Filter the dataframe to include only the necessary columns
    df_filtered = df[['method', 'data_size', 'n_features', 'n_clusters'] + metrics_columns]

    # Rename columns to more compact names
    df_filtered = df_filtered.rename(columns=compact_column_names)
    
    # Convert to LaTeX table format
    latex_table = df_filtered.to_latex(index=False, float_format="%.2f", caption=f"Results for {experiment_name}")
    
    return latex_table


# Create LaTeX tables for each experiment
latex_tables = {experiment_name: create_latex_table(df, experiment_name) for df, experiment_name in experiments}

print(latex_tables["Varying Datasizes"])
print(latex_tables["Varying Clusters"])
print(latex_tables["Varying Dimensions"])

\begin{table}
\caption{Results for Varying Datasizes}
\begin{tabular}{lrrrrrrrrr}
\toprule
Method, LR & Size & N & K & Iters & LL & RI & AMI & V & Time (s) \\
\midrule
EM & 100 & 4 & 4 & 2 & -6.78 & 1.00 & 1.00 & 1.00 & 0.02 \\
Adam, 1e-01 & 100 & 4 & 4 & 46 & -6.78 & 1.00 & 1.00 & 1.00 & 0.56 \\
Adam, 1e-02 & 100 & 4 & 4 & 82 & -6.78 & 1.00 & 1.00 & 1.00 & 0.93 \\
Adam, 1e-03 & 100 & 4 & 4 & 526 & -6.78 & 1.00 & 1.00 & 1.00 & 3.34 \\
EM & 500 & 4 & 4 & 2 & -6.93 & 1.00 & 1.00 & 1.00 & 0.01 \\
Adam, 1e-01 & 500 & 4 & 4 & 44 & -6.93 & 1.00 & 1.00 & 1.00 & 0.27 \\
Adam, 1e-02 & 500 & 4 & 4 & 51 & -6.93 & 1.00 & 1.00 & 1.00 & 0.31 \\
Adam, 1e-03 & 500 & 4 & 4 & 210 & -6.93 & 1.00 & 1.00 & 1.00 & 1.23 \\
EM & 1000 & 4 & 4 & 2 & -6.96 & 1.00 & 1.00 & 1.00 & 0.01 \\
Adam, 1e-01 & 1000 & 4 & 4 & 28 & -6.97 & 1.00 & 1.00 & 1.00 & 0.14 \\
Adam, 1e-02 & 1000 & 4 & 4 & 14 & -6.96 & 1.00 & 1.00 & 1.00 & 0.09 \\
Adam, 1e-03 & 1000 & 4 & 4 & 152 & -6.96 & 1.00 & 1.00 & 1.00 & 1.27 \\
EM & 5000 & 4 &

In [9]:
import pandas as pd

# Load the uploaded Excel files
file_paths = [
    'results/gmm_adam_vs_em_varying_datasizes.xlsx',
    'results/gmm_adam_vs_em_varying_clusters.xlsx',
    'results/gmm_adam_vs_em_varying_dimensions.xlsx',
    'results/gmm_experiment_random_vs_points_initialization.xlsx',
]

# Load each file into a dataframe
df_varying_datasizes = pd.read_excel(file_paths[0])
df_varying_clusters = pd.read_excel(file_paths[1])
df_varying_dimensions = pd.read_excel(file_paths[2])
df_random_vs_points = pd.read_excel(file_paths[3])

# List to store dataframes and experiment names
experiments = [
    (df_varying_datasizes, "Varying Datasizes"),
    (df_varying_clusters, "Varying Clusters"),
    (df_varying_dimensions, "Varying Dimensions"),
    (df_random_vs_points, "Random Initializations"),
]

# Compact column names for LaTeX
compact_column_names = {
    'method': 'Method, LR',
    'data_size': 'Size',
    'n_features': 'N',
    'n_clusters': 'K',
    'iterations': 'Iters',
    'log_likelihood': 'LL',
    'rand_score': 'RI',  # Rand Index
    'adjusted_mutual_info_score': 'AMI',  # Adjusted Mutual Info
    'v_measure_score': 'V',  # V-Measure
    'time_taken': 'Time (s)',
}

# Function to update the method column with learning rate for Adam-based methods
def update_method_column(row):
    if row['method'] == 'Adam':
        return f"Adam, {row['learning_rate']:.0e}"
    return row['method']

# Function to calculate mean ± std and format it for LaTeX
def calculate_mean_std_for_metrics(df_group):
    # Dictionary to hold mean ± std formatted strings for each metric
    result = {}
    for metric in ['iterations', 'log_likelihood', 'rand_score', 'time_taken']:
        mean_val = df_group[metric].mean()
        std_val = df_group[metric].std()
        result[metric] = f"${mean_val:.2f} \pm {std_val:.2f}$"
    return pd.Series(result)

# Function to create a LaTeX table from a dataframe
def create_latex_table(df, experiment_name):
    # Apply the method update to add learning rates for Adam
    df['method'] = df.apply(update_method_column, axis=1)
    
    # Add the 'init_method' column based on the row index
    df['init_method'] = ['random' if i < 10 else 'points' for i in range(len(df))]
    
    # Group by method, data size, number of features, number of clusters, and init method
    grouped = df.groupby(['method', 'init_method']).apply(calculate_mean_std_for_metrics)
    
    # Rename columns to more compact names
    grouped = grouped.rename(columns=compact_column_names).reset_index()
    
    # Convert to LaTeX table format
    latex_table = grouped.to_latex(index=False, caption=f"Results for {experiment_name}", escape=False)
    
    return latex_table

# Create LaTeX tables for each experiment
latex_tables = {experiment_name: create_latex_table(df, experiment_name) for df, experiment_name in experiments}

# Print the LaTeX table for the "Random Initializations" experiment
print(latex_tables["Random Initializations"])


\begin{table}
\caption{Results for Random Initializations}
\begin{tabular}{llllll}
\toprule
method & init_method & Iters & LL & RI & Time (s) \\
\midrule
Adam, 1e+00 & points & $56.00 \pm 41.34$ & $-8.94 \pm 0.99$ & $0.43 \pm 0.21$ & $0.64 \pm 0.44$ \\
Adam, 1e+00 & random & $53.00 \pm 72.12$ & $-8.99 \pm 0.72$ & $0.57 \pm 0.38$ & $0.67 \pm 0.83$ \\
Adam, 1e+01 & points & $132.11 \pm 126.27$ & $-17.86 \pm 1.51$ & $0.41 \pm 0.19$ & $1.51 \pm 1.71$ \\
Adam, 1e+01 & random & $208.00 \pm 251.73$ & $-19.21 \pm 0.49$ & $0.25 \pm 0.00$ & $1.75 \pm 1.98$ \\
Adam, 1e-01 & points & $199.67 \pm 159.40$ & $-7.30 \pm 0.33$ & $0.90 \pm 0.14$ & $2.54 \pm 2.63$ \\
Adam, 1e-01 & random & $181.00 \pm 183.85$ & $-7.58 \pm 0.77$ & $0.82 \pm 0.26$ & $1.89 \pm 1.80$ \\
Adam, 1e-02 & points & $630.17 \pm 246.43$ & $-7.57 \pm 0.39$ & $0.85 \pm 0.17$ & $7.40 \pm 3.74$ \\
Adam, 1e-02 & random & $1065.50 \pm 116.67$ & $-7.41 \pm 0.13$ & $0.77 \pm 0.14$ & $12.53 \pm 1.09$ \\
EM & points & $8.94 \pm 4.02$ & $-7.20

  result[metric] = f"${mean_val:.2f} \pm {std_val:.2f}$"
  grouped = df.groupby(['method', 'init_method']).apply(calculate_mean_std_for_metrics)
  grouped = df.groupby(['method', 'init_method']).apply(calculate_mean_std_for_metrics)
  grouped = df.groupby(['method', 'init_method']).apply(calculate_mean_std_for_metrics)
  grouped = df.groupby(['method', 'init_method']).apply(calculate_mean_std_for_metrics)
