# Model Training Results - Domain-Shifted Variant

This notebook analyzes the training results for the domain-shifted variant of the Caltech-256 2-domain synthetic taxonomy experiment. It creates visualizations of training curves and evaluation results similar to the original model training analysis.

In [1]:
from csv import DictReader
import matplotlib
import numpy as np

matplotlib.use("pgf")
import matplotlib.pyplot as plt
import pandas as pd
import os

# LaTeX settings
plt.rcParams.update(
    {
        "text.usetex": True,
        "font.family": "EB Garamond",
        "font.size": 11,
        "pgf.texsystem": "lualatex",
    }
)

In [2]:
# Load training and validation data for domain-shifted variant
def load_csv_data(filename):
    """Load training/validation data from CSV file"""
    steps = []
    values = []
    try:
        with open(filename, "r") as f:
            reader = DictReader(f)
            for row in reader:
                steps.append(int(row["Step"]))
                values.append(float(row["Value"]))
        print(f"Loaded {filename}: {len(steps)} data points")
    except FileNotFoundError:
        print(f"Warning: {filename} not found")
    except Exception as e:
        print(f"Error loading {filename}: {e}")
    return steps, values

In [3]:
# Plot training curves for domain-shifted variant
datasets = [
    {
        "name": "Caltech-256 2-Domain Variant Domain Shifted",
        "files": [
            "caltech256_synthetic_variant_domain_shifted_A",
            "caltech256_synthetic_variant_domain_shifted_B",
        ],
        "title": "Caltech-256 2-Domain Variant Domain Shifted",
        "save_name": "caltech256_2domain_variant_domain_shifted",
    }
]

# Create plot for domain-shifted variant
for dataset in datasets:
    plt.figure(figsize=(8, 6))

    for domain_file in dataset["files"]:
        # Load training data
        steps_train, train = load_csv_data(f"{domain_file}_train.csv")

        # Load validation data
        steps_val, val = load_csv_data(f"{domain_file}_val.csv")

        # Extract domain label (A or B)
        domain_label = domain_file.split("_")[-1]

        # Plot training and validation curves
        if steps_train and train:
            plt.plot(steps_train, train, label=f"Train Domain {domain_label}")
        if steps_val and val:
            plt.plot(steps_val, val, label=f"Validation Domain {domain_label}")

    plt.xlabel("Steps")
    plt.ylabel("Accuracy")
    plt.title(dataset["title"])
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(f"../../thesis/figures/{dataset['save_name']}.pgf", bbox_inches="tight")
    plt.show()

print("Training curves plotted and saved successfully!")

Loaded caltech256_synthetic_variant_domain_shifted_A_train.csv: 289 data points
Loaded caltech256_synthetic_variant_domain_shifted_A_val.csv: 50 data points
Loaded caltech256_synthetic_variant_domain_shifted_B_train.csv: 294 data points
Loaded caltech256_synthetic_variant_domain_shifted_B_val.csv: 50 data points
Training curves plotted and saved successfully!
Training curves plotted and saved successfully!


  plt.show()


In [4]:
# Create evaluation results table for domain-shifted variant
import pandas as pd

# Load the evaluation data from available CSV files
eval_data_list = []

# Domain-shifted variant evaluation data - using actual file names
eval_files = [
    (
        "caltech256_synthetic_variant_domain_shifted_A_eval.csv",
        "A",
        "Caltech-256 2-Domain Variant Domain Shifted",
    ),
    (
        "caltech256_synthetic_variant_domain_shifted_B_eval.csv",
        "B",
        "Caltech-256 2-Domain Variant Domain Shifted",
    ),
]

for filename, domain, dataset_variant in eval_files:
    try:
        eval_df = pd.read_csv(filename)
        eval_df["Domain"] = domain
        eval_df["Dataset Variant"] = dataset_variant
        eval_data_list.append(eval_df)
        print(f"Loaded {filename}: {eval_df.shape}")
    except FileNotFoundError:
        print(f"Warning: {filename} not found")
    except Exception as e:
        print(f"Error loading {filename}: {e}")

if eval_data_list:
    # Concatenate all dataframes
    eval_data_domain_shifted = pd.concat(eval_data_list, ignore_index=True)

    # Clean up the data - handle wall time columns
    wall_time_cols = [
        col
        for col in eval_data_domain_shifted.columns
        if "wall" in col.lower() and "time" in col.lower()
    ]
    if wall_time_cols:
        eval_data_domain_shifted.drop(columns=wall_time_cols, inplace=True)

    # Reorder columns: 'Dataset Variant', 'Domain', 'Step', 'Value'
    eval_data_domain_shifted = eval_data_domain_shifted[
        ["Dataset Variant", "Domain", "Step", "Value"]
    ]

    # Make 'Value' a float and rename columns
    eval_data_domain_shifted["Value"] = eval_data_domain_shifted["Value"].astype(float)
    eval_data_domain_shifted.rename(
        columns={"Value": "Accuracy", "Step": "Steps"}, inplace=True
    )

    # Generate LaTeX table
    latex_table_domain_shifted = (
        eval_data_domain_shifted.style.hide(axis="index")
        .format(precision=3)
        .to_latex(
            caption="Evaluation results on test sets for domain-shifted variant. Models were checkpointed after every epoch and evaluated on the validation loss. The model with the lowest validation loss was selected for evaluation on the test set.",
            label="tab:evaluation_results_domain_shifted",
            column_format="cccc",
            position="ht",
            position_float="centering",
            hrules=True,
        )
    )

    # Save to file
    with open("../../thesis/figures/evaluation_results_domain_shifted.tex", "w") as f:
        f.write(latex_table_domain_shifted)

    print(
        "Results table saved to ../../thesis/figures/evaluation_results_domain_shifted.tex"
    )
    print("\nEvaluation Results:")
    print(eval_data_domain_shifted.to_string(index=False))

else:
    print("No evaluation data found for domain-shifted variant")

Loaded caltech256_synthetic_variant_domain_shifted_A_eval.csv: (1, 5)
Loaded caltech256_synthetic_variant_domain_shifted_B_eval.csv: (1, 5)
Results table saved to ../../thesis/figures/evaluation_results_domain_shifted.tex

Evaluation Results:
                            Dataset Variant Domain  Steps  Accuracy
Caltech-256 2-Domain Variant Domain Shifted      A  14450  0.712738
Caltech-256 2-Domain Variant Domain Shifted      B  14700  0.738055


In [5]:
# Analysis and comparison of domain-shifted variant results
print("=== Domain-Shifted Variant Analysis ===")

if "eval_data_domain_shifted" in locals() and not eval_data_domain_shifted.empty:
    print("\nDomain-Shifted Variant Results:")
    for _, row in eval_data_domain_shifted.iterrows():
        print(f"Domain {row['Domain']}: Accuracy = {row['Accuracy']:.3f}")

    # Calculate statistics
    mean_accuracy = eval_data_domain_shifted["Accuracy"].mean()
    std_accuracy = eval_data_domain_shifted["Accuracy"].std()

    print(f"\nSummary Statistics:")
    print(f"Mean Accuracy: {mean_accuracy:.3f}")
    if len(eval_data_domain_shifted) > 1:
        print(f"Standard Deviation: {std_accuracy:.3f}")

    # Domain-wise analysis
    if len(eval_data_domain_shifted) > 1:
        domain_analysis = eval_data_domain_shifted.groupby("Domain")["Accuracy"].agg(
            ["mean", "count"]
        )
        print(f"\nDomain-wise Analysis:")
        print(domain_analysis)

    print(f"\n=== Training Characteristics ===")
    print("The domain-shifted variant tests the robustness of the synthetic taxonomy")
    print("approach when domain boundaries are not perfectly aligned with the")
    print("underlying data distribution. This provides insights into:")
    print("1. Model generalization across shifted domains")
    print("2. Stability of the taxonomy construction method")
    print("3. Impact of domain shift on cross-domain prediction accuracy")

else:
    print("No evaluation data available for analysis")

print(f"\n=== Files Generated ===")
print(
    "- Training curve plot: ../../thesis/figures/caltech256_2domain_variant_domain_shifted.pgf"
)
print(
    "- Evaluation results table: ../../thesis/figures/evaluation_results_domain_shifted.tex"
)

=== Domain-Shifted Variant Analysis ===

Domain-Shifted Variant Results:
Domain A: Accuracy = 0.713
Domain B: Accuracy = 0.738

Summary Statistics:
Mean Accuracy: 0.725
Standard Deviation: 0.018

Domain-wise Analysis:
            mean  count
Domain                 
A       0.712738      1
B       0.738055      1

=== Training Characteristics ===
The domain-shifted variant tests the robustness of the synthetic taxonomy
approach when domain boundaries are not perfectly aligned with the
underlying data distribution. This provides insights into:
1. Model generalization across shifted domains
2. Stability of the taxonomy construction method
3. Impact of domain shift on cross-domain prediction accuracy

=== Files Generated ===
- Training curve plot: ../../thesis/figures/caltech256_2domain_variant_domain_shifted.pgf
- Evaluation results table: ../../thesis/figures/evaluation_results_domain_shifted.tex
