# Test CORAL Loss Implementation

This notebook tests the CORAL (Cumulative Ordinal Regression with Logistic) loss implementation on CPU with a small subset of data for quick validation.

In [11]:
import autogluon
import autogluon.multimodal
import autogluon.tabular

print(autogluon.__path__)                   # namespace search paths
print(autogluon.multimodal.__file__)        # actual file path
print(autogluon.tabular.__file__)

_NamespacePath(['c:\\Users\\z5489720\\OneDrive - UNSW\\TERM 1\\Github\\aglone\\autogluon\\autogluon', 'autogluon', '__editable__.autogluon-1.4.1b20251114.finder.__path_hook__', 'autogluon', '__editable__.autogluon_common-1.4.1b20251114.finder.__path_hook__', 'autogluon', '__editable__.autogluon_core-1.4.1b20251114.finder.__path_hook__', 'autogluon', '__editable__.autogluon_eda-1.4.1b20251114.finder.__path_hook__', 'autogluon', '__editable__.autogluon_features-1.4.1b20251114.finder.__path_hook__', 'autogluon', '__editable__.autogluon_multimodal-1.4.1b20251114.finder.__path_hook__', 'autogluon', '__editable__.autogluon_tabular-1.4.1b20251114.finder.__path_hook__', 'autogluon', '__editable__.autogluon_timeseries-1.4.1b20251114.finder.__path_hook__'])
C:\Users\z5489720\OneDrive - UNSW\TERM 1\Github\aglone\autogluon\multimodal\src\autogluon\multimodal\__init__.py
C:\Users\z5489720\OneDrive - UNSW\TERM 1\Github\aglone\autogluon\tabular\src\autogluon\tabular\__init__.py


In [13]:
import os
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from autogluon.multimodal import MultiModalPredictor

# ---------------------------
# Config: CPU Testing with CORAL
# ---------------------------
TRAIN_CSV = r"C:\Users\z5489720\OneDrive - UNSW\TERM 1\Github\aglone\train_data.csv"
TEST_CSV  = r"C:\Users\z5489720\OneDrive - UNSW\TERM 1\Github\aglone\test_data.csv"
IMG_DIR   = r"C:\Users\z5489720\OneDrive - UNSW\TERM 1\Github\aglone\images"
LABEL_COL = "rating"
IMAGE_COL = "image_path"

# CPU-only testing
os.environ["CUDA_VISIBLE_DEVICES"] = ""

# ---------------------------
# Helpers
# ---------------------------
def to_abs(path: str, base: str) -> str:
    p = str(path).strip()
    return p if os.path.isabs(p) else os.path.abspath(os.path.join(base, p))

def add_abs_image_paths(df: pd.DataFrame, image_col: str, base_dir: str) -> pd.DataFrame:
    df = df.copy()
    df[image_col] = df[image_col].astype(str).map(lambda p: to_abs(p, base_dir))
    return df

# ---------------------------
# Main Test Script
# ---------------------------
np.random.seed(123)

# Load data
print("Loading data...")
train_df = pd.read_csv(TRAIN_CSV, index_col=0)
test_df  = pd.read_csv(TEST_CSV,  index_col=0)

# Use small subset for quick CPU testing
SAMPLE_SIZE = 50  # Reduce to 50 samples for fast testing
print(f"Using {SAMPLE_SIZE} training samples for quick CPU test")
train_df = train_df.sample(n=min(SAMPLE_SIZE, len(train_df)), random_state=42).reset_index(drop=True)
test_df = test_df.sample(n=min(20, len(test_df)), random_state=42).reset_index(drop=True)

# Absolute image paths
train_df = add_abs_image_paths(train_df, IMAGE_COL, IMG_DIR)
test_df  = add_abs_image_paths(test_df,  IMAGE_COL, IMG_DIR)

# Sanity check: verify files exist
missing_train = (~train_df[IMAGE_COL].map(os.path.exists)).sum()
missing_test = (~test_df[IMAGE_COL].map(os.path.exists)).sum()
print(f"Missing files - Train: {missing_train}, Test: {missing_test}")

# Check label distribution
print(f"\nLabel distribution in training data:")
print(train_df[LABEL_COL].value_counts().sort_index())
print(f"\nUnique classes: {sorted(train_df[LABEL_COL].unique())}")

# CORAL Loss Configuration for CPU Testing
# CRITICAL: The config key is "optim.loss_func" not "optim.loss_function"!
coral_config = {
    # Loss function - CORAL (use loss_func not loss_function!)
    "optim.loss_func": "coral",  # ‚ö†Ô∏è CORRECT KEY!
    
    # Minimal training for quick test
    "optim.max_epochs": 3,
    "optim.learning_rate": 1e-4,
    
    # CPU configuration
    "env.num_gpus": 0,  # Force CPU
    "env.per_gpu_batch_size": 4,  # Small batch size for CPU
    "env.num_workers": 0,  # 0 workers on CPU to avoid multiprocessing issues
    "env.num_workers_inference": 0,
    
    # Use smaller model for faster CPU testing
    "model.hf_text.checkpoint_name": "prajjwal1/bert-tiny",  # Tiny BERT model
    "model.timm_image.checkpoint_name": "resnet18",  # Small ResNet
}

print("\n" + "="*60)
print("TESTING CORAL LOSS ON CPU")
print("="*60)
print(f"Loss function: {coral_config['optim.loss_func']}")
print(f"Epochs: {coral_config['optim.max_epochs']}")
print(f"Batch size: {coral_config['env.per_gpu_batch_size']}")
print(f"Training samples: {len(train_df)}")
print(f"Test samples: {len(test_df)}")
print("="*60 + "\n")

# Create predictor
predictor = MultiModalPredictor(
    label=LABEL_COL,
    problem_type="multiclass",  # Treat as multiclass (ordinal)
    eval_metric="accuracy"
)

# Fit with CORAL loss
print("Starting training with CORAL loss...")
print("‚ö†Ô∏è LOOK FOR DEBUG MESSAGES ABOUT LOSS FUNCTION SELECTION ‚ö†Ô∏è\n")
predictor.fit(
    train_data=train_df,
    hyperparameters=coral_config,
    time_limit=600,  # 10 minute timeout for safety
)

print("\n" + "="*60)
print("Training completed!")
print("="*60)

No path specified. Models will be saved in: "AutogluonModels\ag-20251116_014835"
AutoGluon Version:  1.4.1b20251114
Python Version:     3.11.14
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Pytorch Version:    2.7.1+cpu
CUDA Version:       CUDA is not available
GPU Memory:         
Total GPU Memory:   Free: 0.00 GB, Allocated: 0.00 GB, Total: 0.00 GB
GPU Count:          0
Memory Avail:       7.15 GB / 31.66 GB (22.6%)
Disk Space Avail:   68.40 GB / 950.87 GB (7.2%)
AutoGluon Version:  1.4.1b20251114
Python Version:     3.11.14
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Pytorch Version:    2.7.1+cpu
CUDA Version:       CUDA is not available
GPU Memory:         
Total GPU Memory:   Free: 0.00 GB, Allocated: 0.00 GB, Total: 0.00 GB
GPU Count:          0
Memory Avail:       7.15 GB / 31.66 GB (22.6%)
Disk Space Avail:   68.40 GB / 950.87 GB (7.2%)


Loading data...
Using 50 training samples for quick CPU test
Missing files - Train: 0, Test: 0

Label distribution in training data:
rating
1     4
2     5
3    16
4    17
5     8
Name: count, dtype: int64

Unique classes: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5)]

TESTING CORAL LOSS ON CPU
Loss function: coral
Epochs: 3
Batch size: 4
Training samples: 50
Test samples: 20

Starting training with CORAL loss...
‚ö†Ô∏è LOOK FOR DEBUG MESSAGES ABOUT LOSS FUNCTION SELECTION ‚ö†Ô∏è




AutoMM starts to create your model. ‚ú®‚ú®‚ú®

To track the learning progress, you can open a terminal and launch Tensorboard:
    ```shell
    # Assume you have installed tensorboard
    tensorboard --logdir c:\Users\z5489720\OneDrive - UNSW\TERM 1\Github\aglone\autogluon\AutogluonModels\ag-20251116_014835
    ```

Seed set to 0
Seed set to 0
The hyperparameter name optim.learning_rate is depreciated. We recommend using the new name optim.lr instead.The deprecated hyperparameter will raise an exception starting in AutoGluon 1.4.0
The hyperparameter name optim.learning_rate is depreciated. We recommend using the new name optim.lr instead.The deprecated hyperparameter will raise an exception starting in AutoGluon 1.4.0
GPU Count: 0
GPU Count to be Used: 0

GPU Count: 0
GPU Count to be Used: 0

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
GPU available: False, used: False
TPU available: False, using: 0 TPU cores

  | Name              | Type                

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 0, global step 1: 'val_accuracy' reached 0.30000 (best 0.30000), saving model to 'C:\\Users\\z5489720\\OneDrive - UNSW\\TERM 1\\Github\\aglone\\autogluon\\AutogluonModels\\ag-20251116_014835\\epoch=0-step=1.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1, global step 2: 'val_accuracy' reached 0.30000 (best 0.30000), saving model to 'C:\\Users\\z5489720\\OneDrive - UNSW\\TERM 1\\Github\\aglone\\autogluon\\AutogluonModels\\ag-20251116_014835\\epoch=1-step=2.ckpt' as top 3


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2, global step 3: 'val_accuracy' reached 0.20000 (best 0.30000), saving model to 'C:\\Users\\z5489720\\OneDrive - UNSW\\TERM 1\\Github\\aglone\\autogluon\\AutogluonModels\\ag-20251116_014835\\epoch=2-step=3.ckpt' as top 3
`Trainer.fit` stopped: `max_epochs=3` reached.
`Trainer.fit` stopped: `max_epochs=3` reached.
Start to fuse 3 checkpoints via the greedy soup algorithm.
Start to fuse 3 checkpoints via the greedy soup algorithm.
üí° Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
üí° Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


Predicting: |          | 0/? [00:00<?, ?it/s]

üí° Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


Predicting: |          | 0/? [00:00<?, ?it/s]

üí° Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


Predicting: |          | 0/? [00:00<?, ?it/s]

AutoMM has created your model. üéâüéâüéâ

To load the model, use the code below:
    ```python
    from autogluon.multimodal import MultiModalPredictor
    predictor = MultiModalPredictor.load("c:\Users\z5489720\OneDrive - UNSW\TERM 1\Github\aglone\autogluon\AutogluonModels\ag-20251116_014835")
    ```

If you are not satisfied with the model, try to increase the training time, 
adjust the hyperparameters (https://auto.gluon.ai/stable/tutorials/multimodal/advanced_topics/customization.html),
or post issues on GitHub (https://github.com/autogluon/autogluon/issues).





Training completed!


## Test Predictions with CORAL

Now let's verify that predictions work correctly with CORAL loss.

In [None]:
# Test predictions
print("\nTesting predictions on test data...")
predictions = predictor.predict(test_df.drop(columns=[LABEL_COL]))
print(f"\nPredictions shape: {predictions.shape}")
print(f"Predictions:\n{predictions.head(10)}")
print(f"\nUnique predicted values: {sorted(predictions.unique())}")

# Test predict_proba
print("\n" + "="*60)
print("Testing predict_proba (probability outputs)...")
probabilities = predictor.predict_proba(test_df.drop(columns=[LABEL_COL]))
print(f"Probabilities shape: {probabilities.shape}")
print(f"\nFirst 5 probability distributions:")
print(probabilities.head())
print(f"\nProbabilities sum to 1? {np.allclose(probabilities.sum(axis=1), 1.0)}")

# Compare predictions with ground truth
print("\n" + "="*60)
print("Comparing predictions with ground truth...")
comparison = pd.DataFrame({
    'True': test_df[LABEL_COL].values,
    'Predicted': predictions.values
})
print(comparison.head(10))

# Calculate accuracy
accuracy = (comparison['True'] == comparison['Predicted']).mean()
print(f"\nTest Accuracy: {accuracy:.2%}")

# Calculate mean absolute error (useful for ordinal data)
mae = np.abs(comparison['True'] - comparison['Predicted']).mean()
print(f"Mean Absolute Error: {mae:.3f}")

print("\n" + "="*60)
print("CORAL LOSS TEST COMPLETED SUCCESSFULLY!")
print("="*60)

## Compare CORAL vs Standard Cross-Entropy (Optional)

If you want to compare CORAL with standard cross-entropy loss, run this cell.

In [None]:
# Optional: Train with standard CrossEntropyLoss for comparison
baseline_config = {
    # No CORAL - standard cross-entropy (default)
    # "optim.loss_function": "cross_entropy",  # Default, no need to specify
    
    "optim.max_epochs": 3,
    "optim.learning_rate": 1e-4,
    
    "env.num_gpus": 0,
    "env.per_gpu_batch_size": 4,
    "env.num_workers": 0,
    "env.num_workers_inference": 0,
    
    "model.hf_text.checkpoint_name": "prajjwal1/bert-tiny",
    "model.timm_image.checkpoint_name": "resnet18",
}

print("\n" + "="*60)
print("TRAINING WITH STANDARD CROSS-ENTROPY (BASELINE)")
print("="*60)

predictor_baseline = MultiModalPredictor(
    label=LABEL_COL,
    problem_type="multiclass",
    eval_metric="accuracy",
    path="./baseline_model"  # Different path to avoid conflict
)

predictor_baseline.fit(
    train_data=train_df,
    hyperparameters=baseline_config,
    time_limit=600,
)

# Compare predictions
baseline_preds = predictor_baseline.predict(test_df.drop(columns=[LABEL_COL]))
baseline_probs = predictor_baseline.predict_proba(test_df.drop(columns=[LABEL_COL]))

print("\n" + "="*60)
print("COMPARISON: CORAL vs BASELINE")
print("="*60)

comparison_df = pd.DataFrame({
    'True': test_df[LABEL_COL].values,
    'CORAL': predictions.values,
    'Baseline': baseline_preds.values
})
print(comparison_df.head(10))

coral_acc = (comparison_df['True'] == comparison_df['CORAL']).mean()
baseline_acc = (comparison_df['True'] == comparison_df['Baseline']).mean()

coral_mae = np.abs(comparison_df['True'] - comparison_df['CORAL']).mean()
baseline_mae = np.abs(comparison_df['True'] - comparison_df['Baseline']).mean()

print(f"\nCORAL Accuracy: {coral_acc:.2%}")
print(f"Baseline Accuracy: {baseline_acc:.2%}")
print(f"\nCORAL MAE: {coral_mae:.3f}")
print(f"Baseline MAE: {baseline_mae:.3f}")