In [109]:
import os
import copy
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
import torch.nn.functional as F

Transformation Function

In [110]:
def get_tta_transforms():
    # Define a transformation that randomly masks some features
    class RandomFeatureMasking:
        def __init__(self, p=0.2):
            self.p = p

        def __call__(self, x):
            # x is a tensor of shape (batch_size, num_features)
            # Generate a mask with probability p of masking each feature
            mask = (torch.rand_like(x.float()) > self.p).long()
            x_transformed = x * mask
            return x_transformed

    return RandomFeatureMasking(p=0.2)

In [111]:
def weight_init(layers):
    for layer in layers:
        if isinstance(layer, nn.BatchNorm1d):
            layer.weight.data.fill_(1)
            layer.bias.data.zero_()
        elif isinstance(layer, nn.Linear):
            n = layer.in_features
            y = 1.0 / np.sqrt(n)
            layer.weight.data.uniform_(-y, y)
            if layer.bias is not None:
                layer.bias.data.fill_(0)
            # nn.init.kaiming_normal_(layer.weight.data, nonlinearity='relu')

CoTTA Utility Functions

In [112]:
def copy_model_and_optimizer(model, optimizer):
    """Copy the model and optimizer states for resetting after adaptation."""
    model_state = deepcopy(model.state_dict())
    model_anchor = deepcopy(model)
    optimizer_state = deepcopy(optimizer.state_dict())
    ema_model = deepcopy(model)
    for param in ema_model.parameters():
        param.detach_()
    return model_state, optimizer_state, ema_model, model_anchor

def load_model_and_optimizer(model, optimizer, model_state, optimizer_state):
    """Restore the model and optimizer states from copies."""
    model.load_state_dict(model_state, strict=True)
    optimizer.load_state_dict(optimizer_state)

def configure_model(model):
    """Configure model for use with CoTTA."""
    model.train()
    model.requires_grad_(False)
    for m in model.modules():
        if isinstance(m, (nn.Embedding, nn.BatchNorm1d)):
            m.requires_grad_(True)
            if isinstance(m, nn.BatchNorm1d):
                m.track_running_stats = False
                m.running_mean = None
                m.running_var = None
    return model

def collect_params(model):
    """Collect all trainable parameters."""
    params = []
    names = []
    for nm, m in model.named_modules():
        for np_, p in m.named_parameters():
            if p.requires_grad:
                params.append(p)
                key = f"{nm}.{np_}" if nm else np_
                names.append(key)
    return params, names

Custom Binary Entropy Function

In [113]:
def binary_entropy(logits, logits_ema):
    """Entropy for binary classification."""
    p = torch.sigmoid(logits)
    p_ema = torch.sigmoid(logits_ema)
    entropy = -0.5 * (p_ema * torch.log(p + 1e-8) + (1 - p_ema) * torch.log(1 - p + 1e-8))
    entropy -= 0.5 * (p * torch.log(p_ema + 1e-8) + (1 - p) * torch.log(1 - p_ema + 1e-8))
    return entropy.squeeze()

def update_ema_variables(ema_model, model, alpha_teacher):
    """Update EMA model parameters."""
    for ema_param, param in zip(ema_model.parameters(), model.parameters()):
        ema_param.data.mul_(alpha_teacher).add_(param.data * (1 - alpha_teacher))
    return ema_model

CoTTA Class

In [114]:
class CoTTA(nn.Module):
    """CoTTA adapts a model by entropy minimization during testing."""
    def __init__(self, model, optimizer, steps=1, episodic=False):
        super().__init__()
        self.model = model
        self.optimizer = optimizer
        self.steps = steps
        assert steps > 0, "CoTTA requires >= 1 step(s) to forward and update"
        self.episodic = episodic

        self.model_state, self.optimizer_state, self.model_ema, self.model_anchor = \
            copy_model_and_optimizer(self.model, self.optimizer)
        self.transform = get_tta_transforms()

    def forward(self, x):
        if self.episodic:
            self.reset()
        for _ in range(self.steps):
            outputs = self.forward_and_adapt(x, self.model, self.optimizer)
        return outputs

    def reset(self):
        if self.model_state is None or self.optimizer_state is None:
            raise Exception("Cannot reset without saved model/optimizer state")
        load_model_and_optimizer(self.model, self.optimizer,
                                 self.model_state, self.optimizer_state)
        self.model_state, self.optimizer_state, self.model_ema, self.model_anchor = \
            copy_model_and_optimizer(self.model, self.optimizer)

    @torch.enable_grad()
    def forward_and_adapt(self, x, model, optimizer):
        outputs = self.model(x)
        self.model_ema.train()
        # Teacher Prediction
        anchor_prob = torch.sigmoid(self.model_anchor(x)).detach()
        standard_ema = self.model_ema(x)
        # Augmentation-averaged Prediction
        N = 32
        outputs_emas = []
        to_aug = anchor_prob.mean() < 0.1  # Adjusted for binary classification
        if to_aug:
            for i in range(N):
                outputs_ = self.model_ema(self.transform(x)).detach()
                outputs_emas.append(outputs_)
            outputs_ema = torch.stack(outputs_emas).mean(0)
        else:
            outputs_ema = standard_ema
        # Student update
        loss = binary_entropy(outputs, outputs_ema.detach()).mean()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        # Teacher update
        self.model_ema = update_ema_variables(self.model_ema, self.model, alpha_teacher=0.999)
        # Stochastic restore
        for nm, m in self.model.named_modules():
            for np_, p in m.named_parameters():
                if p.requires_grad:
                    mask = (torch.rand(p.shape) < 0.001).float().to(p.device)
                    with torch.no_grad():
                        key = f"{nm}.{np_}" if nm else np_
                        p.data = self.model_state[key] * mask + p * (1. - mask)
        return outputs_ema

Data Loading and Processing

In [115]:
def load_data(data_path):
    # Read feature mapping
    with open(os.path.join(data_path, 'feat.bid.txt'), 'r') as f:
        lines = f.readlines()
    feature_nums = int(lines[0].strip())
    # Assuming features are already mapped to integers in the data files

    # Read train data
    train_data = pd.read_csv(os.path.join(data_path, 'train.bid.txt'), header=None)
    # Read test data
    test_data = pd.read_csv(os.path.join(data_path, 'test.bid.txt'), header=None)

    return train_data, test_data, feature_nums

Dataset Class

In [116]:
class CTRDataset(Dataset):
    def __init__(self, data):
        # Data columns: click + winning price + hour + time_fraction + timestamp + features
        self.labels = data.iloc[:, 0].values.astype(np.float32)  # click labels
        self.features = data.iloc[:, 5:].values.astype(np.int64)  # features start from column 5

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = self.features[idx]
        y = self.labels[idx]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.float32)

## Function to expand embedding layers

In [117]:
def expand_embedding_layer(old_embedding, new_num_embeddings):
    old_num_embeddings, embedding_dim = old_embedding.weight.size()
    if new_num_embeddings <= old_num_embeddings:
        return old_embedding  # No need to expand
    # Create new embedding layer
    new_embedding = nn.Embedding(new_num_embeddings, embedding_dim)
    # Copy weights from old embedding
    with torch.no_grad():
        new_embedding.weight[:old_num_embeddings] = old_embedding.weight
        # Initialize new embeddings
        nn.init.xavier_uniform_(new_embedding.weight[old_num_embeddings:])
    return new_embedding

## Model Implementations
### DeepFM Model

In [118]:
class DeepFM(nn.Module):
    def __init__(self,
                 feature_nums,
                 field_nums,
                 latent_dims,
                 output_dim=1):
        super(DeepFM, self).__init__()
        self.feature_nums = feature_nums
        self.field_nums = field_nums
        self.latent_dims = latent_dims

        # Linear part
        self.linear = nn.Embedding(self.feature_nums, output_dim)

        # FM embedding
        self.feature_embedding = nn.Embedding(self.feature_nums, self.latent_dims)
        nn.init.xavier_uniform_(self.feature_embedding.weight.data)

        # MLP
        deep_input_dims = self.field_nums * self.latent_dims
        layers = []

        neuron_nums = [300, 300, 300]
        for neuron_num in neuron_nums:
            layers.append(nn.Linear(deep_input_dims, neuron_num))
            # layers.append(nn.BatchNorm1d(neuron_num))  # Uncomment if needed
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(p=0.2))
            deep_input_dims = neuron_num

        layers.append(nn.Linear(deep_input_dims, 1))

        # Initialize weights
        weight_init(layers)

        self.mlp = nn.Sequential(*layers)

    def expand_embeddings(self, new_feature_nums):
        self.linear = expand_embedding_layer(self.linear, new_feature_nums)
        self.feature_embedding = expand_embedding_layer(self.feature_embedding, new_feature_nums)
        self.feature_nums = new_feature_nums

    def forward(self, x):
        # Linear Part
        linear_out = torch.sum(self.linear(x), dim=1)  # Shape: (batch_size, output_dim)

        # FM Part
        embedding_x = self.feature_embedding(x)
        square_of_sum = torch.sum(embedding_x, dim=1) ** 2
        sum_of_square = torch.sum(embedding_x ** 2, dim=1)
        ix = 0.5 * torch.sum(square_of_sum - sum_of_square, dim=1, keepdim=True)  # Shape: (batch_size, 1)

        # Deep Part
        deep_out = self.mlp(embedding_x.view(-1, self.field_nums * self.latent_dims))  # Shape: (batch_size, 1)

        # Output
        out = linear_out + ix + deep_out  # Shape: (batch_size, 1)

        return out  # Return logits

FNN Model

In [119]:
class FNN(nn.Module):
    def __init__(self,
                 feature_nums,
                 field_nums,
                 latent_dims):
        super(FNN, self).__init__()
        self.feature_nums = feature_nums
        self.field_nums = field_nums
        self.latent_dims = latent_dims

        # Embedding layer
        self.feature_embedding = nn.Embedding(self.feature_nums, self.latent_dims)
        nn.init.xavier_uniform_(self.feature_embedding.weight.data)

        # MLP
        deep_input_dims = self.field_nums * self.latent_dims
        layers = []

        neuron_nums = [300, 300, 300]
        for neuron_num in neuron_nums:
            layers.append(nn.Linear(deep_input_dims, neuron_num))
            # layers.append(nn.BatchNorm1d(neuron_num))  # Uncomment if needed
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(p=0.2))
            deep_input_dims = neuron_num

        layers.append(nn.Linear(deep_input_dims, 1))

        # Initialize weights
        weight_init(layers)

        self.mlp = nn.Sequential(*layers)

    def expand_embeddings(self, new_feature_nums):
        self.feature_embedding = expand_embedding_layer(self.feature_embedding, new_feature_nums)
        self.feature_nums = new_feature_nums

    def forward(self, x):
        embedding_x = self.feature_embedding(x)
        out = self.mlp(embedding_x.view(-1, self.field_nums * self.latent_dims))

        return out  # Return logits


DCN Model

In [120]:
class DCN(nn.Module):
    def __init__(self,
                 feature_nums,
                 field_nums,
                 latent_dims,
                 output_dim=1):
        super(DCN, self).__init__()
        self.feature_nums = feature_nums
        self.field_nums = field_nums
        self.latent_dims = latent_dims

        # Embedding layer
        self.feature_embedding = nn.Embedding(self.feature_nums, self.latent_dims)
        nn.init.xavier_uniform_(self.feature_embedding.weight.data)

        # Deep Network
        deep_input_dims = self.field_nums * self.latent_dims
        deep_net_layers = []
        neural_nums = [300, 300, 300]
        self.num_neural_layers = 5  # Number of layers in the cross network

        for neural_num in neural_nums:
            deep_net_layers.append(nn.Linear(deep_input_dims, neural_num))
            # deep_net_layers.append(nn.BatchNorm1d(neural_num))  # Uncomment if needed
            deep_net_layers.append(nn.ReLU())
            deep_net_layers.append(nn.Dropout(p=0.2))
            deep_input_dims = neural_num

        # Initialize weights
        weight_init(deep_net_layers)

        self.DN = nn.Sequential(*deep_net_layers)

        # Cross Network
        cross_input_dims = self.field_nums * self.latent_dims
        self.cross_net_w = nn.ModuleList([
            nn.Linear(cross_input_dims, cross_input_dims) for _ in range(self.num_neural_layers)
        ])

        # Initialize weights for cross network
        weight_init(self.cross_net_w)

        self.cross_net_b = nn.ParameterList([
            nn.Parameter(torch.zeros(cross_input_dims)) for _ in range(self.num_neural_layers)
        ])

        # Final Linear Layer
        self.linear = nn.Linear(deep_input_dims + cross_input_dims, output_dim)
        # nn.init.xavier_normal_(self.linear.weight)

    def expand_embeddings(self, new_feature_nums):
        self.feature_embedding = expand_embedding_layer(self.feature_embedding, new_feature_nums)
        self.feature_nums = new_feature_nums
        # Adjust input dimensions if latent_dims remain the same
        cross_input_dims = self.field_nums * self.latent_dims
        for layer in self.cross_net_w:
            layer.in_features = cross_input_dims
            layer.out_features = cross_input_dims
            layer.weight.data = torch.randn(cross_input_dims, cross_input_dims) * np.sqrt(2.0 / cross_input_dims)
        for i in range(len(self.cross_net_b)):
            self.cross_net_b[i] = nn.Parameter(torch.zeros(cross_input_dims))
        self.linear = nn.Linear(cross_input_dims + self.DN[-2].out_features, 1)

    def forward(self, x):
        embedding_x = self.feature_embedding(x).view(-1, self.field_nums * self.latent_dims)

        # Cross Network
        cn_x0 = embedding_x
        cn_x = embedding_x
        for i in range(self.num_neural_layers):
            cn_x_w = self.cross_net_w[i](cn_x)
            cn_x = cn_x0 * cn_x_w + self.cross_net_b[i] + cn_x

        # Deep Network
        dn_x = self.DN(embedding_x)

        # Concatenate
        x_stack = torch.cat([cn_x, dn_x], dim=1)

        # Final output
        out = self.linear(x_stack)

        return out  # Return logits

AFM Model

In [121]:
class AFM(nn.Module):
    def __init__(self,
                 feature_nums,
                 field_nums,
                 latent_dims,
                 output_dim=1):
        super(AFM, self).__init__()
        self.feature_nums = feature_nums
        self.field_nums = field_nums
        self.latent_dims = latent_dims

        # Embedding layer
        self.feature_embedding = nn.Embedding(self.feature_nums, self.latent_dims)
        nn.init.xavier_uniform_(self.feature_embedding.weight.data)

        # Prepare index pairs for interactions
        self.row, self.col = [], []
        for i in range(self.field_nums - 1):
            for j in range(i + 1, self.field_nums):
                self.row.append(i)
                self.col.append(j)

        attention_factor = self.latent_dims

        # Attention network
        self.attention_net = nn.Linear(self.latent_dims, attention_factor)
        n = self.attention_net.in_features
        y = 1.0 / np.sqrt(n)
        self.attention_net.weight.data.uniform_(-y, y)
        self.attention_net.bias.data.fill_(0)

        self.attention_softmax = nn.Linear(attention_factor, 1)

        # Output layers
        self.fc = nn.Linear(self.latent_dims, output_dim)

        # Linear part
        self.linear = nn.Embedding(self.feature_nums, output_dim)
        self.bias = nn.Parameter(torch.zeros((output_dim,)))

    def expand_embeddings(self, new_feature_nums):
        self.feature_embedding = expand_embedding_layer(self.feature_embedding, new_feature_nums)
        self.linear = expand_embedding_layer(self.linear, new_feature_nums)
        self.feature_nums = new_feature_nums

    def forward(self, x):
        embedding_x = self.feature_embedding(x)

        # Pairwise interactions
        row_emb = embedding_x[:, self.row]  # Shape: (batch_size, num_pairs, latent_dims)
        col_emb = embedding_x[:, self.col]  # Shape: (batch_size, num_pairs, latent_dims)
        inner_product = row_emb * col_emb  # Element-wise multiplication

        # Attention mechanism
        attn_scores = F.relu(self.attention_net(inner_product))  # Shape: (batch_size, num_pairs, attention_factor)
        attn_scores = F.softmax(self.attention_softmax(attn_scores), dim=1)  # Shape: (batch_size, num_pairs, 1)
        attn_scores = F.dropout(attn_scores, p=0.2)

        # Weighted sum of interactions
        attn_output = torch.sum(attn_scores * inner_product, dim=1)  # Shape: (batch_size, latent_dims)
        attn_output = F.dropout(attn_output, p=0.2)

        # Output
        linear_part = self.bias + torch.sum(self.linear(x), dim=1)  # Shape: (batch_size, output_dim)
        out = linear_part + self.fc(attn_output)  # Shape: (batch_size, output_dim)

        return out  # Return logits

## Training and Testing with CoTTA
### Main Function

In [122]:
def main():
    dataset_ids = ['1458', '2259', '2261', '2821', '2997', '3358', '3386', '3427', '3476']
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    batch_size = 1024

    # Model parameters
    model_name = 'AFM'  # Change as needed
    latent_dims = 10
    dropout = 0.2
    num_layers = 5  # For DCN
    attn_size = 32  # For AFM
    epochs_per_dataset = 1  # Train for 1 epoch per dataset

    # Paths
    base_data_path = '/home/vladplyusnin/tftest/Deep-Learning-COPSCI764/Project/data/ipinyou'  # Adjust this path as needed

    # Initialize the model (we need to know feature_nums and field_nums)
    # For that, we need to load the first dataset to get feature_nums and field_nums

    # Initialize variables
    max_feature_nums = 0
    field_nums = None  # Will be set after loading the first dataset

    # Initialize model as None
    model = None

    # Loss and optimizer (initialized later)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = None

    for dataset_id in dataset_ids:
        data_path = os.path.join(base_data_path, dataset_id)

        # Load data
        train_data, test_data, feature_nums_dataset = load_data(data_path)
        field_nums_dataset = train_data.shape[1] - 5

        # Update max_feature_nums if needed
        if feature_nums_dataset > max_feature_nums:
            max_feature_nums = feature_nums_dataset

        # Initialize field_nums if not set
        if field_nums is None:
            field_nums = field_nums_dataset

        # Check that field_nums are the same
        if field_nums != field_nums_dataset:
            print(f"Dataset {dataset_id} has different field_nums.")
            # Handle this case as needed
            continue  # Skip this dataset for now

        # Expand model's embeddings if feature_nums increased
        if model is None:
            # Initialize the model
            if model_name == 'DeepFM':
                model = DeepFM(max_feature_nums, field_nums, latent_dims).to(device)
            elif model_name == 'FNN':
                model = FNN(max_feature_nums, field_nums, latent_dims).to(device)
            elif model_name == 'DCN':
                model = DCN(max_feature_nums, field_nums, latent_dims).to(device)
            elif model_name == 'AFM':
                model = AFM(max_feature_nums, field_nums, latent_dims).to(device)
            else:
                raise ValueError('Unknown model name')
            # Initialize optimizer
            optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
        else:
            # Expand embeddings if needed
            if feature_nums_dataset > model.feature_nums:
                model.expand_embeddings(feature_nums_dataset)
                # Update optimizer with new parameters
                optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

        # Split train data into training and validation sets
        from sklearn.model_selection import train_test_split
        train_df, val_df = train_test_split(train_data, test_size=0.2, random_state=42)

        # Create datasets and loaders
        train_dataset = CTRDataset(train_df)
        val_dataset = CTRDataset(val_df)
        test_dataset = CTRDataset(test_data)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=4)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=4)

        print(f"\nTraining on dataset {dataset_id}")
        # Train the model for 1 epoch on the current dataset
        train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=1, early_stopping_patience=None)

        # After training, re-initialize CoTTA with the updated model
        # Configure model for CoTTA
        cotta_model = deepcopy(model)
        cotta_model = configure_model(cotta_model)
        params, param_names = collect_params(cotta_model)
        cotta_optimizer = torch.optim.SGD(params, lr=0.0001)
        cotta = CoTTA(cotta_model, cotta_optimizer, steps=1, episodic=False)

        # Testing with CoTTA
        cotta_model.eval()
        y_true = []
        y_scores = []

        for x_batch, y_batch in test_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            outputs = cotta(x_batch)  # Outputs are logits
            probabilities = torch.sigmoid(outputs).detach().cpu().numpy().flatten()

            y_true.extend(y_batch.cpu().numpy())
            y_scores.extend(probabilities)

        test_auc = roc_auc_score(y_true, y_scores)
        print(f'Dataset {dataset_id}, Test AUC with CoTTA: {test_auc:.4f}')

Training Function

In [123]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, device, epochs, early_stopping_patience=None):
    best_loss = float('inf')
    epochs_no_improve = 0
    early_stop = False

    for epoch in range(epochs):
        if early_stop:
            print("Early stopping")
            break

        model.train()
        total_loss = 0
        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            logits = model(x_batch).squeeze()
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)

        # Validation
        model.eval()
        val_total_loss = 0
        y_true = []
        y_scores = []
        with torch.no_grad():
            for x_val, y_val in valid_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                logits = model(x_val).squeeze()
                loss = criterion(logits, y_val)
                val_total_loss += loss.item()
                y_pred = torch.sigmoid(logits)
                y_true.extend(y_val.cpu().numpy())
                y_scores.extend(y_pred.cpu().numpy())
        val_avg_loss = val_total_loss / len(valid_loader)
        val_auc = roc_auc_score(y_true, y_scores)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Val Loss: {val_avg_loss:.4f}, Validation AUC: {val_auc:.4f}')

        # Early stopping not used since epochs=1
        if early_stopping_patience is not None:
            if val_avg_loss < best_loss:
                best_loss = val_avg_loss
                epochs_no_improve = 0
                # Save best model
                torch.save(model.state_dict(), 'best_model.pth')
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= early_stopping_patience:
                    print(f"Early stopping after {epoch+1} epochs")
                    early_stop = True

    if early_stopping_patience is not None:
        print(f'Best Validation Loss: {best_loss:.4f}')

In [124]:
if __name__ == '__main__':
    main()


Training on dataset 1458
Epoch 1/1, Loss: 0.2963, Val Loss: 0.1011, Validation AUC: 0.5419


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 1458, Test AUC with CoTTA: 0.5300

Training on dataset 2259
Epoch 1/1, Loss: 1.0594, Val Loss: 0.5556, Validation AUC: 0.5793


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 2259, Test AUC with CoTTA: 0.5103

Training on dataset 2261
Epoch 1/1, Loss: 0.2766, Val Loss: 0.1582, Validation AUC: 0.5979


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 2261, Test AUC with CoTTA: 0.5001

Training on dataset 2821
Epoch 1/1, Loss: 0.0608, Val Loss: 0.0395, Validation AUC: 0.5471


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 2821, Test AUC with CoTTA: 0.5345

Training on dataset 2997
Epoch 1/1, Loss: 0.0463, Val Loss: 0.0447, Validation AUC: 0.4665


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 2997, Test AUC with CoTTA: 0.4907

Training on dataset 3358
Epoch 1/1, Loss: 0.0450, Val Loss: 0.0245, Validation AUC: 0.5731


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 3358, Test AUC with CoTTA: 0.6089

Training on dataset 3386
Epoch 1/1, Loss: 0.0220, Val Loss: 0.0145, Validation AUC: 0.5573


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 3386, Test AUC with CoTTA: 0.5412

Training on dataset 3427
Epoch 1/1, Loss: 0.0106, Val Loss: 0.0092, Validation AUC: 0.4969


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 3427, Test AUC with CoTTA: 0.4577

Training on dataset 3476
Epoch 1/1, Loss: 0.0071, Val Loss: 0.0056, Validation AUC: 0.5297


  return torch._dynamo.disable(fn, recursive)(*args, **kwargs)


Dataset 3476, Test AUC with CoTTA: 0.5289
