In [1]:
import pandas as pd 
import numpy as np
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [2]:
# class LSTMFeatureExtractor(nn.Module):
#     def __init__(self, input_dim, hidden_dim, num_layers):
#         super(LSTMFeatureExtractor, self).__init__()
#         self.hidden_dim = hidden_dim
#         self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)

#     def forward(self, x):
#         # x shape: (batch, seq_len, features)
#         lstm_out, (hn, cn) = self.lstm(x)  # lstm_out shape: (batch, seq_len, hidden_dim)
#         return torch.squeeze(lstm_out)


# class LSTMFeatureExtractor(nn.Module):
#     def __init__(self, input_dim, hidden_dim, num_layers):
#         super(LSTMFeatureExtractor, self).__init__()
#         self.hidden_dim = hidden_dim
#         self.num_layers = num_layers
#         self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        
#     def forward(self, x):
#         # x shape: (batch, seq_len, features)
#         _, (hn, _) = self.lstm(x)  # hn shape: (num_layers, batch, hidden_dim)
        
#         # Take the last layer's hidden state
#         hn_last_layer = hn[-1, :, :]  # Shape: (batch, hidden_dim)
        
#         # If you specifically need the output to be 2048 dimensions but your hidden_dim is different,
#         # you might consider adding a linear layer to map hidden_dim to 2048.
#         # Ensure hidden_dim is set to 2048 if you want the output directly without transformation.
#         return torch.squeeze(hn_last_layer)


# class LSTMFeatureExtractorWithPooling(nn.Module):
#     def __init__(self, input_dim, hidden_dim, num_layers):
#         super(LSTMFeatureExtractorWithPooling, self).__init__()
#         self.hidden_dim = hidden_dim
#         self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        
#     def forward(self, x):
#         # x shape: (batch, seq_len, features)
#         lstm_out, _ = self.lstm(x)  # lstm_out shape: (batch, seq_len, hidden_dim)
        
#         # Apply mean pooling across timesteps
#         # lstm_out shape after mean: (batch, hidden_dim)
#         mean_pooled = torch.mean(lstm_out, dim=1)
        
#         return torch.squeeze(mean_pooled)

# input_dim_vis = 2048  
# input_dim_aud = 2048  
# hidden_dim = 2048  
# num_layers = 1    

# feature_extractor_vis = LSTMFeatureExtractor(input_dim_vis, hidden_dim, num_layers)

In [3]:
# class EmotionDataset(Dataset):
#     def __init__(self, X, y):
#         self.X = X
#         self.y = y

#     def __len__(self):
#         return len(self.X)

#     def __getitem__(self, idx):
#         return self.X[idx], self.y[idx]

# def mean_pooling(batch_features_padded):
#     return torch.mean(batch_features_padded, dim=1)

# class FeatureDataset(Dataset):
#     def __init__(self, dataframe, feature_type, base_path="../data/"):
#         """
#         feature_type: 'visual_features' or 'audio_features'
#         """
#         self.dataframe = dataframe
#         self.feature_type = feature_type
#         self.base_path = base_path

#     def __len__(self):
#         return len(self.dataframe)

#     def __getitem__(self, idx):
#         file_path = self.dataframe.iloc[idx][self.feature_type]
#         features = np.load(self.base_path + file_path)
#         return torch.tensor(features, dtype=torch.float)
# def collate_fn(batch):
#     batch_features = [item for item in batch]
#     batch_features_padded = pad_sequence(batch_features, batch_first=True)
#     return mean_pooling(batch_features_padded)
# # Emotion Classifier Model
# class VisualClassifier(nn.Module):
#     def __init__(self):
#         super(VisualClassifier, self).__init__()
#         self.fc1 = nn.Linear(2048, 1024)
#         self.bn1 = nn.BatchNorm1d(1024)
#         self.dropout1 = nn.Dropout(0.5)
#         self.fc2 = nn.Linear(1024, 256)
#         self.bn2 = nn.BatchNorm1d(256)
#         self.dropout2 = nn.Dropout(0.5)
#         self.fc4 = nn.Linear(256, 4)  # Assuming 4 classes

#     def forward(self, x):
#         x = F.relu(self.bn1(self.fc1(x)))
#         x = self.dropout1(x)
#         x = F.relu(self.bn2(self.fc2(x)))
#         x = self.dropout2(x)
#         x = self.fc4(x)
#         return x
# def calculate_accuracy(model, data_loader):
#     model.eval()
#     correct, total = 0, 0
#     with torch.no_grad():
#         for inputs, labels in data_loader:
#             outputs = model(inputs)
#             _, predicted = torch.max(outputs, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
#     return correct / total

In [4]:
class EmotionDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [5]:
def mean_pooling(batch_features_padded):
    return torch.mean(batch_features_padded, dim=1)

In [6]:
class FeatureDataset(Dataset):
    def __init__(self, dataframe, feature_type, base_path="../data/"):
        """
        feature_type: 'visual_features' or 'audio_features'
        """
        self.dataframe = dataframe
        self.feature_type = feature_type
        self.base_path = base_path

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        file_path = self.dataframe.iloc[idx][self.feature_type]
        features = np.load(self.base_path + file_path)
        return torch.tensor(features, dtype=torch.float)

In [7]:
def collate_fn(batch):
    batch_features = [item for item in batch]
    batch_features_padded = pad_sequence(batch_features, batch_first=True)
    return mean_pooling(batch_features_padded)

In [8]:
# Emotion Classifier Model
class VisualClassifier(nn.Module):
    def __init__(self):
        super(VisualClassifier, self).__init__()
        self.fc1 = nn.Linear(2048, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.7)
        self.fc2 = nn.Linear(256, 4)
        self.bn2 = nn.BatchNorm1d(4)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = F.relu(self.bn2(self.fc2(x)))
        return x

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TextClassifier(nn.Module):
    def __init__(self, input_dim=768, output_dim=4, hidden_dim=256):
        super(TextClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.bn2 = nn.BatchNorm1d(hidden_dim // 2)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(hidden_dim // 2, output_dim)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x


In [10]:
# Emotion Classifier Model
class AudialClassifier(nn.Module):
    def __init__(self):
        super(AudialClassifier, self).__init__()
        self.fc1 = nn.Linear(128, 16)
        self.bn1 = nn.BatchNorm1d(16)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(16, 4)
        self.bn2 = nn.BatchNorm1d(4)
   
    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = F.relu(self.bn2(self.fc2(x)))
        return x

In [11]:
def calculate_accuracy(model, data_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [12]:
def train_model(model, dataloaders, optimizer, criterion, device, num_epochs=50, patience=10):

    """
    Trains and validates the model.
    
    Args:
    - model (torch.nn.Module): The PyTorch model to train.
    - dataloaders (dict): A dictionary containing 'train' and 'val' DataLoaders.
    - optimizer (torch.optim.Optimizer): The optimizer to use for training.
    - criterion (torch.nn.Module): The loss function.
    - num_epochs (int): The number of epochs to train for.
    - patience (int): The patience for early stopping.
    """
    
    best_val_loss = float('inf')
    patience_counter = 0
    model = model.to(device)
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        for inputs, labels in dataloaders['train']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        train_accuracy = calculate_accuracy(model, dataloaders['train'])
        
        # Validation phase
        val_loss = 0.0
        model.eval()
        with torch.no_grad():
            for inputs, labels in dataloaders['val']:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_accuracy = calculate_accuracy(model, dataloaders['val'])
        val_loss /= len(dataloaders['val'])
        
        # Early Stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break
        
    print(f'Epoch {epoch+1}, Train Loss: {running_loss/len(dataloaders["train"]):.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')
    return val_accuracy


In [13]:
def extract_and_pool_features(df, feature_types, base_path="../data/"):

    """
    Extracts features from specified columns in the DataFrame, applies mean pooling,
    and updates the DataFrame with new columns for these processed features.
    
    Args:
    - df (DataFrame): The pandas DataFrame containing the features.
    - feature_types (dict): A dictionary mapping from 'visual' and 'audio' to their respective column names in df.
    - base_path (str): Base path where the feature files are stored.
    """
    
    for key, column in feature_types.items():
        pooled_features = []
        for _, row in df.iterrows():
            file_path = row[column]
            features = np.load(f"{base_path}{file_path}")
            pooled_feature = np.mean(features, axis=0)
            pooled_features.append(pooled_feature)
        
        # Update DataFrame with new columns for pooled features
        df[f'extracted_{key}_features'] = pooled_features




In [14]:
def prepare_datasets_and_loaders(df, feature_columns, label_column='emotion_labels', batch_size=4, test_size=0.2):
    
    """
    Prepares datasets and dataloaders for training and validation.
    
    Args:
    - df (DataFrame): The pandas DataFrame containing the pooled features and labels.
    - feature_columns (list): List of column names for the features to be used.
    - label_column (str): The column name where the label data is stored.
    - batch_size (int): Batch size for the dataloaders.
    - test_size (float): Proportion of the dataset to include in the test split.
    
    Returns:
    - A dictionary of dataloaders for training and validation for each feature type.
    """

    dataloaders = {}
    y = torch.tensor(df[label_column].values, dtype=torch.long)

    for feature_type in feature_columns:
        X = np.array(df[feature_type].tolist(), dtype=np.float32)
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42)
        
        train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train), y_train)
        val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val), y_val)
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        
        dataloaders[f'{feature_type}_train'] = train_loader
        dataloaders[f'{feature_type}_val'] = val_loader

    return dataloaders



### RUN CLASSIFIER

In [15]:
df = pd.read_csv('../data/csv/dataset.csv')

model_aud = AudialClassifier()
model_aud = model_aud.to(device)

model_vis = VisualClassifier()
model_vis = model_vis.to(device)

model_text = TextClassifier()

criterion = nn.CrossEntropyLoss()

feature_types = {'visual': 'visual_features', 'audio': 'acoustic_features'}#, 'text':'text_features'}
extract_and_pool_features(df, feature_types)

feature_columns = ['extracted_visual_features', 'extracted_audio_features']
dataloaders = prepare_datasets_and_loaders(df, feature_columns)

optimizer_aud = optim.Adam(model_aud.parameters(), lr=0.001, weight_decay=1e-4)
optimizer_vis = optim.Adam(model_vis.parameters(), lr=0.001, weight_decay=1e-4)

models_optimizers = {
    'extracted_visual_features': (model_vis, optimizer_vis),
    'extracted_audio_features': (model_aud, optimizer_aud),
}

# Train each model with its corresponding feature type
for feature_type, (model, optimizer) in models_optimizers.items():
    print(f"Training with {feature_type}:")
    break
    train_model(model, 
                {'train': dataloaders[f'{feature_type}_train'], 'val': dataloaders[f'{feature_type}_val']}, 
                optimizer, criterion)


Training with extracted_visual_features:


### PARAM GRID SEARCH

In [16]:
from itertools import product
import torch.optim as optim
import torch.nn as nn

feature_columns = {
    'extracted_visual_features'
}
# Define your grid of hyperparameters to search over
param_grid = {
    'learning_rate': [0.001, 0.01, 0.1],
    'optimizer': [optim.Adam],
    'criterion': [nn.CrossEntropyLoss],
    'epochs': [30, 50],
    'batch_size': [4, 16, 32, 64],
    'patience': [5, 10, 15],
    'weight_decay': [0, 1e-4, 1e-2],
}


def get_optimizer(optimizer_class, parameters, lr, weight_decay, momentum=None):
    if optimizer_class == optim.Adam:
        return optim.Adam(parameters, lr=lr, weight_decay=weight_decay)
    elif optimizer_class == optim.SGD:
        # Ensure momentum is provided for SGD; otherwise, default to 0
        return optim.SGD(parameters, lr=lr, momentum=momentum if momentum is not None else 0, weight_decay=weight_decay)


def get_criterion(criterion_class):
    if criterion_class == nn.CrossEntropyLoss:
        return nn.CrossEntropyLoss()
    elif criterion_class == nn.NLLLoss:
        return nn.NLLLoss()


max_val_acc = -np.inf
combinations = list(product(*param_grid.values()))
best_params = None
for combination in tqdm(combinations):
    lr, optimizer_class, criterion_class, epochs, batch_size, patience, wd = combination
    
    dataloaders = prepare_datasets_and_loaders(df, feature_columns, batch_size=batch_size)
    
    # Initialize models, optimizers, and criterion for each combination
    model_vis = VisualClassifier()
    model_aud = AudialClassifier()

    model_vis = model_vis.to(device)
    model_aud = model_aud.to(device)

    optimizer_vis = get_optimizer(optimizer_class, model_vis.parameters(), lr, wd )
    optimizer_aud = get_optimizer(optimizer_class, model_aud.parameters(), lr, wd)
    criterion = get_criterion(criterion_class)
    
    # Training visual model
    print(f"Training Visual Model with lr={lr}, optimizer={optimizer_class.__name__}, criterion={criterion_class.__name__}, epochs={epochs}, batch_size={batch_size}, Patience={patience}, Weight decay={wd} ")
    val_acc = train_model(model_vis, {'train': dataloaders['extracted_visual_features_train'], 'val': dataloaders['extracted_visual_features_val']}, optimizer_vis, criterion, num_epochs=epochs, patience=patience, device=device)
    
    # Update best_params if current combination gives better validation accuracy
    if val_acc > max_val_acc:
        max_val_acc = val_acc
        best_params = {
            'learning_rate': lr,
            'optimizer': optimizer_class.__name__,
            'criterion': criterion_class.__name__,
            'epochs': epochs,
            'batch_size': batch_size,
            'patience': patience,
            'weight_decay': wd,
            'validation_accuracy': val_acc  # Include validation accuracy for reference
        }

    
    
    # # Training audio model
    # print(f"Training Audio Model with lr={lr}, optimizer={optimizer_class.__name__}, criterion={criterion_class.__name__}, epochs={epochs}, batch_size={batch_size}")
    # train_model(model_aud, {'train': dataloaders['extracted_audio_features_train'], 'val': dataloaders['extracted_audio_features_val']}, optimizer_aud, criterion, num_epochs=epochs)

    # Note: Add any additional logging or saving of model performance metrics as needed


  0%|          | 0/216 [00:00<?, ?it/s]

Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0 


  0%|          | 1/216 [00:23<1:25:29, 23.86s/it]

Early stopping triggered
Epoch 19, Train Loss: 1.1110, Train Accuracy: 0.7360, Val Loss: 1.2029, Val Accuracy: 0.4664
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0.0001 


  1%|          | 2/216 [00:39<1:06:58, 18.78s/it]

Early stopping triggered
Epoch 12, Train Loss: 1.1680, Train Accuracy: 0.6985, Val Loss: 1.2660, Val Accuracy: 0.4254
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0.01 


  1%|▏         | 3/216 [00:50<54:47, 15.43s/it]  

Early stopping triggered
Epoch 10, Train Loss: 1.2834, Train Accuracy: 0.5599, Val Loss: 1.2935, Val Accuracy: 0.4254
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0 


  2%|▏         | 4/216 [01:15<1:07:37, 19.14s/it]

Early stopping triggered
Epoch 22, Train Loss: 1.0647, Train Accuracy: 0.7865, Val Loss: 1.2121, Val Accuracy: 0.4963
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0.0001 


  2%|▏         | 5/216 [01:50<1:27:36, 24.91s/it]

Epoch 30, Train Loss: 1.0388, Train Accuracy: 0.8268, Val Loss: 1.1921, Val Accuracy: 0.4851
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0.01 


  3%|▎         | 6/216 [02:13<1:24:26, 24.13s/it]

Early stopping triggered
Epoch 19, Train Loss: 1.2733, Train Accuracy: 0.5337, Val Loss: 1.3033, Val Accuracy: 0.3955
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0 


  3%|▎         | 7/216 [02:46<1:34:39, 27.18s/it]

Epoch 30, Train Loss: 0.9651, Train Accuracy: 0.8399, Val Loss: 1.1646, Val Accuracy: 0.5149
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0.0001 


  4%|▎         | 8/216 [03:19<1:41:01, 29.14s/it]

Epoch 30, Train Loss: 0.9838, Train Accuracy: 0.8680, Val Loss: 1.2061, Val Accuracy: 0.4590
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0.01 


  4%|▍         | 9/216 [03:53<1:45:11, 30.49s/it]

Epoch 30, Train Loss: 1.2535, Train Accuracy: 0.5908, Val Loss: 1.2439, Val Accuracy: 0.4552
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0 


  5%|▍         | 10/216 [03:59<1:18:35, 22.89s/it]

Early stopping triggered
Epoch 23, Train Loss: 0.4439, Train Accuracy: 0.9897, Val Loss: 1.1367, Val Accuracy: 0.5224
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0.0001 


  5%|▌         | 11/216 [04:03<59:09, 17.32s/it]  

Early stopping triggered
Epoch 18, Train Loss: 0.5891, Train Accuracy: 0.9775, Val Loss: 1.1676, Val Accuracy: 0.4925
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0.01 


  6%|▌         | 12/216 [32:29<30:05:03, 530.90s/it]

Early stopping triggered
Epoch 14, Train Loss: 0.8820, Train Accuracy: 0.8614, Val Loss: 1.1748, Val Accuracy: 0.5149
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0 


  6%|▌         | 13/216 [32:36<20:59:39, 372.31s/it]

Early stopping triggered
Epoch 27, Train Loss: 0.3585, Train Accuracy: 0.9916, Val Loss: 1.1558, Val Accuracy: 0.5224
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0.0001 


  6%|▋         | 14/216 [32:44<14:42:23, 262.09s/it]

Epoch 30, Train Loss: 0.3670, Train Accuracy: 0.9897, Val Loss: 1.1713, Val Accuracy: 0.5149
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0.01 


  7%|▋         | 15/216 [32:51<10:20:49, 185.32s/it]

Epoch 30, Train Loss: 0.8189, Train Accuracy: 0.8998, Val Loss: 1.1605, Val Accuracy: 0.4813
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0 


  7%|▋         | 16/216 [32:59<7:19:18, 131.79s/it] 

Epoch 30, Train Loss: 0.3457, Train Accuracy: 0.9925, Val Loss: 1.1363, Val Accuracy: 0.5634
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0.0001 


  8%|▊         | 17/216 [33:06<5:12:39, 94.27s/it] 

Early stopping triggered
Epoch 30, Train Loss: 0.3839, Train Accuracy: 0.9934, Val Loss: 1.1664, Val Accuracy: 0.5075
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0.01 


  8%|▊         | 18/216 [33:12<3:43:38, 67.77s/it]

Epoch 30, Train Loss: 0.8118, Train Accuracy: 0.8858, Val Loss: 1.1597, Val Accuracy: 0.5075
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0 


  9%|▉         | 19/216 [33:14<2:38:15, 48.20s/it]

Early stopping triggered
Epoch 24, Train Loss: 0.3993, Train Accuracy: 0.9906, Val Loss: 1.1344, Val Accuracy: 0.5224
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0.0001 


  9%|▉         | 20/216 [33:17<1:53:15, 34.67s/it]

Early stopping triggered
Epoch 28, Train Loss: 0.3626, Train Accuracy: 0.9953, Val Loss: 1.1348, Val Accuracy: 0.5485
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0.01 


 10%|▉         | 21/216 [33:21<1:21:49, 25.18s/it]

Early stopping triggered
Epoch 27, Train Loss: 0.6632, Train Accuracy: 0.9457, Val Loss: 1.1539, Val Accuracy: 0.5112
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0 


 10%|█         | 22/216 [33:24<1:00:22, 18.67s/it]

Early stopping triggered
Epoch 30, Train Loss: 0.3448, Train Accuracy: 0.9934, Val Loss: 1.1354, Val Accuracy: 0.4963
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0.0001 


 11%|█         | 23/216 [33:28<45:40, 14.20s/it]  

Early stopping triggered
Epoch 30, Train Loss: 0.3315, Train Accuracy: 0.9925, Val Loss: 1.1571, Val Accuracy: 0.5037
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0.01 


 11%|█         | 24/216 [33:31<35:00, 10.94s/it]

Epoch 30, Train Loss: 0.6222, Train Accuracy: 0.9691, Val Loss: 1.1651, Val Accuracy: 0.4813
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0 


 12%|█▏        | 25/216 [33:35<27:48,  8.74s/it]

Epoch 30, Train Loss: 0.3151, Train Accuracy: 0.9934, Val Loss: 1.1445, Val Accuracy: 0.5261
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0.0001 


 12%|█▏        | 26/216 [33:38<22:46,  7.19s/it]

Epoch 30, Train Loss: 0.3263, Train Accuracy: 0.9925, Val Loss: 1.1482, Val Accuracy: 0.5299
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0.01 


 12%|█▎        | 27/216 [33:42<18:59,  6.03s/it]

Epoch 30, Train Loss: 0.5624, Train Accuracy: 0.9616, Val Loss: 1.1112, Val Accuracy: 0.5336
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0 


 13%|█▎        | 28/216 [33:43<14:56,  4.77s/it]

Early stopping triggered
Epoch 27, Train Loss: 0.4098, Train Accuracy: 0.9916, Val Loss: 1.1442, Val Accuracy: 0.5410
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0.0001 


 13%|█▎        | 29/216 [33:46<12:17,  3.94s/it]

Epoch 30, Train Loss: 0.3712, Train Accuracy: 0.9953, Val Loss: 1.1469, Val Accuracy: 0.5075
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0.01 


 14%|█▍        | 30/216 [33:47<10:19,  3.33s/it]

Early stopping triggered
Epoch 28, Train Loss: 0.5613, Train Accuracy: 0.9803, Val Loss: 1.1768, Val Accuracy: 0.5112
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0 


 14%|█▍        | 31/216 [33:49<09:01,  2.93s/it]

Epoch 30, Train Loss: 0.3677, Train Accuracy: 0.9925, Val Loss: 1.1533, Val Accuracy: 0.5336
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0.0001 


 15%|█▍        | 32/216 [33:51<08:08,  2.66s/it]

Early stopping triggered
Epoch 30, Train Loss: 0.3858, Train Accuracy: 0.9953, Val Loss: 1.1381, Val Accuracy: 0.5299
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0.01 


 15%|█▌        | 33/216 [33:54<07:36,  2.49s/it]

Epoch 30, Train Loss: 0.5535, Train Accuracy: 0.9794, Val Loss: 1.1525, Val Accuracy: 0.5149
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0 


 16%|█▌        | 34/216 [33:56<07:06,  2.34s/it]

Epoch 30, Train Loss: 0.3702, Train Accuracy: 0.9953, Val Loss: 1.1188, Val Accuracy: 0.5299
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0.0001 


 16%|█▌        | 35/216 [33:58<06:47,  2.25s/it]

Epoch 30, Train Loss: 0.3718, Train Accuracy: 0.9934, Val Loss: 1.1639, Val Accuracy: 0.5261
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0.01 


 17%|█▋        | 36/216 [34:00<06:33,  2.18s/it]

Epoch 30, Train Loss: 0.5367, Train Accuracy: 0.9841, Val Loss: 1.1607, Val Accuracy: 0.5187
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0 


 17%|█▋        | 37/216 [34:22<24:46,  8.30s/it]

Early stopping triggered
Epoch 32, Train Loss: 0.9326, Train Accuracy: 0.8951, Val Loss: 1.1950, Val Accuracy: 0.4888
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0.0001 


 18%|█▊        | 38/216 [34:44<36:34, 12.33s/it]

Early stopping triggered
Epoch 30, Train Loss: 0.9761, Train Accuracy: 0.8511, Val Loss: 1.1724, Val Accuracy: 0.4813
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0.01 


 18%|█▊        | 39/216 [34:53<33:20, 11.30s/it]

Early stopping triggered
Epoch 12, Train Loss: 1.2695, Train Accuracy: 0.5506, Val Loss: 1.2745, Val Accuracy: 0.4366
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0 


 19%|█▊        | 40/216 [35:30<55:34, 18.95s/it]

Epoch 50, Train Loss: 0.8290, Train Accuracy: 0.9251, Val Loss: 1.1705, Val Accuracy: 0.4664
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0.0001 


 19%|█▉        | 41/216 [36:06<1:10:41, 24.24s/it]

Epoch 50, Train Loss: 0.8439, Train Accuracy: 0.9242, Val Loss: 1.1323, Val Accuracy: 0.5261
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0.01 


 19%|█▉        | 42/216 [36:43<1:21:07, 27.97s/it]

Early stopping triggered
Epoch 50, Train Loss: 1.2523, Train Accuracy: 0.5571, Val Loss: 1.2663, Val Accuracy: 0.4478
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0 


 20%|█▉        | 43/216 [37:18<1:27:00, 30.18s/it]

Epoch 50, Train Loss: 0.8950, Train Accuracy: 0.9232, Val Loss: 1.1773, Val Accuracy: 0.4888
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0.0001 


 20%|██        | 44/216 [37:46<1:24:26, 29.46s/it]

Early stopping triggered
Epoch 38, Train Loss: 0.9456, Train Accuracy: 0.8736, Val Loss: 1.2230, Val Accuracy: 0.4813
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0.01 


 21%|██        | 45/216 [38:12<1:20:44, 28.33s/it]

Early stopping triggered
Epoch 35, Train Loss: 1.2636, Train Accuracy: 0.5272, Val Loss: 1.2822, Val Accuracy: 0.4440
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0 


 21%|██▏       | 46/216 [38:15<58:50, 20.77s/it]  

Early stopping triggered
Epoch 16, Train Loss: 0.6043, Train Accuracy: 0.9494, Val Loss: 1.1473, Val Accuracy: 0.5224
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0.0001 


 22%|██▏       | 47/216 [38:19<44:40, 15.86s/it]

Early stopping triggered
Epoch 22, Train Loss: 0.4423, Train Accuracy: 0.9860, Val Loss: 1.1370, Val Accuracy: 0.5037
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0.01 


 22%|██▏       | 48/216 [38:22<33:36, 12.00s/it]

Early stopping triggered
Epoch 15, Train Loss: 0.8629, Train Accuracy: 0.8689, Val Loss: 1.1780, Val Accuracy: 0.4851
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0 


 23%|██▎       | 49/216 [38:27<27:29,  9.87s/it]

Early stopping triggered
Epoch 25, Train Loss: 0.4183, Train Accuracy: 0.9916, Val Loss: 1.1308, Val Accuracy: 0.5336
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0.0001 


 23%|██▎       | 50/216 [38:33<23:44,  8.58s/it]

Early stopping triggered
Epoch 28, Train Loss: 0.3890, Train Accuracy: 0.9916, Val Loss: 1.1416, Val Accuracy: 0.5560
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0.01 


 24%|██▎       | 51/216 [38:42<24:38,  8.96s/it]

Epoch 50, Train Loss: 0.7459, Train Accuracy: 0.9195, Val Loss: 1.1843, Val Accuracy: 0.5187
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0 


 24%|██▍       | 52/216 [38:48<21:34,  7.89s/it]

Early stopping triggered
Epoch 28, Train Loss: 0.3633, Train Accuracy: 0.9916, Val Loss: 1.1789, Val Accuracy: 0.5261
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0.0001 


 25%|██▍       | 53/216 [38:55<21:00,  7.73s/it]

Early stopping triggered
Epoch 36, Train Loss: 0.3230, Train Accuracy: 0.9934, Val Loss: 1.1484, Val Accuracy: 0.5448
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0.01 


 25%|██▌       | 54/216 [39:02<19:58,  7.40s/it]

Early stopping triggered
Epoch 33, Train Loss: 0.7678, Train Accuracy: 0.9204, Val Loss: 1.1418, Val Accuracy: 0.5187
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0 


 25%|██▌       | 55/216 [39:04<15:39,  5.83s/it]

Early stopping triggered
Epoch 20, Train Loss: 0.4726, Train Accuracy: 0.9803, Val Loss: 1.1552, Val Accuracy: 0.5187
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0.0001 


 26%|██▌       | 56/216 [39:07<13:23,  5.02s/it]

Early stopping triggered
Epoch 28, Train Loss: 0.3518, Train Accuracy: 0.9906, Val Loss: 1.1474, Val Accuracy: 0.5075
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0.01 


 26%|██▋       | 57/216 [39:11<12:07,  4.58s/it]

Early stopping triggered
Epoch 32, Train Loss: 0.5945, Train Accuracy: 0.9710, Val Loss: 1.1285, Val Accuracy: 0.5410
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0 


 27%|██▋       | 58/216 [39:15<11:40,  4.43s/it]

Early stopping triggered
Epoch 38, Train Loss: 0.2586, Train Accuracy: 0.9953, Val Loss: 1.1828, Val Accuracy: 0.5224
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0.0001 


 27%|██▋       | 59/216 [39:19<11:26,  4.37s/it]

Early stopping triggered
Epoch 38, Train Loss: 0.2449, Train Accuracy: 0.9963, Val Loss: 1.1588, Val Accuracy: 0.5224
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0.01 


 28%|██▊       | 60/216 [39:23<11:23,  4.38s/it]

Early stopping triggered
Epoch 40, Train Loss: 0.5761, Train Accuracy: 0.9682, Val Loss: 1.1152, Val Accuracy: 0.5224
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0 


 28%|██▊       | 61/216 [39:27<10:46,  4.17s/it]

Early stopping triggered
Epoch 34, Train Loss: 0.3025, Train Accuracy: 0.9944, Val Loss: 1.1464, Val Accuracy: 0.5261
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0.0001 


 29%|██▊       | 62/216 [39:31<10:47,  4.21s/it]

Early stopping triggered
Epoch 38, Train Loss: 0.2543, Train Accuracy: 0.9944, Val Loss: 1.1439, Val Accuracy: 0.5522
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0.01 


 29%|██▉       | 63/216 [39:35<10:07,  3.97s/it]

Early stopping triggered
Epoch 31, Train Loss: 0.5915, Train Accuracy: 0.9569, Val Loss: 1.1572, Val Accuracy: 0.5000
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0 


 30%|██▉       | 64/216 [39:37<08:43,  3.44s/it]

Early stopping triggered
Epoch 33, Train Loss: 0.3521, Train Accuracy: 0.9963, Val Loss: 1.1449, Val Accuracy: 0.5261
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0.0001 


 30%|███       | 65/216 [39:40<08:21,  3.32s/it]

Early stopping triggered
Epoch 44, Train Loss: 0.2730, Train Accuracy: 0.9944, Val Loss: 1.1565, Val Accuracy: 0.5187
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0.01 


 31%|███       | 66/216 [39:41<06:45,  2.71s/it]

Early stopping triggered
Epoch 18, Train Loss: 0.6421, Train Accuracy: 0.9579, Val Loss: 1.1708, Val Accuracy: 0.5037
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0 


 31%|███       | 67/216 [39:44<06:39,  2.68s/it]

Early stopping triggered
Epoch 40, Train Loss: 0.2785, Train Accuracy: 0.9934, Val Loss: 1.1311, Val Accuracy: 0.5112
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0.0001 


 31%|███▏      | 68/216 [39:47<06:45,  2.74s/it]

Early stopping triggered
Epoch 43, Train Loss: 0.2791, Train Accuracy: 0.9944, Val Loss: 1.1371, Val Accuracy: 0.5261
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0.01 


 32%|███▏      | 69/216 [39:49<06:34,  2.68s/it]

Early stopping triggered
Epoch 38, Train Loss: 0.5142, Train Accuracy: 0.9906, Val Loss: 1.1597, Val Accuracy: 0.5149
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0 


 32%|███▏      | 70/216 [39:53<06:58,  2.87s/it]

Epoch 50, Train Loss: 0.2310, Train Accuracy: 0.9963, Val Loss: 1.1390, Val Accuracy: 0.5299
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0.0001 


 33%|███▎      | 71/216 [39:56<07:21,  3.05s/it]

Early stopping triggered
Epoch 45, Train Loss: 0.2605, Train Accuracy: 0.9944, Val Loss: 1.1329, Val Accuracy: 0.5373
Training Visual Model with lr=0.001, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0.01 


 33%|███▎      | 72/216 [40:00<07:31,  3.13s/it]

Early stopping triggered
Epoch 49, Train Loss: 0.4507, Train Accuracy: 0.9916, Val Loss: 1.1309, Val Accuracy: 0.5037
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0 


 34%|███▍      | 73/216 [40:10<12:51,  5.39s/it]

Early stopping triggered
Epoch 15, Train Loss: 1.1465, Train Accuracy: 0.6704, Val Loss: 1.2190, Val Accuracy: 0.4701
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0.0001 


 34%|███▍      | 74/216 [40:24<18:42,  7.91s/it]

Early stopping triggered
Epoch 19, Train Loss: 1.2491, Train Accuracy: 0.5730, Val Loss: 1.2568, Val Accuracy: 0.4701
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0.01 


 35%|███▍      | 75/216 [40:35<20:38,  8.78s/it]

Early stopping triggered
Epoch 15, Train Loss: 1.3292, Train Accuracy: 0.3876, Val Loss: 1.3131, Val Accuracy: 0.3955
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0 


 35%|███▌      | 76/216 [40:56<29:07, 12.48s/it]

Epoch 30, Train Loss: 1.0052, Train Accuracy: 0.7772, Val Loss: 1.1908, Val Accuracy: 0.5000
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0.0001 


 36%|███▌      | 77/216 [41:18<35:36, 15.37s/it]

Epoch 30, Train Loss: 1.2426, Train Accuracy: 0.5571, Val Loss: 1.2513, Val Accuracy: 0.4366
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0.01 


 36%|███▌      | 78/216 [41:27<30:45, 13.37s/it]

Early stopping triggered
Epoch 12, Train Loss: 1.3422, Train Accuracy: 0.3876, Val Loss: 1.3354, Val Accuracy: 0.3955
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0 


 37%|███▋      | 79/216 [41:48<35:56, 15.74s/it]

Epoch 30, Train Loss: 1.0284, Train Accuracy: 0.7640, Val Loss: 1.1921, Val Accuracy: 0.4851
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0.0001 


 37%|███▋      | 80/216 [42:10<39:50, 17.58s/it]

Epoch 30, Train Loss: 1.2335, Train Accuracy: 0.5478, Val Loss: 1.2367, Val Accuracy: 0.4701
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0.01 


 38%|███▊      | 81/216 [42:23<36:32, 16.24s/it]

Early stopping triggered
Epoch 18, Train Loss: 1.3415, Train Accuracy: 0.3876, Val Loss: 1.3340, Val Accuracy: 0.3955
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0 


 38%|███▊      | 82/216 [42:26<27:26, 12.28s/it]

Early stopping triggered
Epoch 15, Train Loss: 0.5925, Train Accuracy: 0.9476, Val Loss: 1.1852, Val Accuracy: 0.5187
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0.0001 


 38%|███▊      | 83/216 [42:30<21:35,  9.74s/it]

Early stopping triggered
Epoch 19, Train Loss: 0.7170, Train Accuracy: 0.9073, Val Loss: 1.2222, Val Accuracy: 0.5149
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0.01 


 39%|███▉      | 84/216 [42:32<16:23,  7.45s/it]

Early stopping triggered
Epoch 10, Train Loss: 1.3122, Train Accuracy: 0.4036, Val Loss: 1.2911, Val Accuracy: 0.4104
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0 


 39%|███▉      | 85/216 [42:36<14:13,  6.51s/it]

Early stopping triggered
Epoch 22, Train Loss: 0.4261, Train Accuracy: 0.9794, Val Loss: 1.2415, Val Accuracy: 0.5187
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0.0001 


 40%|███▉      | 86/216 [42:41<12:53,  5.95s/it]

Early stopping triggered
Epoch 23, Train Loss: 0.6799, Train Accuracy: 0.9419, Val Loss: 1.1928, Val Accuracy: 0.5224
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0.01 


 40%|████      | 87/216 [42:46<12:08,  5.65s/it]

Early stopping triggered
Epoch 24, Train Loss: 1.3178, Train Accuracy: 0.3923, Val Loss: 1.2930, Val Accuracy: 0.4030
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0 


 41%|████      | 88/216 [42:50<11:19,  5.31s/it]

Early stopping triggered
Epoch 23, Train Loss: 0.4946, Train Accuracy: 0.9813, Val Loss: 1.2960, Val Accuracy: 0.5299
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0.0001 


 41%|████      | 89/216 [42:56<11:42,  5.53s/it]

Epoch 30, Train Loss: 0.6710, Train Accuracy: 0.9316, Val Loss: 1.1863, Val Accuracy: 0.5261
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0.01 


 42%|████▏     | 90/216 [43:02<11:38,  5.55s/it]

Early stopping triggered
Epoch 27, Train Loss: 1.3155, Train Accuracy: 0.3904, Val Loss: 1.2998, Val Accuracy: 0.3993
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0 


 42%|████▏     | 91/216 [43:04<09:08,  4.39s/it]

Early stopping triggered
Epoch 15, Train Loss: 0.4971, Train Accuracy: 0.9682, Val Loss: 1.1626, Val Accuracy: 0.5299
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0.0001 


 43%|████▎     | 92/216 [43:05<07:22,  3.57s/it]

Early stopping triggered
Epoch 14, Train Loss: 0.6359, Train Accuracy: 0.9354, Val Loss: 1.2212, Val Accuracy: 0.5299
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0.01 


 43%|████▎     | 93/216 [43:06<05:50,  2.85s/it]

Early stopping triggered
Epoch 9, Train Loss: 1.2678, Train Accuracy: 0.4728, Val Loss: 1.3005, Val Accuracy: 0.3993
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0 


 44%|████▎     | 94/216 [43:09<05:41,  2.80s/it]

Early stopping triggered
Epoch 24, Train Loss: 0.2965, Train Accuracy: 0.9916, Val Loss: 1.2799, Val Accuracy: 0.5261
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0.0001 


 44%|████▍     | 95/216 [43:12<05:54,  2.93s/it]

Early stopping triggered
Epoch 29, Train Loss: 0.4861, Train Accuracy: 0.9738, Val Loss: 1.2672, Val Accuracy: 0.4888
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0.01 


 44%|████▍     | 96/216 [43:15<05:38,  2.82s/it]

Early stopping triggered
Epoch 23, Train Loss: 1.2963, Train Accuracy: 0.4288, Val Loss: 1.2910, Val Accuracy: 0.3993
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0 


 45%|████▍     | 97/216 [43:18<05:37,  2.84s/it]

Early stopping triggered
Epoch 26, Train Loss: 0.3078, Train Accuracy: 0.9944, Val Loss: 1.2731, Val Accuracy: 0.5224
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0.0001 


 45%|████▌     | 98/216 [43:21<05:37,  2.86s/it]

Early stopping triggered
Epoch 26, Train Loss: 0.4635, Train Accuracy: 0.9766, Val Loss: 1.2313, Val Accuracy: 0.5075
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0.01 


 46%|████▌     | 99/216 [43:23<05:10,  2.65s/it]

Early stopping triggered
Epoch 18, Train Loss: 1.2796, Train Accuracy: 0.4616, Val Loss: 1.2794, Val Accuracy: 0.4216
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0 


 46%|████▋     | 100/216 [43:24<04:11,  2.17s/it]

Early stopping triggered
Epoch 15, Train Loss: 0.4642, Train Accuracy: 0.9607, Val Loss: 1.1306, Val Accuracy: 0.5522
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0.0001 


 47%|████▋     | 101/216 [43:25<03:32,  1.85s/it]

Early stopping triggered
Epoch 15, Train Loss: 0.5289, Train Accuracy: 0.9560, Val Loss: 1.1505, Val Accuracy: 0.5037
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0.01 


 47%|████▋     | 102/216 [43:26<02:51,  1.50s/it]

Early stopping triggered
Epoch 9, Train Loss: 1.2142, Train Accuracy: 0.5178, Val Loss: 1.2843, Val Accuracy: 0.4328
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0 


 48%|████▊     | 103/216 [43:27<02:48,  1.49s/it]

Early stopping triggered
Epoch 21, Train Loss: 0.3118, Train Accuracy: 0.9850, Val Loss: 1.2137, Val Accuracy: 0.5336
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0.0001 


 48%|████▊     | 104/216 [43:29<02:45,  1.48s/it]

Early stopping triggered
Epoch 21, Train Loss: 0.3967, Train Accuracy: 0.9897, Val Loss: 1.1929, Val Accuracy: 0.5299
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0.01 


 49%|████▊     | 105/216 [43:30<02:28,  1.34s/it]

Early stopping triggered
Epoch 14, Train Loss: 1.2322, Train Accuracy: 0.5262, Val Loss: 1.2749, Val Accuracy: 0.4030
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0 


 49%|████▉     | 106/216 [43:31<02:40,  1.46s/it]

Early stopping triggered
Epoch 26, Train Loss: 0.2378, Train Accuracy: 0.9934, Val Loss: 1.2572, Val Accuracy: 0.5373
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0.0001 


 50%|████▉     | 107/216 [43:33<02:57,  1.63s/it]

Early stopping triggered
Epoch 28, Train Loss: 0.2901, Train Accuracy: 0.9897, Val Loss: 1.2573, Val Accuracy: 0.5410
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0.01 


 50%|█████     | 108/216 [43:35<02:57,  1.64s/it]

Early stopping triggered
Epoch 24, Train Loss: 1.2182, Train Accuracy: 0.5206, Val Loss: 1.2738, Val Accuracy: 0.4254
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0 


 50%|█████     | 109/216 [43:57<13:48,  7.74s/it]

Early stopping triggered
Epoch 31, Train Loss: 1.0241, Train Accuracy: 0.7884, Val Loss: 1.1981, Val Accuracy: 0.4888
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0.0001 


 51%|█████     | 110/216 [44:09<15:52,  8.99s/it]

Early stopping triggered
Epoch 16, Train Loss: 1.2494, Train Accuracy: 0.5290, Val Loss: 1.2641, Val Accuracy: 0.4291
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0.01 


 51%|█████▏    | 111/216 [44:15<14:04,  8.05s/it]

Early stopping triggered
Epoch 8, Train Loss: 1.3434, Train Accuracy: 0.3876, Val Loss: 1.3346, Val Accuracy: 0.3955
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0 


 52%|█████▏    | 112/216 [44:50<28:09, 16.25s/it]

Epoch 50, Train Loss: 0.9133, Train Accuracy: 0.8305, Val Loss: 1.1444, Val Accuracy: 0.5075
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0.0001 


 52%|█████▏    | 113/216 [45:11<30:02, 17.50s/it]

Early stopping triggered
Epoch 28, Train Loss: 1.2606, Train Accuracy: 0.4981, Val Loss: 1.2716, Val Accuracy: 0.4254
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0.01 


 53%|█████▎    | 114/216 [45:20<25:24, 14.95s/it]

Early stopping triggered
Epoch 12, Train Loss: 1.3399, Train Accuracy: 0.3876, Val Loss: 1.3354, Val Accuracy: 0.3955
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0 


 53%|█████▎    | 115/216 [45:47<31:14, 18.56s/it]

Early stopping triggered
Epoch 34, Train Loss: 1.0141, Train Accuracy: 0.7893, Val Loss: 1.1920, Val Accuracy: 0.4963
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0.0001 


 54%|█████▎    | 116/216 [46:19<37:57, 22.77s/it]

Early stopping triggered
Epoch 43, Train Loss: 1.2508, Train Accuracy: 0.5272, Val Loss: 1.2394, Val Accuracy: 0.4440
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0.01 


 54%|█████▍    | 117/216 [46:38<35:43, 21.65s/it]

Early stopping triggered
Epoch 24, Train Loss: 1.3299, Train Accuracy: 0.3876, Val Loss: 1.3102, Val Accuracy: 0.3955
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0 


 55%|█████▍    | 118/216 [46:41<26:08, 16.00s/it]

Early stopping triggered
Epoch 13, Train Loss: 0.6508, Train Accuracy: 0.9129, Val Loss: 1.1883, Val Accuracy: 0.5149
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0.0001 


 55%|█████▌    | 119/216 [46:45<19:57, 12.35s/it]

Early stopping triggered
Epoch 18, Train Loss: 0.7211, Train Accuracy: 0.9007, Val Loss: 1.1797, Val Accuracy: 0.4776
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0.01 


 56%|█████▌    | 120/216 [46:46<14:28,  9.04s/it]

Early stopping triggered
Epoch 6, Train Loss: 1.3087, Train Accuracy: 0.4438, Val Loss: 1.3018, Val Accuracy: 0.3806
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0 


 56%|█████▌    | 121/216 [46:51<12:05,  7.63s/it]

Early stopping triggered
Epoch 21, Train Loss: 0.4923, Train Accuracy: 0.9728, Val Loss: 1.2828, Val Accuracy: 0.4888
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0.0001 


 56%|█████▋    | 122/216 [46:55<10:23,  6.64s/it]

Early stopping triggered
Epoch 21, Train Loss: 0.7111, Train Accuracy: 0.9279, Val Loss: 1.1836, Val Accuracy: 0.5224
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0.01 


 57%|█████▋    | 123/216 [46:59<09:16,  5.98s/it]

Early stopping triggered
Epoch 21, Train Loss: 1.3225, Train Accuracy: 0.3895, Val Loss: 1.2925, Val Accuracy: 0.3955
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0 


 57%|█████▋    | 124/216 [47:04<08:41,  5.67s/it]

Early stopping triggered
Epoch 23, Train Loss: 0.4163, Train Accuracy: 0.9785, Val Loss: 1.2882, Val Accuracy: 0.5299
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0.0001 


 58%|█████▊    | 125/216 [47:13<09:56,  6.56s/it]

Early stopping triggered
Epoch 42, Train Loss: 0.6810, Train Accuracy: 0.9476, Val Loss: 1.2183, Val Accuracy: 0.4813
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0.01 


 58%|█████▊    | 126/216 [47:17<08:31,  5.68s/it]

Early stopping triggered
Epoch 17, Train Loss: 1.3166, Train Accuracy: 0.4054, Val Loss: 1.2869, Val Accuracy: 0.4179
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0 


 59%|█████▉    | 127/216 [47:18<06:42,  4.52s/it]

Early stopping triggered
Epoch 15, Train Loss: 0.5096, Train Accuracy: 0.9569, Val Loss: 1.1497, Val Accuracy: 0.5112
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0.0001 


 59%|█████▉    | 128/216 [47:20<05:29,  3.74s/it]

Early stopping triggered
Epoch 16, Train Loss: 0.6308, Train Accuracy: 0.9448, Val Loss: 1.2030, Val Accuracy: 0.5000
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0.01 


 60%|█████▉    | 129/216 [47:21<04:14,  2.92s/it]

Early stopping triggered
Epoch 7, Train Loss: 1.2810, Train Accuracy: 0.4532, Val Loss: 1.3049, Val Accuracy: 0.3918
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0 


 60%|██████    | 130/216 [47:24<04:05,  2.86s/it]

Early stopping triggered
Epoch 23, Train Loss: 0.3194, Train Accuracy: 0.9888, Val Loss: 1.2507, Val Accuracy: 0.5373
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0.0001 


 61%|██████    | 131/216 [47:26<03:51,  2.72s/it]

Early stopping triggered
Epoch 20, Train Loss: 0.5481, Train Accuracy: 0.9541, Val Loss: 1.1989, Val Accuracy: 0.5187
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0.01 


 61%|██████    | 132/216 [47:28<03:20,  2.39s/it]

Early stopping triggered
Epoch 13, Train Loss: 1.2809, Train Accuracy: 0.4476, Val Loss: 1.2822, Val Accuracy: 0.3918
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0 


 62%|██████▏   | 133/216 [47:32<03:48,  2.76s/it]

Early stopping triggered
Epoch 31, Train Loss: 0.2784, Train Accuracy: 0.9934, Val Loss: 1.2945, Val Accuracy: 0.5410
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0.0001 


 62%|██████▏   | 134/216 [47:35<03:58,  2.91s/it]

Early stopping triggered
Epoch 27, Train Loss: 0.4887, Train Accuracy: 0.9785, Val Loss: 1.2669, Val Accuracy: 0.5336
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0.01 


 62%|██████▎   | 135/216 [47:37<03:41,  2.74s/it]

Early stopping triggered
Epoch 19, Train Loss: 1.2887, Train Accuracy: 0.4494, Val Loss: 1.2839, Val Accuracy: 0.3993
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0 


 63%|██████▎   | 136/216 [47:38<02:57,  2.21s/it]

Early stopping triggered
Epoch 13, Train Loss: 0.5505, Train Accuracy: 0.9504, Val Loss: 1.1517, Val Accuracy: 0.5485
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0.0001 


 63%|██████▎   | 137/216 [47:40<02:34,  1.95s/it]

Early stopping triggered
Epoch 18, Train Loss: 0.4321, Train Accuracy: 0.9766, Val Loss: 1.1994, Val Accuracy: 0.5261
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0.01 


 64%|██████▍   | 138/216 [47:40<02:05,  1.60s/it]

Early stopping triggered
Epoch 10, Train Loss: 1.2055, Train Accuracy: 0.5824, Val Loss: 1.2421, Val Accuracy: 0.4552
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0 


 64%|██████▍   | 139/216 [47:42<02:09,  1.68s/it]

Early stopping triggered
Epoch 26, Train Loss: 0.2115, Train Accuracy: 0.9944, Val Loss: 1.3052, Val Accuracy: 0.5261
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0.0001 


 65%|██████▍   | 140/216 [47:44<02:10,  1.72s/it]

Early stopping triggered
Epoch 23, Train Loss: 0.3122, Train Accuracy: 0.9897, Val Loss: 1.1915, Val Accuracy: 0.5448
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0.01 


 65%|██████▌   | 141/216 [47:45<01:58,  1.58s/it]

Early stopping triggered
Epoch 17, Train Loss: 1.2049, Train Accuracy: 0.5646, Val Loss: 1.2675, Val Accuracy: 0.3993
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0 


 66%|██████▌   | 142/216 [47:47<02:04,  1.68s/it]

Early stopping triggered
Epoch 27, Train Loss: 0.2193, Train Accuracy: 0.9916, Val Loss: 1.2498, Val Accuracy: 0.5485
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0.0001 


 66%|██████▌   | 143/216 [47:49<02:13,  1.82s/it]

Early stopping triggered
Epoch 30, Train Loss: 0.3378, Train Accuracy: 0.9822, Val Loss: 1.3243, Val Accuracy: 0.5187
Training Visual Model with lr=0.01, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0.01 


 67%|██████▋   | 144/216 [47:52<02:24,  2.00s/it]

Early stopping triggered
Epoch 34, Train Loss: 1.2368, Train Accuracy: 0.5459, Val Loss: 1.2778, Val Accuracy: 0.4478
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0 


 67%|██████▋   | 145/216 [47:56<03:17,  2.79s/it]

Early stopping triggered
Epoch 6, Train Loss: 1.3863, Train Accuracy: 0.2331, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0.0001 


 68%|██████▊   | 146/216 [48:03<04:31,  3.88s/it]

Early stopping triggered
Epoch 8, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=5, Weight decay=0.01 


 68%|██████▊   | 147/216 [48:08<04:55,  4.28s/it]

Early stopping triggered
Epoch 6, Train Loss: 1.3872, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0 


 69%|██████▊   | 148/216 [48:32<11:31, 10.17s/it]

Epoch 30, Train Loss: 1.2495, Train Accuracy: 0.5037, Val Loss: 1.2859, Val Accuracy: 0.4142
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0.0001 


 69%|██████▉   | 149/216 [48:41<10:59,  9.84s/it]

Early stopping triggered
Epoch 11, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=10, Weight decay=0.01 


 69%|██████▉   | 150/216 [48:53<11:28, 10.44s/it]

Early stopping triggered
Epoch 15, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0 


 70%|██████▉   | 151/216 [49:16<15:26, 14.25s/it]

Epoch 30, Train Loss: 1.2429, Train Accuracy: 0.4906, Val Loss: 1.2930, Val Accuracy: 0.4067
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0.0001 


 70%|███████   | 152/216 [49:31<15:26, 14.47s/it]

Early stopping triggered
Epoch 18, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=4, Patience=15, Weight decay=0.01 


 71%|███████   | 153/216 [49:47<15:31, 14.78s/it]

Early stopping triggered
Epoch 20, Train Loss: 1.3635, Train Accuracy: 0.3876, Val Loss: 1.3355, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0 


 71%|███████▏  | 154/216 [49:51<11:58, 11.59s/it]

Early stopping triggered
Epoch 20, Train Loss: 1.1200, Train Accuracy: 0.5496, Val Loss: 1.2938, Val Accuracy: 0.4328
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0.0001 


 72%|███████▏  | 155/216 [49:55<09:27,  9.31s/it]

Early stopping triggered
Epoch 19, Train Loss: 1.3212, Train Accuracy: 0.3830, Val Loss: 1.3391, Val Accuracy: 0.3582
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=5, Weight decay=0.01 


 72%|███████▏  | 156/216 [49:58<07:30,  7.51s/it]

Early stopping triggered
Epoch 16, Train Loss: 1.3482, Train Accuracy: 0.3876, Val Loss: 1.3367, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0 


 73%|███████▎  | 157/216 [50:04<07:00,  7.12s/it]

Early stopping triggered
Epoch 29, Train Loss: 0.8910, Train Accuracy: 0.8006, Val Loss: 1.2851, Val Accuracy: 0.4888
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0.0001 


 73%|███████▎  | 158/216 [50:07<05:43,  5.93s/it]

Early stopping triggered
Epoch 15, Train Loss: 1.3209, Train Accuracy: 0.4110, Val Loss: 1.3203, Val Accuracy: 0.4067
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=10, Weight decay=0.01 


 74%|███████▎  | 159/216 [50:11<04:53,  5.15s/it]

Early stopping triggered
Epoch 16, Train Loss: 1.3553, Train Accuracy: 0.3876, Val Loss: 1.3419, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0 


 74%|███████▍  | 160/216 [50:17<05:05,  5.45s/it]

Early stopping triggered
Epoch 30, Train Loss: 1.0618, Train Accuracy: 0.5974, Val Loss: 1.2935, Val Accuracy: 0.4590
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0.0001 


 75%|███████▍  | 161/216 [50:20<04:29,  4.90s/it]

Early stopping triggered
Epoch 17, Train Loss: 1.3224, Train Accuracy: 0.3914, Val Loss: 1.3275, Val Accuracy: 0.3881
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=16, Patience=15, Weight decay=0.01 


 75%|███████▌  | 162/216 [50:25<04:16,  4.75s/it]

Early stopping triggered
Epoch 21, Train Loss: 1.3505, Train Accuracy: 0.3876, Val Loss: 1.3363, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0 


 75%|███████▌  | 163/216 [50:27<03:28,  3.93s/it]

Early stopping triggered
Epoch 16, Train Loss: 1.1144, Train Accuracy: 0.5365, Val Loss: 1.3076, Val Accuracy: 0.4142
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0.0001 


 76%|███████▌  | 164/216 [50:28<02:46,  3.20s/it]

Early stopping triggered
Epoch 12, Train Loss: 1.2833, Train Accuracy: 0.4579, Val Loss: 1.2910, Val Accuracy: 0.3507
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=5, Weight decay=0.01 


 76%|███████▋  | 165/216 [50:29<02:08,  2.51s/it]

Early stopping triggered
Epoch 7, Train Loss: 1.3319, Train Accuracy: 0.2846, Val Loss: 1.4285, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0 


 77%|███████▋  | 166/216 [50:32<02:07,  2.54s/it]

Early stopping triggered
Epoch 22, Train Loss: 0.9225, Train Accuracy: 0.5936, Val Loss: 1.2098, Val Accuracy: 0.4552
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0.0001 


 77%|███████▋  | 167/216 [50:34<02:02,  2.49s/it]

Early stopping triggered
Epoch 20, Train Loss: 1.2424, Train Accuracy: 0.4794, Val Loss: 1.3115, Val Accuracy: 0.3806
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=10, Weight decay=0.01 


 78%|███████▊  | 168/216 [50:37<02:00,  2.50s/it]

Early stopping triggered
Epoch 21, Train Loss: 1.3507, Train Accuracy: 0.3876, Val Loss: 1.3399, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0 


 78%|███████▊  | 169/216 [50:40<02:04,  2.66s/it]

Early stopping triggered
Epoch 25, Train Loss: 1.0750, Train Accuracy: 0.5665, Val Loss: 1.3164, Val Accuracy: 0.4590
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0.0001 


 79%|███████▊  | 170/216 [50:42<02:02,  2.65s/it]

Early stopping triggered
Epoch 22, Train Loss: 1.2887, Train Accuracy: 0.4073, Val Loss: 1.3258, Val Accuracy: 0.3731
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=32, Patience=15, Weight decay=0.01 


 79%|███████▉  | 171/216 [50:45<01:54,  2.54s/it]

Early stopping triggered
Epoch 19, Train Loss: 1.3448, Train Accuracy: 0.3876, Val Loss: 1.3324, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0 


 80%|███████▉  | 172/216 [50:46<01:39,  2.26s/it]

Early stopping triggered
Epoch 22, Train Loss: 1.0463, Train Accuracy: 0.5833, Val Loss: 1.2638, Val Accuracy: 0.4701
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0.0001 


 80%|████████  | 173/216 [50:47<01:23,  1.93s/it]

Early stopping triggered
Epoch 15, Train Loss: 1.1496, Train Accuracy: 0.6058, Val Loss: 1.3236, Val Accuracy: 0.4142
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=5, Weight decay=0.01 


 81%|████████  | 174/216 [50:48<01:05,  1.55s/it]

Early stopping triggered
Epoch 8, Train Loss: 1.3158, Train Accuracy: 0.3970, Val Loss: 1.3393, Val Accuracy: 0.4291
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0 


 81%|████████  | 175/216 [50:50<01:08,  1.67s/it]

Early stopping triggered
Epoch 26, Train Loss: 0.8363, Train Accuracy: 0.5890, Val Loss: 1.2505, Val Accuracy: 0.4590
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0.0001 


 81%|████████▏ | 176/216 [50:52<01:07,  1.68s/it]

Early stopping triggered
Epoch 21, Train Loss: 1.2025, Train Accuracy: 0.4785, Val Loss: 1.2431, Val Accuracy: 0.3993
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=10, Weight decay=0.01 


 82%|████████▏ | 177/216 [50:54<01:11,  1.83s/it]

Epoch 30, Train Loss: 1.3364, Train Accuracy: 0.4045, Val Loss: 1.3169, Val Accuracy: 0.4067
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0 


 82%|████████▏ | 178/216 [50:56<01:13,  1.92s/it]

Epoch 30, Train Loss: 0.8033, Train Accuracy: 0.5927, Val Loss: 1.2349, Val Accuracy: 0.4664
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0.0001 


 83%|████████▎ | 179/216 [50:58<01:14,  2.01s/it]

Early stopping triggered
Epoch 30, Train Loss: 1.1669, Train Accuracy: 0.6386, Val Loss: 1.2548, Val Accuracy: 0.4515
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=30, batch_size=64, Patience=15, Weight decay=0.01 


 83%|████████▎ | 180/216 [51:01<01:14,  2.07s/it]

Epoch 30, Train Loss: 1.3313, Train Accuracy: 0.4026, Val Loss: 1.3215, Val Accuracy: 0.4067
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0 


 84%|████████▍ | 181/216 [51:12<02:53,  4.95s/it]

Early stopping triggered
Epoch 15, Train Loss: 1.2707, Train Accuracy: 0.4457, Val Loss: 1.3206, Val Accuracy: 0.3843
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0.0001 


 84%|████████▍ | 182/216 [51:17<02:46,  4.90s/it]

Early stopping triggered
Epoch 6, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=5, Weight decay=0.01 


 85%|████████▍ | 183/216 [51:22<02:40,  4.86s/it]

Early stopping triggered
Epoch 6, Train Loss: 1.3870, Train Accuracy: 0.2341, Val Loss: 1.3824, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0 


 85%|████████▌ | 184/216 [51:30<03:10,  5.97s/it]

Early stopping triggered
Epoch 11, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0.0001 


 86%|████████▌ | 185/216 [51:39<03:30,  6.78s/it]

Early stopping triggered
Epoch 11, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=10, Weight decay=0.01 


 86%|████████▌ | 186/216 [51:47<03:38,  7.30s/it]

Early stopping triggered
Epoch 11, Train Loss: 1.3897, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0 


 87%|████████▋ | 187/216 [52:25<07:58, 16.50s/it]

Epoch 50, Train Loss: 1.2072, Train Accuracy: 0.5599, Val Loss: 1.2827, Val Accuracy: 0.4142
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0.0001 


 87%|████████▋ | 188/216 [52:39<07:20, 15.72s/it]

Early stopping triggered
Epoch 18, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=4, Patience=15, Weight decay=0.01 


 88%|████████▊ | 189/216 [53:01<07:54, 17.57s/it]

Early stopping triggered
Epoch 28, Train Loss: 1.3874, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0 


 88%|████████▊ | 190/216 [53:03<05:31, 12.76s/it]

Early stopping triggered
Epoch 7, Train Loss: 1.2304, Train Accuracy: 0.4813, Val Loss: 1.3202, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0.0001 


 88%|████████▊ | 191/216 [53:05<04:01,  9.66s/it]

Early stopping triggered
Epoch 11, Train Loss: 1.3066, Train Accuracy: 0.3783, Val Loss: 1.3351, Val Accuracy: 0.3470
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=5, Weight decay=0.01 


 89%|████████▉ | 192/216 [53:08<02:59,  7.50s/it]

Early stopping triggered
Epoch 11, Train Loss: 1.3465, Train Accuracy: 0.3876, Val Loss: 1.3356, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0 


 89%|████████▉ | 193/216 [53:10<02:16,  5.94s/it]

Early stopping triggered
Epoch 11, Train Loss: 1.3863, Train Accuracy: 0.2341, Val Loss: 1.3863, Val Accuracy: 0.2910
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0.0001 


 90%|████████▉ | 194/216 [53:14<02:00,  5.49s/it]

Early stopping triggered
Epoch 21, Train Loss: 1.3255, Train Accuracy: 0.3867, Val Loss: 1.3363, Val Accuracy: 0.3918
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=10, Weight decay=0.01 


 90%|█████████ | 195/216 [53:19<01:50,  5.28s/it]

Early stopping triggered
Epoch 23, Train Loss: 1.3457, Train Accuracy: 0.3876, Val Loss: 1.3359, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0 


 91%|█████████ | 196/216 [53:27<01:59,  5.99s/it]

Early stopping triggered
Epoch 37, Train Loss: 1.0606, Train Accuracy: 0.5908, Val Loss: 1.3020, Val Accuracy: 0.4701
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0.0001 


 91%|█████████ | 197/216 [53:31<01:42,  5.37s/it]

Early stopping triggered
Epoch 19, Train Loss: 1.3233, Train Accuracy: 0.4120, Val Loss: 1.3352, Val Accuracy: 0.3619
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=16, Patience=15, Weight decay=0.01 


 92%|█████████▏| 198/216 [53:34<01:27,  4.88s/it]

Early stopping triggered
Epoch 18, Train Loss: 1.3484, Train Accuracy: 0.3876, Val Loss: 1.3437, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0 


 92%|█████████▏| 199/216 [53:37<01:09,  4.09s/it]

Early stopping triggered
Epoch 20, Train Loss: 1.0691, Train Accuracy: 0.5684, Val Loss: 1.3157, Val Accuracy: 0.4142
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0.0001 


 93%|█████████▎| 200/216 [53:40<00:59,  3.73s/it]

Early stopping triggered
Epoch 25, Train Loss: 1.2911, Train Accuracy: 0.4316, Val Loss: 1.3181, Val Accuracy: 0.4067
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=5, Weight decay=0.01 


 93%|█████████▎| 201/216 [53:40<00:43,  2.87s/it]

Early stopping triggered
Epoch 7, Train Loss: 1.3374, Train Accuracy: 0.3876, Val Loss: 1.3470, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0 


 94%|█████████▎| 202/216 [53:44<00:42,  3.00s/it]

Early stopping triggered
Epoch 29, Train Loss: 1.0547, Train Accuracy: 0.5890, Val Loss: 1.3267, Val Accuracy: 0.4701
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0.0001 


 94%|█████████▍| 203/216 [53:46<00:37,  2.90s/it]

Early stopping triggered
Epoch 21, Train Loss: 1.3066, Train Accuracy: 0.3933, Val Loss: 1.3241, Val Accuracy: 0.3694
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=10, Weight decay=0.01 


 94%|█████████▍| 204/216 [53:49<00:33,  2.82s/it]

Early stopping triggered
Epoch 22, Train Loss: 1.3427, Train Accuracy: 0.3876, Val Loss: 1.3398, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0 


 95%|█████████▍| 205/216 [53:53<00:33,  3.02s/it]

Early stopping triggered
Epoch 30, Train Loss: 0.8382, Train Accuracy: 0.8034, Val Loss: 1.2884, Val Accuracy: 0.5112
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0.0001 


 95%|█████████▌| 206/216 [53:56<00:30,  3.04s/it]

Early stopping triggered
Epoch 26, Train Loss: 1.2923, Train Accuracy: 0.4654, Val Loss: 1.3203, Val Accuracy: 0.4030
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=32, Patience=15, Weight decay=0.01 


 96%|█████████▌| 207/216 [53:59<00:28,  3.12s/it]

Early stopping triggered
Epoch 27, Train Loss: 1.3438, Train Accuracy: 0.3876, Val Loss: 1.3379, Val Accuracy: 0.3955
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0 


 96%|█████████▋| 208/216 [54:01<00:21,  2.70s/it]

Early stopping triggered
Epoch 24, Train Loss: 0.8272, Train Accuracy: 0.5871, Val Loss: 1.2029, Val Accuracy: 0.4552
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0.0001 


 97%|█████████▋| 209/216 [54:02<00:15,  2.22s/it]

Early stopping triggered
Epoch 14, Train Loss: 1.1990, Train Accuracy: 0.5197, Val Loss: 1.3621, Val Accuracy: 0.3881
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=5, Weight decay=0.01 


 97%|█████████▋| 210/216 [54:03<00:11,  1.85s/it]

Early stopping triggered
Epoch 13, Train Loss: 1.3127, Train Accuracy: 0.4148, Val Loss: 1.3205, Val Accuracy: 0.4030
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0 


 98%|█████████▊| 211/216 [54:05<00:09,  1.98s/it]

Early stopping triggered
Epoch 32, Train Loss: 0.9861, Train Accuracy: 0.5936, Val Loss: 1.3292, Val Accuracy: 0.4664
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0.0001 


 98%|█████████▊| 212/216 [54:07<00:08,  2.02s/it]

Early stopping triggered
Epoch 29, Train Loss: 1.1944, Train Accuracy: 0.4794, Val Loss: 1.2860, Val Accuracy: 0.3993
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=10, Weight decay=0.01 


 99%|█████████▊| 213/216 [54:08<00:05,  1.69s/it]

Early stopping triggered
Epoch 12, Train Loss: 1.3358, Train Accuracy: 0.4185, Val Loss: 1.3603, Val Accuracy: 0.3806
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0 


 99%|█████████▉| 214/216 [54:10<00:03,  1.85s/it]

Early stopping triggered
Epoch 30, Train Loss: 0.5474, Train Accuracy: 0.8324, Val Loss: 1.3412, Val Accuracy: 0.5037
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0.0001 


100%|█████████▉| 215/216 [54:12<00:01,  1.90s/it]

Early stopping triggered
Epoch 28, Train Loss: 1.1872, Train Accuracy: 0.5543, Val Loss: 1.2880, Val Accuracy: 0.4366
Training Visual Model with lr=0.1, optimizer=Adam, criterion=CrossEntropyLoss, epochs=50, batch_size=64, Patience=15, Weight decay=0.01 


100%|██████████| 216/216 [54:14<00:00, 15.07s/it]

Early stopping triggered
Epoch 23, Train Loss: 1.3329, Train Accuracy: 0.3923, Val Loss: 1.3257, Val Accuracy: 0.3918





In [18]:
best_params

{'learning_rate': 0.001,
 'optimizer': 'Adam',
 'criterion': 'CrossEntropyLoss',
 'epochs': 30,
 'batch_size': 16,
 'patience': 15,
 'weight_decay': 0,
 'validation_accuracy': 0.5634328358208955}