<a href="https://colab.research.google.com/github/hsun26/CS445-Project/blob/main/Dual_CNN_(use_generated_mask).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install torchsummary



In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [None]:
from torchsummary import summary
import os
import numpy as np
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import Dataset
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from sklearn.metrics import precision_score, recall_score, f1_score
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class HandGestureDataset(Dataset):
    def __init__(self, rgb_dir, mask_dir, transform=None):
        """
        Args:
            rgb_dir (string): Directory with all the RGB images divided into subdirectories.
            mask_dir (string): Directory with all the mask images divided into the same subdirectories as RGB images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.rgb_dir = rgb_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.samples = self._load_samples()

    def _load_samples(self):
        samples = []
        label_mapping = {'one': 0, 'two': 1, 'three': 2, 'four': 3, 'five': 4, 'like': 5, 'ok': 6}

        # Iterate over all categories
        for category in label_mapping:
            rgb_path = os.path.join(self.rgb_dir, category)
            mask_path = os.path.join(self.mask_dir, category)
            for filename in os.listdir(rgb_path):
                if filename.endswith('.jpg'):
                    # Constructing the mask filename based on the RGB filename
                    # RGB filename like '2_Original_046.jpg' corresponds to mask '2_Mask_046.jpg'
                    mask_filename = filename.replace('Original', 'Mask')   # so here I
                    file_rgb_path = os.path.join(rgb_path, filename)
                    file_mask_path = os.path.join(mask_path, mask_filename)
                    samples.append((file_rgb_path, file_mask_path, label_mapping[category]))
        return samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        rgb_path, mask_path, label = self.samples[idx]
        rgb_image = Image.open(rgb_path).convert('RGB')
        mask_image = Image.open(mask_path).convert('L')  # Assuming masks are grayscale

        if self.transform:
            rgb_image = self.transform(rgb_image)
            mask_image = self.transform(mask_image)

        return rgb_image, mask_image, label

# Transformation to apply
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])



In [None]:
dataset = HandGestureDataset('/content/drive/MyDrive/CS445/Final Project/rgb', '/content/drive/MyDrive/CS445/Final Project/generate', transform=transform)
for i in range(5):  # Check first 5 samples
    print(dataset.samples[i])


('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_018.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_018.jpg', 0)
('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_022.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_022.jpg', 0)
('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_010.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_010.jpg', 0)
('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_021.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_021.jpg', 0)
('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_024.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_024.jpg', 0)


In [None]:
# hand_dir = '/content/drive/MyDrive/CS445/Final Project/rgb'
# mask_dir = '/content/drive/MyDrive/CS445/Final Project/generate'
# dataset = HandGestureDataset(hand_dir, mask_dir, transform=transform)

# Split the dataset into training and testing
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# DataLoader setup
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)


### DC-CNN (pytorch)

In [None]:
class DualCNN(nn.Module):
    def __init__(self):
        super(DualCNN, self).__init__()
        # Define the first branch for the RGB image
        self.branch1 = nn.Sequential(
            nn.Conv2d(3, 20, kernel_size=5, padding=2),  # Padding=2 to keep size constant
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),  # Larger kernel and padding
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Define the second branch for the mask image
        self.branch2 = nn.Sequential(
            nn.Conv2d(1, 20, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Define the fully connected layers
        self.fc1 = nn.Linear(20 * 56 * 56 * 2, 224)  # calculated
        self.fc2 = nn.Linear(224, 7)

    def forward(self, x_img, x_mask):
        out_img = self.branch1(x_img)
        out_mask = self.branch2(x_mask)

        # print("Output size after branch1:", out_img.shape)  # Debug: Check output size
        # print("Output size after branch2:", out_mask.shape)

        out_img = out_img.view(out_img.size(0), -1)
        out_mask = out_mask.view(out_mask.size(0), -1)
        out = torch.cat((out_img, out_mask), dim=1)

        # print("Concatenated output size:", out.shape)  # Debug: Check concatenated size

        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return out

# Initialize model
model = DualCNN()
print(model)


DualCNN(
  (branch1): Sequential(
    (0): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (branch2): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=125440, out_features=224, bias=True)
  (fc2): Linear(in_features=224, out_features=7, bias=True)
)


In [None]:
model = DualCNN()
summary(model, [(3, 224, 224), (1, 224, 224)])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 20, 224, 224]           1,520
              ReLU-2         [-1, 20, 224, 224]               0
         MaxPool2d-3         [-1, 20, 112, 112]               0
            Conv2d-4         [-1, 20, 112, 112]          19,620
              ReLU-5         [-1, 20, 112, 112]               0
         MaxPool2d-6           [-1, 20, 56, 56]               0
            Conv2d-7         [-1, 20, 224, 224]             520
              ReLU-8         [-1, 20, 224, 224]               0
         MaxPool2d-9         [-1, 20, 112, 112]               0
           Conv2d-10         [-1, 20, 112, 112]          19,620
             ReLU-11         [-1, 20, 112, 112]               0
        MaxPool2d-12           [-1, 20, 56, 56]               0
           Linear-13                  [-1, 224]      28,098,784
           Linear-14                   

In [None]:
def train_and_validate(model, criterion, optimizer, train_loader, val_loader, n_epochs=10):
    for epoch in range(n_epochs):
        # Training Phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_counter = 0

        train_tqdm = tqdm(train_loader, desc=f'Training Epoch {epoch+1}')
        for data_img, data_mask, labels in train_tqdm:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Calculate loss and accuracy
            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            train_correct += (predicted == labels).sum().item()
            train_counter += labels.size(0)

            train_tqdm.set_postfix(loss=train_loss/(1+len(train_tqdm)), accuracy=100.0 * train_correct / train_counter)

        # Validation Phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_counter = 0

        val_tqdm = tqdm(val_loader, desc=f'Validation Epoch {epoch+1}')
        with torch.no_grad():
            for data_img, data_mask, labels in val_tqdm:
                outputs = model(data_img, data_mask)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_counter += labels.size(0)

                val_tqdm.set_postfix(loss=val_loss/(1+len(val_tqdm)), accuracy=100.0 * val_correct / val_counter)

        # End of Epoch Summary
        train_loss /= len(train_loader)
        train_accuracy = 100.0 * train_correct / train_counter
        val_loss /= len(val_loader)
        val_accuracy = 100.0 * val_correct / val_counter
        print(f"Epoch {epoch+1}: Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%")
        print(f"Epoch {epoch+1}: Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")


In [None]:

def train_and_validate(model, criterion, optimizer, train_loader, val_loader, n_epochs=10):
    for epoch in range(n_epochs):
        # Initialize metrics
        train_losses, val_losses = [], []
        train_preds, train_targets = [], []
        val_preds, val_targets = [], []

        # Training Phase
        model.train()
        train_tqdm = tqdm(train_loader, desc=f'Training Epoch {epoch+1}')
        for data_img, data_mask, labels in train_tqdm:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accumulate loss and predictions
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_targets.extend(labels.cpu().numpy())

            # Update progress bar
            train_tqdm.set_postfix(loss=np.mean(train_losses))

        # Validation Phase
        model.eval()
        val_tqdm = tqdm(val_loader, desc=f'Validation Epoch {epoch+1}')
        with torch.no_grad():
            for data_img, data_mask, labels in val_tqdm:
                outputs = model(data_img, data_mask)
                loss = criterion(outputs, labels)

                # Accumulate loss and predictions
                val_losses.append(loss.item())
                _, predicted = torch.max(outputs, 1)
                val_preds.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

                # Update progress bar
                val_tqdm.set_postfix(loss=np.mean(val_losses))

        # Calculate metrics for training
        train_precision = precision_score(train_targets, train_preds, average='macro')
        train_recall = recall_score(train_targets, train_preds, average='macro')
        train_f1 = f1_score(train_targets, train_preds, average='macro')

        # Calculate metrics for validation
        val_precision = precision_score(val_targets, val_preds, average='macro')
        val_recall = recall_score(val_targets, val_preds, average='macro')
        val_f1 = f1_score(val_targets, val_preds, average='macro')

        # End of Epoch Summary
        print(f'Epoch {epoch+1}: Training Loss: {np.mean(train_losses):.4f}')
        print(f'Training Precision: {train_precision:.2f}, Recall: {train_recall:.2f}, F1: {train_f1:.2f}')
        print(f'Validation Loss: {np.mean(val_losses):.4f}')
        print(f'Validation Precision: {val_precision:.2f}, Recall: {val_recall:.2f}, F1: {val_f1:.2f}')


In [None]:
model = DualCNN()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [None]:
# 原始设置
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 21/21 [03:59<00:00, 11.42s/it, loss=1.91]
Validation Epoch 1: 100%|██████████| 6/6 [00:52<00:00,  8.69s/it, loss=1.84]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1: Training Loss: 1.9150
Training Precision: 0.12, Recall: 0.16, F1: 0.11
Validation Loss: 1.8355
Validation Precision: 0.06, Recall: 0.14, F1: 0.08


Training Epoch 2: 100%|██████████| 21/21 [01:31<00:00,  4.34s/it, loss=1.82]
Validation Epoch 2: 100%|██████████| 6/6 [00:18<00:00,  3.14s/it, loss=1.76]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2: Training Loss: 1.8194
Training Precision: 0.37, Recall: 0.28, F1: 0.29
Validation Loss: 1.7610
Validation Precision: 0.08, Recall: 0.18, F1: 0.11


Training Epoch 3: 100%|██████████| 21/21 [01:29<00:00,  4.26s/it, loss=1.62]
Validation Epoch 3: 100%|██████████| 6/6 [00:24<00:00,  4.10s/it, loss=1.69]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3: Training Loss: 1.6242
Training Precision: 0.34, Recall: 0.30, F1: 0.30
Validation Loss: 1.6890
Validation Precision: 0.21, Recall: 0.28, F1: 0.23


Training Epoch 4: 100%|██████████| 21/21 [01:28<00:00,  4.22s/it, loss=1.44]
Validation Epoch 4: 100%|██████████| 6/6 [00:16<00:00,  2.76s/it, loss=1.77]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4: Training Loss: 1.4388
Training Precision: 0.49, Recall: 0.43, F1: 0.44
Validation Loss: 1.7654
Validation Precision: 0.34, Recall: 0.35, F1: 0.28


Training Epoch 5: 100%|██████████| 21/21 [01:37<00:00,  4.64s/it, loss=1.52]
Validation Epoch 5: 100%|██████████| 6/6 [00:15<00:00,  2.58s/it, loss=1.92]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5: Training Loss: 1.5162
Training Precision: 0.46, Recall: 0.39, F1: 0.39
Validation Loss: 1.9203
Validation Precision: 0.35, Recall: 0.44, F1: 0.33


Training Epoch 6: 100%|██████████| 21/21 [01:43<00:00,  4.94s/it, loss=1.23]
Validation Epoch 6: 100%|██████████| 6/6 [00:16<00:00,  2.70s/it, loss=1.65]


Epoch 6: Training Loss: 1.2251
Training Precision: 0.54, Recall: 0.51, F1: 0.51
Validation Loss: 1.6498
Validation Precision: 0.36, Recall: 0.44, F1: 0.36


Training Epoch 7: 100%|██████████| 21/21 [01:30<00:00,  4.32s/it, loss=1.01]
Validation Epoch 7: 100%|██████████| 6/6 [00:17<00:00,  2.95s/it, loss=1.83]


Epoch 7: Training Loss: 1.0111
Training Precision: 0.68, Recall: 0.62, F1: 0.64
Validation Loss: 1.8263
Validation Precision: 0.45, Recall: 0.45, F1: 0.35


Training Epoch 8: 100%|██████████| 21/21 [01:33<00:00,  4.44s/it, loss=0.703]
Validation Epoch 8: 100%|██████████| 6/6 [00:16<00:00,  2.74s/it, loss=1.92]


Epoch 8: Training Loss: 0.7032
Training Precision: 0.74, Recall: 0.73, F1: 0.73
Validation Loss: 1.9244
Validation Precision: 0.34, Recall: 0.37, F1: 0.34


Training Epoch 9: 100%|██████████| 21/21 [01:30<00:00,  4.30s/it, loss=0.57]
Validation Epoch 9: 100%|██████████| 6/6 [00:13<00:00,  2.22s/it, loss=1.97]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 9: Training Loss: 0.5699
Training Precision: 0.82, Recall: 0.81, F1: 0.81
Validation Loss: 1.9652
Validation Precision: 0.28, Recall: 0.31, F1: 0.27


Training Epoch 10: 100%|██████████| 21/21 [01:31<00:00,  4.35s/it, loss=0.526]
Validation Epoch 10: 100%|██████████| 6/6 [00:16<00:00,  2.68s/it, loss=1.92]

Epoch 10: Training Loss: 0.5261
Training Precision: 0.86, Recall: 0.87, F1: 0.86
Validation Loss: 1.9196
Validation Precision: 0.43, Recall: 0.40, F1: 0.39





Test

In [None]:
from PIL import Image
import torchvision.transforms as transforms
import torch

def load_image(image_path, mask_path=None, image_size=224):
    # Image transformations
    transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        # Normalize with the same parameters used in training

    ])

    # Load image
    image = Image.open(image_path).convert('RGB')
    image = transform(image)

    # If a mask is required
    if mask_path:
        mask = Image.open(mask_path).convert('L')
        mask = transform(mask)
    else:
        mask = None

    return image, mask

def test_model(model, image_path, mask_path=None):
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        image, mask = load_image(image_path, mask_path)
        if mask is not None:
            image, mask = image.unsqueeze(0), mask.unsqueeze(0)  # Add batch dimension
            outputs = model(image, mask)
        else:
            image = image.unsqueeze(0)  # Add batch dimension
            outputs = model(image)

        # the output is class scores
        _, predicted = torch.max(outputs, 1)
        return predicted.item()


model = DualCNN()

predicted_class = test_model(model, '/content/drive/MyDrive/CS445/Final Project/1_A_hgr2A1_id02_1.jpg', '/content/drive/MyDrive/CS445/Final Project/1_A_hgr2A1_id02_1.bmp')
print("Predicted Class:", predicted_class)


Predicted Class: 0


In [None]:
predicted_class = test_model(model, 'path_to_test_image.jpg', 'path_to_test_mask.bmp')
print("Predicted Class:", predicted_class)

### DCCNN (use different regularization)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DualCNN(nn.Module):
    def __init__(self):
        super(DualCNN, self).__init__()
        # Define the first branch for the RGB image
        self.branch1 = nn.Sequential(
            nn.Conv2d(3, 20, kernel_size=5, padding=2),  # Padding=2 to keep size constant
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),  # Larger kernel and padding
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.5)  # Dropout added after pooling
        )

        # Define the second branch for the mask image
        self.branch2 = nn.Sequential(
            nn.Conv2d(1, 20, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.5)  # Dropout added after pooling
        )

        # Fully connected layers
        self.fc1 = nn.Linear(20 * 56 * 56 * 2, 224)
        self.fc2 = nn.Dropout(0.5)  # Dropout before the final classification layer
        self.fc3 = nn.Linear(224, 7)

    def forward(self, x_img, x_mask):
        out_img = self.branch1(x_img)
        out_mask = self.branch2(x_mask)

        out_img = out_img.view(out_img.size(0), -1)
        out_mask = out_mask.view(out_mask.size(0), -1)
        out = torch.cat((out_img, out_mask), dim=1)

        out = F.relu(self.fc1(out))
        out = self.fc2(out)  # Applying dropout
        out = self.fc3(out)  # Final output layer
        return out

# Initialize model
model = DualCNN()
print(model)


DualCNN(
  (branch1): Sequential(
    (0): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Dropout(p=0.5, inplace=False)
  )
  (branch2): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Dropout(p=0.5, inplace=False)
  )
  (fc1): Linear(in_features=125440, out_features=224, bias=True)
  (fc2): Dropout(p=0.5, inplace=False)
  (fc3): Linear(in_features=224, out_features=7, bias

In [None]:

def train_and_validate(model, criterion, optimizer, train_loader, val_loader, n_epochs=10):
    for epoch in range(n_epochs):
        # Initialize metrics
        train_losses, val_losses = [], []
        train_preds, train_targets = [], []
        val_preds, val_targets = [], []

        # Training Phase
        model.train()
        train_tqdm = tqdm(train_loader, desc=f'Training Epoch {epoch+1}')
        for data_img, data_mask, labels in train_tqdm:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accumulate loss and predictions
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_targets.extend(labels.cpu().numpy())

            # Update progress bar
            train_tqdm.set_postfix(loss=np.mean(train_losses))

        # Validation Phase
        model.eval()
        val_tqdm = tqdm(val_loader, desc=f'Validation Epoch {epoch+1}')
        with torch.no_grad():
            for data_img, data_mask, labels in val_tqdm:
                outputs = model(data_img, data_mask)
                loss = criterion(outputs, labels)

                # Accumulate loss and predictions
                val_losses.append(loss.item())
                _, predicted = torch.max(outputs, 1)
                val_preds.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

                # Update progress bar
                val_tqdm.set_postfix(loss=np.mean(val_losses))

        # Calculate metrics for training
        train_precision = precision_score(train_targets, train_preds, average='macro')
        train_recall = recall_score(train_targets, train_preds, average='macro')
        train_f1 = f1_score(train_targets, train_preds, average='macro')

        # Calculate metrics for validation
        val_precision = precision_score(val_targets, val_preds, average='macro')
        val_recall = recall_score(val_targets, val_preds, average='macro')
        val_f1 = f1_score(val_targets, val_preds, average='macro')

        # End of Epoch Summary
        print(f'Epoch {epoch+1}: Training Loss: {np.mean(train_losses):.4f}')
        print(f'Training Precision: {train_precision:.2f}, Recall: {train_recall:.2f}, F1: {train_f1:.2f}')
        print(f'Validation Loss: {np.mean(val_losses):.4f}')
        print(f'Validation Precision: {val_precision:.2f}, Recall: {val_recall:.2f}, F1: {val_f1:.2f}')


In [None]:
model = DualCNN()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [None]:
# 原始设置  dropout =0.5
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 21/21 [01:27<00:00,  4.18s/it, loss=1.9]
Validation Epoch 1: 100%|██████████| 6/6 [00:13<00:00,  2.30s/it, loss=1.86]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1: Training Loss: 1.9040
Training Precision: 0.17, Recall: 0.15, F1: 0.13
Validation Loss: 1.8615
Validation Precision: 0.04, Recall: 0.14, F1: 0.06


Training Epoch 2: 100%|██████████| 21/21 [01:25<00:00,  4.05s/it, loss=1.88]
Validation Epoch 2: 100%|██████████| 6/6 [00:14<00:00,  2.41s/it, loss=1.94]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2: Training Loss: 1.8791
Training Precision: 0.14, Recall: 0.14, F1: 0.13
Validation Loss: 1.9357
Validation Precision: 0.08, Recall: 0.16, F1: 0.10


Training Epoch 3: 100%|██████████| 21/21 [01:39<00:00,  4.75s/it, loss=1.87]
Validation Epoch 3: 100%|██████████| 6/6 [00:18<00:00,  3.05s/it, loss=1.91]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3: Training Loss: 1.8678
Training Precision: 0.08, Recall: 0.17, F1: 0.11
Validation Loss: 1.9112
Validation Precision: 0.08, Recall: 0.15, F1: 0.08


Training Epoch 4: 100%|██████████| 21/21 [01:27<00:00,  4.17s/it, loss=1.8]
Validation Epoch 4: 100%|██████████| 6/6 [00:14<00:00,  2.42s/it, loss=1.87]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4: Training Loss: 1.8011
Training Precision: 0.12, Recall: 0.15, F1: 0.12
Validation Loss: 1.8691
Validation Precision: 0.07, Recall: 0.15, F1: 0.08


Training Epoch 5: 100%|██████████| 21/21 [01:34<00:00,  4.50s/it, loss=1.79]
Validation Epoch 5: 100%|██████████| 6/6 [00:15<00:00,  2.52s/it, loss=1.76]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5: Training Loss: 1.7911
Training Precision: 0.17, Recall: 0.18, F1: 0.14
Validation Loss: 1.7594
Validation Precision: 0.35, Recall: 0.35, F1: 0.31


Training Epoch 6: 100%|██████████| 21/21 [01:27<00:00,  4.15s/it, loss=1.75]
Validation Epoch 6: 100%|██████████| 6/6 [00:13<00:00,  2.33s/it, loss=1.68]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6: Training Loss: 1.7520
Training Precision: 0.39, Recall: 0.29, F1: 0.31
Validation Loss: 1.6752
Validation Precision: 0.52, Recall: 0.38, F1: 0.38


Training Epoch 7: 100%|██████████| 21/21 [01:25<00:00,  4.09s/it, loss=1.65]
Validation Epoch 7: 100%|██████████| 6/6 [00:16<00:00,  2.83s/it, loss=1.55]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7: Training Loss: 1.6477
Training Precision: 0.39, Recall: 0.27, F1: 0.26
Validation Loss: 1.5507
Validation Precision: 0.26, Recall: 0.32, F1: 0.28


Training Epoch 8: 100%|██████████| 21/21 [01:26<00:00,  4.11s/it, loss=1.6]
Validation Epoch 8: 100%|██████████| 6/6 [00:15<00:00,  2.53s/it, loss=1.49]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8: Training Loss: 1.6002
Training Precision: 0.31, Recall: 0.31, F1: 0.31
Validation Loss: 1.4929
Validation Precision: 0.31, Recall: 0.37, F1: 0.32


Training Epoch 9: 100%|██████████| 21/21 [01:26<00:00,  4.14s/it, loss=1.6]
Validation Epoch 9: 100%|██████████| 6/6 [00:14<00:00,  2.48s/it, loss=1.59]


Epoch 9: Training Loss: 1.5990
Training Precision: 0.33, Recall: 0.32, F1: 0.32
Validation Loss: 1.5914
Validation Precision: 0.48, Recall: 0.43, F1: 0.42


Training Epoch 10: 100%|██████████| 21/21 [01:25<00:00,  4.07s/it, loss=1.45]
Validation Epoch 10: 100%|██████████| 6/6 [00:15<00:00,  2.65s/it, loss=1.29]

Epoch 10: Training Loss: 1.4528
Training Precision: 0.46, Recall: 0.40, F1: 0.41
Validation Loss: 1.2939
Validation Precision: 0.55, Recall: 0.40, F1: 0.39





batchnorm + dropout

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DualCNN(nn.Module):
    def __init__(self):
        super(DualCNN, self).__init__()
        # Define the first branch for the RGB image
        self.branch1 = nn.Sequential(
            nn.Conv2d(3, 20, kernel_size=5, padding=2),  # Padding=2 to keep size constant
            nn.BatchNorm2d(20),  # Batch Normalization after convolution
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),  # Dropout after pooling

            nn.Conv2d(20, 20, kernel_size=7, padding=3),  # Larger kernel and padding
            nn.BatchNorm2d(20),  # Batch Normalization after convolution
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)  # Dropout after pooling
        )

        # Define the second branch for the mask image
        self.branch2 = nn.Sequential(
            nn.Conv2d(1, 20, kernel_size=5, padding=2),
            nn.BatchNorm2d(20),  # Batch Normalization after convolution
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),  # Dropout after pooling

            nn.Conv2d(20, 20, kernel_size=7, padding=3),
            nn.BatchNorm2d(20),  # Batch Normalization after convolution
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)  # Dropout after pooling
        )

        # Fully connected layers
        self.fc1 = nn.Linear(20 * 56 * 56 * 2, 224)
        self.fc2 = nn.Dropout(0.5)  # Dropout before the final classification layer
        self.fc3 = nn.Linear(224, 7)

    def forward(self, x_img, x_mask):
        out_img = self.branch1(x_img)
        out_mask = self.branch2(x_mask)

        out_img = out_img.view(out_img.size(0), -1)
        out_mask = out_mask.view(out_mask.size(0), -1)
        out = torch.cat((out_img, out_mask), dim=1)

        out = F.relu(self.fc1(out))
        out = self.fc2(out)  # Applying dropout
        out = self.fc3(out)  # Final output layer
        return out

# Initialize model
model = DualCNN()
print(model)


DualCNN(
  (branch1): Sequential(
    (0): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.25, inplace=False)
    (5): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (6): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Dropout(p=0.25, inplace=False)
  )
  (branch2): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.25, inplace=False)
    (5): Conv2d(20, 20, ker

In [None]:
model = DualCNN()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [None]:
# 原始设置  lr=0.001, momentum=0.9
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 21/21 [01:30<00:00,  4.29s/it, loss=2.64]
Validation Epoch 1: 100%|██████████| 6/6 [00:13<00:00,  2.32s/it, loss=1.94]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1: Training Loss: 2.6386
Training Precision: 0.15, Recall: 0.15, F1: 0.14
Validation Loss: 1.9390
Validation Precision: 0.07, Recall: 0.14, F1: 0.09


Training Epoch 2: 100%|██████████| 21/21 [01:28<00:00,  4.19s/it, loss=2]
Validation Epoch 2: 100%|██████████| 6/6 [00:13<00:00,  2.28s/it, loss=1.82]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2: Training Loss: 1.9952
Training Precision: 0.28, Recall: 0.23, F1: 0.24
Validation Loss: 1.8172
Validation Precision: 0.08, Recall: 0.15, F1: 0.10


Training Epoch 3: 100%|██████████| 21/21 [01:27<00:00,  4.18s/it, loss=1.87]
Validation Epoch 3: 100%|██████████| 6/6 [00:13<00:00,  2.18s/it, loss=1.78]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3: Training Loss: 1.8730
Training Precision: 0.15, Recall: 0.17, F1: 0.16
Validation Loss: 1.7782
Validation Precision: 0.07, Recall: 0.15, F1: 0.10


Training Epoch 4: 100%|██████████| 21/21 [01:27<00:00,  4.16s/it, loss=1.86]
Validation Epoch 4: 100%|██████████| 6/6 [00:13<00:00,  2.24s/it, loss=1.8]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4: Training Loss: 1.8619
Training Precision: 0.21, Recall: 0.18, F1: 0.17
Validation Loss: 1.8040
Validation Precision: 0.30, Recall: 0.25, F1: 0.19


Training Epoch 5: 100%|██████████| 21/21 [01:26<00:00,  4.10s/it, loss=1.81]
Validation Epoch 5: 100%|██████████| 6/6 [00:13<00:00,  2.33s/it, loss=1.71]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5: Training Loss: 1.8091
Training Precision: 0.14, Recall: 0.18, F1: 0.15
Validation Loss: 1.7135
Validation Precision: 0.22, Recall: 0.21, F1: 0.18


Training Epoch 6: 100%|██████████| 21/21 [01:25<00:00,  4.07s/it, loss=1.81]
Validation Epoch 6: 100%|██████████| 6/6 [00:13<00:00,  2.28s/it, loss=1.75]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6: Training Loss: 1.8111
Training Precision: 0.11, Recall: 0.16, F1: 0.11
Validation Loss: 1.7523
Validation Precision: 0.07, Recall: 0.15, F1: 0.10


Training Epoch 7: 100%|██████████| 21/21 [01:26<00:00,  4.11s/it, loss=1.8]
Validation Epoch 7: 100%|██████████| 6/6 [00:13<00:00,  2.32s/it, loss=1.81]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7: Training Loss: 1.8026
Training Precision: 0.06, Recall: 0.13, F1: 0.08
Validation Loss: 1.8079
Validation Precision: 0.09, Recall: 0.16, F1: 0.11


Training Epoch 8: 100%|██████████| 21/21 [01:28<00:00,  4.20s/it, loss=1.83]
Validation Epoch 8: 100%|██████████| 6/6 [00:13<00:00,  2.25s/it, loss=1.73]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8: Training Loss: 1.8300
Training Precision: 0.11, Recall: 0.13, F1: 0.11
Validation Loss: 1.7331
Validation Precision: 0.09, Recall: 0.16, F1: 0.11


Training Epoch 9: 100%|██████████| 21/21 [01:32<00:00,  4.40s/it, loss=1.78]
Validation Epoch 9: 100%|██████████| 6/6 [00:13<00:00,  2.25s/it, loss=1.79]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 9: Training Loss: 1.7790
Training Precision: 0.20, Recall: 0.17, F1: 0.15
Validation Loss: 1.7915
Validation Precision: 0.06, Recall: 0.15, F1: 0.07


Training Epoch 10: 100%|██████████| 21/21 [01:28<00:00,  4.20s/it, loss=1.79]
Validation Epoch 10: 100%|██████████| 6/6 [00:13<00:00,  2.27s/it, loss=1.7]

Epoch 10: Training Loss: 1.7921
Training Precision: 0.17, Recall: 0.16, F1: 0.15
Validation Loss: 1.6996
Validation Precision: 0.22, Recall: 0.18, F1: 0.15



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Use Data augmentation

In [None]:
class HandGestureDataset(Dataset):
    def __init__(self, rgb_dir, mask_dir, transform=None):
        """
        Args:
            rgb_dir (string): Directory with all the RGB images divided into subdirectories.
            mask_dir (string): Directory with all the mask images divided into the same subdirectories as RGB images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.rgb_dir = rgb_dir
        self.mask_dir = mask_dir
        self.rgb_transform = rgb_transform
        self.mask_transform = mask_transform
        self.samples = self._load_samples()

    def _load_samples(self):
        samples = []
        label_mapping = {'one': 0, 'two': 1, 'three': 2, 'four': 3, 'five': 4, 'like': 5, 'ok': 6}

        # Iterate over all categories (e.g., 'one', 'two', etc.)
        for category in label_mapping:
            rgb_path = os.path.join(self.rgb_dir, category)
            mask_path = os.path.join(self.mask_dir, category)
            for filename in os.listdir(rgb_path):
                if filename.endswith('.jpg'):
                    # Constructing the mask filename based on the RGB filename
                    # RGB filename like '2_Original_046.jpg' corresponds to mask '2_Mask_046.jpg'
                    mask_filename = filename.replace('Original', 'Mask')
                    file_rgb_path = os.path.join(rgb_path, filename)
                    file_mask_path = os.path.join(mask_path, mask_filename)
                    samples.append((file_rgb_path, file_mask_path, label_mapping[category]))
        return samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        rgb_path, mask_path, label = self.samples[idx]
        rgb_image = Image.open(rgb_path).convert('RGB')
        mask_image = Image.open(mask_path).convert('L')  # 确保掩码是单通道的

        if self.rgb_transform:
            rgb_image = self.rgb_transform(rgb_image)
        if self.mask_transform:
            mask_image = self.mask_transform(mask_image)

        return rgb_image, mask_image, label

# Transformation to apply
# transform = transforms.Compose([
#     transforms.Resize((224, 224)),  # Resize all images to the same size for model consistency
#     transforms.RandomHorizontalFlip(),  # Randomly flip images horizontally
#     transforms.RandomRotation(15),  # Randomly rotate images by up to 15 degrees
#     transforms.ColorJitter(brightness=0.1, contrast=0.1),  # Randomly jitter brightness and contrast
#     transforms.ToTensor(),  # Convert images to PyTorch tensors
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize tensors
# ])


rgb_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 保持大小一致
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.RandomRotation(15),  # 随机旋转
    transforms.ColorJitter(brightness=0.1, contrast=0.1),  # 调整亮度和对比度
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 归一化
])

mask_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  # 掩码也需要翻转以匹配图像的变换
    transforms.RandomRotation(15),  # 掩码也需要旋转
    transforms.ToTensor()  # 直接转为张量，不需要归一化
])

In [None]:
dataset = HandGestureDataset('/content/drive/MyDrive/CS445/Final Project/rgb', '/content/drive/MyDrive/CS445/Final Project/generate', transform=transform)
for i in range(5):  # Check first 5 samples
    print(dataset.samples[i])


('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_018.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_018.jpg', 0)
('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_022.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_022.jpg', 0)
('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_010.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_010.jpg', 0)
('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_021.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_021.jpg', 0)
('/content/drive/MyDrive/CS445/Final Project/rgb/one/1_Original_024.jpg', '/content/drive/MyDrive/CS445/Final Project/generate/one/1_Mask_024.jpg', 0)


In [None]:
# hand_dir = '/content/drive/MyDrive/CS445/Final Project/rgb'
# mask_dir = '/content/drive/MyDrive/CS445/Final Project/generate'
# dataset = HandGestureDataset(hand_dir, mask_dir, transform=transform)

# Split the dataset into training and testing
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# DataLoader setup
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)


In [None]:
class DualCNN(nn.Module):
    def __init__(self):
        super(DualCNN, self).__init__()
        # Define the first branch for the RGB image
        self.branch1 = nn.Sequential(
            nn.Conv2d(3, 20, kernel_size=5, padding=2),  # Padding=2 to keep size constant
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),  # Larger kernel and padding
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Define the second branch for the mask image
        self.branch2 = nn.Sequential(
            nn.Conv2d(1, 20, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Define the fully connected layers
        self.fc1 = nn.Linear(20 * 56 * 56 * 2, 224)  # calculated
        self.fc2 = nn.Linear(224, 7)

    def forward(self, x_img, x_mask):
        out_img = self.branch1(x_img)
        out_mask = self.branch2(x_mask)

        # print("Output size after branch1:", out_img.shape)  # Debug: Check output size
        # print("Output size after branch2:", out_mask.shape)

        out_img = out_img.view(out_img.size(0), -1)
        out_mask = out_mask.view(out_mask.size(0), -1)
        out = torch.cat((out_img, out_mask), dim=1)

        # print("Concatenated output size:", out.shape)  # Debug: Check concatenated size

        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return out

# Initialize model
model = DualCNN()
print(model)


DualCNN(
  (branch1): Sequential(
    (0): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (branch2): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=125440, out_features=224, bias=True)
  (fc2): Linear(in_features=224, out_features=7, bias=True)
)


In [None]:

def train_and_validate(model, criterion, optimizer, train_loader, val_loader, n_epochs=10):
    for epoch in range(n_epochs):
        # Initialize metrics
        train_losses, val_losses = [], []
        train_preds, train_targets = [], []
        val_preds, val_targets = [], []

        # Training Phase
        model.train()
        train_tqdm = tqdm(train_loader, desc=f'Training Epoch {epoch+1}')
        for data_img, data_mask, labels in train_tqdm:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accumulate loss and predictions
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_targets.extend(labels.cpu().numpy())

            # Update progress bar
            train_tqdm.set_postfix(loss=np.mean(train_losses))

        # Validation Phase
        model.eval()
        val_tqdm = tqdm(val_loader, desc=f'Validation Epoch {epoch+1}')
        with torch.no_grad():
            for data_img, data_mask, labels in val_tqdm:
                outputs = model(data_img, data_mask)
                loss = criterion(outputs, labels)

                # Accumulate loss and predictions
                val_losses.append(loss.item())
                _, predicted = torch.max(outputs, 1)
                val_preds.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

                # Update progress bar
                val_tqdm.set_postfix(loss=np.mean(val_losses))

        # Calculate metrics for training
        train_precision = precision_score(train_targets, train_preds, average='macro')
        train_recall = recall_score(train_targets, train_preds, average='macro')
        train_f1 = f1_score(train_targets, train_preds, average='macro')

        # Calculate metrics for validation
        val_precision = precision_score(val_targets, val_preds, average='macro')
        val_recall = recall_score(val_targets, val_preds, average='macro')
        val_f1 = f1_score(val_targets, val_preds, average='macro')

        # End of Epoch Summary
        print(f'Epoch {epoch+1}: Training Loss: {np.mean(train_losses):.4f}')
        print(f'Training Precision: {train_precision:.2f}, Recall: {train_recall:.2f}, F1: {train_f1:.2f}')
        print(f'Validation Loss: {np.mean(val_losses):.4f}')
        print(f'Validation Precision: {val_precision:.2f}, Recall: {val_recall:.2f}, F1: {val_f1:.2f}')


In [None]:
model = DualCNN()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [None]:
# 原始设置
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 21/21 [01:25<00:00,  4.05s/it, loss=1.87]
Validation Epoch 1: 100%|██████████| 6/6 [00:14<00:00,  2.40s/it, loss=1.84]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1: Training Loss: 1.8749
Training Precision: 0.13, Recall: 0.15, F1: 0.13
Validation Loss: 1.8356
Validation Precision: 0.09, Recall: 0.18, F1: 0.12


Training Epoch 2: 100%|██████████| 21/21 [01:24<00:00,  4.03s/it, loss=1.82]
Validation Epoch 2: 100%|██████████| 6/6 [00:16<00:00,  2.77s/it, loss=1.87]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2: Training Loss: 1.8159
Training Precision: 0.11, Recall: 0.18, F1: 0.13
Validation Loss: 1.8692
Validation Precision: 0.04, Recall: 0.13, F1: 0.06


Training Epoch 3: 100%|██████████| 21/21 [01:24<00:00,  4.03s/it, loss=1.79]
Validation Epoch 3: 100%|██████████| 6/6 [00:14<00:00,  2.48s/it, loss=1.76]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3: Training Loss: 1.7855
Training Precision: 0.26, Recall: 0.19, F1: 0.16
Validation Loss: 1.7585
Validation Precision: 0.32, Recall: 0.27, F1: 0.23


Training Epoch 4: 100%|██████████| 21/21 [01:23<00:00,  3.98s/it, loss=1.77]
Validation Epoch 4: 100%|██████████| 6/6 [00:13<00:00,  2.30s/it, loss=1.69]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4: Training Loss: 1.7683
Training Precision: 0.25, Recall: 0.21, F1: 0.20
Validation Loss: 1.6888
Validation Precision: 0.33, Recall: 0.31, F1: 0.25


Training Epoch 5: 100%|██████████| 21/21 [01:25<00:00,  4.05s/it, loss=1.66]
Validation Epoch 5: 100%|██████████| 6/6 [00:13<00:00,  2.30s/it, loss=1.49]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5: Training Loss: 1.6606
Training Precision: 0.44, Recall: 0.31, F1: 0.28
Validation Loss: 1.4938
Validation Precision: 0.16, Recall: 0.28, F1: 0.18


Training Epoch 6: 100%|██████████| 21/21 [01:22<00:00,  3.95s/it, loss=1.58]
Validation Epoch 6: 100%|██████████| 6/6 [00:13<00:00,  2.28s/it, loss=1.51]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6: Training Loss: 1.5826
Training Precision: 0.29, Recall: 0.27, F1: 0.25
Validation Loss: 1.5051
Validation Precision: 0.36, Recall: 0.33, F1: 0.31


Training Epoch 7: 100%|██████████| 21/21 [01:23<00:00,  3.97s/it, loss=1.61]
Validation Epoch 7: 100%|██████████| 6/6 [00:13<00:00,  2.32s/it, loss=1.57]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7: Training Loss: 1.6080
Training Precision: 0.40, Recall: 0.35, F1: 0.36
Validation Loss: 1.5667
Validation Precision: 0.15, Recall: 0.35, F1: 0.21


Training Epoch 8: 100%|██████████| 21/21 [01:21<00:00,  3.88s/it, loss=1.57]
Validation Epoch 8: 100%|██████████| 6/6 [00:14<00:00,  2.35s/it, loss=1.72]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8: Training Loss: 1.5689
Training Precision: 0.45, Recall: 0.29, F1: 0.27
Validation Loss: 1.7211
Validation Precision: 0.45, Recall: 0.34, F1: 0.33


Training Epoch 9: 100%|██████████| 21/21 [01:20<00:00,  3.84s/it, loss=1.44]
Validation Epoch 9: 100%|██████████| 6/6 [00:15<00:00,  2.57s/it, loss=1.36]


Epoch 9: Training Loss: 1.4419
Training Precision: 0.43, Recall: 0.36, F1: 0.37
Validation Loss: 1.3568
Validation Precision: 0.48, Recall: 0.34, F1: 0.28


Training Epoch 10: 100%|██████████| 21/21 [01:20<00:00,  3.85s/it, loss=1.39]
Validation Epoch 10: 100%|██████████| 6/6 [00:13<00:00,  2.28s/it, loss=1.47]

Epoch 10: Training Loss: 1.3852
Training Precision: 0.49, Recall: 0.44, F1: 0.45
Validation Loss: 1.4690
Validation Precision: 0.29, Recall: 0.40, F1: 0.30



  _warn_prf(average, modifier, msg_start, len(result))


### Hyperparameter tuning （没整）

In [None]:
class DualCNN(nn.Module):
    def __init__(self, num_filters=20, kernel_size1=5, kernel_size2=7, padding1=2, padding2=3):
        super(DualCNN, self).__init__()
        # First branch for the RGB image
        self.branch1 = nn.Sequential(
            nn.Conv2d(3, num_filters, kernel_size=kernel_size1, padding=padding1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(num_filters, num_filters, kernel_size=kernel_size2, padding=padding2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Second branch for the mask image
        self.branch2 = nn.Sequential(
            nn.Conv2d(1, num_filters, kernel_size=kernel_size1, padding=padding1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(num_filters, num_filters, kernel_size=kernel_size2, padding=padding2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Fully connected layers
        self.fc1 = nn.Linear(num_filters * 56 * 56 * 2, 224)  # Adjust size calculation as necessary
        self.fc2 = nn.Linear(224, 7)

    def forward(self, x_img, x_mask):
        out_img = self.branch1(x_img)
        out_mask = self.branch2(x_mask)

        out_img = out_img.view(out_img.size(0), -1)
        out_mask = out_mask.view(out_mask.size(0), -1)
        out = torch.cat((out_img, out_mask), dim=1)

        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return out


In [None]:
import optuna
import torch.optim as optim

def objective(trial):
    # Hyperparameters to tune
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    num_filters = trial.suggest_categorical("num_filters", [16, 32, 64])
    kernel_size1 = trial.suggest_categorical("kernel_size1", [3, 5, 7])

    model = DualCNN(num_filters=num_filters, kernel_size1=kernel_size1)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(10):  # Reduced number of epochs for quick tuning
        model.train()
        for data_img, data_mask, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Validation phase
        model.eval()
        accuracy = evaluate(model, val_loader)  # Define a function to calculate validation accuracy

    return accuracy  # Objective value to maximize

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

print("Best hyperparameters:", study.best_trial.params)


[I 2024-05-03 20:32:36,911] A new study created in memory with name: no-name-af2aee04-8f52-43d3-8139-44a75cd0bb36
[W 2024-05-03 20:33:17,104] Trial 0 failed with parameters: {'lr': 0.00039582641463206545, 'num_filters': 16, 'kernel_size1': 5} because of the following error: NameError("name 'evaluate' is not defined").
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-36-58233ae8c4dd>", line 25, in objective
    accuracy = evaluate(model, val_loader)  # Define a function to calculate validation accuracy
NameError: name 'evaluate' is not defined
[W 2024-05-03 20:33:17,106] Trial 0 failed with value None.


NameError: name 'evaluate' is not defined

### DC CNN + ATTENTION

In [None]:
class AttentionGate(nn.Module):
    def __init__(self, F_g, F_l, F_int):
        super(AttentionGate, self).__init__()
        self.W_g = nn.Sequential(
            nn.Conv2d(F_g, F_int, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(F_int)
        )
        self.W_x = nn.Sequential(
            nn.Conv2d(F_l, F_int, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(F_int)
        )
        self.psi = nn.Sequential(
            nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(1),
            nn.Sigmoid()
        )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, g, x):
        g1 = self.W_g(g)
        x1 = self.W_x(x)
        psi = self.relu(g1 + x1)
        psi = self.psi(psi)
        return x * psi

class DualCNN_attention(nn.Module):
    def __init__(self):
        super(DualCNN_attention, self).__init__()
        # Define the first branch for the RGB image
        self.branch1 = nn.Sequential(
            nn.Conv2d(3, 20, kernel_size=5, padding=2),  # Padding=2 to keep size constant
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),  # Larger kernel and padding
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Define the second branch for the mask image
        self.branch2 = nn.Sequential(
            nn.Conv2d(1, 20, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Attention Gate
        self.attention_gate = AttentionGate(F_g=20, F_l=20, F_int=10)

        # Define the fully connected layers
        self.fc1 = nn.Linear(20 * 56 * 56 * 2, 224)  # calculated
        self.fc2 = nn.Linear(224, 7)

    def forward(self, x_img, x_mask):
        out_img = self.branch1(x_img)
        out_mask = self.branch2(x_mask)

        # Apply attention
        out_mask = self.attention_gate(out_img, out_mask)

        out_img = out_img.view(out_img.size(0), -1)
        out_mask = out_mask.view(out_mask.size(0), -1)
        out = torch.cat((out_img, out_mask), dim=1)

        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return out


In [None]:
model = DualCNN_attention()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [None]:
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 21/21 [01:34<00:00,  4.49s/it, loss=1.9]
Validation Epoch 1: 100%|██████████| 6/6 [00:20<00:00,  3.40s/it, loss=1.87]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1: Training Loss: 1.8974
Training Precision: 0.12, Recall: 0.15, F1: 0.12
Validation Loss: 1.8740
Validation Precision: 0.03, Recall: 0.14, F1: 0.05


Training Epoch 2: 100%|██████████| 21/21 [01:29<00:00,  4.25s/it, loss=1.78]
Validation Epoch 2: 100%|██████████| 6/6 [00:16<00:00,  2.69s/it, loss=1.57]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2: Training Loss: 1.7784
Training Precision: 0.31, Recall: 0.22, F1: 0.20
Validation Loss: 1.5694
Validation Precision: 0.37, Recall: 0.29, F1: 0.25


Training Epoch 3: 100%|██████████| 21/21 [01:32<00:00,  4.41s/it, loss=1.63]
Validation Epoch 3: 100%|██████████| 6/6 [00:17<00:00,  2.89s/it, loss=1.74]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3: Training Loss: 1.6263
Training Precision: 0.38, Recall: 0.37, F1: 0.37
Validation Loss: 1.7401
Validation Precision: 0.22, Recall: 0.19, F1: 0.15


Training Epoch 4: 100%|██████████| 21/21 [01:32<00:00,  4.41s/it, loss=1.51]
Validation Epoch 4: 100%|██████████| 6/6 [00:16<00:00,  2.77s/it, loss=1.57]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4: Training Loss: 1.5110
Training Precision: 0.37, Recall: 0.37, F1: 0.36
Validation Loss: 1.5689
Validation Precision: 0.41, Recall: 0.29, F1: 0.31


Training Epoch 5: 100%|██████████| 21/21 [01:32<00:00,  4.38s/it, loss=1.44]
Validation Epoch 5: 100%|██████████| 6/6 [00:16<00:00,  2.72s/it, loss=1.6]


Epoch 5: Training Loss: 1.4430
Training Precision: 0.49, Recall: 0.44, F1: 0.46
Validation Loss: 1.5979
Validation Precision: 0.37, Recall: 0.31, F1: 0.29


Training Epoch 6: 100%|██████████| 21/21 [01:30<00:00,  4.30s/it, loss=1.26]
Validation Epoch 6: 100%|██████████| 6/6 [00:16<00:00,  2.76s/it, loss=2.25]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6: Training Loss: 1.2640
Training Precision: 0.59, Recall: 0.52, F1: 0.54
Validation Loss: 2.2527
Validation Precision: 0.26, Recall: 0.35, F1: 0.27


Training Epoch 7: 100%|██████████| 21/21 [01:40<00:00,  4.79s/it, loss=1.3]
Validation Epoch 7: 100%|██████████| 6/6 [00:17<00:00,  2.90s/it, loss=2.11]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7: Training Loss: 1.3042
Training Precision: 0.53, Recall: 0.51, F1: 0.51
Validation Loss: 2.1095
Validation Precision: 0.18, Recall: 0.34, F1: 0.22


Training Epoch 8: 100%|██████████| 21/21 [01:34<00:00,  4.51s/it, loss=1.11]
Validation Epoch 8: 100%|██████████| 6/6 [00:16<00:00,  2.77s/it, loss=2.03]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8: Training Loss: 1.1100
Training Precision: 0.63, Recall: 0.55, F1: 0.57
Validation Loss: 2.0338
Validation Precision: 0.22, Recall: 0.36, F1: 0.26


Training Epoch 9: 100%|██████████| 21/21 [01:29<00:00,  4.28s/it, loss=1.04]
Validation Epoch 9: 100%|██████████| 6/6 [00:16<00:00,  2.75s/it, loss=1.79]


Epoch 9: Training Loss: 1.0383
Training Precision: 0.64, Recall: 0.57, F1: 0.59
Validation Loss: 1.7899
Validation Precision: 0.33, Recall: 0.36, F1: 0.32


Training Epoch 10: 100%|██████████| 21/21 [01:32<00:00,  4.39s/it, loss=0.672]
Validation Epoch 10: 100%|██████████| 6/6 [00:16<00:00,  2.79s/it, loss=2.07]

Epoch 10: Training Loss: 0.6715
Training Precision: 0.76, Recall: 0.75, F1: 0.75
Validation Loss: 2.0661
Validation Precision: 0.35, Recall: 0.43, F1: 0.32





### DC + ResNet18 (LIGHTWEIGHT)

In [151]:
class DualResNet(nn.Module):
    def __init__(self, num_classes=7):
        super(DualResNet, self).__init__()

        # Initialize ResNet backbones
        self.resnet_branch1 = models.resnet18(pretrained=True)  # pre-trained on imagenet
        self.resnet_branch1.fc = nn.Identity()

        self.resnet_branch2 = models.resnet18(pretrained=True)
        self.resnet_branch2.fc = nn.Identity()

        # Modify the first convolution layer of the second branch to accept 1-channel input
        self.resnet_branch2.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

        # Fully connected layers
        self.fc1 = nn.Linear(512 * 2, 224)  # the output of each ResNet is 512 features
        self.fc2 = nn.Linear(224, num_classes)

    def forward(self, x_img, x_mask):
        # Process inputs through each ResNet branch
        out_img = self.resnet_branch1(x_img)
        out_mask = self.resnet_branch2(x_mask)

        # Concatenate features from both branches
        out = torch.cat((out_img, out_mask), dim=1)

        # Pass through fully connected layers
        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return out

# Initialize and print model
model = DualResNet()
print(model)




DualResNet(
  (resnet_branch1): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,

In [147]:

def train_and_validate(model, criterion, optimizer, train_loader, val_loader, n_epochs=10):
    for epoch in range(n_epochs):
        # Initialize metrics
        train_losses, val_losses = [], []
        train_preds, train_targets = [], []
        val_preds, val_targets = [], []

        # Training Phase
        model.train()
        train_tqdm = tqdm(train_loader, desc=f'Training Epoch {epoch+1}')
        for data_img, data_mask, labels in train_tqdm:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accumulate loss and predictions
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_targets.extend(labels.cpu().numpy())

            # Update progress bar
            train_tqdm.set_postfix(loss=np.mean(train_losses))

        # Validation Phase
        model.eval()
        val_tqdm = tqdm(val_loader, desc=f'Validation Epoch {epoch+1}')
        with torch.no_grad():
            for data_img, data_mask, labels in val_tqdm:
                outputs = model(data_img, data_mask)
                loss = criterion(outputs, labels)

                # Accumulate loss and predictions
                val_losses.append(loss.item())
                _, predicted = torch.max(outputs, 1)
                val_preds.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

                # Update progress bar
                val_tqdm.set_postfix(loss=np.mean(val_losses))

        # Calculate metrics for training
        train_precision = precision_score(train_targets, train_preds, average='macro')
        train_recall = recall_score(train_targets, train_preds, average='macro')
        train_f1 = f1_score(train_targets, train_preds, average='macro')

        # Calculate metrics for validation
        val_precision = precision_score(val_targets, val_preds, average='macro')
        val_recall = recall_score(val_targets, val_preds, average='macro')
        val_f1 = f1_score(val_targets, val_preds, average='macro')

        # End of Epoch Summary
        print(f'Epoch {epoch+1}: Training Loss: {np.mean(train_losses):.4f}')
        print(f'Training Precision: {train_precision:.2f}, Recall: {train_recall:.2f}, F1: {train_f1:.2f}')
        print(f'Validation Loss: {np.mean(val_losses):.4f}')
        print(f'Validation Precision: {val_precision:.2f}, Recall: {val_recall:.2f}, F1: {val_f1:.2f}')


In [149]:
model = DualResNet()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [150]:
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 21/21 [02:51<00:00,  8.17s/it, loss=1.8]
Validation Epoch 1: 100%|██████████| 6/6 [00:22<00:00,  3.70s/it, loss=1.97]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1: Training Loss: 1.8014
Training Precision: 0.25, Recall: 0.20, F1: 0.20
Validation Loss: 1.9747
Validation Precision: 0.09, Recall: 0.15, F1: 0.06


Training Epoch 2: 100%|██████████| 21/21 [02:42<00:00,  7.72s/it, loss=1.28]
Validation Epoch 2: 100%|██████████| 6/6 [00:19<00:00,  3.25s/it, loss=1.42]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2: Training Loss: 1.2844
Training Precision: 0.55, Recall: 0.44, F1: 0.43
Validation Loss: 1.4238
Validation Precision: 0.50, Recall: 0.51, F1: 0.44


Training Epoch 3: 100%|██████████| 21/21 [02:43<00:00,  7.80s/it, loss=0.918]
Validation Epoch 3: 100%|██████████| 6/6 [00:20<00:00,  3.44s/it, loss=1.95]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3: Training Loss: 0.9180
Training Precision: 0.62, Recall: 0.60, F1: 0.60
Validation Loss: 1.9499
Validation Precision: 0.43, Recall: 0.40, F1: 0.38


Training Epoch 4: 100%|██████████| 21/21 [02:41<00:00,  7.68s/it, loss=1.13]
Validation Epoch 4: 100%|██████████| 6/6 [00:21<00:00,  3.57s/it, loss=1.17]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4: Training Loss: 1.1297
Training Precision: 0.62, Recall: 0.59, F1: 0.60
Validation Loss: 1.1745
Validation Precision: 0.47, Recall: 0.56, F1: 0.49


Training Epoch 5: 100%|██████████| 21/21 [02:43<00:00,  7.78s/it, loss=1.03]
Validation Epoch 5: 100%|██████████| 6/6 [00:19<00:00,  3.19s/it, loss=1.15]


Epoch 5: Training Loss: 1.0345
Training Precision: 0.64, Recall: 0.63, F1: 0.63
Validation Loss: 1.1539
Validation Precision: 0.52, Recall: 0.55, F1: 0.52


Training Epoch 6: 100%|██████████| 21/21 [02:43<00:00,  7.79s/it, loss=1.12]
Validation Epoch 6: 100%|██████████| 6/6 [00:19<00:00,  3.23s/it, loss=1.13]


Epoch 6: Training Loss: 1.1164
Training Precision: 0.62, Recall: 0.65, F1: 0.63
Validation Loss: 1.1285
Validation Precision: 0.65, Recall: 0.61, F1: 0.55


Training Epoch 7: 100%|██████████| 21/21 [02:43<00:00,  7.79s/it, loss=0.709]
Validation Epoch 7: 100%|██████████| 6/6 [00:20<00:00,  3.43s/it, loss=1.54]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7: Training Loss: 0.7092
Training Precision: 0.73, Recall: 0.74, F1: 0.73
Validation Loss: 1.5393
Validation Precision: 0.55, Recall: 0.58, F1: 0.55


Training Epoch 8: 100%|██████████| 21/21 [02:40<00:00,  7.65s/it, loss=0.986]
Validation Epoch 8: 100%|██████████| 6/6 [00:20<00:00,  3.42s/it, loss=1.09]


Epoch 8: Training Loss: 0.9857
Training Precision: 0.67, Recall: 0.65, F1: 0.66
Validation Loss: 1.0948
Validation Precision: 0.61, Recall: 0.53, F1: 0.53


Training Epoch 9: 100%|██████████| 21/21 [02:44<00:00,  7.81s/it, loss=0.559]
Validation Epoch 9: 100%|██████████| 6/6 [00:24<00:00,  4.08s/it, loss=2.21]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 9: Training Loss: 0.5594
Training Precision: 0.83, Recall: 0.83, F1: 0.83
Validation Loss: 2.2094
Validation Precision: 0.44, Recall: 0.47, F1: 0.44


Training Epoch 10: 100%|██████████| 21/21 [02:51<00:00,  8.18s/it, loss=0.815]
Validation Epoch 10: 100%|██████████| 6/6 [00:20<00:00,  3.46s/it, loss=0.77]

Epoch 10: Training Loss: 0.8154
Training Precision: 0.75, Recall: 0.75, F1: 0.75
Validation Loss: 0.7703
Validation Precision: 0.64, Recall: 0.64, F1: 0.59





### DC + VGG 19

In [153]:
import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F

class DualVGG(nn.Module):
    def __init__(self, num_classes=7):
        super(DualVGG, self).__init__()

        # Load pre-trained VGG16 for the first branch (handling RGB images)
        base_model1 = models.vgg16(pretrained=True)
        # Correctly handling the avgpool
        self.vgg_branch1 = nn.Sequential(
            *base_model1.features,
            base_model1.avgpool,
            nn.Flatten()
        )

        # Load pre-trained VGG16 for the second branch (handling mask images)
        base_model2 = models.vgg16(pretrained=True)
        # Modify the first convolutional layer to accept 1-channel input
        base_model2.features[0] = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        # Correctly handling the avgpool
        self.vgg_branch2 = nn.Sequential(
            *base_model2.features,
            base_model2.avgpool,
            nn.Flatten()
        )

        # Define the classifier
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7 * 2, 4096),  # Adjust based on the output size from the avgpool layer
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, num_classes)  # Outputs for 7 classes
        )

    def forward(self, x_img, x_mask):
        # Process inputs through each VGG branch
        x_img = self.vgg_branch1(x_img)
        x_mask = self.vgg_branch2(x_mask)

        # Concatenate features from both branches
        x = torch.cat((x_img, x_mask), dim=1)

        # Classification
        x = self.classifier(x)
        return x

# Initialize and print model
model = DualVGG(num_classes=7)  # Explicitly setting number of classes to 7
print(model)


DualVGG(
  (vgg_branch1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dil

In [154]:

def train_and_validate(model, criterion, optimizer, train_loader, val_loader, n_epochs=10):
    for epoch in range(n_epochs):
        # Initialize metrics
        train_losses, val_losses = [], []
        train_preds, train_targets = [], []
        val_preds, val_targets = [], []

        # Training Phase
        model.train()
        train_tqdm = tqdm(train_loader, desc=f'Training Epoch {epoch+1}')
        for data_img, data_mask, labels in train_tqdm:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accumulate loss and predictions
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_targets.extend(labels.cpu().numpy())

            # Update progress bar
            train_tqdm.set_postfix(loss=np.mean(train_losses))

        # Validation Phase
        model.eval()
        val_tqdm = tqdm(val_loader, desc=f'Validation Epoch {epoch+1}')
        with torch.no_grad():
            for data_img, data_mask, labels in val_tqdm:
                outputs = model(data_img, data_mask)
                loss = criterion(outputs, labels)

                # Accumulate loss and predictions
                val_losses.append(loss.item())
                _, predicted = torch.max(outputs, 1)
                val_preds.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

                # Update progress bar
                val_tqdm.set_postfix(loss=np.mean(val_losses))

        # Calculate metrics for training
        train_precision = precision_score(train_targets, train_preds, average='macro')
        train_recall = recall_score(train_targets, train_preds, average='macro')
        train_f1 = f1_score(train_targets, train_preds, average='macro')

        # Calculate metrics for validation
        val_precision = precision_score(val_targets, val_preds, average='macro')
        val_recall = recall_score(val_targets, val_preds, average='macro')
        val_f1 = f1_score(val_targets, val_preds, average='macro')

        # End of Epoch Summary
        print(f'Epoch {epoch+1}: Training Loss: {np.mean(train_losses):.4f}')
        print(f'Training Precision: {train_precision:.2f}, Recall: {train_recall:.2f}, F1: {train_f1:.2f}')
        print(f'Validation Loss: {np.mean(val_losses):.4f}')
        print(f'Validation Precision: {val_precision:.2f}, Recall: {val_recall:.2f}, F1: {val_f1:.2f}')


In [158]:
model = DualVGG()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [159]:
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 21/21 [16:30<00:00, 47.16s/it, loss=1.92]
Validation Epoch 1: 100%|██████████| 6/6 [01:24<00:00, 14.02s/it, loss=1.86]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1: Training Loss: 1.9186
Training Precision: 0.19, Recall: 0.14, F1: 0.10
Validation Loss: 1.8608
Validation Precision: 0.07, Recall: 0.18, F1: 0.10


Training Epoch 2: 100%|██████████| 21/21 [16:02<00:00, 45.85s/it, loss=1.76]
Validation Epoch 2: 100%|██████████| 6/6 [01:14<00:00, 12.44s/it, loss=1.67]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2: Training Loss: 1.7553
Training Precision: 0.26, Recall: 0.20, F1: 0.16
Validation Loss: 1.6705
Validation Precision: 0.49, Recall: 0.39, F1: 0.38


Training Epoch 3: 100%|██████████| 21/21 [15:39<00:00, 44.73s/it, loss=1.48]
Validation Epoch 3: 100%|██████████| 6/6 [01:16<00:00, 12.79s/it, loss=1.04]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3: Training Loss: 1.4776
Training Precision: 0.45, Recall: 0.36, F1: 0.37
Validation Loss: 1.0381
Validation Precision: 0.56, Recall: 0.57, F1: 0.55


Training Epoch 4: 100%|██████████| 21/21 [15:40<00:00, 44.76s/it, loss=1.16]
Validation Epoch 4: 100%|██████████| 6/6 [01:15<00:00, 12.52s/it, loss=1.04]


Epoch 4: Training Loss: 1.1562
Training Precision: 0.61, Recall: 0.54, F1: 0.57
Validation Loss: 1.0388
Validation Precision: 0.51, Recall: 0.61, F1: 0.54


Training Epoch 5: 100%|██████████| 21/21 [15:38<00:00, 44.67s/it, loss=0.991]
Validation Epoch 5: 100%|██████████| 6/6 [01:17<00:00, 12.86s/it, loss=0.638]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5: Training Loss: 0.9905
Training Precision: 0.69, Recall: 0.66, F1: 0.67
Validation Loss: 0.6381
Validation Precision: 0.66, Recall: 0.71, F1: 0.67


Training Epoch 6: 100%|██████████| 21/21 [15:38<00:00, 44.68s/it, loss=0.657]
Validation Epoch 6: 100%|██████████| 6/6 [01:16<00:00, 12.67s/it, loss=0.84]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6: Training Loss: 0.6570
Training Precision: 0.80, Recall: 0.79, F1: 0.80
Validation Loss: 0.8402
Validation Precision: 0.75, Recall: 0.73, F1: 0.71


Training Epoch 7: 100%|██████████| 21/21 [15:37<00:00, 44.65s/it, loss=0.616]
Validation Epoch 7: 100%|██████████| 6/6 [01:15<00:00, 12.54s/it, loss=0.873]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7: Training Loss: 0.6157
Training Precision: 0.81, Recall: 0.73, F1: 0.76
Validation Loss: 0.8732
Validation Precision: 0.64, Recall: 0.67, F1: 0.64


Training Epoch 8: 100%|██████████| 21/21 [15:35<00:00, 44.55s/it, loss=0.595]
Validation Epoch 8: 100%|██████████| 6/6 [01:16<00:00, 12.69s/it, loss=0.805]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8: Training Loss: 0.5952
Training Precision: 0.80, Recall: 0.79, F1: 0.79
Validation Loss: 0.8048
Validation Precision: 0.67, Recall: 0.71, F1: 0.68


Training Epoch 9: 100%|██████████| 21/21 [15:43<00:00, 44.91s/it, loss=0.474]
Validation Epoch 9: 100%|██████████| 6/6 [01:16<00:00, 12.76s/it, loss=0.715]


Epoch 9: Training Loss: 0.4744
Training Precision: 0.88, Recall: 0.86, F1: 0.87
Validation Loss: 0.7152
Validation Precision: 0.79, Recall: 0.81, F1: 0.79


Training Epoch 10: 100%|██████████| 21/21 [15:48<00:00, 45.15s/it, loss=0.371]
Validation Epoch 10: 100%|██████████| 6/6 [01:16<00:00, 12.81s/it, loss=0.318]

Epoch 10: Training Loss: 0.3706
Training Precision: 0.88, Recall: 0.87, F1: 0.88
Validation Loss: 0.3183
Validation Precision: 0.87, Recall: 0.84, F1: 0.85





### DC+ DenseNet

In [160]:
class DualDenseNet(nn.Module):
    def __init__(self, num_classes=7):
        super(DualDenseNet, self).__init__()

        # Load pre-trained DenseNet121 for the first branch (handling RGB images)
        base_model1 = models.densenet121(pretrained=True)
        self.dense_branch1 = nn.Sequential(*list(base_model1.features), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten())

        # Load pre-trained DenseNet121 for the second branch (handling mask images)
        base_model2 = models.densenet121(pretrained=True)
        # Modify the first convolutional layer to accept 1-channel input
        base_model2.features.conv0 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.dense_branch2 = nn.Sequential(*list(base_model2.features), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten())

        # Define the classifier
        self.classifier = nn.Sequential(
            nn.Linear(1024 * 2, 512),  # DenseNet121 outputs 1024 features, concatenated from both branches
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(512, num_classes)
        )

    def forward(self, x_img, x_mask):
        # Process inputs through each DenseNet branch
        x_img = self.dense_branch1(x_img)
        x_mask = self.dense_branch2(x_mask)

        # Concatenate features from both branches
        x = torch.cat((x_img, x_mask), dim=1)

        # Classification
        x = self.classifier(x)
        return x

# Initialize and print model
model = DualDenseNet(num_classes=7)
print(model)


Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 82.1MB/s]


DualDenseNet(
  (dense_branch1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=

In [161]:

def train_and_validate(model, criterion, optimizer, train_loader, val_loader, n_epochs=10):
    for epoch in range(n_epochs):
        # Initialize metrics
        train_losses, val_losses = [], []
        train_preds, train_targets = [], []
        val_preds, val_targets = [], []

        # Training Phase
        model.train()
        train_tqdm = tqdm(train_loader, desc=f'Training Epoch {epoch+1}')
        for data_img, data_mask, labels in train_tqdm:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Accumulate loss and predictions
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_targets.extend(labels.cpu().numpy())

            # Update progress bar
            train_tqdm.set_postfix(loss=np.mean(train_losses))

        # Validation Phase
        model.eval()
        val_tqdm = tqdm(val_loader, desc=f'Validation Epoch {epoch+1}')
        with torch.no_grad():
            for data_img, data_mask, labels in val_tqdm:
                outputs = model(data_img, data_mask)
                loss = criterion(outputs, labels)

                # Accumulate loss and predictions
                val_losses.append(loss.item())
                _, predicted = torch.max(outputs, 1)
                val_preds.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

                # Update progress bar
                val_tqdm.set_postfix(loss=np.mean(val_losses))

        # Calculate metrics for training
        train_precision = precision_score(train_targets, train_preds, average='macro')
        train_recall = recall_score(train_targets, train_preds, average='macro')
        train_f1 = f1_score(train_targets, train_preds, average='macro')

        # Calculate metrics for validation
        val_precision = precision_score(val_targets, val_preds, average='macro')
        val_recall = recall_score(val_targets, val_preds, average='macro')
        val_f1 = f1_score(val_targets, val_preds, average='macro')

        # End of Epoch Summary
        print(f'Epoch {epoch+1}: Training Loss: {np.mean(train_losses):.4f}')
        print(f'Training Precision: {train_precision:.2f}, Recall: {train_recall:.2f}, F1: {train_f1:.2f}')
        print(f'Validation Loss: {np.mean(val_losses):.4f}')
        print(f'Validation Precision: {val_precision:.2f}, Recall: {val_recall:.2f}, F1: {val_f1:.2f}')


In [162]:
model = DualDenseNet()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [163]:
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 21/21 [04:35<00:00, 13.12s/it, loss=1.93]
Validation Epoch 1: 100%|██████████| 6/6 [00:28<00:00,  4.68s/it, loss=1.9]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1: Training Loss: 1.9310
Training Precision: 0.09, Recall: 0.10, F1: 0.09
Validation Loss: 1.9030
Validation Precision: 0.14, Recall: 0.22, F1: 0.16


Training Epoch 2: 100%|██████████| 21/21 [04:37<00:00, 13.20s/it, loss=1.77]
Validation Epoch 2: 100%|██████████| 6/6 [00:31<00:00,  5.29s/it, loss=1.81]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2: Training Loss: 1.7738
Training Precision: 0.18, Recall: 0.16, F1: 0.12
Validation Loss: 1.8092
Validation Precision: 0.25, Recall: 0.22, F1: 0.18


Training Epoch 3: 100%|██████████| 21/21 [04:37<00:00, 13.24s/it, loss=1.67]
Validation Epoch 3: 100%|██████████| 6/6 [00:27<00:00,  4.65s/it, loss=1.72]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3: Training Loss: 1.6656
Training Precision: 0.43, Recall: 0.28, F1: 0.29
Validation Loss: 1.7209
Validation Precision: 0.23, Recall: 0.25, F1: 0.22


Training Epoch 4: 100%|██████████| 21/21 [04:35<00:00, 13.10s/it, loss=1.53]
Validation Epoch 4: 100%|██████████| 6/6 [00:27<00:00,  4.63s/it, loss=1.47]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4: Training Loss: 1.5300
Training Precision: 0.41, Recall: 0.36, F1: 0.34
Validation Loss: 1.4742
Validation Precision: 0.51, Recall: 0.48, F1: 0.48


Training Epoch 5: 100%|██████████| 21/21 [04:35<00:00, 13.11s/it, loss=1.37]
Validation Epoch 5: 100%|██████████| 6/6 [00:27<00:00,  4.65s/it, loss=1.28]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5: Training Loss: 1.3686
Training Precision: 0.54, Recall: 0.44, F1: 0.43
Validation Loss: 1.2803
Validation Precision: 0.39, Recall: 0.55, F1: 0.44


Training Epoch 6: 100%|██████████| 21/21 [04:35<00:00, 13.14s/it, loss=1.17]
Validation Epoch 6: 100%|██████████| 6/6 [00:28<00:00,  4.67s/it, loss=1.02]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6: Training Loss: 1.1743
Training Precision: 0.70, Recall: 0.55, F1: 0.57
Validation Loss: 1.0151
Validation Precision: 0.51, Recall: 0.63, F1: 0.56


Training Epoch 7: 100%|██████████| 21/21 [04:39<00:00, 13.30s/it, loss=0.919]
Validation Epoch 7: 100%|██████████| 6/6 [00:28<00:00,  4.73s/it, loss=0.802]


Epoch 7: Training Loss: 0.9187
Training Precision: 0.76, Recall: 0.65, F1: 0.64
Validation Loss: 0.8020
Validation Precision: 0.80, Recall: 0.68, F1: 0.70


Training Epoch 8: 100%|██████████| 21/21 [04:34<00:00, 13.08s/it, loss=0.769]
Validation Epoch 8: 100%|██████████| 6/6 [00:28<00:00,  4.74s/it, loss=0.633]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8: Training Loss: 0.7692
Training Precision: 0.78, Recall: 0.76, F1: 0.76
Validation Loss: 0.6334
Validation Precision: 0.77, Recall: 0.75, F1: 0.73


Training Epoch 9: 100%|██████████| 21/21 [04:34<00:00, 13.07s/it, loss=0.567]
Validation Epoch 9: 100%|██████████| 6/6 [00:28<00:00,  4.68s/it, loss=0.545]


Epoch 9: Training Loss: 0.5673
Training Precision: 0.86, Recall: 0.82, F1: 0.83
Validation Loss: 0.5448
Validation Precision: 0.94, Recall: 0.77, F1: 0.78


Training Epoch 10: 100%|██████████| 21/21 [04:34<00:00, 13.09s/it, loss=0.522]
Validation Epoch 10: 100%|██████████| 6/6 [00:27<00:00,  4.63s/it, loss=0.677]

Epoch 10: Training Loss: 0.5219
Training Precision: 0.87, Recall: 0.83, F1: 0.85
Validation Loss: 0.6772
Validation Precision: 0.82, Recall: 0.73, F1: 0.74



