<a href="https://colab.research.google.com/github/hsun26/CS445-Project/blob/main/Dual_CNN_(2_inputs_required).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install torchsummary



In [49]:
from torchsummary import summary
import os
import numpy as np
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import Dataset
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.utils.data import random_split

In [40]:
# Load dataset

class HandGestureDataset(Dataset):
    def __init__(self, hand_dir, mask_dir, transform=None):
        """
        Args:
            hand_dir (string): Directory with all the hand images divided into subdirectories.
            mask_dir (string): Directory with all the mask images divided into the same subdirectories as hand images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.hand_dir = hand_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.samples = self._load_samples()

    def _load_samples(self):
        samples = []
        # Labels could be dynamically assigned based on folder names if needed
        label_mapping = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4, 'like': 5, 'ok': 6}

        categories = os.listdir(self.hand_dir)
        for category in categories:
            hand_path = os.path.join(self.hand_dir, category)
            mask_path = os.path.join(self.mask_dir, category)
            for filename in os.listdir(hand_path):
                if filename.endswith('.jpg'):  # Make sure to match your image file extensions
                    file_hand_path = os.path.join(hand_path, filename)
                    file_mask_path = os.path.join(mask_path, filename.replace('.jpg', '.bmp'))
                    samples.append((file_hand_path, file_mask_path, label_mapping[category]))
        return samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        hand_path, mask_path, label = self.samples[idx]

        hand_image = Image.open(hand_path).convert('RGB')
        mask_image = Image.open(mask_path).convert('L')  # Ensure mask is in grayscale

        if self.transform:
            hand_image = self.transform(hand_image)
            mask_image = self.transform(mask_image)

        return hand_image, mask_image, label

# Transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])


In [41]:
hand_dir = '/content/drive/MyDrive/CS445/Final Project/filtered_hands (1)'
mask_dir = '/content/drive/MyDrive/CS445/Final Project/filtered_masks (1)'
dataset = HandGestureDataset(hand_dir, mask_dir, transform=transform)

# Split the dataset into training and testing
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# DataLoader setup
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)


DC-CNN (pytorch)

In [46]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DualCNN(nn.Module):
    def __init__(self):
        super(DualCNN, self).__init__()
        # Define the first branch for the RGB image
        self.branch1 = nn.Sequential(
            nn.Conv2d(3, 20, kernel_size=5, padding=2),  # Padding=2 to keep size constant
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),  # Larger kernel and padding
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Define the second branch for the mask image
        self.branch2 = nn.Sequential(
            nn.Conv2d(1, 20, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 20, kernel_size=7, padding=3),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        # Define the fully connected layers
        self.fc1 = nn.Linear(20 * 56 * 56 * 2, 224)  # calculated
        self.fc2 = nn.Linear(224, 7)

    def forward(self, x_img, x_mask):
        out_img = self.branch1(x_img)
        out_mask = self.branch2(x_mask)

        # print("Output size after branch1:", out_img.shape)  # Debug: Check output size
        # print("Output size after branch2:", out_mask.shape)

        out_img = out_img.view(out_img.size(0), -1)
        out_mask = out_mask.view(out_mask.size(0), -1)
        out = torch.cat((out_img, out_mask), dim=1)

        # print("Concatenated output size:", out.shape)  # Debug: Check concatenated size

        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        return out

# Initialize model
model = DualCNN()
print(model)


DualCNN(
  (branch1): Sequential(
    (0): Conv2d(3, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (branch2): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(20, 20, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=125440, out_features=224, bias=True)
  (fc2): Linear(in_features=224, out_features=7, bias=True)
)


In [47]:
model = DualCNN()
summary(model, [(3, 224, 224), (1, 224, 224)])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 20, 224, 224]           1,520
              ReLU-2         [-1, 20, 224, 224]               0
         MaxPool2d-3         [-1, 20, 112, 112]               0
            Conv2d-4         [-1, 20, 112, 112]          19,620
              ReLU-5         [-1, 20, 112, 112]               0
         MaxPool2d-6           [-1, 20, 56, 56]               0
            Conv2d-7         [-1, 20, 224, 224]             520
              ReLU-8         [-1, 20, 224, 224]               0
         MaxPool2d-9         [-1, 20, 112, 112]               0
           Conv2d-10         [-1, 20, 112, 112]          19,620
             ReLU-11         [-1, 20, 112, 112]               0
        MaxPool2d-12           [-1, 20, 56, 56]               0
           Linear-13                  [-1, 224]      28,098,784
           Linear-14                   

In [52]:
# def train(model, criterion, optimizer, train_loader, epochs=10):
#     model.train()
#     for epoch in range(epochs):
#         for data_img, data_mask, labels in train_loader:
#             optimizer.zero_grad()   # 清除过去的梯度
#             outputs = model(data_img, data_mask)   # get output
#             loss = criterion(outputs, labels)   # calculate loss
#             loss.backward()    # backpropogation
#             optimizer.step()   # 更新模型的参数
#         print(f'Epoch {epoch+1}, Loss: {loss.item()}')

In [53]:
# def evaluate(model, test_loader):
#     model.eval()
#     total = 0
#     correct = 0
#     with torch.no_grad():
#         for data_img, data_mask, labels in test_loader:
#             outputs = model(data_img, data_mask)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
#     accuracy = 100 * correct / total
#     return accuracy


In [54]:
# def train_and_evaluate(model, criterion, optimizer, train_loader, test_loader, epochs=10):
#     for epoch in range(epochs):
#         model.train()
#         train_loss = 0
#         total_batches = len(train_loader)
#         loop = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}', leave=True)

#         for data_img, data_mask, labels in loop:
#             optimizer.zero_grad()
#             outputs = model(data_img, data_mask)
#             loss = criterion(outputs, labels)
#             loss.backward()
#             optimizer.step()

#             train_loss += loss.item()
#             loop.set_postfix(loss=loss.item())

#         train_loss /= total_batches
#         validation_accuracy = evaluate(model, test_loader)

#         # Print training and validation results
#         print(f'Epoch {epoch + 1}: Training Loss: {train_loss:.4f}, Validation Accuracy: {validation_accuracy:.2f}%')


In [56]:
def train_and_validate(model, criterion, optimizer, train_loader, val_loader, n_epochs=10):
    for epoch in range(n_epochs):
        # Training Phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_counter = 0

        train_tqdm = tqdm(train_loader, desc=f'Training Epoch {epoch+1}')
        for data_img, data_mask, labels in train_tqdm:
            optimizer.zero_grad()
            outputs = model(data_img, data_mask)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Calculate loss and accuracy
            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            train_correct += (predicted == labels).sum().item()
            train_counter += labels.size(0)

            train_tqdm.set_postfix(loss=train_loss/(1+len(train_tqdm)), accuracy=100.0 * train_correct / train_counter)

        # Validation Phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_counter = 0

        val_tqdm = tqdm(val_loader, desc=f'Validation Epoch {epoch+1}')
        with torch.no_grad():
            for data_img, data_mask, labels in val_tqdm:
                outputs = model(data_img, data_mask)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_counter += labels.size(0)

                val_tqdm.set_postfix(loss=val_loss/(1+len(val_tqdm)), accuracy=100.0 * val_correct / val_counter)

        # End of Epoch Summary
        train_loss /= len(train_loader)
        train_accuracy = 100.0 * train_correct / train_counter
        val_loss /= len(val_loader)
        val_accuracy = 100.0 * val_correct / val_counter
        print(f"Epoch {epoch+1}: Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%")
        print(f"Epoch {epoch+1}: Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")


In [57]:
model = DualCNN()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)   # update the model's parameters (weights and biases) during the training
criterion = nn.CrossEntropyLoss()  #define loss function

In [59]:
train_and_validate(model, criterion, optimizer, train_loader, test_loader, n_epochs=10)

Training Epoch 1: 100%|██████████| 27/27 [00:44<00:00,  1.65s/it, accuracy=29.9, loss=1.69]
Validation Epoch 1: 100%|██████████| 7/7 [00:04<00:00,  1.49it/s, accuracy=47, loss=1.16]


Epoch 1: Training Loss: 1.7496, Training Accuracy: 29.89%
Epoch 1: Validation Loss: 1.3312, Validation Accuracy: 46.97%


Training Epoch 2: 100%|██████████| 27/27 [00:43<00:00,  1.61s/it, accuracy=44.4, loss=1.3]
Validation Epoch 2: 100%|██████████| 7/7 [00:04<00:00,  1.74it/s, accuracy=28.8, loss=1.6]


Epoch 2: Training Loss: 1.3512, Training Accuracy: 44.44%
Epoch 2: Validation Loss: 1.8239, Validation Accuracy: 28.79%


Training Epoch 3: 100%|██████████| 27/27 [00:41<00:00,  1.55s/it, accuracy=41, loss=1.66]
Validation Epoch 3: 100%|██████████| 7/7 [00:05<00:00,  1.33it/s, accuracy=39.4, loss=1.44]


Epoch 3: Training Loss: 1.7249, Training Accuracy: 41.00%
Epoch 3: Validation Loss: 1.6407, Validation Accuracy: 39.39%


Training Epoch 4: 100%|██████████| 27/27 [00:42<00:00,  1.57s/it, accuracy=47.1, loss=1.29]
Validation Epoch 4: 100%|██████████| 7/7 [00:04<00:00,  1.74it/s, accuracy=36.4, loss=1.21]


Epoch 4: Training Loss: 1.3411, Training Accuracy: 47.13%
Epoch 4: Validation Loss: 1.3844, Validation Accuracy: 36.36%


Training Epoch 5: 100%|██████████| 27/27 [00:45<00:00,  1.67s/it, accuracy=62.5, loss=1.1]
Validation Epoch 5: 100%|██████████| 7/7 [00:04<00:00,  1.51it/s, accuracy=53, loss=0.97]


Epoch 5: Training Loss: 1.1414, Training Accuracy: 62.45%
Epoch 5: Validation Loss: 1.1087, Validation Accuracy: 53.03%


Training Epoch 6: 100%|██████████| 27/27 [00:43<00:00,  1.60s/it, accuracy=75.1, loss=0.659]
Validation Epoch 6: 100%|██████████| 7/7 [00:04<00:00,  1.74it/s, accuracy=48.5, loss=1.4]


Epoch 6: Training Loss: 0.6835, Training Accuracy: 75.10%
Epoch 6: Validation Loss: 1.5985, Validation Accuracy: 48.48%


Training Epoch 7: 100%|██████████| 27/27 [00:42<00:00,  1.58s/it, accuracy=73.9, loss=0.647]
Validation Epoch 7: 100%|██████████| 7/7 [00:04<00:00,  1.74it/s, accuracy=54.5, loss=1.07]


Epoch 7: Training Loss: 0.6712, Training Accuracy: 73.95%
Epoch 7: Validation Loss: 1.2189, Validation Accuracy: 54.55%


Training Epoch 8: 100%|██████████| 27/27 [00:42<00:00,  1.57s/it, accuracy=90.4, loss=0.298]
Validation Epoch 8: 100%|██████████| 7/7 [00:05<00:00,  1.36it/s, accuracy=59.1, loss=0.998]


Epoch 8: Training Loss: 0.3086, Training Accuracy: 90.42%
Epoch 8: Validation Loss: 1.1405, Validation Accuracy: 59.09%


Training Epoch 9: 100%|██████████| 27/27 [00:45<00:00,  1.67s/it, accuracy=96.2, loss=0.0996]
Validation Epoch 9: 100%|██████████| 7/7 [00:03<00:00,  1.76it/s, accuracy=63.6, loss=1.15]


Epoch 9: Training Loss: 0.1032, Training Accuracy: 96.17%
Epoch 9: Validation Loss: 1.3145, Validation Accuracy: 63.64%


Training Epoch 10: 100%|██████████| 27/27 [00:42<00:00,  1.56s/it, accuracy=100, loss=0.0109]
Validation Epoch 10: 100%|██████████| 7/7 [00:04<00:00,  1.54it/s, accuracy=62.1, loss=1.32]

Epoch 10: Training Loss: 0.0113, Training Accuracy: 100.00%
Epoch 10: Validation Loss: 1.5093, Validation Accuracy: 62.12%





In [61]:
from PIL import Image
import torchvision.transforms as transforms
import torch

def load_image(image_path, mask_path=None, image_size=224):
    # Image transformations
    transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        # Normalize with the same parameters used in training

    ])

    # Load image
    image = Image.open(image_path).convert('RGB')
    image = transform(image)

    # If a mask is required
    if mask_path:
        mask = Image.open(mask_path).convert('L')
        mask = transform(mask)
    else:
        mask = None

    return image, mask

def test_model(model, image_path, mask_path=None):
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        image, mask = load_image(image_path, mask_path)
        if mask is not None:
            image, mask = image.unsqueeze(0), mask.unsqueeze(0)  # Add batch dimension
            outputs = model(image, mask)
        else:
            image = image.unsqueeze(0)  # Add batch dimension
            outputs = model(image)

        # Assuming the output is class scores
        _, predicted = torch.max(outputs, 1)
        return predicted.item()


model = DualCNN()

predicted_class = test_model(model, '/content/drive/MyDrive/CS445/Final Project/1_A_hgr2A1_id02_1.jpg', '/content/drive/MyDrive/CS445/Final Project/1_A_hgr2A1_id02_1.bmp')
print("Predicted Class:", predicted_class)


Predicted Class: 0


In [None]:
predicted_class = test_model(model, 'path_to_test_image.jpg', 'path_to_test_mask.bmp')
print("Predicted Class:", predicted_class)