In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision.transforms import AutoAugment, ColorJitter, Compose

In [None]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class CustomDataset_train(Dataset):
    def __init__(self, root_dir, transform=None):
        self.classes = sorted(os.listdir(root_dir))  # get a list of subdirectory names
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}  # map class names to indices
        self.samples = []
        for target_class in self.classes:
            class_dir = os.path.join(root_dir, target_class)
            if not os.path.isdir(class_dir):
                continue
            for root, _, fnames in sorted(os.walk(class_dir)):
                for fname in sorted(fnames):
                    if fname.endswith('.jpg') or fname.endswith('.png'):
                        path = os.path.join(root, fname)
                        item = (path, self.class_to_idx[target_class])
                        self.samples.append(item)
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # resize the image
            transforms.ToTensor(),  # convert PIL image to a PyTorch tensor
            
            
       #])
            
            
            #ResNet with Landmarks
            transforms.Normalize(mean=[0.4514, 0.3094, 0.2865], 
                          std=[0.1353, 0.1066, 0.0966]),])
            
            #ResNet without Landmarks
       #     transforms.Normalize(mean=[0.4429, 0.3043, 0.2806], 
       #                   std=[0.1187, 0.0874, 0.0728]),])
        
        if transform is not None:
            self.transform = transforms.Compose([self.transform, transform])
    
    def __getitem__(self, index):
        path, target = self.samples[index]
        with open(path, 'rb') as f:
            sample = Image.open(f).convert('RGB')
        sample = self.transform(sample)
        return sample, target
    
    def __len__(self):
        return len(self.samples)


In [None]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class CustomDataset_valid(Dataset):
    def __init__(self, root_dir, transform=None):
        self.classes = sorted(os.listdir(root_dir))  # get a list of subdirectory names
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}  # map class names to indices
        self.samples = []
        for target_class in self.classes:
            class_dir = os.path.join(root_dir, target_class)
            if not os.path.isdir(class_dir):
                continue
            for root, _, fnames in sorted(os.walk(class_dir)):
                for fname in sorted(fnames):
                    if fname.endswith('.jpg') or fname.endswith('.png'):
                        path = os.path.join(root, fname)
                        item = (path, self.class_to_idx[target_class])
                        self.samples.append(item)
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # resize the image
            transforms.ToTensor(),  # convert PIL image to a PyTorch tensor
            #])
            
            #Inception_v3 without landmarks
            transforms.Normalize(mean=[0.4514, 0.3094, 0.2865], 
                             std=[0.1353, 0.1066, 0.0966]),])
        
        if transform is not None:
            self.transform = transforms.Compose([self.transform, transform])
    
    def __getitem__(self, index):
        path, target = self.samples[index]
        with open(path, 'rb') as f:
            sample = Image.open(f).convert('RGB')
        sample = self.transform(sample)
        return sample, target
    
    def __len__(self):
        return len(self.samples)


In [None]:
train_dataset = CustomDataset_train(root_dir='C:/Users/Bruss/Desktop/Speciale/data/used_data_5/training_data')
validation_data = CustomDataset_valid(root_dir='C:/Users/Bruss/Desktop/Speciale/data/used_data_5/testing_data')

In [None]:
train_loader = DataLoader(train_dataset, batch_size=26, shuffle=True)

val_loader = DataLoader(validation_data, batch_size=26, shuffle=False)

In [None]:
# Use a loop to calculate the mean and standard deviation of your training data
def calculate_mean_std(train_loader): 
    mean = 0.
    std = 0.
    for images, _ in train_loader:
        batch_samples = images.size(0)
        images = images.view(batch_samples, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)

    # Calculate the mean and standard deviation of your training data
    mean /= len(train_loader.dataset)
    std /= len(train_loader.dataset)
    print(f'Mean: {mean}')
    print(f'Standard deviation: {std}')
    return mean, std

In [None]:
#calculate_mean_std(train_loader)

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

class ResNetWithLSTM(nn.Module):
    def __init__(self, num_classes, hidden_size=512, num_layers=2, bidirectional=True, dropout=0.5):
        super(ResNetWithLSTM, self).__init__()

        # Load pre-trained ResNet-18 model
        self.resnet = models.resnet18(pretrained=True)

        # Freeze all layers except for the last fully connected layer
        for param in self.resnet.parameters():
            param.requires_grad = False
        self.fc1 = nn.Linear(1000, 512)
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)  
        self.lstm = nn.LSTM(input_size=hidden_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            bidirectional=bidirectional,
                            batch_first=True)
        self.dropout2 = nn.Dropout(dropout) 
        self.relu2 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size * 2 if bidirectional else hidden_size, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.resnet(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout1(x)  # Apply dropout after the first fully connected layer
        x, _ = self.lstm(x)
        x = self.dropout2(x)  # Apply dropout after the LSTM layer
        x = self.relu2(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet
from torchvision.transforms import transforms
from torchsummary import summary

class EfficientNetCNN(nn.Module):
    def __init__(self, num_classes, hidden_size=256, num_layers=2, dropout=0.25):
        super().__init__()
        self.efficientnet = EfficientNet.from_pretrained('efficientnet-b0')
        #for param in self.efficientnet.parameters():
        #    param.requires_grad = False
        self.lstm = nn.LSTM(input_size=1280, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, bidirectional=True)
        self.dropout1 = nn.Dropout(dropout)
        self.fc1 = nn.Linear(hidden_size * 2, 256)
        self.dropout2 = nn.Dropout(dropout)
        self.fc2 = nn.Linear(256, num_classes)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.efficientnet.extract_features(x)
        # Flatten the feature maps and add a batch dimension
        x = x.flatten(start_dim=2).permute(0, 2, 1)
        # Pass the flattened feature maps through the bidirectional LSTM layer
        x, _ = self.lstm(x)
        # Concatenate the outputs of the forward and backward directions and pass through the fully connected layers with dropout
        x = self.dropout1(torch.cat((x[:, -1, :self.lstm.hidden_size], x[:, 0, self.lstm.hidden_size:]), dim=1))
        x = self.fc1(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, confusion_matrix
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
# set seed for reproducibility
torch.manual_seed(1234)
from sklearn.metrics import roc_auc_score, confusion_matrix
directory = 'C:/Users/Bruss/Desktop/Speciale/models'


# Define training parameters
batch_size = 26
#9 epochs

#EFFECIENTNET WITH LANDMARKS: 7 EPOCHS - LOWEST LOSS SECOND HIGHEST VAL lr = 0.0001 /TRAINED ON FOR 20
#EFFECIENTNET WITHOUT LANDMARKS: 10 EPOCHS - LOWEST LOSS SECOND HIGHEST VAL lr = 0.0005 /TRAIN ON FOR 20 #EPOCH 5 BEST YESTERDAY
num_epochs = 20
learning_rate = 0.0005


# Instantiate the model and optimizer
model = ResNetWithLSTM(num_classes=5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Training loop
train_loss_list, val_loss_list = [], []
train_acc_list, val_acc_list = [], []
val_auc_list = []

train_loss_list_graph, val_loss_list_graph = [], []
train_acc_list_graph, val_acc_list_graph = [], []

for epoch in range(num_epochs):
    # Train the model
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss_list.append(loss.item())
        _, predicted = torch.max(outputs.data, 1)
        train_acc_list.append((predicted == labels).sum().item())

    # Validate the model
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        val_acc = 0.0
        val_auc = 0.0
        all_labels = []
        all_predictions = []
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss_list.append(loss.item())
            _, predicted = torch.max(outputs.data, 1)
            val_acc_list.append((predicted == labels).sum().item())
            all_labels.extend(labels.tolist())
            all_predictions.extend(predicted.tolist())

    # Print the results for this epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {train_loss_list[-1]:.4f}, "
          f"Train Acc: {train_acc_list[-1]/len(train_loader.dataset)*10000:.2f}%, "
          f"Val Loss: {val_loss_list[-1]:.4f}, "
          f"Val Acc: {val_acc_list[-1]/len(val_loader.dataset)*100:.2f}%, ")
    train_loss_list_graph.append(train_loss_list[-1])
    val_loss_list_graph.append(val_loss_list[-1])
    train_acc_list_graph.append(train_acc_list[-1]/len(train_loader.dataset)*10000)
    val_acc_list_graph.append(val_acc_list[-1]/len(val_loader.dataset)*100)
    
    #torch.save(model, os.path.join(directory,f'current_model_resnet_wo_landmarks_{epoch}.pt'))
   

## Deployment of Resnet without landmarks


In [None]:
import cv2
import torch
import torchvision.transforms as transforms
from PIL import Image

# Load the saved model and move it to the GPU
model = torch.load('C:/Users/Bruss/Desktop/Speciale/models/resnet_models/current_model_resnet_without_landmarks_15.pt')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define the preprocessing transforms
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4429, 0.3043, 0.2806], 
                          std=[0.1187, 0.0874, 0.0728]),])

# Capture images from the webcam
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if ret:
        # Convert the image to RGB format
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Preprocess the image
        pil_image = Image.fromarray(frame)
        image_tensor = preprocess(pil_image)
        image_tensor = image_tensor.unsqueeze(0)

        # Move the data to the GPU
        image_tensor = image_tensor.to(device)

        # Pass the image through the model
        with torch.no_grad():
            output = model(image_tensor)

        # Move the output to the CPU
        output = output.cpu()

        # Get the predicted class
        _, predicted = torch.max(output.data, 1)
        predicted_class = predicted.item()

        # Define the class names
        class_names = ['afraid',  'alone', 'boss', 'hello', 'tough']

        # Display the predicted class on the frame
        predicted_class_name = class_names[predicted_class]
        cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)


        # Show the frame
        cv2.imshow('frame', frame)

    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


## Deployment of ResNet with landmarks

In [None]:
import cv2
import torch
import mediapipe as mp
import time
import torchvision.transforms as transforms
from PIL import Image


# Load the saved model and move it to the GPU
model = torch.load('C:/Users/Bruss/Desktop/Speciale/models/resnet_models/current_model_resnet_with_landmarks_14.pt')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


# Define the preprocessing transforms
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4429, 0.3043, 0.2806], 
                          std=[0.1187, 0.0874, 0.0728]),])

# Create a VideoCapture object to capture the video from the default camera
cap = cv2.VideoCapture(0)

# Initialize the frame counter and the start time
frame_count = 0
start_time = time.time()

# Create a Mediapipe Hands and Face Detection objects
mp_hands = mp.solutions.hands
mp_face_detection = mp.solutions.face_detection

# Create a Mediapipe Hands and Face Detection objects

hands = mp_hands.Hands()
face_detection = mp_face_detection.FaceDetection()


# Loop through each frame in the video stream
while True:
    # Read a frame from the video stream
    ret, frame = cap.read()
    
    # Increment the frame counter
    frame_count += 1
    
    

    # Flip the frame horizontally for a mirror-like effect
    frame = cv2.flip(frame, 1)

    # Convert the image to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Detect hands
    hands_results = hands.process(frame_rgb)

    # Detect faces
    face_detection_results = face_detection.process(frame_rgb)

    # Draw landmarks for hands
    if hands_results.multi_hand_landmarks:
        for hand_landmarks in hands_results.multi_hand_landmarks:
            mp.solutions.drawing_utils.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS)

    # Draw landmarks for faces
    if face_detection_results.detections:
        for detection in face_detection_results.detections:
            mp.solutions.drawing_utils.draw_detection(
                frame,
                detection)
            
    elapsed_time = time.time() - start_time
    fps = frame_count / elapsed_time
    cv2.putText(frame, "FPS: {:.2f}".format(fps), (10, 30),
    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    
    
    # Preprocess the image
    pil_image = Image.fromarray(frame)
    image_tensor = preprocess(pil_image)
    image_tensor = image_tensor.unsqueeze(0)

    # Move the data to the GPU
    image_tensor = image_tensor.to(device)

    # Pass the image through the model
    with torch.no_grad():
        output = model(image_tensor)

    # Move the output to the CPU
    output = output.cpu()

    # Get the predicted class
    _, predicted = torch.max(output.data, 1)
    predicted_class = predicted.item()

    # Define the class names
    class_names = ['afraid',  'alone', 'boss', 'hello', 'tough']

    # Display the predicted class on the frame
    predicted_class_name = class_names[predicted_class]
    cv2.putText(frame, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)


    # Display the resulting image
    cv2.imshow('frame', frame)
    
    # Exit if the user presses the 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        

# Release the VideoCapture object and close the OpenCV window
cap.release()
cv2.destroyAllWindows()
