In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from model import GhostFaceNetsV2

In [2]:
data = np.load('data/arr/X2_train.npy')

In [3]:
data.shape

(9964, 478, 3)

In [4]:
#find min and max values of data
min_val = np.min(data)
max_val = np.max(data) 
print(min_val, max_val)

-0.12879282740036513 0.6114155876520542


In [5]:
if torch.cuda.is_available():  
    device = torch.device("cuda")
    print('We will use the GPU:', torch.cuda.get_device_name(0))
    
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

No GPU available, using the CPU instead.


In [6]:
class FaceDataset(Dataset):
    def __init__(self, images_path, landmarks_path, labels_path, transform=None):
        self.images = np.load(images_path).astype(np.float32)
        self.landmarks = np.load(landmarks_path).astype(np.float32)
        self.labels = np.load(labels_path)
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.images[idx]
        landmarks = self.landmarks[idx]
        label = self.labels[idx]
        image = image.transpose(2, 0, 1)
        # Transform the image to match the expected input shape [C, H, W]
        if self.transform:
            image = self.transform(image)

        # Convert landmarks to a flat tensor
        landmarks = torch.tensor(landmarks, dtype=torch.float32).view(-1)
        label = torch.tensor(label, dtype=torch.long)
        return image, landmarks, label

# Define transformations for images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize the image
])

In [7]:
# Paths to the saved numpy files
X1_train_path = 'data/arr/X1_train.npy'
X2_train_path = 'data/arr/X2_train.npy'
y_train_path = 'data/arr/y_train.npy'
X1_test_path = 'data/arr/X1_test.npy'
X2_test_path = 'data/arr/X2_test.npy'
y_test_path = 'data/arr/y_test.npy'

# Create datasets
train_dataset = FaceDataset(X1_train_path, X2_train_path, y_train_path)
test_dataset = FaceDataset(X1_test_path, X2_test_path, y_test_path)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


TRAIN WITH JUST IMAGES

TRY WITH GPU

In [8]:
# Initialize model, loss function, and optimizer
model = GhostFaceNetsV2(image_size=224, num_classes=5749, width=1, dropout=0., combined='Image')

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for images, landmarks, labels in train_loader:
        images, landmarks, labels = images.to(device), landmarks.to(device), labels.to(device)
        
        optimizer.zero_grad()
        if model.combined == 'Image':
            outputs = model(x=images)
        elif model.combined == 'Landmark':
            outputs = model(landmarks=landmarks)
        elif model.combined == 'Combined':
            outputs = model(x=images, landmarks=landmarks)        
        print("1")
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader)}")

    # Validation step
    model.eval()  # Set the model to evaluation mode for validation
    correct = 0
    total = 0
    with torch.no_grad():
        for images, landmarks, labels in test_loader:
            images, landmarks, labels = images.to(device), landmarks.to(device), labels.to(device)
            if model.combined == 'Image':
                outputs = model(x=images)
            elif model.combined == 'Landmark':
                outputs = model(landmarks=landmarks)
            elif model.combined == 'Combined':
                outputs = model(x=images, landmarks=landmarks)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Validation Accuracy: {100 * correct / total}%')

# Save the trained model
torch.save(model.state_dict(), 'combined_model.pth')


1
1
1
1
1
1
1
1
1
1


KeyboardInterrupt: 

TRY WITH CPU

In [9]:
import torch.optim as optim
CUDA_LAUNCH_BLOCKING=1
# Initialize model, loss function, and optimizer
model = GhostFaceNetsV2(image_size=224, num_classes=5749, width=1, dropout=0., combined='Image')
model.to('cpu')  # Use CPU for debugging
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, landmarks, labels in train_loader:
        images, landmarks, labels = images.to('cpu'), landmarks.to('cpu'), labels.to('cpu')
        
        optimizer.zero_grad()
        if model.combined == 'Image':
            outputs = model(x=images)
        elif model.combined == 'Landmark':
            outputs = model(landmarks=landmarks)
        elif model.combined == 'Combined':
            outputs = model(x=images, landmarks=landmarks)
        print("1")
        print(outputs.shape)
        print(labels.shape)
        loss = criterion(outputs, labels)
        print(loss)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader)}")

    # Validation step
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, landmarks, labels in test_loader:
            images, landmarks, labels = images.to('cpu'), landmarks.to('cpu'), labels.to('cpu')
            if model.combined == 'Image':
                outputs = model(x=images)
            elif model.combined == 'Landmark':
                outputs = model(landmarks=landmarks)
            elif model.combined == 'Combined':
                outputs = model(x=images, landmarks=landmarks)
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Validation Accuracy: {100 * correct / total}%')

# Save the trained model
torch.save(model.state_dict(), 'combined_model.pth')


1
torch.Size([32, 5749])
torch.Size([32])
tensor(9.4536, grad_fn=<NllLossBackward0>)


KeyboardInterrupt: 