In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
import torch.nn as nn
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, ConcatDataset, Subset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision.io import read_image
import matplotlib.pyplot as plt
import time
import os
import pandas as pd
from torch.utils.tensorboard import SummaryWriter

In [2]:
%load_ext tensorboard

In [3]:
def train_val_dataset(dataset, val_split=0.25):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['valid'] = Subset(dataset, val_idx)
    return datasets

In [4]:
def train_and_validate(model, loss_criterion, optimizer, epochs, train_data_loader, valid_data_loader, device):
    '''
    Function to train and validate
    Parameters
        :param model: Model to train and validate
        :param loss_criterion: Loss Criterion to minimize
        :param optimizer: Optimizer for computing gradients
        :param epochs: Number of epochs (default=25)
  
    Returns
        model: Trained Model with best validation accuracy
        history: (dict object): Having training loss, accuracy and validation loss, accuracy
    '''
    
    writer = SummaryWriter()
    model = model.to(device)
    start = time.time()
    history = []
    best_acc = 0.0

    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))
        
        # Set to training mode
        model.train()
        
        # Loss and Accuracy within the epoch
        train_loss = 0.0
        train_acc = 0.0
        
        valid_loss = 0.0
        valid_acc = 0.0
        
        for i, (inputs, labels) in enumerate(train_data_loader):
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Clean existing gradients
            optimizer.zero_grad()
            
            # Forward pass - compute outputs on input data using the model
            outputs = model(inputs.cuda())

            #print(f"Output shape: {outputs.shape} \t Label shape: {labels.shape}")
            
            # Compute loss
            loss = loss_criterion(outputs, labels) #.to(torch.float32)
            
            #print(f"Outputs: {outputs} \t type: {outputs.dtype}")
            #print(f"Loss: {loss} \t type: {loss.dtype}")
            
            # Backpropagate the gradients
            loss.backward()
            
            # Update the parameters
            optimizer.step()
            
            # Compute the total loss for the batch and add it to train_loss
            train_loss += loss.item() * inputs.size(0)

            

            # Compute the accuracy
            #ret, predictions = torch.max(outputs.data, 1)
            #correct_counts = predictions.eq(labels.data.view_as(predictions))
            
            # Convert correct_counts to float and then compute the mean
            #acc = torch.mean(correct_counts.type(torch.FloatTensor))
            
            # Compute total accuracy in the whole batch and add to train_acc
            #train_acc += acc.item() * inputs.size(0)
            
            #print("Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item()))
        writer.add_scalar("Train_loss x epoch", train_loss/len(train_data_loader), epoch)
        
        # Validation - No gradient tracking needed
        with torch.no_grad():

            # Set to evaluation mode
            model.eval()

            # Validation loop
            for j, (inputs, labels) in enumerate(valid_data_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass - compute outputs on input data using the model
                outputs = model(inputs.cuda())

                # Compute loss
                loss = loss_criterion(outputs, labels)

                # Compute the total loss for the batch and add it to valid_loss
                valid_loss += loss.item() * inputs.size(0)

                # Calculate validation accuracy
                #ret, predictions = torch.max(outputs.data, 1)
                #correct_counts = predictions.eq(labels.data.view_as(predictions))

                # Convert correct_counts to float and then compute the mean
                #acc = torch.mean(correct_counts.type(torch.FloatTensor))

                # Compute total accuracy in the whole batch and add to valid_acc
                #valid_acc += acc.item() * inputs.size(0)

                #print("Validation Batch number: {:03d}, Validation: Loss: {:.4f}, Accuracy: {:.4f}".format(j, loss.item(), acc.item()))
            
        writer.add_scalar("Valid_loss x epoch", valid_loss/len(valid_data_loader), epoch)
        
        # Find average training loss and training accuracy
        avg_train_loss = train_loss/len(train_data_loader) 
        #avg_train_acc = train_acc/train_data_size
        # Find average training loss and training accuracy
        avg_valid_loss = valid_loss/len(valid_data_loader) 
        #avg_valid_acc = valid_acc/valid_data_size

        history.append([avg_train_loss, avg_valid_loss])#, avg_train_acc, avg_valid_acc])
                
        epoch_end = time.time()
    
        #print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch+1, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start))
        print("Epoch : {:03d}, Training: Loss: {:.4f}, \n\t\tValidation : Loss : {:.4f}, Time: {:.4f}s".format(epoch+1, avg_train_loss, avg_valid_loss, epoch_end-epoch_start))

        # Save if the model has best accuracy till now
        #torch.save(model, dataset+'_model_'+str(epoch)+'.pt')
    writer.close()      
    return model, history

In [5]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
            running_loss = 0.0
            running_corrects = 0
            # Iterate over data.
            for inputs, labels in trainloader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()
            epoch_loss = running_loss / dataset_sizes
            epoch_acc = running_corrects.double() / dataset_sizes
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            # deep copy the model
        print()
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [6]:
# Custom datasets for each shape
class SquareImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_name = "square_img_" + str(self.img_labels.iloc[idx, 0]) + ".jpg"
        img_path = os.path.join(self.img_dir, img_name)
        image = read_image(img_path)#.float()
        label = self.img_labels.iloc[idx, 1:4].to_numpy()
        label = np.sqrt((label*label).sum())
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

class SphereImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_name = "sphere_img_" + str(self.img_labels.iloc[idx, 0]) + ".jpg"
        img_path = os.path.join(self.img_dir, img_name)
        image = read_image(img_path)#.float()
        label = self.img_labels.iloc[idx, 1:4].to_numpy()
        label = np.sqrt((label*label).sum())
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

class RomboidImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_name = "romboid_img_" + str(self.img_labels.iloc[idx, 0]) + ".jpg"
        img_path = os.path.join(self.img_dir, img_name)
        image = read_image(img_path)#.float()
        label = self.img_labels.iloc[idx, 1:4].to_numpy()
        label = np.sqrt((label*label).sum())
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [7]:
# Dataset: imgs and labels paths
dataset_path = "/home/corcasta/Documents/ati/dataset"
square_imgs_dir = dataset_path + "/images/square"
sphere_imgs_dir = dataset_path + "/images/sphere"
romboid_imgs_dir = dataset_path + "/images/romboid"

square_labels_dir = dataset_path + "/labels/square"
sphere_labels_dir = dataset_path + "/labels/sphere"
romboid_labels_dir = dataset_path + "/labels/romboid"

In [8]:
square_df = pd.read_csv(square_labels_dir + "/square_data.csv")
sphere_df = pd.read_csv(sphere_labels_dir + "/sphere_data.csv")
romboid_df = pd.read_csv(romboid_labels_dir + "/romboid_data.csv")
total_df = pd.concat([square_df, sphere_df], ignore_index=True)

# Getting all the labels in ORDER
labels = total_df[["fx", "fy", "fz"]]

In [9]:
square_df.head()

Unnamed: 0.1,Unnamed: 0,fx,fy,fz,x,y,z,x_shear,y_shear
0,0,-0.000228,0.000324,-4.7e-05,0.0,0.0,0.0,,
1,1,-0.000644,-0.000226,0.001464,0.0,0.0,0.0,0.0,0.0
2,2,0.019416,0.034281,-0.470873,0.0,0.0,1.7,0.0,0.0
3,3,0.020019,0.034907,-0.463959,0.0,0.0,1.7,0.0,0.2
4,4,0.019774,0.035586,-0.46022,0.0,0.0,1.7,0.0,0.5


In [10]:
# Applying image transforms
image_transforms = {
    "train": transforms.Compose([
        transforms.Resize(size=224),
        transforms.ConvertImageDtype(dtype=torch.float32)
        #transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406],
        #                     [0.229, 0.224, 0.225])
    ]),
    "valid": transforms.Compose([
        transforms.Resize(size=224),
        transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406],
        #                     [0.229, 0.224, 0.225])
    ]),
    "test": transforms.Compose([
        transforms.Resize(size=224),
        transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406],
        #                     [0.229, 0.224, 0.225])
    ])
}

In [11]:
square_labels_file = square_labels_dir + "/square_data.csv"
sphere_labels_file = sphere_labels_dir + "/sphere_data.csv"
romboid_labels_file = romboid_labels_dir + "/romboid_data.csv"

# Loading individual datasets
square_dataset = SquareImageDataset(annotations_file=square_labels_file, img_dir=square_imgs_dir, transform=image_transforms["train"])#, target_transform=transforms.ToTensor())
sphere_dataset = SphereImageDataset(annotations_file=sphere_labels_file, img_dir=sphere_imgs_dir, transform=image_transforms["train"])#, target_transform=transforms.ToTensor())
romboid_dataset = RomboidImageDataset(annotations_file=romboid_labels_file, img_dir=romboid_imgs_dir, transform=image_transforms["train"])#, target_transform=transforms.ToTensor())



# Dataset containing square and sphere imgs including labels
full_dataset = ConcatDataset([square_dataset, romboid_dataset])

In [12]:
# Viz DEMO
img, label = full_dataset[0]
label

0.0003988280889054832

In [13]:
img

tensor([[[0.0745, 0.0745, 0.0706,  ..., 0.1098, 0.0980, 0.0745],
         [0.0824, 0.0784, 0.0706,  ..., 0.0902, 0.1020, 0.0902],
         [0.0824, 0.0784, 0.0745,  ..., 0.0745, 0.0980, 0.0863],
         ...,
         [0.0784, 0.0745, 0.0706,  ..., 0.0941, 0.0902, 0.0784],
         [0.0784, 0.0706, 0.0667,  ..., 0.0863, 0.0902, 0.0824],
         [0.0706, 0.0627, 0.0627,  ..., 0.0824, 0.0902, 0.0824]],

        [[0.1373, 0.1529, 0.1569,  ..., 0.2863, 0.2784, 0.2471],
         [0.1373, 0.1451, 0.1490,  ..., 0.2627, 0.2667, 0.2392],
         [0.1255, 0.1333, 0.1451,  ..., 0.2510, 0.2431, 0.2039],
         ...,
         [0.1176, 0.1176, 0.1216,  ..., 0.1686, 0.1608, 0.1490],
         [0.1216, 0.1176, 0.1176,  ..., 0.1529, 0.1490, 0.1412],
         [0.1176, 0.1176, 0.1176,  ..., 0.1451, 0.1412, 0.1333]],

        [[0.0667, 0.0784, 0.0824,  ..., 0.1373, 0.1333, 0.1255],
         [0.0706, 0.0745, 0.0784,  ..., 0.1569, 0.1490, 0.1255],
         [0.0588, 0.0667, 0.0824,  ..., 0.2235, 0.1765, 0.

In [14]:
img

tensor([[[0.0745, 0.0745, 0.0706,  ..., 0.1098, 0.0980, 0.0745],
         [0.0824, 0.0784, 0.0706,  ..., 0.0902, 0.1020, 0.0902],
         [0.0824, 0.0784, 0.0745,  ..., 0.0745, 0.0980, 0.0863],
         ...,
         [0.0784, 0.0745, 0.0706,  ..., 0.0941, 0.0902, 0.0784],
         [0.0784, 0.0706, 0.0667,  ..., 0.0863, 0.0902, 0.0824],
         [0.0706, 0.0627, 0.0627,  ..., 0.0824, 0.0902, 0.0824]],

        [[0.1373, 0.1529, 0.1569,  ..., 0.2863, 0.2784, 0.2471],
         [0.1373, 0.1451, 0.1490,  ..., 0.2627, 0.2667, 0.2392],
         [0.1255, 0.1333, 0.1451,  ..., 0.2510, 0.2431, 0.2039],
         ...,
         [0.1176, 0.1176, 0.1216,  ..., 0.1686, 0.1608, 0.1490],
         [0.1216, 0.1176, 0.1176,  ..., 0.1529, 0.1490, 0.1412],
         [0.1176, 0.1176, 0.1176,  ..., 0.1451, 0.1412, 0.1333]],

        [[0.0667, 0.0784, 0.0824,  ..., 0.1373, 0.1333, 0.1255],
         [0.0706, 0.0745, 0.0784,  ..., 0.1569, 0.1490, 0.1255],
         [0.0588, 0.0667, 0.0824,  ..., 0.2235, 0.1765, 0.

In [15]:
split_datasets = train_val_dataset(full_dataset)

# Size of Data, to be used for calculating Average Loss and Accuracy
train_data_size = len(split_datasets["train"])
valid_data_size = len(split_datasets["valid"])

# bath size
bs = 32

train_data_loader = DataLoader(split_datasets["train"], batch_size=bs, shuffle=True)
valid_data_loader = DataLoader(split_datasets["valid"], batch_size=bs, shuffle=True)

In [16]:
# Choose whatever GPU device number you want
device = "cuda" #torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

alexnet = models.alexnet(pretrained=True).to(device)
vgg16 = models.vgg16(pretrained=True)
model = vgg16
print(model)



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [17]:
## Choose a model
#model = vgg16
#PATH = "model.pt"
#torch.save(model.state_dict(), PATH)

## Choose whatever GPU device number you want
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#model.load_state_dict(torch.load(PATH, map_location="cuda:0"))
## Make sure to call input = input.to(device) on any input tensors that you feed to the model
#model.to(device)



# Each output represents a Force (fx, fy, fz)
num_output = 3

# A single output representing the magnitud force (fx, fy, fz)
num_output = 1

# Change the final layer of AlexNet Model for Transfer Learning
model.classifier[-1] = nn.Linear(4096, num_output)


# Freeze model parameters
for param in model.parameters():
    param.requires_grad = False

# Unfreeze last layer parameters
for param in model.classifier.parameters():
    param.requires_grad = True

In [18]:
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [21]:
class MyMSELoss(nn.Module):
    def __init__(self):
        super(MyMSELoss, self).__init__()

    def forward(self, inputs, targets):
        # Calculate the loss for each ourput neuron
        # in this case 3 losses one for each force
        individual_force_mean_square = ((inputs - targets)**2).mean(0)
        loss = individual_force_mean_square.sum()
        return loss


class MSELoss(nn.Module):
    def __init__(self):
        super(MSELoss, self).__init__()

    def forward(self, inputs, targets):
        # Calculate the loss for each ourput neuron
        # in this case 3 losses one for each force
        loss = ((inputs - targets)**2).mean()
        return loss

In [22]:
#loss_func = nn.L1Loss()
#loss_func = nn.MSELoss()
loss_func = MSELoss()

optimizer = optim.Adam(model.parameters())

In [24]:
num_epochs = 5
trained_model, history = train_and_validate(model, loss_func, optimizer, num_epochs, train_data_loader, valid_data_loader, device)

torch.save(trained_model.state_dict(), 'trained_model_e2.pt')

Epoch: 1/5
Epoch : 001, Training: Loss: 2568.7776, 
		Validation : Loss : 2791.0606, Time: 44.9692s
Epoch: 2/5
Epoch : 002, Training: Loss: 2541.7895, 
		Validation : Loss : 2818.1861, Time: 44.8988s
Epoch: 3/5
Epoch : 003, Training: Loss: 2546.0413, 
		Validation : Loss : 3039.5365, Time: 44.8968s
Epoch: 4/5
Epoch : 004, Training: Loss: 2558.3024, 
		Validation : Loss : 2813.5336, Time: 44.8613s
Epoch: 5/5
Epoch : 005, Training: Loss: 2506.7424, 
		Validation : Loss : 2817.8927, Time: 45.1570s


# Load Model

In [26]:
model.load_state_dict(torch.load("trained_model_e2.pt"))
model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [58]:
img, label = split_datasets["valid"][150]
print(f"Output: {label}")

Output: 0.8102925533710709


In [59]:
img.shape

torch.Size([3, 224, 224])

In [60]:
dummy = img[None, :, :, :]
dummy.shape

torch.Size([1, 3, 224, 224])

In [61]:
predicted_output = model(dummy.cuda())

In [62]:
predicted_output

tensor([[8.7374]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [None]:
a = torch.Tensor([[1,2,3,], [4,5,6]])
b = torch.Tensor([[10,20,30,], [40,50,60]])

In [None]:
(a-b)

In [None]:
((a-b)**2)

In [None]:
((a-b)**2).mean(axis=0)

In [None]:
((a-b)**2).mean(axis=0).sum()