The aim is to pass the images that we have to tensors, to be able to work later with a convolutional neural network.

In [10]:
# Import libraries
import torch # an open source ML library used for creating neural networks
from torch.utils.data import Dataset 
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import pytorch_lightning as pl
from torchmetrics import R2Score

In [11]:
# More imports
from torchvision.datasets import ImageFolder # to load the dataset
from torchvision import transforms # to transform the dataset (the images) 

import pytorch_lightning as pl  # to help to write more scalable and maintainable code
import torchmetrics #for evaluating and reporting metrics

# install torchmetrics
!pip install torchmetrics





In [12]:
def getData(path='archive/OriginalDataset', RNN=False):
    dataset = ImageFolder(root=path)
    transform1 = transforms.Compose([
        transforms.Resize((128, 128)),  # resize the image to 128x128
        transforms.ToTensor(),  # convert the image to a tensor
        # normalize the image with the mean and standard deviation of the ImageNet dataset
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
        transforms.Grayscale(num_output_channels=1) if RNN else transforms.Lambda(lambda x: x)
    ])
    dataset = ImageFolder(root='archive/OriginalDataset', transform=transform1)
    return dataset

In [13]:
# index of the first data point shown as an image

Once the data is organized correctly and the images have been converted to tensors, it is time to implement a CNN network with four classes (it uses three Conv2-ReLu-MaxPool layers and one sequential one)

In [14]:
# simple cnn model with 4 classes

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN,self).__init__()
        self.cnn = torch.nn.Sequential(
            torch.nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(128 * 16 * 16, 4),
            torch.nn.Softmax(dim=-1)
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [15]:

class ImageRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(ImageRNN, self).__init__()

        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        batch_size, channels, height, width = x.size()

        # Reshape the input tensor to a 2D sequence
        x = x.view(batch_size, channels, -1)

        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [16]:

class PLModel(pl.LightningModule):    
    def __init__(self,model, num_classes):
        super(PLModel,self).__init__()
        self.model = model
        self.accuracy = torchmetrics.Accuracy('multiclass', num_classes=num_classes)
        self.loss  = torch.nn.functional.cross_entropy      
        
    def forward(self, x):
        return self.model(x)
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)        
        self.log('train_accuracy', self.accuracy(y_hat, y), on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)
    
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)        
        self.log('val_accuracy', self.accuracy(y_hat, y), on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    

In [17]:
class DataModule(pl.LightningDataModule):
    def __init__(self, Dataset, batch_size=32):
        super().__init__()
        self.Dataset = Dataset
        self.batch_size = batch_size
    def setup(self, stage=None):
        # split dataset into train, val, test
        dataset = self.Dataset
        train_size = int(0.8 * len(dataset))
        val_size = int(0.1 * len(dataset))
        test_size = len(dataset) - train_size - val_size
        self.train_dataset, self.val_dataset, self.test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
    



In [18]:
dataset = getData(RNN=True)
dataset = DataModule(dataset, batch_size=32)
m = ImageRNN(16384, 128, 4)
model = PLModel(m, 4)

trainer = pl.Trainer(accelerator = "gpu", max_epochs=10) # use GPU, 10 epochs


trainer.fit(model, dataset)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type     | Params
--------------------------------------
0 | model    | ImageRNN | 2.1 M 
1 | accuracy | Accuracy | 0     
--------------------------------------
2.1 M     Trainable params
0         Non-trainable params
2.1 M     Total params
8.457     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [19]:
# train the model
m = SimpleCNN()
dataset = getData()
dataset = DataModule(dataset, batch_size=32)
model = PLModel(m, 4)

trainer = pl.Trainer(accelerator = "gpu", max_epochs=10) # use GPU, 10 epochs


trainer.fit(model,dataset)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type      | Params
---------------------------------------
0 | model    | SimpleCNN | 224 K 
1 | accuracy | Accuracy  | 0     
---------------------------------------
224 K     Trainable params
0         Non-trainable params
224 K     Total params
0.897     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

In [23]:
class Resnet(nn.Module):
    def __init__(self, num_classes=4):
        super(Resnet, self).__init__()
        self.resnet = torch.hub.load(
            'pytorch/vision:v0.10.0', 'resnet18', pretrained=True)

        self.resnet.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        return self.resnet(x)

In [25]:
m = Resnet()
dataset = getData()
model = PLModel(m, 4)
dataset = DataModule(dataset, batch_size=32)

trainer = pl.Trainer(accelerator = "gpu", max_epochs=10) # use GPU, 10 epochs


trainer.fit(model,dataset)

Using cache found in C:\Users\franz/.cache\torch\hub\pytorch_vision_v0.10.0
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type     | Params
--------------------------------------
0 | model    | Resnet   | 11.2 M
1 | accuracy | Accuracy | 0     
--------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.714    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]