In [31]:
import torch
import kagglehub
import numpy as np
import pandas as pd
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

from PIL import Image
from tqdm import tqdm
from pathlib import Path
from matplotlib import cm
from torch import nn, optim
from __future__ import annotations
from torch.utils.data import DataLoader, Dataset

from IPython.display import clear_output

import warnings

In [32]:
path = kagglehub.dataset_download("bloodlaac/products-dataset")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\Юля\.cache\kagglehub\datasets\bloodlaac\products-dataset\versions\1


In [33]:
warnings.filterwarnings("ignore")

In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [35]:
food_dir = f"{path}\products_dataset"

FOOD = [
    'FreshApple', 'FreshBanana', 'FreshMango', 'FreshOrange', 'FreshStrawberry',
    'RottenApple', 'RottenBanana', 'RottenMango', 'RottenOrange', 'RottenStrawberry',
    'FreshBellpepper', 'FreshCarrot', 'FreshCucumber', 'FreshPotato', 'FreshTomato',
    'RottenBellpepper', 'RottenCarrot', 'RottenCucumber', 'RottenPotato', 'RottenTomato'
]

In [36]:
class LabeledDataset():
    def __init__(self, food_dir: Path, food_classes: list[str], transform=None) -> LabeledDataset:
        self.food_dir = food_dir
        self.food_classes = food_classes
        self.transform = transform
        self.images_paths = []
        self.labels = []

        for cls_name in food_classes:
            class_path = Path(food_dir)
            class_path /= cls_name

            for image_name in class_path.iterdir():
                image_path = class_path / image_name
                self.images_paths.append(image_path)
                self.labels.append(food_classes.index(cls_name))
        
    def __len__(self) -> int:
        return len(self.images_paths)
    
    def __getitem__(self, index: int):
        image = Image.open(self.images_paths[index]).convert("RGB")
        label = self.labels[index]

        if self.transform:
            image = self.transform(image)

        return image, label

In [37]:
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.3),
    transforms.RandomVerticalFlip(p=0.3),
    transforms.RandomCrop([200, 200]),
    transforms.ColorJitter(brightness=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.3, 0.3)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
])

In [38]:
food_dataset = LabeledDataset(food_dir, FOOD, transform=data_transforms)

In [39]:
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(food_dataset, [0.6, 0.2, 0.2])

In [40]:
train_dataloader = DataLoader(
            train_dataset,
            batch_size=16,
            shuffle=True,
            pin_memory=True  # TODO: fix
        )

In [41]:
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [42]:
class Block(nn.Module):
    """
    Create basic unit of ResNet.

    Consists of two convolutional layers.
    
    """

    def __init__(
            self,
            in_channels: int,
            out_channels: int,
            stride: int = 1,
            downsampling=None
        ) -> Block:

        super().__init__()
        
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=3,
            stride=stride,
            padding=1
        )
        self.batch_norm = nn.BatchNorm2d(num_features=out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=3,
            stride=1,  # TODO: Replace with padding="same"
            padding=1
        )
        self.downsampling = downsampling

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        input = x

        pred = self.batch_norm(self.conv1(x))
        pred = self.relu(pred)
        pred = self.batch_norm(self.conv2(pred))
        
        if self.downsampling is not None:
            input = self.downsampling(x)
        
        pred += input
        pred = self.relu(pred)

        return pred

In [43]:
class ResNet(nn.Module):
    """
    Build model ResNet and return prediction

    """

    def __init__(self, blocks_num_list: list[int]) -> ResNet:
        """
        ResNet init.

        Parameters
        ----------
        blocks_num_list : list[int] 
                          Number of basic blocks for each layer.

        """
        super().__init__()

        self.in_channels = 64  # Default number of channels for first layer. Mutable!

        # Reduce resolution of picture by 2
        # 224 -> 112
        self.conv1 = nn.Conv2d(
            in_channels=3,
            out_channels=64,
            kernel_size=7,
            stride=2,
            padding=3
        )
        self.batch_norm = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.pooling = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # 112 -> 56

        self.layer1 = self.create_layer(  # Default stride. No resolution reduction.
            out_channels=64,
            num_blocks=blocks_num_list[0]
        )
        self.layer2 = self.create_layer(  # Resolution reduction. 56 -> 28
            out_channels=128,
            num_blocks=blocks_num_list[1],
            stride=2
        )
        self.layer3 = self.create_layer(  # Resolution reduction. 28 -> 14
            out_channels=256,
            num_blocks=blocks_num_list[2],
            stride=2
        )
        self.layer4 = self.create_layer(  # Resolution reduction. 14 -> 7
            out_channels=512,
            num_blocks=blocks_num_list[3],
            stride=2
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, 20)
    
    def create_layer(
            self,
            out_channels: int,
            num_blocks: int,
            stride: int = 1
        ) -> nn.Sequential:
        """
        Create ResNet layer.

        Parameters
        ----------
        out_channels : int
            Number of output channels per block
        num_blocks : int
            Number of blocks per layer
        stride : int, default=1
            Step of filter in conv layer

        """
        downsampling = None

        if stride != 1:
            downsampling = nn.Sequential(
                nn.Conv2d(
                    in_channels=self.in_channels,
                    out_channels=out_channels,
                    kernel_size=1,
                    stride=stride
                ),
                nn.BatchNorm2d(out_channels)
            )

        blocks: list[Block] = []
        
        blocks.append(Block(
            in_channels=self.in_channels,
            out_channels=out_channels,
            stride=stride,
            downsampling=downsampling
        ))

        self.in_channels = out_channels

        for _ in range(num_blocks - 1):
            blocks.append(Block(out_channels, out_channels))

        return nn.Sequential(*blocks)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        pred = self.batch_norm(self.conv1(x))
        pred = self.relu(pred)
        pred = self.pooling(pred)

        pred = self.layer1(pred)
        pred = self.layer2(pred)
        pred = self.layer3(pred)
        pred = self.layer4(pred)

        pred = self.avgpool(pred)
        pred = torch.flatten(pred, 1)
        pred = self.fc(pred)

        return pred

In [44]:
def plot_history(
        epochs: int,
        train_history: list,
        val_history: list,
        optimizer_name: str,
        label: str
    ):
    _, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 10))
    ax1.plot(np.arange(1, epochs + 1), train_history, label=label)
    ax2.plot(np.arange(1, epochs + 1), val_history, label=label)

    for ax in (ax1, ax2):
        ax.set_xlabel('Epochs')
        ax.set_ylabel('Accuracy')
        ax.legend(loc='lower right')
        ax.grid(True)

    ax1.set_title(f'{optimizer_name} Training accuracy')
    ax2.set_title(f'{optimizer_name} Validation accuracy')

    plt.tight_layout()
    plt.show()

In [45]:
def validate(model, loader):
    model.eval()

    correct, total = 0, 0

    for batch in loader:
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            pred = model(images)
        pred = torch.argmax(pred, dim=1)

        total += len(pred)
        correct += (pred == labels).sum().item()

    return correct / total

In [46]:
def train(model, criterion, train_loader, val_loader, optimizer, epochs=10):
    train_acc, val_acc = [], []

    model.train()

    for epoch in tqdm(range(epochs), leave=False):
        correct, total = 0, 0
        train_loss = 0.0

        for batch in train_loader:
            images, labels = batch

            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            pred = model(images)
            loss = criterion(pred, labels)
                
            loss.backward()
            optimizer.step()

            pred = torch.argmax(pred, dim=1)

            total += len(pred)
            correct += (pred == labels).sum().item()
            train_loss += loss.item()

        train_acc.append(correct / total)
        val_acc.append(validate(model, val_loader))
        epoch_loss = train_loss / len(train_loader)

        print(f"Epoch: [{epoch + 1}/{epochs}]")
        print(f"Train accuracy: {train_acc[-1]:.4f}")
        print(f"Train loss: {epoch_loss:.4f}")
        print(f"Val accuracy: {val_acc[-1]:.4f}\n")

    return train_acc, val_acc

In [47]:
def test(model, loader):
    model.eval()

    correct, total = 0, 0

    for batch in loader:
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            pred = model(images)
        pred = torch.argmax(pred, dim=1)

        total += len(pred)
        correct += (pred == labels).sum().item()

    return correct / total

In [26]:
criterion = nn.CrossEntropyLoss()

In [27]:
blocks_num_list = [2, 2, 2, 2]

model = ResNet(blocks_num_list).to(device)

In [28]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [48]:
# TODO: add plotting graphs and cycle over epochs

In [30]:
print(f"Training ResNet18 with SGD\n")
    
train_acc, val_acc = train(
    model,
    criterion,
    train_dataloader,
    val_dataloader,
    optimizer=optimizer,
    epochs=20
)

test_acc = test(model, test_dataloader)

print(f"\nTest accuracy: {test_acc:.4f}")

Training ResNet18 with SGD



  5%|▌         | 1/20 [00:56<17:49, 56.27s/it]

Epoch: [1/20]
Train accuracy: 0.5521
Train loss: 1.3687
Val accuracy: 0.4933



 10%|█         | 2/20 [01:51<16:45, 55.83s/it]

Epoch: [2/20]
Train accuracy: 0.6071
Train loss: 1.2162
Val accuracy: 0.6600



 15%|█▌        | 3/20 [02:47<15:47, 55.74s/it]

Epoch: [3/20]
Train accuracy: 0.6400
Train loss: 1.1028
Val accuracy: 0.6150



 20%|██        | 4/20 [03:42<14:49, 55.59s/it]

Epoch: [4/20]
Train accuracy: 0.6721
Train loss: 0.9964
Val accuracy: 0.6404



 25%|██▌       | 5/20 [04:38<13:52, 55.47s/it]

Epoch: [5/20]
Train accuracy: 0.6929
Train loss: 0.9442
Val accuracy: 0.6958



 30%|███       | 6/20 [05:33<12:56, 55.49s/it]

Epoch: [6/20]
Train accuracy: 0.7265
Train loss: 0.8412
Val accuracy: 0.7379



 35%|███▌      | 7/20 [06:28<12:00, 55.39s/it]

Epoch: [7/20]
Train accuracy: 0.7328
Train loss: 0.8069
Val accuracy: 0.6837



 40%|████      | 8/20 [07:22<10:59, 54.95s/it]

Epoch: [8/20]
Train accuracy: 0.7432
Train loss: 0.7781
Val accuracy: 0.7488



 45%|████▌     | 9/20 [08:17<10:02, 54.82s/it]

Epoch: [9/20]
Train accuracy: 0.7631
Train loss: 0.7262
Val accuracy: 0.7262



 50%|█████     | 10/20 [09:11<09:06, 54.69s/it]

Epoch: [10/20]
Train accuracy: 0.7700
Train loss: 0.6820
Val accuracy: 0.7775



 55%|█████▌    | 11/20 [10:06<08:11, 54.59s/it]

Epoch: [11/20]
Train accuracy: 0.7832
Train loss: 0.6703
Val accuracy: 0.7858



 60%|██████    | 12/20 [11:00<07:15, 54.48s/it]

Epoch: [12/20]
Train accuracy: 0.7889
Train loss: 0.6500
Val accuracy: 0.7846



 65%|██████▌   | 13/20 [11:54<06:21, 54.48s/it]

Epoch: [13/20]
Train accuracy: 0.7935
Train loss: 0.6318
Val accuracy: 0.7925



 70%|███████   | 14/20 [12:48<05:26, 54.40s/it]

Epoch: [14/20]
Train accuracy: 0.7979
Train loss: 0.6054
Val accuracy: 0.8117



 75%|███████▌  | 15/20 [13:43<04:31, 54.39s/it]

Epoch: [15/20]
Train accuracy: 0.8122
Train loss: 0.5653
Val accuracy: 0.7987



 80%|████████  | 16/20 [14:37<03:36, 54.23s/it]

Epoch: [16/20]
Train accuracy: 0.8136
Train loss: 0.5617
Val accuracy: 0.7992



 85%|████████▌ | 17/20 [15:31<02:42, 54.22s/it]

Epoch: [17/20]
Train accuracy: 0.8263
Train loss: 0.5256
Val accuracy: 0.8221



 90%|█████████ | 18/20 [16:25<01:48, 54.19s/it]

Epoch: [18/20]
Train accuracy: 0.8336
Train loss: 0.5009
Val accuracy: 0.8025



 95%|█████████▌| 19/20 [17:20<00:54, 54.36s/it]

Epoch: [19/20]
Train accuracy: 0.8343
Train loss: 0.5024
Val accuracy: 0.8208



                                               

Epoch: [20/20]
Train accuracy: 0.8462
Train loss: 0.4713
Val accuracy: 0.8154






Test accuracy: 0.8108
