In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

from sklearn.model_selection import train_test_split
from PIL import Image

import os
import pandas as pd
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from util.cnn_parameters import *
from util.data_preprocessing import *

### Preprocessing

Przygotowanie danych i podział na dane treningowe i walidacyje -> w pliku data_preprocessing.py. 

In [28]:
class CatDogDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.class_to_idx = {DOG_LABEL: 1, CAT_LABEL: 0}

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.dataframe.iloc[idx][IMAGE_LABEL])
        image = Image.open(img_name).convert("RGB")
        label = self.class_to_idx[self.dataframe.iloc[idx][CLASS_LABEL]]

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.float32)  # [batch_size, 1]

# Edycja danych treningowych i walidacyjnych:
train_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop((128, 128), scale=(0.8, 1.5)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.5, contrast=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

val_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Zbiory danych:
train_dataset = CatDogDataset(train_df, TRAIN_IMAGES_PATH, transform=train_transforms)
val_dataset = CatDogDataset(val_df, TRAIN_IMAGES_PATH, transform=val_transforms)

# generators:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


Dataloaders created successfully!


In [29]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=KERNEL_SIZE_CONV, padding=PADDING),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=POOL_SIZE, stride=MAXPOOL_STRIDE),
            nn.Dropout(0.25),

            nn.Conv2d(32, 64, kernel_size=KERNEL_SIZE_CONV, padding=PADDING),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=POOL_SIZE, stride=MAXPOOL_STRIDE),
            nn.Dropout(0.25),

            nn.Conv2d(64, 128, kernel_size=KERNEL_SIZE_CONV, padding=PADDING),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=POOL_SIZE, stride=MAXPOOL_STRIDE),
            nn.Dropout(0.25),

            nn.Conv2d(128, 256, kernel_size=KERNEL_SIZE_CONV, padding=PADDING),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(kernel_size=POOL_SIZE, stride=MAXPOOL_STRIDE),
            nn.Dropout(0.25),

            nn.Conv2d(256, 512, kernel_size=KERNEL_SIZE_CONV, padding=PADDING),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.MaxPool2d(kernel_size=POOL_SIZE, stride=MAXPOOL_STRIDE),

            nn.Flatten(),

            nn.Linear(512 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.5),

            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

model = CNN().to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.float().to(device).unsqueeze(1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        predicted = (outputs > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {train_acc:.2f}%")


torch.save(model.state_dict(), "cats_dogs_cnn.pth")

print("Model training complete and saved as 'cats_dogs_cnn.pth'.")

Epoch [1/10], Loss: 0.6872, Accuracy: 55.00%
Epoch [2/10], Loss: 0.6208, Accuracy: 65.75%
Epoch [3/10], Loss: 0.5610, Accuracy: 71.86%
Epoch [4/10], Loss: 0.4971, Accuracy: 76.41%
Epoch [5/10], Loss: 0.4210, Accuracy: 81.03%
Epoch [6/10], Loss: 0.3556, Accuracy: 84.19%
Epoch [7/10], Loss: 0.3181, Accuracy: 86.75%
Epoch [8/10], Loss: 0.2936, Accuracy: 87.62%
Epoch [9/10], Loss: 0.2688, Accuracy: 88.98%
Epoch [10/10], Loss: 0.2565, Accuracy: 89.39%
Model training complete and saved as 'cats_dogs_cnn.pth'.
