In [16]:
import torch
from torchvision import datasets
from torchvision.transforms import v2
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F

In [17]:
transform = v2.Compose([
    v2.Grayscale(num_output_channels=1),
    v2.Resize((128, 128)),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.RandomRotation(30),
    v2.ElasticTransform(alpha=350.0),
    v2.Normalize(mean=[0.5], std=[0.5])
])

In [18]:
dataset = datasets.ImageFolder(root="data", transform=transform)

In [19]:
dataset

Dataset ImageFolder
    Number of datapoints: 3985
    Root location: data
    StandardTransform
Transform: Compose(
                 Grayscale(num_output_channels=1)
                 Resize(size=[128, 128], interpolation=InterpolationMode.BILINEAR, antialias=True)
                 ToImage()
                 ToDtype(scale=True)
                 RandomRotation(degrees=[-30.0, 30.0], interpolation=InterpolationMode.NEAREST, expand=False, fill=0)
                 ElasticTransform(alpha=[350.0, 350.0], sigma=[5.0, 5.0], interpolation=InterpolationMode.BILINEAR, fill=0)
                 Normalize(mean=[0.5], std=[0.5], inplace=False)
           )

In [20]:
totalsize = len(dataset)
train_size = int(0.7 * totalsize)
val_size = int(0.15 * totalsize)
test_size = totalsize - train_size - val_size

In [21]:
val_size

597

In [22]:
train_df, val_df, test_df = random_split(dataset,[train_size, val_size, test_size])

In [23]:
train_loader = DataLoader(train_df, batch_size=32, shuffle=True)
val_loader = DataLoader(train_df, batch_size=32, shuffle=False)
test_loader = DataLoader(train_df, batch_size=32, shuffle=False)

In [24]:
print(f"Train: {len(train_df)}, Validation: {len(val_df)}, Test: {len(test_df)}")

Train: 2789, Validation: 597, Test: 599


In [27]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.adative_pool = nn.AdaptiveMaxPool2d((8, 8))
        self.fc1 = nn.Linear(64 * 37 * 37, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [29]:
model = CNN(num_classes=2)

dummy = torch.randn(1, 1, 150, 150)
out = model(dummy)

print("Output:", out.shape)

Output: torch.Size([1, 2])


In [None]:
loss = F.binary_cross_entropy()