In [95]:
import os
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torchvision.datasets import ImageFolder
from torch.utils.data import Subset
import torch.nn.functional as F
import gc

In [96]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [97]:
data_dir = './trafic_32/' 

In [98]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [99]:
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

print("Number of classes:", len(dataset.classes))
print("Classes:", dataset.classes)


Number of classes: 43
Classes: ['00000', '00001', '00002', '00003', '00004', '00005', '00006', '00007', '00008', '00009', '00010', '00011', '00012', '00013', '00014', '00015', '00016', '00017', '00018', '00019', '00020', '00021', '00022', '00023', '00024', '00025', '00026', '00027', '00028', '00029', '00030', '00031', '00032', '00033', '00034', '00035', '00036', '00037', '00038', '00039', '00040', '00041', '00042']


In [100]:
def corrupt(x, amount):
    if not torch.is_tensor(amount):
        amount = torch.tensor(amount, device=x.device, dtype=x.dtype)
    amount = amount.view(-1, 1, 1, 1)
    noise = torch.rand_like(x)
    return x * (1 - amount) + noise * amount

In [101]:
class ResidualBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.block = nn.Sequential(
            nn.LayerNorm(dim),
            nn.ReLU(),
            nn.Linear(dim, dim),
            nn.LayerNorm(dim),
            nn.ReLU(),
            nn.Linear(dim, dim)
        )

    def forward(self, x):
        return x + self.block(x)

In [102]:
class SequentialNet(nn.Module):
    def __init__(self, input_channels=3, patch_size=4, embed_dim=128, num_blocks=4):
        super().__init__()
        self.patch_size = patch_size
        self.embed_dim = embed_dim

        self.patch_embed = nn.Conv2d(input_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.flatten = nn.Flatten(2)
        self.transpose = lambda x: x.transpose(1,2)

        self.res_blocks = nn.Sequential(
            *[ResidualBlock(embed_dim) for _ in range(num_blocks)]
        )

        self.unflatten = lambda x: x.transpose(1, 2).reshape(x.size(0), embed_dim, 8, 8)
        self.decode = nn.Sequential(
            nn.ConvTranspose2d(embed_dim, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid() # do wywalenia jeśli mse
        )

    def forward(self, x):
        x = self.patch_embed(x)
        B, C, H, W = x.shape
        x = x.view(B, C, H * W).transpose(1, 2)
        x = self.res_blocks(x)
        x = x.transpose(1, 2).view(B, C, H, W)
        x = self.decode(x)
        return x
        
        

In [94]:
# sanity check żeby sprawdzic czy sie uczy
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SequentialNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.MSELoss()

x, _ = next(iter(dataloader))
x = x[0].unsqueeze(0).to(device)

def corrupt(x, noise_amount):
    noise = noise_amount[:, None, None, None] * torch.randn_like(x)
    return x + noise

noise_amount = torch.tensor([0.5], device=device)
noisy_x = corrupt(x, noise_amount)

for step in range(200):
    model.train()
    pred = model(noisy_x)
    loss = loss_fn(pred, x)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if step % 10 == 0 or step == 199:
        print(f"[{step:03d}] Loss: {loss.item():.6f}")

[000] Loss: 2.234373
[010] Loss: 1.774741
[020] Loss: 1.460139
[030] Loss: 1.314956
[040] Loss: 1.244955
[050] Loss: 1.204571
[060] Loss: 1.178227
[070] Loss: 1.156814
[080] Loss: 1.148518
[090] Loss: 1.144521
[100] Loss: 1.141918
[110] Loss: 1.140125
[120] Loss: 1.138762
[130] Loss: 1.137631
[140] Loss: 1.136658
[150] Loss: 1.135828
[160] Loss: 1.135144
[170] Loss: 1.134579
[180] Loss: 1.134099
[190] Loss: 1.133683
[199] Loss: 1.133349


In [103]:
evice = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SequentialNet().to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

def corrupt(x, noise_amount):
    noise = noise_amount[:, None, None, None] * torch.randn_like(x)
    return x + noise

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    losses = []

    for x, _ in dataloader:
        x = x.to(device)

        noise_amount = torch.rand(x.size(0), device=device)
        noisy_x = corrupt(x, noise_amount)

        pred = model(noisy_x)
        loss = loss_fn(pred, x)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())

    avg_loss = sum(losses) / len(losses)
    print(f"Epoch {epoch+1}/{num_epochs} | Avg Loss: {avg_loss:.6f}")

Epoch 1/10 | Avg Loss: 1.427083
Epoch 2/10 | Avg Loss: 1.417355
Epoch 3/10 | Avg Loss: 1.417129
Epoch 4/10 | Avg Loss: 1.416930


KeyboardInterrupt: 

In [75]:
class BasicUNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=3):
        super().__init__()
        self.down_layers = nn.ModuleList([
            nn.Conv2d(in_channels, 32, kernel_size=5, padding=2),
            nn.Conv2d(32, 64, kernel_size=5, padding=2),
            nn.Conv2d(64, 64, kernel_size=5, padding=2),
        ])
        self.up_layers = nn.ModuleList([
            nn.Conv2d(64 + 1, 64, kernel_size=5, padding=2),
            nn.Conv2d(64, 32, kernel_size=5, padding=2),
            nn.Conv2d(32, out_channels, kernel_size=5, padding=2),
        ])
        self.act = nn.SiLU()
        self.downscale = nn.MaxPool2d(2)
        self.upscale = nn.Upsample(scale_factor=2, mode='nearest', align_corners=False)

    def forward(self, x, t):
        h = []
        for i, l in enumerate(self.down_layers):
            x = self.act(l(x))
            if i < 2:
                h.append(x)
                x = self.downscale(x)

        t = t.view(-1, 1, 1, 1)
        t = t.expand(-1, 1, x.size(2), x.size(3))
        x = torch.cat([x, t], dim=1)

        for i, l in enumerate(self.up_layers):
            if i > 0:
                x = F.interpolate(x, size=h[-1].shape[2:], mode='bilinear', align_corners=False)
                x = x + h.pop()
            x = self.act(l(x))

        return x

In [78]:
from tqdm import tqdm
import time

net = BasicUNet().to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-5)
loss_fn = nn.MSELoss()
num_epochs = 20
losses = []

for epoch in range(num_epochs):
    torch.cuda.empty_cache()
    gc.collect()
    for x, _ in dataloader:
        x = x.to(device)
        noise_amount = torch.rand(x.size(0), device=device)
        noisy_x = corrupt(x, noise_amount)
        
        pred = net(noisy_x, noise_amount)
        loss = loss_fn(pred, x)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
    
    torch.cuda.empty_cache()
    gc.collect()
    avg_loss = sum(losses) / len(losses)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

Epoch 1/20, Loss: 0.9094
Epoch 2/20, Loss: 0.8983
Epoch 3/20, Loss: 0.8931
Epoch 4/20, Loss: 0.8933
Epoch 5/20, Loss: 0.8911
Epoch 6/20, Loss: 0.9383
Epoch 7/20, Loss: 0.9425
Epoch 8/20, Loss: 0.9452
Epoch 9/20, Loss: 0.9510
Epoch 10/20, Loss: 0.9584
Epoch 11/20, Loss: 0.9572
Epoch 12/20, Loss: 0.9561
Epoch 13/20, Loss: 0.9559
Epoch 14/20, Loss: 0.9556
Epoch 15/20, Loss: 0.9569
Epoch 16/20, Loss: 1.0083
Epoch 17/20, Loss: 1.0536
Epoch 18/20, Loss: 1.0939
Epoch 19/20, Loss: 1.1299
Epoch 20/20, Loss: 1.1623


In [None]:
import matplotlib.pyplot as plt
plt.plot(losses)
plt.ylim(0, 0.1)