In [None]:
# Basic imports
import torch
import torchvision
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import subprocess
from matplotlib import pyplot as plt
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import ConcatDataset, DataLoader
import os

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        # Warstwa konwolucyjna 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(32)
        self.dropout1 = nn.Dropout2d(0.25)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # MaxPooling o rozmiarze 2x2

        # Warstwa konwolucyjna 2
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(64)
        self.dropout2 = nn.Dropout2d(0.25)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # MaxPooling o rozmiarze 2x2

        # Warstwa konwolucyjna 3
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.batch_norm3 = nn.BatchNorm2d(128)
        self.dropout3 = nn.Dropout2d(0.25)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)  # MaxPooling o rozmiarze 2x2

        # Warstwa w pełni połączona (FC)
        self.fc1 = nn.Linear(128 * 3 * 3, 512)  # Po trzech operacjach poolingowych obraz zostanie zmniejszony do 3x3
        self.fc2 = nn.Linear(512, 10)  # 10 klas wyjściowych (cyfry 0-9)

    def forward(self, x):
        # Warstwa konwolucyjna 1 -> BatchNorm -> ReLU -> Dropout -> MaxPool
        x = F.relu(self.batch_norm1(self.conv1(x)))
        x = self.dropout1(x)
        x = self.pool1(x)

        # Warstwa konwolucyjna 2 -> BatchNorm -> ReLU -> Dropout -> MaxPool
        x = F.relu(self.batch_norm2(self.conv2(x)))
        x = self.dropout2(x)
        x = self.pool2(x)

        # Warstwa konwolucyjna 3 -> BatchNorm -> ReLU -> Dropout -> MaxPool
        x = F.relu(self.batch_norm3(self.conv3(x)))
        x = self.dropout3(x)
        x = self.pool3(x)

        # Spłaszczanie obrazu przed warstwą w pełni połączoną
        x = x.view(-1, 128 * 3 * 3)  # Spłaszczanie: 128 filtrów 3x3

        # Warstwa w pełni połączona (FC)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNNModel()

In [None]:
# please consult torchvision here: https://pytorch.org/vision/stable/index.html
def to_binary(image):
    # Przekształcenie obrazu na wartości binarne: 0 (czarny) i 1 (biały)
    threshold = 0.5  # Ustal próg, gdzie wartości poniżej 0.5 będą czarne, a powyżej białe
    return (image > 0).float()

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Konwersja na czarno-biały obraz
    transforms.ToTensor(),  # Konwersja do tensora
    transforms.Normalize((0.5,), (0.5,)),  # Normalizacja
    transforms.Lambda(to_binary)
])

# first we get the training dataset from touchvision
training_data_mnist = torchvision.datasets.MNIST(root="../data", train=True, transform=transform, download=True)
training_data_mnist += torchvision.datasets.MNIST(root="../data", train=False, transform=transform, download=True)

# EMNIST Dataset (split "digits" dla cyfr 0-9)
#training_data_emnist = torchvision.datasets.EMNIST(root="../data", split="digits", train=True, transform=transform, download=True)
test_data_emnist = torchvision.datasets.EMNIST(root="../data", split="digits", train=False, transform=transform, download=True)

# Połączenie wszystkich zbiorów cyfr
full_dataset_train = training_data_mnist
full_dataset_test = test_data_mnist

# Tworzenie DataLoadera
batch_size = 64
data_loader_train = DataLoader(full_dataset_train, batch_size=batch_size, shuffle=True)
data_loader_test = DataLoader(full_dataset_test, batch_size=batch_size, shuffle=True)

In [None]:
# Create a loss function

# note!! in pytorch the CrossEntropyLoss will apply softmax internally...
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

In [None]:
Epochs = 3
for epoch in range(Epochs):
  training_loss = 0.0
  correct = 0
  total = 0
  for i, data in enumerate(data_loader_train, 0):
    # get the inputs
    inputs, labels = data
    # Flatten the images

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward
    outputs = model(inputs)
    loss = loss_function(outputs, labels)
    loss.backward()
    optimizer.step()

    training_loss += loss.item()
    _, predicted = outputs.max(1)
    total += labels.size(0)
    correct += predicted.eq(labels).sum().item()
    avg_loss = training_loss / (i + 1)
    avg_acc = 100. * correct / total
  print(f'Training Loss: {avg_loss:.3f} | Training acc: {avg_acc:.3f}', 'for epoch: ', epoch)

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'cnn.pth')
print("Model saved to 'cnn.pth'")

In [None]:
import time
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for i, (image, label) in enumerate(data_loader_test):
      output = model(image)
      test_loss += F.nll_loss(output, label, reduction='sum').item()
      #test_loss += F.nll_loss(output, label, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(label.data.view_as(pred)).sum()
      plt.imshow(image[0].squeeze(0), cmap="gray")
      plt.title(f"Prediction: {pred[0].item()}, Label: {label[0].item()}")
      plt.axis("off")
      plt.show()
      time.sleep(3)
    test_loss /= len(data_loader_test.dataset)
    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(data_loader_test.dataset), 100. * correct / len(data_loader_test.dataset)))


In [None]:
from PIL import Image
def preprocess_image(image_path):

    transform = transforms.Compose([
        transforms.Grayscale(),  # Konwersja na skalę szarości (jeśli obraz kolorowy)
        transforms.Resize((28, 28)),  # Zmiana rozmiaru na 28x28
        transforms.ToTensor(),  # Konwersja na tensor [C, H, W]
        transforms.Normalize((0.5,), (0.5,))  # Normalizacja wartości pikseli
    ])

    image = Image.open(image_path)
    # Zastosowanie transformacji
    tensor = transform(image).unsqueeze(0)  # Dodanie wymiaru batcha [1, 1, 28, 28]
    return tensor



# Główna funkcja
def main():
    # Wybór pliku obrazu

    image_path = '1.png'

    if not image_path:
        print("Nie wybrano żadnego obrazu.")
        return

    # Przetworzenie obrazu
    image_tensor = preprocess_image(image_path)
    print(f"Obraz przetworzony: {image_tensor.shape}")

    # Przewidywanie
    prediction = 0
    with torch.no_grad():
        output = model(image_tensor)  # Wynik modelu
        prediction = torch.argmax(output, dim=1).item()  # Klasa z najwyższym prawdopodobieństwem
    print(f"Model przewiduje: {prediction}")

# Uruchomienie programu
if __name__ == "__main__":
    main()