# Импорт библиотек

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision as tv

import numpy as np
import matplotlib.pyplot as plt
import cv2
from torch.utils.data import DataLoader, random_split, Dataset
from tqdm import tqdm

# Загрузка датасета

In [None]:
# Преобразование изображения в тензор
trans = tv.transforms.Compose([
    tv.transforms.ToTensor()
])

ds_mnist = tv.datasets.MNIST('./data', download=True, transform=trans)

proportions = [0.6, 0.2, 0.2]
size = len(ds_mnist)

train_ds, valid_ds, test_ds = random_split(ds_mnist, [int(x * size) for x in proportions])

print("Размер тренировочного датасета:", len(train_ds))
print("Размер валидационного датасета:", len(valid_ds))
print("Размер тестового датасета:", len(test_ds))

In [None]:
# Обращаемся к двумерному тензору
img = train_ds[9][0].numpy()[0]
plt.imshow(img, cmap='gray')

# Даталоадер

In [None]:
batch_size = 16

train_loader = DataLoader(
    train_ds, batch_size=batch_size, shuffle=True,
    num_workers=1, drop_last=True
)

valid_loader =  DataLoader(
    valid_ds, batch_size=batch_size, shuffle=True,
    num_workers=1, drop_last=False
)

test_loader =  DataLoader(
    test_ds, batch_size=batch_size, shuffle=True,
    num_workers=1, drop_last=False
)

# Архитектура нейронной сети

### Полносвязная нейронная сеть

In [None]:
# Fully Connected Neural Network
class FCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.flat = nn.Flatten()
        self.linear1 = nn.Linear(28 * 28, 16)
        self.linear2 = nn.Linear(16, 16)
        self.linear3 = nn.Linear(16, 10)
        self.act = nn.ReLU()

    def forward(self, x):
        out = self.flat(x)
        out = self.linear1(out)
        out = self.act(out)
        out = self.linear2(out)
        out = self.act(out)
        out = self.linear3(out)

        return out

### Сверточная нейронная сеть

In [None]:
# Convolutional Neural Network
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.act = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2,2)
        self.conv0 = nn.Conv2d(1, 128, 2, stride=1, padding=0)
        self.conv1 = nn.Conv2d(128, 128, 2, stride=1, padding=0)
        self.conv2 = nn.Conv2d(128, 128, 2, stride=1, padding=0)
        self.conv3 = nn.Conv2d(128, 28*28, 2, stride=1, padding=0)

        self.adaptivepool = nn.AdaptiveAvgPool2d((1,1))
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(28*28, 20)
        self.linear2 = nn.Linear(20, 10)

    def forward(self, x):


        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv3(out)
        out = self.act(out)

        out = self.adaptivepool(out)
        out = self.flatten(out)
        out = self.linear1(out)
        out = self.act(out)
        out = self.linear2(out)

        return out

In [None]:
model = FCNN()

# Оптимайзер, функция потерь, метрики

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

loss_fn = nn.CrossEntropyLoss()

def accuracy(pred, label):
    pred_softmax = F.softmax(pred, dim=1)
    pred_argmax = pred_softmax.argmax(1).detach().numpy()
    label_argmax = label.argmax(1).numpy()
    answer = (pred_argmax == label_argmax)
    return answer.mean()

# Обучение

In [None]:
# На чём обучаем
device = torch.device("mps") # mps для Mac с процессорами Apple Slicon (M1, M2)! Альтернативы - 'cuda', 'cpu'
model = model.to(device)
loss_fn = loss_fn.to(device)

In [None]:
epochs = 10

accuracy_train = []
accuracy_valid = []
losses_train = []
losses_valid = []

for epoch in range(epochs):
    print(f"{epoch+1}/{epochs}")

    loss_train = 0
    acc_train = 0
    loss_valid = 0
    acc_valid = 0

    for img, label in (pbar := tqdm(train_loader)):
        optimizer.zero_grad()
        img = img.to(device)
        label = label.to(device)

        label = F.one_hot(label, 10).float()
        pred = model(img)

        loss = loss_fn(pred, label)

        loss.backward()
        loss_item = loss.item()
        loss_train += loss_item

        optimizer.step()

        acc_current = accuracy(pred.cpu(), label.cpu())
        acc_train += acc_current

        #pbar.set_description(f'loss: {loss_item:.5f}\taccuracy: {acc_current:.3f}')

    accuracy_train.append(acc_train/len(train_loader))
    losses_train.append(loss_train/len(train_loader))
    print(f"train acc: {accuracy_train[-1]}")
    print(f"train loss: {losses_train[-1]}")

    for img,label in (pbar := tqdm(valid_loader)):
        with torch.no_grad():
            img = img.to(device)
            label = label.to(device)
            pred = model(img)
            label = F.one_hot(label, 10).float()
            loss = loss_fn(pred, label)
            loss_item = loss.item()
            loss_valid += loss_item
            acc_current = accuracy(pred.cpu(), label.cpu())
            acc_valid += acc_current

    accuracy_valid.append(acc_valid/len(valid_loader))
    losses_valid.append(loss_valid/len(valid_loader))

    print(f"valid acc: {accuracy_valid[-1]}")
    print(f"valid loss: {losses_valid[-1]}")

### График обучения

In [None]:
x = range(1, epochs + 1)

# Строим график для точности
plt.figure(figsize=(8, 4))
plt.plot(x, accuracy_train, 'b', label='Точность на обучении')
plt.plot(x, accuracy_valid, 'r', label='Точность на валидации')
plt.title('График точности обучения по эпохам')
plt.xlabel('Эпохи')
plt.ylabel('Точность')
plt.legend()
plt.grid(True)

# Строим график для потерь
plt.figure(figsize=(8, 4))
plt.plot(x, losses_train, 'b', label='Потери на обучении')
plt.plot(x, losses_valid, 'r', label='Потери на валидации')
plt.title('График потерь обучения по эпохам')
plt.xlabel('Эпохи')
plt.ylabel('Потери')
plt.legend()
plt.grid(True)

# Отображаем оба графика
plt.show()

### Сохранение модели

In [None]:
save_path = './models/model1.pth'
torch.save(model.state_dict(), save_path)

# Использование модели

### Загрузка модели из файла

In [None]:
model = FCNN()
# модели находятся в папке models, самая точная 'cnn1.pth',
# для использования нужно изменить тип модели на model = CNN()
load_path = './models/model1.pth'
model.load_state_dict(torch.load(load_path))
model.eval();

In [None]:
model = model.to('cpu')  # Опционально

### Тест

In [None]:
loss_test = 0
acc_test = 0

for img,label in (pbar := tqdm(test_loader)):
    with torch.no_grad():
        pred = model(img)
        label = F.one_hot(label, 10).float()
        loss = loss_fn(pred, label)
        loss_item = loss.item()
        loss_test += loss_item
        acc_current = accuracy(pred.cpu(), label.cpu())
        acc_test += acc_current

print(f"Средние потери: {loss_test/len(test_loader)}")
print(f"Средняя точность: {acc_test/len(test_loader)}")

### Пеинт

In [None]:
path = './data/test/3_Paint.png'
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
plt.imshow(img, cmap='gray')

# Преобразование
img = np.expand_dims(img, axis=0)
img = np.expand_dims(img, axis=0)
img = img.astype(np.float32)/255.0

img = torch.from_numpy(img)

In [None]:
predict = model(img)
print(F.softmax(predict, dim=1).detach().numpy().argmax())
F.softmax(predict, dim=1).detach().numpy()

### Реальное фото

In [None]:
# Чтение
img = cv2.imread('./data/test/4_Real.png', cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)

In [None]:
# Тона серого
gray_img = cv2.cvtColor(img, cv2.COLOR_RGBA2GRAY)
plt.imshow(gray_img,cmap='gray')

In [None]:
# Увеличение контраста с помощью выравнивания гистограммы
# contrasted_img = cv2.equalizeHist(gray_img)
# plt.imshow(contrasted_img, cmap='gray')

In [None]:
# Инверсия цветов
inverted_img = cv2.bitwise_not(gray_img)
plt.imshow(inverted_img, cmap='gray')

In [None]:
# Удаление шума с помощью морфологического закрытия
# kernel = np.ones((4, 4), np.uint8)
# cleared_img = cv2.morphologyEx(inverted_img, cv2.MORPH_CLOSE, kernel)
# plt.imshow(cleared_img, cmap='gray')
cleared_img = inverted_img

In [None]:
# Примените метод Оцу для бинаризации
_, binary_img = cv2.threshold(cleared_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
plt.imshow(binary_img, cmap='gray')

In [None]:
desired_size = (28, 28)
resized_img = cv2.resize(binary_img, desired_size, interpolation=cv2.INTER_CUBIC)
plt.imshow(resized_img, cmap='gray')

In [None]:
img = resized_img
# Преобразование
img = np.expand_dims(img, axis=0)
img = np.expand_dims(img, axis=0)
img = img.astype(np.float32)/255.0

img = torch.from_numpy(img)

predict = model(img)
print(F.softmax(predict, dim=1).detach().numpy().argmax())
F.softmax(predict, dim=1).detach().numpy()

# Kaggle

In [None]:
# формирование таблицы для сайта kaggle с использованием нашей модели
import pandas as pd

data = pd.read_csv('./data/test.csv')
data_tensor = torch.tensor(data.values, dtype=torch.float32)

class CustomDataset(Dataset):
    def __init__(self, data_tensor):
        self.data = data_tensor

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        return sample

batch_size = 64
test_dataset = CustomDataset(data_tensor)
test_ds_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

predicted_labels = []
for batch in tqdm(test_ds_loader):
    batch = batch.view(batch.size(0), 1, 28, 28)
    with torch.no_grad():
        predictions = model(batch)

    predicted_batch_labels = torch.argmax(predictions, dim=1)
    predicted_labels.extend(predicted_batch_labels.tolist())

image_ids = list(range(1, len(predicted_labels) + 1))
image_id_label_pairs = list(zip(image_ids, predicted_labels))


df = pd.DataFrame(image_id_label_pairs, columns=["ImageId", "Label"])
df.to_csv('./data/output.csv', index=False)
df