In [1]:
import os 
import pickle
import random
import shutil
import zipfile
import numpy as np
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torchvision
from torchvision import transforms, models

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split


# clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

Setup complete. Using torch 1.13.0+cu116 _CudaDeviceProperties(name='Tesla T4', major=7, minor=5, total_memory=15109MB, multi_processor_count=40)


## Загрузка данных

Данные  - фотографии документов, которые выравнены верно 

In [2]:
from google.colab import drive

In [3]:
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [4]:
img_zip = '/content/gdrive/MyDrive/FlipNet/img.zip'
zipfile_img = zipfile.ZipFile(img_zip, 'r')
zipfile_img.extractall()

## Генерация классов

создаем копиb каждой фотографии переварачивая ее на 90 градусов

In [5]:
! mv img flip0

In [6]:
!mkdir flip90 flip180 flip270

In [7]:
def rotate_img(list_imgs, rot):
    for img in list_imgs:
        if len(img) > 10:
            im = Image.open(os.path.join('flip0', img))
            im_rotate = im.rotate(rot, expand=True)
            im_rotate.save(os.path.join(f"flip{rot}", img))
            im.close()
 
images = [x for x in os.listdir('flip0')]
rotate_img(images, 90)
rotate_img(images, 180)
rotate_img(images, 270)

## Разделим данные на Train и Test

In [None]:
# all_images = [[os.path.join('flip0', x), os.path.join('flip90', x), os.path.join('flip180', x), os.path.join('flip270', x)] for x in os.listdir('flip0')]
# all_images = np.concatenate(np.array(all_images))
# train_images, test_images = train_test_split(all_images, test_size=0.3, random_state=42)

In [8]:
# with open('/content/gdrive/My Drive/FlipNet/train_images.pickle', 'wb') as f:
#     pickle.dump(train_images, f)

# with open('/content/gdrive/My Drive/FlipNet/test_images.pickle', 'wb') as f:
#     pickle.dump(test_images, f)

with open('/content/gdrive/My Drive/FlipNet/train_images.pickle', 'rb') as f:
    train_images = pickle.load(f)

with open('/content/gdrive/My Drive/FlipNet/test_images.pickle', 'rb') as f:
    test_images = pickle.load(f)

In [11]:
!mkdir images images/train images/train/flip0 images/train/flip90 images/train/flip180 images/train/flip270
!mkdir images/test images/test/flip0 images/test/flip90 images/test/flip180 images/test/flip270

In [12]:
#Utility function to move images 
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            if f.find('.DS_Store') != -1:
                continue
            prefix = f.split('/', 1)[0]
            shutil.move(f, os.path.join(destination_folder, prefix))
        except:
            print(f)
            assert False

# перетащим файлы по нужным папкам
move_files_to_folder(train_images, 'images/train')
move_files_to_folder(test_images, 'images/test/')

In [13]:
! rm -rf flip0 flip90 flip180 flip270  img

In [14]:
len(os.listdir('images/train/flip0')), len(os.listdir('images/train/flip90')), len(os.listdir('images/train/flip180')), len(os.listdir('images/train/flip270'))

(652, 671, 666, 689)

## Подготовка данных 

разобьем данные на бачи

In [29]:
def augmentation(train_dir = 'train'):
    # mas = [transforms.ColorJitter(brightness=.5, hue=.3), transforms.RandomRotation(degrees=(-5, 5)),
    # transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.3), scale=(0.5, 0.75)), transforms.RandomEqualize(p=1),
    # transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 3)), transforms.AugMix(),
    # transforms.RandomPerspective(distortion_scale=0.3, p=1.0)]

    mas = [transforms.ColorJitter(brightness=.5, hue=.3),
    transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.3), scale=(0.5, 0.75)), transforms.RandomEqualize(p=1)]
    head = [transforms.Resize((224, 224))]
    end = [transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
    com_mas = []
    for i in range(len(mas)):
        com_mas.append(transforms.Compose(head + [mas[i]] + end))
        for j in range(i+1, len(mas)):
            com_mas.append(transforms.Compose(head + [mas[i], mas[j]] + end))
            for l in range(j+1, len(mas)):
                if l > j+2:
                    break
                com_mas.append(transforms.Compose(head + [mas[i], mas[j], mas[l]] + end))
    dataset_mas = []
    for com in com_mas:
        dataset_mas.append(torchvision.datasets.ImageFolder(train_dir, com))
        
    return torch.utils.data.ConcatDataset(dataset_mas)

# ColorJitter и RandomAffine мб два раза? 

In [30]:
val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
train_dir = 'images/train'
val_dir = 'images/test'

train_dataset = augmentation(train_dir)
val_dataset = torchvision.datasets.ImageFolder(val_dir, val_transforms)

batch_size = 100
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=8)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=8)



In [31]:
print(len(train_dataset)) ## кол-во всех фотографий
print(len(train_dataloader)) ## кол-во батчей
print(train_dataset[0][0].shape) ## размерность изображения

18746
188
torch.Size([3, 224, 224])


 #### Посмотрим как теперь выглядят наши фотографии

In [32]:
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
class_names = ['flip0', 'flip180', 'flip90', 'flip270']
def show_input(input_tensor, title=''):
    image = input_tensor.permute(1, 2, 0).numpy()
    image = std * image + mean
    plt.imshow(image.clip(0, 1))
    plt.title(title)
    plt.show()
    plt.pause(0.001)

X_batch, y_batch = next(iter(train_dataloader))

for x_item, y_item in zip(X_batch, y_batch):
    show_input(x_item, title=class_names[y_item])

KeyboardInterrupt: ignored

## Объявление модели

In [33]:
model = models.resnet50(pretrained=True)

"""отключить рассчет градиента для всех слоев сети
сеть хорошо предобучена и мы не хотим, что бы веса менялись во время обучения
"""
for param in model.parameters():
    param.requires_grad = False

"""меняем последний слой, вместо 1000 классов, как в оригинале, у нас будет 4
"""
model.fc = torch.nn.Linear(model.fc.in_features, 4)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
loss = torch.nn.CrossEntropyLoss() # функция активации 
optimizer = torch.optim.Adam(model.parameters(), lr=1.0e-3) # метод оптимизации
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # планировщик



## Тренировка модели

In [35]:
def train_model(model, loss, optimizer, scheduler, num_epochs):
    seed = np.random.randint(1000)
    for epochs in range(1, num_epochs):
        print(f'Epoch {epochs + 1} / {num_epochs}')
        for phase in ['train', 'val']:
            if phase == 'train':
                dataloader = train_dataloader
                scheduler.step()
                model.train()
            else:
                dataloader = val_dataloader
                model.eval()
            true_ans, total, all_loss = 0, 0, 0
            for inputs, label in tqdm(dataloader):
                inputs, label = inputs.to(device), label.to(device)
                optimizer.zero_grad() # обнуляем градиент, что бы он не накапливался
                with torch.set_grad_enabled(phase == 'train'):
                    preds = model(inputs)
                    loss_value = loss(preds, label)
                    preds_class = preds.argmax(dim=1)
                    if phase == 'train':
                        loss_value.backward()
                        optimizer.step()
                true_ans += (preds_class == label).sum().item()
                total += label.size(0)
                all_loss += loss_value.item()

            print(f"{phase} accuracy of the network {100 * true_ans / total}, Loss {all_loss}")
        with open(f'/content/gdrive/My Drive/FlipNet/epochs_{epochs}_model_s{seed}.pickle', 'wb') as f:
            pickle.dump(model, f)
    return model

In [None]:
flipmodel = train_model(model, loss, optimizer, scheduler, num_epochs=100);

Epoch 2 / 100


 13%|█▎        | 24/188 [00:44<01:52,  1.46it/s]

In [23]:
# flipmodel = train_model(flipmodel, loss, optimizer, scheduler, num_epochs=100);



Epoch 14 / 100


100%|██████████| 1420/1420 [48:48<00:00,  2.06s/it]


train accuracy of the network 87.28563980441614, Loss 458.94686557352543


100%|██████████| 12/12 [00:18<00:00,  1.57s/it]


val accuracy of the network 94.95652173913044, Loss 1.7190782576799393
Epoch 15 / 100


100%|██████████| 1420/1420 [51:33<00:00,  2.18s/it]


train accuracy of the network 87.32157199825271, Loss 457.6569448709488


100%|██████████| 12/12 [00:19<00:00,  1.58s/it]


val accuracy of the network 94.8695652173913, Loss 1.7360921949148178
Epoch 16 / 100


100%|██████████| 1420/1420 [49:37<00:00,  2.10s/it]


train accuracy of the network 87.4483915059112, Loss 457.0504651516676


100%|██████████| 12/12 [00:19<00:00,  1.59s/it]


val accuracy of the network 94.78260869565217, Loss 1.7307384610176086
Epoch 17 / 100


100%|██████████| 1420/1420 [48:57<00:00,  2.07s/it]


train accuracy of the network 87.33707216029985, Loss 458.45509503781796


100%|██████████| 12/12 [00:18<00:00,  1.56s/it]


val accuracy of the network 95.1304347826087, Loss 1.7279200591146946
Epoch 18 / 100


100%|██████████| 1420/1420 [48:37<00:00,  2.05s/it]


train accuracy of the network 87.324390209534, Loss 458.1702328622341


100%|██████████| 12/12 [00:18<00:00,  1.55s/it]


val accuracy of the network 94.6086956521739, Loss 1.7282644137740135
Epoch 19 / 100


100%|██████████| 1420/1420 [48:33<00:00,  2.05s/it]


train accuracy of the network 87.4314822382234, Loss 458.06220154464245


100%|██████████| 12/12 [00:18<00:00,  1.58s/it]


val accuracy of the network 95.04347826086956, Loss 1.7193243950605392
Epoch 20 / 100


  6%|▌         | 88/1420 [03:14<48:59,  2.21s/it]


KeyboardInterrupt: ignored

## Сохранение модели

In [24]:
# with open('/content/gdrive/My Drive/FlipNet/model3.pickle', 'wb') as f:
#     pickle.dump(flipmodel, f)

with open('/content/gdrive/My Drive/FlipNet/epochs_18_model_s489.pickle', 'rb') as f:
    flipmodel = pickle.load(f)

In [None]:
# %cp model2.pickle /content/gdrive/My\ Drive/FlipNet/

cp: cannot create regular file '/content/gdrive/My Drive/FlipNet/': No such file or directory


## Тестирование 

In [25]:
def predict(model):
    model.eval() ## фиксируем модельку 
    test_predictions, true_predictions = [], []
    for inputs, labels in tqdm(val_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        with torch.set_grad_enabled(False):
            preds = model(inputs)
        test_predictions.append(
            np.argmax(torch.nn.functional.softmax(preds, dim=1).data.cpu().numpy(), axis=1))
        true_predictions.append(labels.data.cpu().numpy())
    return np.concatenate(true_predictions), np.concatenate(test_predictions)

In [26]:
true_predict, my_predict = predict(flipmodel)

100%|██████████| 12/12 [00:18<00:00,  1.57s/it]


In [27]:
print(classification_report(true_predict, my_predict))

              precision    recall  f1-score   support

           0       0.95      0.94      0.95       305
           1       0.94      0.94      0.94       291
           2       0.96      0.96      0.96       268
           3       0.96      0.97      0.96       286

    accuracy                           0.95      1150
   macro avg       0.95      0.95      0.95      1150
weighted avg       0.95      0.95      0.95      1150



In [28]:
true_predict

array([0, 0, 0, ..., 3, 3, 3])

In [None]:
my_predict

array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 3, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 2,
       2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 3, 2, 2, 2, 3, 2, 1, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 3, 2, 2,