# Обучить сеть, попробовать попрунить, сконвертировать в onnx и запустить в юнити.

In [1]:
import os
import time
import copy
import numpy as np
import random
import pandas
import torch

import cv2
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch import device
from torch import load
from torch.cuda import is_available
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
from PIL import Image
from torchvision import transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pandas
import timm
from tqdm import tqdm
from datetime import datetime

# Ф-ии Обучения

## Гиперпараметры

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
N_class = 11  # Количество классов в задаче
num_epochs = 10  # Количество эпох обучения
lr = 0.0008  # Коэффициент скорости обучения (Learning rate)
momentum = 0.8
momentumB = 0.9
step_size = 10
gamma = 0.9
label_smoothing = 0.0
batch_size = 300  # Размер бача
snp_path_0 = "/workspace/prj/snp"  # Путь к папке, в которую сохранять готовые модели
model_path = None  # "/workspace/prj/snp/Caltech256/mobilenetV2/30_05_2023/15_58_51/mobilenetV2_Caltech256_292_ACC top1-0.5907_checkpoint.tar"
pretrained = True  # True - загрузить предобученную модель
num_workers = 5  # Cколько подпроцессов использовать для загрузки данных
pin_memory = True  # Ускорить ли загрузки данных с CPU на GPU False если очень маленький набор данных
obj_transforms = (
    None  # Аугментации val_transforms, train_transforms на основе Albumentation
)
SIZE = 224  # Размер входа (SIZE*SIZE)
model_name = "resnet18"
Dataset_name = "Imaginette"
rasp_file_train = None
shuffle_train = False
drop_last_train = True
rasp_file_val = None
shuffle_val = False
drop_last_val = True

### Фиксация рандома

In [4]:
def set_seed(seed=10):
    """Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY."""
    np.random.seed(seed)
    random_state = np.random.RandomState(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    return random_state


random_state = set_seed(99)

## Датасет

### Класс датасета

In [5]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageOps
import pandas


class GetData(Dataset):
    def __init__(self, Root, annotation, Valid=False, Transform=None):

        self.landmarks_frame = pandas.read_csv(annotation).query(
            "isval == " + str(Valid)
        )
        self.transform = Transform
        self.root = Root
        self.valid = Valid

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, index):
        img_name = os.path.join(
            self.root, str(self.landmarks_frame["impath"].iloc[index])
        )
        image = cv2.imread(img_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        image = self.transform(image)

        landmarks = self.landmarks_frame["label"].iloc[index]
        landmarks = np.array(landmarks)

        return image, landmarks

    def getSempler(self, name_class="label"):
        rasp_sampler_list = []
        class_count = self.landmarks_frame[name_class].value_counts()
        rasp_sampler_list = [
            1 / class_count[i] for i in self.landmarks_frame[name_class].values
        ]

        if self.__len__() != len(rasp_sampler_list):
            raise ValueError("sampler does not converge with the map")
        sempler = WeightedRandomSampler(rasp_sampler_list, len(rasp_sampler_list))
        return sempler

### Аугментации

In [6]:
class OneOf:
    def __init__(self, transforms, p: float = 0.5):
        self.p = p
        self.transforms = transforms
        transforms_ps = [1 for t in self.transforms]
        s = sum(transforms_ps)
        self.transforms_ps = [t / s for t in transforms_ps]

    def __call__(self, img):

        if self.transforms_ps and (random.random() < self.p):
            t = random.choices(population=self.transforms, weights=self.transforms_ps)
            data = t[0](img)
        return data


train_transforms = transforms.Compose(
    [
        OneOf(
            [
                transforms.Compose(
                    [
                        transforms.Resize((int(SIZE), int(SIZE))),
                        transforms.RandomCrop((int(SIZE), int(SIZE))),
                    ]
                ),
                transforms.RandomResizedCrop((SIZE, SIZE)),
                transforms.Resize((SIZE, SIZE)),
            ],
            p=1,
        ),
        OneOf([transforms.AutoAugment(), transforms.RandAugment(3)], p=1),
        transforms.ToTensor(),  # преобразуем изображение в тензор
        transforms.Normalize(
            [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
        ),  # Нормируем получившийся тезор с мат ожиданием и стандартным отклонением для каждого канала тензора
    ]
)

val_transforms = transforms.Compose(
    [
        transforms.Resize((SIZE, SIZE)),
        transforms.ToTensor(),  # преобразуем изображение в тензор
        transforms.Normalize(
            [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
        ),  # Нормируем получившийся тезор с мат ожиданием и стандартным отклонением для каждого канала тензора
    ]
)

### Датасет

In [7]:
Data_dir = "/workspace/db_labs/paradigma/ImageNette"
annotation_imagenette = Data_dir + "/data.csv"
train_dataset = GetData(Data_dir, annotation_imagenette, False, train_transforms)
val_dataset = GetData(Data_dir, annotation_imagenette, True, val_transforms)
print(len(train_dataset))
print(len(val_dataset))
# dataset_test=MyDatasetCaltech256(dirr_path,test_annotation,val_transforms)
sampler_train = train_dataset.getSempler()
sampler_val = None

11376
2845


### Даталоадер

In [8]:
dataloaders = {
    "train": DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=shuffle_train,
        sampler=train_dataset.getSempler(),
        num_workers=num_workers,
        drop_last=drop_last_train,
        pin_memory=pin_memory,
    ),
    "val": DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=shuffle_val,
        sampler=sampler_val,
        num_workers=num_workers,
        drop_last=drop_last_val,
        pin_memory=pin_memory,
    ),
}

## Ф-ия обучения

In [22]:
def train_model(
    model,
    criterion,
    optimizer,
    dataloaders=dataloaders,
    scheduler=exp_lr_scheduler,
    batch_size=50,
    snp_path="./",
    Name_experement="None",
    num_epochs=10,
):
    # Запомнить время начала обучения
    devices = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    since = time.time()
    mass = [[], [], []]
    # Копировать параметры поданной модели
    best_model_the_loss_classification = model.state_dict()
    best_model_the_acc_classification = model.state_dict()
    # print("GPU inference time: {:8f},CPU inference time:{:8f}".format(SeachInferensModel(model),SeachInferensModel(model,"cpu")))
    for epoch in range(num_epochs):
        # У каждой эпохи есть этап обучения и проверки

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()  # Установить модель в режим обучения
            elif phase == "val":
                model.eval()  # Установить модель в режим оценки

            # Обнуление параметров
            running_classification_loss = 0.0
            running_corrects = 0
            dataset_sizes = 0
            # Получать порции картинок и иx классов из датасета

            pbar = tqdm(
                enumerate(dataloaders[phase]),
                total=len(dataloaders[phase]),
                desc="Epocha " + phase + " " + str(epoch + 1) + "/" + str(num_epochs),
            )
            for step, (inputs, labels) in pbar:

                # считать все на видеокарте или ЦП
                inputs = inputs.to(devices)
                labels = labels.to(devices)
                # обнулить градиенты параметра
                optimizer.zero_grad()
                # Пока градиент можно поcчитать, cчитать только на учимся
                with torch.set_grad_enabled(phase == "train"):
                    # Проход картинок через модель
                    classification = model(inputs)
                    total_classification_loss = criterion(
                        classification, labels.to(dtype=torch.long)
                    )

                    # Если учимся
                    if phase == "train":
                        # Вычислить градиенты
                        total_classification_loss.backward()
                        # Обновить веса
                        optimizer.step()

                # Статистика
                # for i in range(batch_size):# Колличество правильных ответов
                #     running_corrects += float(torch.sum(torch.argmax(classification[i]) == labels[i]))
                running_corrects += float(
                    torch.sum(
                        labels.unsqueeze(1)
                        == torch.topk(
                            input=classification, k=1, dim=1, largest=True, sorted=True
                        )[1]
                    )
                )
                running_classification_loss += (
                    total_classification_loss.item() * inputs.size(0)
                )
                dataset_sizes = dataset_sizes + batch_size

                epoch_classification_loss = running_classification_loss / dataset_sizes
                epoch_acc = running_corrects / dataset_sizes
                mem = (
                    torch.cuda.memory_reserved() / 1e9
                    if torch.cuda.is_available()
                    else 0
                )
                current_lr = optimizer.param_groups[0]["lr"]
                pbar.set_postfix(
                    valid_loss=f"{epoch_classification_loss:0.4f}",
                    acc=f"{epoch_acc:0.5f}",
                    lr=f"{current_lr:0.5f}",
                    gpu_memory=f"{mem:0.2f} GB",
                )

            if epoch == 0 and phase == "train":
                with open(
                    os.path.join(snp_path + Name_experement + "_lock.csv"), "w"
                ) as rez_file:
                    rez_file.write("Epoch,train_loss,train_acc,val_loss,val_acc\n")
                best_acc = epoch_acc
                best_Loss_classification = epoch_classification_loss
                best_epoch_acc = 1
                best_epoch_classification = 1

            # Обновить скорость обучения
            if (
                phase == "val"
                and type(scheduler) == torch.optim.lr_scheduler.ReduceLROnPlateau
            ):
                scheduler.step(epoch_acc)
            elif (
                phase == "train"
                and type(scheduler) != torch.optim.lr_scheduler.ReduceLROnPlateau
            ):
                scheduler.step()

            if phase == "train":

                with open(
                    os.path.join(snp_path + Name_experement + "_lock.csv"), "a"
                ) as rez_file:
                    rez_file.write(
                        str(epoch + 1)
                        + ","
                        + str(epoch_classification_loss)
                        + ","
                        + str(epoch_acc)
                    )
            else:
                with open(
                    os.path.join(snp_path + Name_experement + "_lock.csv"), "a"
                ) as rez_file:
                    rez_file.write(
                        ","
                        + str(round(epoch_classification_loss, 4))
                        + ","
                        + str(round(epoch_acc, 4))
                        + "\n"
                    )

                mass[0].append(epoch)
                mass[1].append(epoch_classification_loss)
                mass[2].append(epoch_acc)

            # Копироование весов успешной модели на вэйле
            if phase == "val" and best_acc < epoch_acc:
                if epoch_classification_loss < best_Loss_classification:
                    best_Loss_classification = epoch_classification_loss
                best_acc = epoch_acc
                best_epoch_acc = epoch + 1
                best_model_the_acc_classification = model.state_dict()
                save_name = (
                    snp_path
                    + Name_experement
                    + "_"
                    + str(epoch + 1)
                    + "_ACC top1-"
                    + str(round(best_acc, 4))
                    + "_checkpoint.tar"
                )
                torch.save(
                    {
                        "epoch": epoch + 1,
                        "state_dict": model.state_dict(),
                    },
                    save_name,
                )
                print("Best val Acc classification:{:4f}".format(best_acc))
            elif (
                phase == "val" and epoch_classification_loss < best_Loss_classification
            ):
                best_Loss_classification = epoch_classification_loss
                best_epoch_classification = epoch + 1
                best_model_the_loss_classification = model.state_dict()
                save_name = (
                    snp_path
                    + Name_experement
                    + "_"
                    + str(epoch + 1)
                    + "_CrossEntropyLoss-"
                    + str(round(best_Loss_classification, 4))
                    + "_checkpoint.tar"
                )
                torch.save({
                            'epoch': epoch + 1,
                            'state_dict': model.state_dict(),
                            }, save_name)
                print(
                    "Best Loss classification: {:4f}".format(best_Loss_classification)
                )

    # Конечное время и печать времени работы
    time_elapsed = time.time() - since
    print(
        "Training complete in {:.0f}m {:.0f}s".format(
            time_elapsed // 60, time_elapsed % 60
        )
    )
    print(
        "Best val Loss classification: {:.4f} epoch {:.0f}  ".format(
            best_Loss_classification, best_epoch_classification
        )
    )
    print(
        "Best val Loss accuracy: {:.4f} epoch {:.0f}".format(best_acc, best_epoch_acc)
    )

    overfit_model = model
    modelLoss = copy.deepcopy(model)
    modelAcc = copy.deepcopy(model)
    modelLoss.load_state_dict(best_model_the_loss_classification)
    modelAcc.load_state_dict(best_model_the_acc_classification)
    return modelAcc, modelLoss, overfit_model

## Модель

In [10]:
def search(list, platform):
    for i in range(len(list)):
        if list[i] == platform:
            return True
    return False


def get_model(model_name, N_class=256, path=None, model_old=None, pretrained=False):
    model_list_names = timm.list_models(pretrained=pretrained)
    if search(model_list_names, model_name):
        model = timm.create_model(
            model_name, pretrained=pretrained, num_classes=N_class
        )  # ,drop_rate=0.2)
    else:
        print("Модель не найдена")

    devices = torch.device("cuda:0" if is_available() else "cpu")
    model = model.to(devices)

    if path != None:
        weights = load(path)
        model.load_state_dict(weights["state_dict"], strict=True)
        model = model.eval()
    elif model_old != None:
        weights = model_old.state_dict()
        model.load_state_dict(weights, strict=True)
        model = model.eval()

    return model

In [11]:
model_ft = get_model(model_name, N_class, model_path, pretrained=pretrained)
model_ft = model_ft.to(torch.device("cuda:0" if is_available() else "cpu"))
model_ft.eval()
optimizer_ft = optim.AdamW(
    model_ft.parameters(),
    lr=lr,
)  # betas=(momentum,momentumB),)
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, "max", patience=5)
classification_criterion = nn.CrossEntropyLoss()  # label_smoothing=label_smoothing)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /home/Vio/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


# Обучение

In [12]:
Name_experement = model_name + "_" + Dataset_name
snp_path = snp_path_0 + Dataset_name + "/" + model_name + "/"
os.makedirs(snp_path, exist_ok=True)
now = datetime.now()
dt_string = now.strftime("%d_%m_%Y")
tm_string = now.strftime("/%H_%M_%S/")
snp_path = snp_path + dt_string + tm_string
os.makedirs(snp_path)
print(snp_path)

model1, model2, overfit_model = train_model(
    model_ft,
    classification_criterion,
    optimizer_ft,
    dataloaders,
    exp_lr_scheduler,
    batch_size,
    snp_path,
    Name_experement,
    num_epochs,
)

/workspace/prj/snpImaginette/resnet18/16_06_2023/18_47_00/


Epocha train 1/10: 100%|██████████| 37/37 [03:22<00:00,  5.48s/it, acc=0.83441, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.5383]
Epocha val 1/10: 100%|██████████| 9/9 [01:07<00:00,  7.53s/it, acc=0.89185, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.3446]


Best val Acc classification:0.891852


Epocha train 2/10: 100%|██████████| 37/37 [02:50<00:00,  4.61s/it, acc=0.89982, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.3260]
Epocha val 2/10: 100%|██████████| 9/9 [00:48<00:00,  5.40s/it, acc=0.93037, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2244]


Best val Acc classification:0.930370


Epocha train 3/10: 100%|██████████| 37/37 [02:38<00:00,  4.28s/it, acc=0.90946, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2991]
Epocha val 3/10: 100%|██████████| 9/9 [00:58<00:00,  6.51s/it, acc=0.88889, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.3900]
Epocha train 4/10: 100%|██████████| 37/37 [03:35<00:00,  5.82s/it, acc=0.91559, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2685]
Epocha val 4/10: 100%|██████████| 9/9 [00:56<00:00,  6.26s/it, acc=0.93222, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2274]


Best val Acc classification:0.932222


Epocha train 5/10: 100%|██████████| 37/37 [02:41<00:00,  4.35s/it, acc=0.91577, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2767]
Epocha val 5/10: 100%|██████████| 9/9 [00:35<00:00,  3.90s/it, acc=0.88333, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.4400]
Epocha train 6/10: 100%|██████████| 37/37 [01:56<00:00,  3.14s/it, acc=0.91766, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2558]
Epocha val 6/10: 100%|██████████| 9/9 [01:44<00:00, 11.57s/it, acc=0.90111, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.3890]
Epocha train 7/10: 100%|██████████| 37/37 [03:18<00:00,  5.36s/it, acc=0.92423, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2404]
Epocha val 7/10: 100%|██████████| 9/9 [01:12<00:00,  8.06s/it, acc=0.93037, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2322]
Epocha train 8/10: 100%|██████████| 37/37 [03:07<00:00,  5.08s/it, acc=0.92703, gpu_memory=10.14 GB, lr=0.00080, valid_loss=0.2359]
Epocha val 8/10: 100%|██████████| 9/9 [01:39<00:00, 11.08s/it, acc=0.93000, gpu_memory=1

Training complete in 40m 52s
Best val Loss classification: 0.2244 epoch 1  
Best val Loss accuracy: 0.9322 epoch 4


In [16]:
torch.save({
            'epoch': 3 + 1,
            'state_dict': model2.state_dict(),
            }, "save_name_checkpoint.tar")

# Конвертация в Onnx

In [13]:
dummy_input = torch.randn(1, 3, SIZE, SIZE)
model1.eval()
model1.to("cpu")
torch.onnx.export(model1,
                 dummy_input,
                 "resnet18_origen.onnx",
                 verbose=False,
                 input_names=[ "actual_input" ],
                 output_names=[ "output" ],
                 export_params=True,
                 )

# Прунинг

In [14]:
import nni
from nni.algorithms.compression.v2.pytorch.pruning import TaylorFOWeightPruner
from nni.compression.pytorch import ModelSpeedup
import os

In [23]:
devices = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
traced_optimizer = nni.trace(torch.optim.Adam)(model1.parameters())
config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
model1.to(devices)
pruner = TaylorFOWeightPruner(model1, config_list, train_model, traced_optimizer, classification_criterion, training_batches=batch_size)
masked_model, masks = pruner.compress()
pruner._unwrap_model()
model1.eval()
ms = ModelSpeedup(model1.to(devices), dummy_input.to(devices), masks)
ms.speedup_model()

Epocha train 1/10:   0%|          | 0/37 [00:00<?, ?it/s]

## Дообучение

In [None]:
Name_experement = model_name + "_" + Dataset_name
snp_path = snp_path_0 + Dataset_name + "/" + model_name + "/"
os.makedirs(snp_path, exist_ok=True)
now = datetime.now()
dt_string = now.strftime("%d_%m_%Y")
tm_string = now.strftime("/%H_%M_%S/")
snp_path = snp_path + dt_string + tm_string
os.makedirs(snp_path)
print(snp_path)

model1, model2, overfit_model = train_model(
    model1,
    classification_criterion,
    optimizer_ft,
    dataloaders,
    exp_lr_scheduler,
    batch_size,
    snp_path,
    Name_experement,
    num_epochs,
)


## Конвертация в ONNX


In [None]:
model1.eval()
model1.to("cpu")
torch.onnx.export(model1,
                 dummy_input,
                 "resnet18_prun_80p.onnx",
                 verbose=False,
                 input_names=[ "actual_input" ],
                 output_names=[ "output" ],
                 export_params=True,
                 )

# Сравнение результатов

In [None]:
stats1 = os.stat('resnet18_origen.onnx')
stats2 = os.stat('resnet18_prun_80p.onnx')

print(f"Размер сети до прунинга = {stats1.st_size / 1024 / 1024:0.0f} mb, после = {stats2.st_size / 1024 / 1024:0.0f} mb")