In [1]:
import torch
import torchvision
import tqdm
import sys
import os

from IPython.display import clear_output
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix, precision_score, recall_score, auc, roc_curve
from tqdm.notebook import tqdm_notebook

import numpy as np
import torch.nn as nn
import seaborn as sns
import matplotlib.pyplot as plt

clear_output()

### Разделение на обучающую и тестовую выборку в отношении 1 к 4

In [2]:
dataset = torchvision.datasets.ImageFolder(root="C:/Users/Wxei/fitoClean/Data400/")

split_ratio = 0.8

train_size = int(split_ratio * len(dataset))
test_size = len(dataset) - train_size
train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])

### Добавление аугментаций и фильтра imagenet

In [3]:
train_set.dataset.transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.AutoAugment(torchvision.transforms.AutoAugmentPolicy.CIFAR10),
    torchvision.transforms.RandomHorizontalFlip(p=0.5),
    torchvision.transforms.RandomPosterize(bits=2),
    torchvision.transforms.RandomPerspective(distortion_scale=0.05, p=1.0),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_set.dataset.transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [4]:
BATCH_SIZE = 128

train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True,  num_workers=6)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True,  num_workers=6)

### Загрузка предобученной сети

In [5]:
def load_model(path):
    model = torch.load(path)
    model.eval()
    return model

cnn_model = load_model("C:/Users/Wxei/fitoBack/savedModels/basic-noSoftmax-pretrained-95.5.pt").cpu()

In [6]:
cnn_model.classifier = nn.Flatten()

In [7]:
cnn_model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Flatten(start_dim=1, end_dim=-1)
)

### Преобразование обучающей выборки в двумерный numpy массив

In [8]:
train_X, train_Y = None, None
for imgs, true_labels in train_loader:
    if train_X is None and train_Y is None:
        train_X, train_Y = cnn_model(imgs).detach().numpy(), true_labels.numpy()
    else:
        train_X, train_Y = np.vstack((train_X, cnn_model(imgs).detach().numpy())), np.hstack((train_Y, true_labels))

### Обучение SVM

In [9]:
from sklearn.svm import SVC, NuSVC, LinearSVC

In [10]:
clf = LinearSVC()
clf.fit(train_X, train_Y)

### Тестирование модели

In [11]:
test_X, test_Y = None, None
for imgs, true_labels in test_loader:
    if test_X is None and test_Y is None:
        test_X, test_Y = cnn_model(imgs).detach().numpy(), true_labels.numpy()
    else:
        test_X, test_Y = np.vstack((test_X, cnn_model(imgs).detach().numpy())), np.hstack((test_Y, true_labels))

In [12]:
accuracy_score(clf.predict(test_X), test_Y)

0.9874686716791979

### Перебор параметров с GridSearch

In [27]:
from sklearn.model_selection import GridSearchCV

In [28]:
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}

grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=2)
grid.fit(train_X, train_Y)
print(grid.best_estimator_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  10.4s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  10.4s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  10.4s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  10.5s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=  10.4s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   0.7s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   0.7s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   0.7s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   0.7s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   0.7s
[CV] END .....................C=0.1, gamma=1, kernel=sigmoid; total time=   8.0s
[CV] END .....................C=0.1, gamma=1, k

### Сохранение Модели

In [46]:
import pickle
file = open("savedModels/SVMclf.pckl", 'wb')
pickle.dump(clf, file)
file = open("savedModels/SVMCNN.pckl", 'wb')
pickle.dump(cnn_model, file)