Объявление нужных нам модулей, классов, методов.

In [1]:
import os
from typing import Tuple, Any

from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.optim as optim
from torch import nn
import pandas as pd
import numpy as np
import cv2
import torch

Разделение загруженного набора на обучающую, тестовую и валидационную выборки.

In [None]:
PATH = "/content/drive/MyDrive/Colab Notebooks/"
main_df = pd.read_csv(PATH+"ML.csv", header=None, sep=";")

learn_frames = [main_df[:800], main_df[1031:1831]]
learn_df = pd.concat(learn_frames)
learn_df

test_frames = [main_df[800:900], main_df[1831:1931]]
test_df = pd.concat(test_frames)
test_df

valid_frames = [main_df[900:1000], main_df[1931:2031]]
valid_df = pd.concat(valid_frames)
valid_df

learn_df.to_csv(PATH+"ML_learn.csv", sep=";", index=None, header=None)
test_df.to_csv(PATH+"ML_test.csv", sep=";", index=None, header=None)
valid_df.to_csv(PATH+"ML_valid.csv", sep=";", index=None, header=None)

Свёрточная нейронная сеть для решения задач классификации.

In [2]:
class CNN(nn.Module):
    def __init__(self) -> None:
        super(CNN, self).__init__()
        
        self.conv_1 = nn.Conv2d(3, 16, kernel_size=3, padding=0, stride=2)
        self.conv_2 = nn.Conv2d(16, 32, kernel_size=3, padding=0, stride=2)
        self.conv_3 = nn.Conv2d(32, 64, kernel_size=3, padding=0, stride=2)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)
        self.max_pool = nn.MaxPool2d(2)
        
        self.fc_1 = nn.Linear(576, 10)
        self.fc_2 = nn.Linear(10, 1)
        
    def forward(self, x: torch.tensor) -> torch.tensor:
        output = self.relu(self.conv_1(x))
        output = self.max_pool(output)
        output = self.relu(self.conv_2(output))
        output = self.max_pool(output)
        output = self.relu(self.conv_3(output))
        output = self.max_pool(output)
        
        output = torch.nn.Flatten()(output)
        output = self.relu(self.fc_1(output))
        output = torch.nn.Sigmoid()(self.fc_2(output))
        return output

Описание пайплайна предобработки данных.

In [3]:
custom_transforms =  transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])

Класс для работы с набором данных.

In [4]:
class CustomImageDataset(Dataset):
  def __init__(self, path_to_annotation_file: str, transform: Any=None, target_transform: Any=None) -> None:
    self.path_to_annotation_file = path_to_annotation_file
    self.dataset_info = pd.read_csv(path_to_annotation_file, sep=";", header=None)
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self) -> int:
    return len(self.dataset_info)

  def __getitem__(self, index: int) -> Tuple[torch.tensor, int]:
    path_to_image = self.dataset_info.iloc[index, 0]
    image = cv2.cvtColor(cv2.imread(path_to_image), cv2.COLOR_BGR2RGB)
    label = self.dataset_info.iloc[index, 1]

    if self.transform:
      image = self.transform(image)
    if self.target_transform:
      label = self.target_Transform(label)
      
    return image, label

Train Loop(цикл обучения)

In [5]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
model = CNN().to(device)

In [7]:
learn_annotation = r"/content/drive/MyDrive/Colab Notebooks/ML_learn.csv"
test_annotation = r"/content/drive/MyDrive/Colab Notebooks/ML_test.csv"
valid_annotation = r"/content/drive/MyDrive/Colab Notebooks/ML_valid.csv"

learn_dataset = CustomImageDataset(learn_annotation, custom_transforms)
test_dataset = CustomImageDataset(test_annotation, custom_transforms)
valid_dataset = CustomImageDataset(valid_annotation, custom_transforms)

learn_dataloader = DataLoader(learn_dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)
valid_dataloader = DataLoader(valid_dataset, batch_size=4, shuffle=True)