In [52]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
import os
from tqdm import tqdm
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [53]:
classes = ['airplane', 'car', 'cat', 'dog', 'flower', 'fruit', 'motorbike', 'person']
data_path = './data/natural_images/'

In [54]:
class CustomDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.targets[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

In [55]:
class ConvNet(nn.Module):
    def __init__(self, num_classes=8):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=5, padding=2)
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(8)
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 16, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(16)
        self.conv4 = nn.Conv2d(16, 100, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(100)
        self.conv5 = nn.Conv2d(100, 100, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(100)
        self.fc1 = nn.Linear(100 * 12 * 12, 128) # размер после последнего пулинга
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        out = self.pool(self.relu(self.bn1(self.conv1(x))))
        out = self.pool(self.relu(self.bn3(self.conv3(self.relu(self.bn2(self.conv2(out)))))))
        out = self.pool(self.relu(self.bn5(self.conv5(self.relu(self.bn4(self.conv4(out)))))))
        out = out.view(out.size(0), -1) # Flatten
        out = self.dropout(self.relu(self.fc1(out)))
        out = self.fc2(out)
        return out

In [56]:
def create_dataset(data_path, input_size=(100, 100)):
    data = []
    labels = []
    for idx, class_ in enumerate(classes):
        class_folder = os.path.join(data_path, class_)
        for image_name in tqdm(os.listdir(class_folder), desc=class_):
            image_path = os.path.join(class_folder, image_name)
            image = cv2.imread(image_path)
            image = cv2.resize(image, input_size)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Перевод изображения из BGR в RGB
            data.append(image)
            labels.append(idx)
    return np.array(data), np.array(labels)

In [57]:
batch_size = 32
input_size = (100, 100)

In [58]:
X, y = create_dataset(data_path, input_size=input_size)

airplane: 100%|██████████| 727/727 [00:00<00:00, 2628.86it/s]
car: 100%|██████████| 968/968 [00:00<00:00, 3528.08it/s]
cat: 100%|██████████| 885/885 [00:00<00:00, 1189.02it/s]
dog: 100%|██████████| 702/702 [00:00<00:00, 1257.33it/s]
flower: 100%|██████████| 843/843 [00:01<00:00, 779.77it/s]
fruit: 100%|██████████| 1000/1000 [00:00<00:00, 4606.13it/s]
motorbike: 100%|██████████| 788/788 [00:00<00:00, 2884.05it/s]
person: 100%|██████████| 986/986 [00:00<00:00, 2195.86it/s]


In [59]:
X = X / 255.0

In [60]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [61]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])

In [62]:
train_dataset = CustomDataset(X_train, y_train, transform=transform)
test_dataset = CustomDataset(X_test, y_test, transform=transform)

In [63]:
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [64]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [65]:
device

device(type='cpu')

In [66]:
model = ConvNet(num_classes=len(classes)).to(device)

In [67]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [81]:
from tqdm import tqdm

num_epochs = 30
n_total_steps = len(train_loader.dataset)
n_correct = 0
n_samples = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    n_correct = 0
    n_samples = 0
    
    for i, (images, labels) in enumerate(tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}')):
        images = images.to(device)
        labels = labels.to(device)

        # Прямое распространение
        outputs = model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()

        # Обратное распространение и оптимизация
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Подсчет верных предсказаний
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    # Вывод метрик после эпохи
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100.0 * n_correct / n_samples
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')

Epoch 1/10: 100%|██████████| 184/184 [00:38<00:00,  4.73it/s]


Epoch [1/10], Loss: 0.0997, Accuracy: 96.18%


Epoch 2/10: 100%|██████████| 184/184 [00:39<00:00,  4.63it/s]


Epoch [2/10], Loss: 0.0941, Accuracy: 96.13%


Epoch 3/10: 100%|██████████| 184/184 [00:39<00:00,  4.62it/s]


Epoch [3/10], Loss: 0.0991, Accuracy: 96.04%


Epoch 4/10: 100%|██████████| 184/184 [00:39<00:00,  4.68it/s]


Epoch [4/10], Loss: 0.0974, Accuracy: 96.32%


Epoch 5/10: 100%|██████████| 184/184 [00:39<00:00,  4.60it/s]


Epoch [5/10], Loss: 0.0982, Accuracy: 96.40%


Epoch 6/10: 100%|██████████| 184/184 [00:39<00:00,  4.62it/s]


Epoch [6/10], Loss: 0.0808, Accuracy: 96.98%


Epoch 7/10: 100%|██████████| 184/184 [00:40<00:00,  4.60it/s]


Epoch [7/10], Loss: 0.0821, Accuracy: 96.96%


Epoch 8/10: 100%|██████████| 184/184 [00:39<00:00,  4.60it/s]


Epoch [8/10], Loss: 0.0831, Accuracy: 96.61%


Epoch 9/10: 100%|██████████| 184/184 [00:39<00:00,  4.67it/s]


Epoch [9/10], Loss: 0.0753, Accuracy: 96.96%


Epoch 10/10: 100%|██████████| 184/184 [00:39<00:00,  4.65it/s]

Epoch [10/10], Loss: 0.0940, Accuracy: 96.49%





In [82]:
model.eval()
y_true = []
y_pred = []

In [83]:
with torch.no_grad():
    for images, labels in tqdm(test_loader, desc='Testing'):
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

Testing: 100%|██████████| 33/33 [00:02<00:00, 13.88it/s]


In [85]:
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy of the network on the test images: {accuracy:.4f}')

Accuracy of the network on the test images: 0.9324


In [80]:
torch.save(model.state_dict(), 'model.pth')