In [1]:
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
import torch.nn as nn

In [2]:
from torchvision.transforms import functional as func

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
import os, sys

In [5]:
import numpy as np

In [33]:
from model_Net import *

In [6]:
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

In [7]:
with open('image_bin.npy', 'rb') as f:
    img_data = np.load(f)
with open('class_bin.npy', 'rb') as c:
    class_data = np.load(c)

In [8]:
img_data.shape

(20000, 240, 640)

In [9]:
class_data.shape

(20000,)

In [10]:
np.amax(img_data)

255

In [11]:
class CustomDatasetFromImages(Dataset):
    def __init__(self, img_data, cl_data):
        self.image_arr = img_data
        self.cl_arr = cl_data - 1
        self.data_len = len(self.image_arr)
    def __getitem__(self, index):
        img = np.asarray(self.image_arr[index])
        img = torch.as_tensor(img)/255
        img = img.unsqueeze(0)
        
        img = func.resize(img, size=[256, 256])
        
        cl = np.asarray(self.cl_arr[index])
        cl = torch.as_tensor(cl)
        
        return (img.float(), cl.type(torch.LongTensor))

    def __len__(self):
        return self.data_len

In [12]:
x_train, x_test, y_train, y_test = train_test_split(img_data, class_data,test_size=0.2)

In [13]:
train_data = CustomDatasetFromImages(x_train, y_train)
test_data = CustomDatasetFromImages(x_test, y_test)

In [14]:
train_data_loader = DataLoader(train_data,batch_size=100,shuffle=True)
test_data_loader = DataLoader(test_data,batch_size=100,shuffle=False)

In [15]:
epochs = 10

In [16]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 7, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(7, 10, 3)
        self.fc1 = nn.Linear(38440, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [17]:
from torchsummary import summary

In [18]:
summary(net, (1, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 7, 252, 252]             182
         MaxPool2d-2          [-1, 7, 126, 126]               0
            Conv2d-3         [-1, 10, 124, 124]             640
         MaxPool2d-4           [-1, 10, 62, 62]               0
            Linear-5                  [-1, 120]       4,612,920
            Linear-6                   [-1, 84]          10,164
            Linear-7                   [-1, 10]             850
Total params: 4,624,756
Trainable params: 4,624,756
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.25
Forward/backward pass size (MB): 5.71
Params size (MB): 17.64
Estimated Total Size (MB): 23.60
----------------------------------------------------------------


In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9)

In [20]:
for epoch in range(epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_data_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 50 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 50:.3f}')
            running_loss = 0.0

print('Finished Training')



[1,    50] loss: 2.306
[1,   100] loss: 2.304
[1,   150] loss: 2.304
[2,    50] loss: 2.302
[2,   100] loss: 2.299
[2,   150] loss: 2.298
[3,    50] loss: 2.292
[3,   100] loss: 2.284
[3,   150] loss: 2.271
[4,    50] loss: 2.234
[4,   100] loss: 2.137
[4,   150] loss: 1.750
[5,    50] loss: 0.935
[5,   100] loss: 0.571
[5,   150] loss: 0.338
[6,    50] loss: 0.191
[6,   100] loss: 0.119
[6,   150] loss: 0.081
[7,    50] loss: 0.064
[7,   100] loss: 0.061
[7,   150] loss: 0.042
[8,    50] loss: 0.035
[8,   100] loss: 0.025
[8,   150] loss: 0.029
[9,    50] loss: 0.018
[9,   100] loss: 0.016
[9,   150] loss: 0.014
[10,    50] loss: 0.010
[10,   100] loss: 0.008
[10,   150] loss: 0.010
Finished Training


In [28]:
preds = []
test_running_right, test_running_total, test_loss = 0.0, 0.0, 0.0
for j, data in enumerate(test_data_loader, 0):
    test_img, test_labels = data
    test_outputs = net(test_img)
    test_preds = torch.max(test_outputs, dim=1)
    # подсчет ошибки на тесте
    test_loss = criterion(test_outputs, test_labels)
        # подсчет метрики на тесте
    test_running_total += len(test_labels)
    pred_test_labels = test_preds.indices
    test_running_right += (test_labels == pred_test_labels).sum()
    preds.append(pred_test_labels)
 
print(f'Test loss: {test_loss:.3f}. Test acc: {test_running_right / test_running_total:.3f}')

Test loss: 0.003. Test acc: 0.998


In [31]:
torch.save(net.state_dict(), 'gesture_classification_model.pt')