In [40]:
import torch
import torchvision

import torch.nn as nn
from torchvision import transforms

<div class="alert alert-info">

<h3> Задание (выполнять в отдельном файле)</h3>
<p></p>
Реализовать сверточную нейронную сеть заданной архитектуры для решения задачи бинарной классификации: снаружи или внутри помещения сделана фотография.
 <p></p>
</div>

In [41]:
base_dir = '/kaggle/input/cian-datafest-2019/train.zip'

import zipfile
with zipfile.ZipFile(base_dir, 'r') as z:
    z.extractall()

In [42]:
if torch.cuda.is_available():
    dev = 'cuda:0'
else:
    dev = 'cpu'
device = torch.device(dev)

In [71]:
transform = transforms.Compose([transforms.Resize((56, 56)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_data = torchvision.datasets.ImageFolder("./train", transform=transform)

trainset = torch.utils.data.Subset(train_data, list(range(0, len(train_data), 6)))

In [72]:
len(trainset)

9221

In [73]:
train_set, val_set = torch.utils.data.random_split(trainset, [6000, 3221])

In [141]:
train_loader = torch.utils.data.DataLoader(dataset=train_set, 
                                           batch_size=100, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=val_set, 
                                          batch_size=100, 
                                          shuffle=True)

In [142]:
class CustomNet(nn.Module):
    def __init__(self):
        super(CustomNet, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)
        self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=1)
        self.conv_layer3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=8, stride=1)
        self.pooling_layer1 = nn.AvgPool2d(kernel_size=2)
        self.pooling_layer2 = nn.AvgPool2d(kernel_size=3)
        
        self.linear_layer1 = nn.Linear(in_features=128, out_features=64)
        self.linear_layer2 = nn.Linear(in_features=64, out_features=32)
        self.linear_layer3 = nn.Linear(in_features=32, out_features=1)
        
        self.relu = nn.ReLU()
        
    def forward(self, inputs):
        output_1 = self.relu(self.conv_layer1(inputs))
        output_2 = self.pooling_layer1(output_1)
        output_3 = self.relu(self.conv_layer2(output_2))
        output_4 = self.pooling_layer2(output_3)
        output_5 = self.relu(self.conv_layer3(output_4))
        output_6 = torch.flatten(output_5, 1)
        
        output_7 = self.relu(self.linear_layer1(output_6))
        output_8 = self.relu(self.linear_layer2(output_7))
        output = self.linear_layer3(output_8)
        
        return output

In [152]:
class NewCustomNet(nn.Module):
    def __init__(self):
        super(NewCustomNet, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv_layer2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv_layer3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.conv_layer4 = nn.Conv2d(in_channels=256, out_channels=4096, kernel_size=7, stride=1)
        self.pooling_layer1 = nn.MaxPool2d(kernel_size=2)
        self.pooling_layer2 = nn.MaxPool2d(kernel_size=2)
        self.pooling_layer3 = nn.MaxPool2d(kernel_size=2)
        
        self.linear_layer1 = nn.Linear(in_features=4096, out_features=1000)
        self.linear_layer2 = nn.Linear(in_features=1000, out_features=1)
        
        self.relu = nn.ReLU()
        
    def forward(self, inputs):
        output_1 = self.relu(self.conv_layer1(inputs))
        output_2 = self.pooling_layer1(output_1)
        output_3 = self.relu(self.conv_layer2(output_2))
        output_4 = self.pooling_layer2(output_3)
        output_5 = self.relu(self.conv_layer3(output_4))
        output_6 = self.pooling_layer3(output_5)
        output_7 = self.relu(self.conv_layer4(output_6))
        output_8 = torch.flatten(output_7, 1)
        
        output_9 = self.relu(self.linear_layer1(output_8))
        output = self.linear_layer2(output_9)
        
        return output

In [163]:
model = CustomNet().to(device)
# model = NewCustomNet().to(device)
print(model)

CustomNet(
  (conv_layer1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv_layer2): Conv2d(32, 64, kernel_size=(4, 4), stride=(1, 1))
  (conv_layer3): Conv2d(64, 128, kernel_size=(8, 8), stride=(1, 1))
  (pooling_layer1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (pooling_layer2): AvgPool2d(kernel_size=3, stride=3, padding=0)
  (linear_layer1): Linear(in_features=128, out_features=64, bias=True)
  (linear_layer2): Linear(in_features=64, out_features=32, bias=True)
  (linear_layer3): Linear(in_features=32, out_features=1, bias=True)
  (relu): ReLU()
)


In [160]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [161]:
total_step = len(train_loader)
model.train()
for epoch in range(5):
    for i, (images, labels) in enumerate(train_loader): 
        images = images.to(device)
        labels = labels.reshape(-1, 1).to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, 5, i+1, total_step, loss.item()))

Epoch [1/5], Step [20/60], Loss: 0.3718
Epoch [1/5], Step [40/60], Loss: 0.2948
Epoch [1/5], Step [60/60], Loss: 0.3631
Epoch [2/5], Step [20/60], Loss: 0.2362
Epoch [2/5], Step [40/60], Loss: 0.2094
Epoch [2/5], Step [60/60], Loss: 0.2878
Epoch [3/5], Step [20/60], Loss: 0.2462
Epoch [3/5], Step [40/60], Loss: 0.3681
Epoch [3/5], Step [60/60], Loss: 0.2548
Epoch [4/5], Step [20/60], Loss: 0.3076
Epoch [4/5], Step [40/60], Loss: 0.2443
Epoch [4/5], Step [60/60], Loss: 0.2464
Epoch [5/5], Step [20/60], Loss: 0.2155
Epoch [5/5], Step [40/60], Loss: 0.2453
Epoch [5/5], Step [60/60], Loss: 0.2163


In [162]:
model.eval()
activation = torch.nn.ReLU()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.reshape(-1, 1).to(device)
        outputs = model(images)
        predicted = (torch.sigmoid(outputs.data) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy: {} %'.format(100 * correct / total))

Accuracy: 91.12076994722136 %
