### Import libraries

In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### Class for data preparation

In [2]:
REBUILD_DATA = True # Set to true to build dataset
IMG_SIZE = 50

class DataBuider():
    male_folder = "internship_data/male"  # path to male images folder
    female_folder = "internship_data/female" # path to female images folder
    labels = {male_folder: 0, female_folder: 1}
    training_data = []
    test_data = []

    male_count = 0
    female_count = 0

    def make_training_data(self):
        for label in self.labels:
            print(label)
            for i, f in enumerate(tqdm(os.listdir(label))):
                if "jpg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                        if i%10 == 0:
                            self.test_data.append([np.array(img), np.eye(2)[self.labels[label]]])
                            continue
                        img_flipped = cv2.flip(img, 1)
                        self.training_data.append([np.array(img), np.eye(2)[self.labels[label]]])
                        self.training_data.append([np.array(img_flipped), np.eye(2)[self.labels[label]]])

                        if label == self.male_folder:
                            self.male_count += 1
                        elif label == self.female_folder:
                            self.female_count += 1

                    except Exception as e:
                        print(e)

        np.random.shuffle(self.training_data)
        np.random.shuffle(self.test_data)
        np.save("training_data.npy", self.training_data)
        np.save("test_data.npy", self.test_data)

### Data preparation

In [3]:
if REBUILD_DATA:
    data_builder = DataBuider()
    data_builder.make_training_data()
    print('Male images count:',data_builder.male_count)
    print('Female images count:',data_builder.female_count)
    
training_data = np.load("training_data.npy", allow_pickle=True)
test_data = np.load("test_data.npy", allow_pickle=True)

  1%|          | 378/50002 [00:00<00:13, 3768.05it/s]

internship_data/male


100%|██████████| 50002/50002 [00:13<00:00, 3710.55it/s]
  0%|          | 221/50002 [00:00<00:24, 2041.42it/s]

internship_data/female


100%|██████████| 50002/50002 [00:16<00:00, 2950.44it/s]


Male images count: 45000
Female images count: 45001


### Neural network architerture

In [4]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(IMG_SIZE, IMG_SIZE).view(-1,1,IMG_SIZE,IMG_SIZE)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 2)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2,2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)

### Checking availability of cuda

In [5]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


### Сreation of neural network

In [6]:
net = Net().to(device)

### Chouse optimiser and loss function

In [7]:
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

### Split data for train and test

In [8]:
train_X = torch.Tensor([i[0] for i in training_data]).view(-1, IMG_SIZE, IMG_SIZE)
train_X = train_X/255.0
train_y = torch.Tensor([i[1] for i in training_data])

test_X = torch.Tensor([i[0] for i in test_data]).view(-1, IMG_SIZE, IMG_SIZE)
test_X = test_X/255.0
test_y = torch.Tensor([i[1] for i in test_data])

print("Len of train data:", len(train_X))
print("Len of test data:", len(test_X))

Len of train data: 180002
Len of test data: 10001


### Train function

In [9]:
def train(net):
    BATCH_SIZE = 100
    EPOCHS = 3
    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
            batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,IMG_SIZE,IMG_SIZE)
            batch_y = train_y[i:i+BATCH_SIZE]

            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            net.zero_grad()
            outputs = net(batch_X)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()
        print(loss)

### Test function

In [10]:
def test(net):
    correct = 0
    total = 0
    with torch.no_grad():
        for i in tqdm(range(len(test_X))):
            real_class = torch.argmax(test_y[i]).to(device)
            net_out = net(test_X[i].view(-1, 1, IMG_SIZE, IMG_SIZE).to(device))[0]

            predicted_class = torch.argmax(net_out)
            if predicted_class == real_class:
                correct += 1
            total += 1
    print("Accuracy:", round(correct/total,3))

In [16]:
train(net)

100%|██████████| 1801/1801 [00:13<00:00, 131.15it/s]
  1%|          | 14/1801 [00:00<00:13, 134.74it/s]

tensor(3.7780e-07, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 1801/1801 [00:13<00:00, 131.37it/s]
  1%|          | 14/1801 [00:00<00:13, 134.05it/s]

tensor(3.7780e-07, device='cuda:0', grad_fn=<MseLossBackward>)


100%|██████████| 1801/1801 [00:13<00:00, 131.07it/s]

tensor(3.7780e-07, device='cuda:0', grad_fn=<MseLossBackward>)





In [17]:
test(net)

100%|██████████| 10001/10001 [00:05<00:00, 1969.13it/s]

Accuracy: 0.958



