In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import zipfile
with zipfile.ZipFile('/content/drive/MyDrive/DLS Face recognition/celebA_train_500.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/drive/MyDrive/DLS Face recognition')

In [2]:
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
from tqdm import tqdm, tqdm_notebook

In [34]:
a = [1, 2, 3]
a += [] + [111]
a

[1, 2, 3, 111]

In [100]:
from torch.utils.data import Dataset
from torchvision import transforms
import torch
from PIL import Image

class CelebADataSet(Dataset):
    def __init__(self, ptest=80, pval=5):
        """
        Параметр p определяет какой процент данных будет отдан для тренировочной выборки
        """
        super().__init__()

        self._path_to_file_name = '/content/drive/MyDrive/DLS Face recognition/celebA_train_500/celebA_imgs'
        self._path_to_annot = '/content/drive/MyDrive/DLS Face recognition/celebA_train_500/celebA_anno.txt'

        self._table = pd.read_csv(self._path_to_annot, header=None, sep=' ')
        self._table.columns = ['File_name', 'Class']

        self._len = self._table.shape[0]

        # Получаем индексы для разделения данных на train и test
        idx = np.random.choice(self._len, self._len, False)
        p1 = self._len//100 * ptest
        p2 = self._len//100 * (100 - pval)
        self.train_idx, self.test_idx, self.val_idx = np.split(idx, [p1, p2])

    class _Data(Dataset):
        """
        Класс, который будет подаваться в Dataloader
        """
        def __init__(self, upper, idx, train=True):
            super().__init__()
            self._upper = upper
            self.train = train

            self._table = self._upper._table.iloc[idx]

        def __len__(self):
            return len(self._table)

        def __getitem__(self, index):
            path_to_img = os.path.join(self._upper._path_to_file_name,
                                       self._table['File_name'].iloc[index])
            image = cv2.cvtColor(cv2.imread(path_to_img),
                                 cv2.COLOR_BGR2RGB)

            make_transforms = {True: transforms.Compose([transforms.ToTensor(),
                                                        transforms.Resize(224, antialias=True),
                                                        transforms.RandomAutocontrast(),
                                                        transforms.RandomHorizontalFlip(),
                                                        transforms.ColorJitter(hue=0.1, saturation=0.1),
                                                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
                              False: transforms.Compose([transforms.ToTensor(),
                                                        transforms.Resize(224, antialias=True),
                                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

            image = make_transforms[self.train](image)
            label =  self._table['Class'].iloc[index]

            return image, label

    def get_train(self):
        return self._Data(self, self.train_idx)
    def get_test(self):
        return self._Data(self, self.test_idx, False)
    def get_val(self):
        return self._Data(self, self.val_idx, False)
    def get_train_and_test(self):
        return self.get_train(), self.get_test()

In [6]:
def deatech(data):
    return data.cpu().numpy()

In [43]:
def fit_epoch(model, train_loader, loss_fn, optimaizer, DEVICE):
    loss_per_epoch = 0
    accuracy_per_epoch = 0
    processed = 0

    log_template = "\ntrain loss: {t_loss:0.4f} train acc {t_acc:0.4f}"

    model.train()

    with tqdm(desc="train", total=len(train_loader)) as pbar_outer:
        for images, labels in train_loader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            # forward and backward
            optimaizer.zero_grad()

            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()

            optimaizer.step()

            #statistic
            preds = torch.argmax(outputs, 1)
            loss_per_epoch += loss.item() * images.size(0)
            accuracy_per_epoch += torch.sum(preds == labels.data)
            processed += images.size(0)

            torch.cuda.empty_cache()

            pbar_outer.update(1)
            tqdm.write(log_template.format(t_loss=loss_per_epoch/processed, t_acc=accuracy_per_epoch/processed))

    loss_per_epoch = loss_per_epoch / processed
    accuracy_per_epoch = accuracy_per_epoch / processed
    torch.cuda.empty_cache()
    return loss_per_epoch, accuracy_per_epoch

In [44]:
def test_epoch(model, test_loader, loss_fn, DEVICE):
    loss_per_epoch = 0
    accuracy_per_epoch = 0
    processed = 0

    model.eval()
    for images, labels in test_loader:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        logits = []

        with torch.no_grad():
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            logits.append(outputs)

            #statistic
            processed += images.shape[0]
            loss_per_epoch += loss.item()

            pred = torch.nn.functional.softmax(torch.cat(logits), dim=-1).to(DEVICE)
            accuracy_per_epoch += torch.sum(torch.argmax(pred, 1) == labels.data)
        torch.cuda.empty_cache()

    loss_per_epoch = loss_per_epoch / processed
    accuracy_per_epoch = accuracy_per_epoch / processed
    torch.cuda.empty_cache()
    return loss_per_epoch, accuracy_per_epoch

In [45]:
def train_model(model, train_loader, test_loader, num_epoch, loss_fn, optimaizer, scheduler, DEVICE):
    history = {'train loss':[], 'test loss': [],
               'train accuracy': [], 'test accuracy': []}

    log_template = "\nEpoch {ep:03d} train loss: {t_loss:0.4f} \
    test loss {v_loss:0.4f} train acc {t_acc:0.4f} test acc {v_acc:0.4f}"

    torch.cuda.empty_cache()

    with tqdm(desc="epoch", total=num_epoch) as pbar_outer:
        for epoch in range(num_epoch):
            torch.cuda.empty_cache()

            train_loss, train_accuracy = fit_epoch(model, train_loader, loss_fn, optimaizer, DEVICE)
            test_loss, test_accuracy = test_epoch(model, test_loader, loss_fn, DEVICE)

            history['train loss'].append(train_loss)
            history['test loss'].append(test_loss)
            history['train accuracy'].append(train_accuracy)
            history['test accuracy'].append(test_accuracy)

            scheduler.step()

            pbar_outer.update(1)
            tqdm.write(log_template.format(ep=epoch, t_loss=train_loss,\
                                            v_loss=test_loss, t_acc=train_accuracy, v_acc=test_accuracy))
            torch.cuda.empty_cache()

    torch.cuda.empty_cache()
    return history

In [46]:
from torchvision.models import mobilenet_v3_large, MobileNet_V3_Large_Weights, resnet18

model = mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.IMAGENET1K_V2)

Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-5c1a4163.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-5c1a4163.pth
100%|██████████| 21.1M/21.1M [00:00<00:00, 26.8MB/s]


In [101]:
from torch.utils.data import DataLoader

celebA = CelebADataSet(ptest=85)
train, test = celebA.get_train_and_test()

batch_size = 256
num_workers = 2

train_data = DataLoader(train, batch_size=batch_size, num_workers=num_workers)
test_data = DataLoader(test, batch_size=batch_size, num_workers=num_workers)

In [47]:
N_CLASSES = 500

in_features = model.classifier[-1].in_features
model.classifier[-1] = torch.nn.Linear(in_features, N_CLASSES)

In [61]:
optimizer = torch.optim.AdamW(model.parameters())
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, 0.5)
loss_fn = torch.nn.CrossEntropyLoss()

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(DEVICE)

In [None]:
torch.cuda.empty_cache()

num_epoch = 50
history = train_model(model, train_data, test_data, num_epoch, loss_fn, optimizer, scheduler, DEVICE)

epoch:   0%|          | 0/50 [00:00<?, ?it/s]
train:   0%|          | 0/40 [00:00<?, ?it/s][A
train:   2%|▎         | 1/40 [03:29<2:16:21, 209.79s/it][A

epoch:   0%|          | 0/50 [03:29<?, ?it/s]


train loss: 6.2223 train acc 0.0039



train:   5%|▌         | 2/40 [03:30<55:08, 87.07s/it]   [A

epoch:   0%|          | 0/50 [03:30<?, ?it/s]


train loss: 6.2148 train acc 0.0020


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcacce3fc70>
Traceback (most recent call last):
self._shutdown_workers()  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
if w.is_alive():    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcacce3fc70>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/


train loss: 6.2109 train acc 0.0039



train:  10%|█         | 4/40 [07:07<53:09, 88.59s/it]   [A

epoch:   0%|          | 0/50 [07:07<?, ?it/s]


train loss: 6.2045 train acc 0.0078



train:  12%|█▎        | 5/40 [10:37<1:17:08, 132.23s/it][A

epoch:   0%|          | 0/50 [10:37<?, ?it/s]


train loss: 6.1988 train acc 0.0102



train:  15%|█▌        | 6/40 [10:38<49:40, 87.67s/it]   [A

epoch:   0%|          | 0/50 [10:38<?, ?it/s]


train loss: 6.1922 train acc 0.0098



train:  18%|█▊        | 7/40 [14:16<1:11:45, 130.47s/it][A

epoch:   0%|          | 0/50 [14:16<?, ?it/s]


train loss: 6.1836 train acc 0.0100



train:  20%|██        | 8/40 [14:17<47:38, 89.32s/it]   [A

epoch:   0%|          | 0/50 [14:18<?, ?it/s]


train loss: 6.1788 train acc 0.0098



train:  22%|██▎       | 9/40 [17:48<1:05:44, 127.24s/it][A

epoch:   0%|          | 0/50 [17:48<?, ?it/s]


train loss: 6.1682 train acc 0.0104



train:  25%|██▌       | 10/40 [17:49<44:09, 88.31s/it]  [A

epoch:   0%|          | 0/50 [17:49<?, ?it/s]


train loss: 6.1546 train acc 0.0105



train:  28%|██▊       | 11/40 [21:24<1:01:19, 126.87s/it][A

epoch:   0%|          | 0/50 [21:24<?, ?it/s]


train loss: 6.1384 train acc 0.0124



train:  30%|███       | 12/40 [21:25<41:21, 88.63s/it]   [A

epoch:   0%|          | 0/50 [21:25<?, ?it/s]


train loss: 6.1200 train acc 0.0143



train:  32%|███▎      | 13/40 [24:57<56:44, 126.09s/it][A

epoch:   0%|          | 0/50 [24:57<?, ?it/s]


train loss: 6.0967 train acc 0.0174



train:  35%|███▌      | 14/40 [24:58<38:17, 88.38s/it] [A

epoch:   0%|          | 0/50 [24:58<?, ?it/s]


train loss: 6.0702 train acc 0.0187



train:  38%|███▊      | 15/40 [28:30<52:21, 125.66s/it][A

epoch:   0%|          | 0/50 [28:30<?, ?it/s]


train loss: 6.0403 train acc 0.0198



train:  40%|████      | 16/40 [28:31<35:16, 88.19s/it] [A

epoch:   0%|          | 0/50 [28:32<?, ?it/s]


train loss: 6.0152 train acc 0.0208



train:  42%|████▎     | 17/40 [31:59<47:32, 124.02s/it][A

epoch:   0%|          | 0/50 [31:59<?, ?it/s]


train loss: 5.9835 train acc 0.0230



train:  45%|████▌     | 18/40 [32:00<31:56, 87.10s/it] [A

epoch:   0%|          | 0/50 [32:00<?, ?it/s]


train loss: 5.9462 train acc 0.0247



train:  48%|████▊     | 19/40 [35:33<43:44, 124.96s/it][A

epoch:   0%|          | 0/50 [35:33<?, ?it/s]


train loss: 5.9027 train acc 0.0278



train:  50%|█████     | 20/40 [35:34<29:16, 87.81s/it] [A

epoch:   0%|          | 0/50 [35:34<?, ?it/s]


train loss: 5.8668 train acc 0.0289



train:  52%|█████▎    | 21/40 [39:04<39:24, 124.42s/it][A

epoch:   0%|          | 0/50 [39:04<?, ?it/s]


train loss: 5.8311 train acc 0.0299



train:  55%|█████▌    | 22/40 [39:05<26:13, 87.43s/it] [A

epoch:   0%|          | 0/50 [39:05<?, ?it/s]


train loss: 5.7901 train acc 0.0309



train:  57%|█████▊    | 23/40 [42:38<35:27, 125.14s/it][A

epoch:   0%|          | 0/50 [42:38<?, ?it/s]


train loss: 5.7523 train acc 0.0324



train:  60%|██████    | 24/40 [42:40<23:27, 87.94s/it] [A

epoch:   0%|          | 0/50 [42:40<?, ?it/s]


train loss: 5.7127 train acc 0.0348



train:  62%|██████▎   | 25/40 [46:13<31:23, 125.59s/it][A

epoch:   0%|          | 0/50 [46:13<?, ?it/s]


train loss: 5.6692 train acc 0.0375



train:  65%|██████▌   | 26/40 [46:14<20:35, 88.26s/it] [A

epoch:   0%|          | 0/50 [46:14<?, ?it/s]


train loss: 5.6285 train acc 0.0397



train:  68%|██████▊   | 27/40 [49:45<27:05, 125.05s/it][A

epoch:   0%|          | 0/50 [49:45<?, ?it/s]


train loss: 5.5848 train acc 0.0440



train:  70%|███████   | 28/40 [49:46<17:34, 87.90s/it] [A

epoch:   0%|          | 0/50 [49:46<?, ?it/s]


train loss: 5.5521 train acc 0.0458


In [26]:
model.eval()
val_data = DataLoader(celebA.get_val(), batch_size=batch_size)

val_loss = 0
val_acc = 0

logits = []
labels_ = []

with torch.no_grad():
    for images, labels in val_data:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        outputs = model(images).cpu()
        logits.append(outputs)
        labels_.extend([*labels])

probs = torch.nn.functional.softmax(torch.cat(logits), dim=-1).numpy()

In [27]:
prob_ = torch.argmax(torch.Tensor(probs), 1).numpy()
labels_ = torch.Tensor(labels_).numpy()

val_accuracy = np.sum(labels_ == prob_)/len(labels_)
val_accuracy

0.6923076923076923

In [28]:
path_to_save_model_state = '/content/drive/MyDrive/DLS Face recognition'
torch.save(model.state_dict(), path_to_save_model_state)

RuntimeError: File /content/drive/MyDrive/DLS Face recognition cannot be opened.