In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!git clone https://github.com/hukenovs/hagrid.git
!python hagrid/download.py --save_path dataset --subset --annotations --dataset

In [None]:
!unzip /content/dataset/subsample/subsample.zip -d /content/subsample

In [None]:
!unzip /content/drive/MyDrive/cv_homework/ann_subsample.zip -d /content/ann_subsample

In [None]:
!pip install timm mediapipe

In [134]:
import json
import random
from pathlib import Path

import albumentations as A
import cv2
import matplotlib.pyplot as plt
import mediapipe as mp
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from albumentations.pytorch import ToTensorV2
from tqdm.notebook import tqdm
from PIL import Image
from torch.utils.data import Dataset, DataLoader

import timm

### Метрика OKS

In [8]:
def oks(y_true, y_pred, visibility):
    # You might want to set these global constant
    # outside the function scope
    KAPPA = np.array([1] * len(y_true))
    # The object scale
    # You might need a dynamic value for the object scale
    SCALE = 1.0

    # Compute the L2/Euclidean Distance
    distances = np.linalg.norm(y_pred - y_true, axis=-1)
    # Compute the exponential part of the equation
    exp_vector = np.exp(-(distances**2) / (2 * (SCALE**2) * (KAPPA**2)))
    # The numerator expression
    numerator = np.dot(exp_vector, visibility.astype(bool).astype(int))
    # The denominator expression
    denominator = np.sum(visibility.astype(bool).astype(int))
    return numerator / denominator

In [13]:
subsample_folder = Path("/content/subsample")
ann_subsample_folder = Path("/content/ann_subsample/ann_subsample")

### Mediapipe

In [49]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

In [91]:
metrics = []
for file_path in ann_subsample_folder.iterdir():
    real_pt = []
    pred_pt = []
    class_meta = json.load(open(file_path))
    for file_uuid, data in class_meta.items():
        if not data['landmarks']:
            continue

        image = cv2.imread(str(subsample_folder.joinpath(file_path.stem, file_uuid + ".jpg")))
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands.process(image_rgb)
        if results.multi_hand_landmarks is None:
            continue

        pt = results.multi_hand_landmarks[0].landmark[8]
        for lm in data['landmarks']:
            if not lm:
                continue

            real_pt.append(lm[8])
            pred_pt.append([pt.x, pt.y])

    oks_metric = oks(np.array(real_pt), np.array(pred_pt), np.ones(len(real_pt)))
    metrics.append(oks_metric)
    print(f'Метрика для класса {file_path.stem}: {oks_metric}')
print()
print(f'Общая метрика: {np.mean(metrics)}')

Метрика для класса palm: 0.9544494551275383
Метрика для класса four: 0.9393977387105934
Метрика для класса three: 0.9680356020947815
Метрика для класса one: 0.9200683221371517
Метрика для класса call: 0.9825109065672695
Метрика для класса two_up_inverted: 0.9730838640744391
Метрика для класса like: 0.9680033422622427
Метрика для класса mute: 0.963730826640324
Метрика для класса peace_inverted: 0.9701256161656603
Метрика для класса peace: 0.949461955635857
Метрика для класса three2: 0.9203575709278646
Метрика для класса fist: 0.9610514048774218
Метрика для класса stop_inverted: 0.9582010551722127
Метрика для класса stop: 0.9554767214391036
Метрика для класса rock: 0.9353961633496034
Метрика для класса dislike: 0.9590839131067055
Метрика для класса ok: 0.957944440006645
Метрика для класса two_up: 0.9503957074048116

Общая метрика: 0.9548208114277903


### Mobilenet

In [75]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [None]:
model = timm.create_model('mobilenetv2_050', pretrained=True, num_classes=0)
model.global_pool = nn.Flatten()
model.classifier = nn.Sequential(
    nn.Linear(62720, 3),
    nn.Sigmoid()
)
model.load_state_dict(torch.load("/content/model_2.pth"))
model.eval()
model.to(device)

In [122]:
from torch.utils.data.sampler import SubsetRandomSampler
class KeypointsDataset(Dataset):

    def __init__(self, meta):
        self.meta = meta
        self.preprocessing = A.Compose([
            A.Resize(height=224, width=224),
            A.Normalize(),
            ToTensorV2()
        ])

    def __len__(self):
        return len(self.meta)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Открыли изображение
        for image_path, image_data in self.meta[idx].items():
            ...
        image = Image.open(subsample_folder.joinpath(image_path)).convert('RGB')
        height, width = image.height, image.width
        image = np.asarray(image)
        keypoints = []

        # Ищем ббокс, вырезаем изображение по ббоксу
        for bbox_data, landmark_data in zip(image_data['bboxes'], image_data['landmarks']):
            if bbox_data and landmark_data:

                bbox_data = np.array(bbox_data)
                bbox_data[0:3:2] = bbox_data[0:3:2] * width
                bbox_data[1:4:2] = bbox_data[1:4:2] * height
                bbox_data = np.round(bbox_data, 0).astype(int)

                x_min = bbox_data[0]
                x_max = bbox_data[0] + bbox_data[2]
                y_min = bbox_data[1]
                y_max = bbox_data[1] + bbox_data[3]

                image = image[y_min:y_max,x_min:x_max]

                landmark_data = np.array(landmark_data)
                landmark_data[:, 0] = landmark_data[:, 0] * width
                landmark_data[:, 1] = landmark_data[:, 1] * height
                landmark_data = landmark_data.astype(int)
                x, y = landmark_data[8]
                keypoints.append((x - x_min) / (x_max - x_min))
                keypoints.append((y - y_min) / (y_max - y_min))
                keypoints.append(float(1))
                break

        image = self.preprocessing(image=image)['image']
        return image, torch.Tensor(keypoints)

In [132]:
dataset = KeypointsDataset(meta=class_meta)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=0)

In [133]:
mobilenet_metrics = []
for file_path in ann_subsample_folder.iterdir():
    class_meta = []
    ann = json.load(open(file_path))
    for file_uuid, data in ann.items():
        if not data['landmarks'][0]:
            continue
        full_ann_data = {str(Path(file_path.stem, file_uuid).with_suffix('.jpg')): data}
        class_meta.append(full_ann_data)
    random.shuffle(class_meta)
    dataset = KeypointsDataset(meta=class_meta)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=0)

    val_predictions = []
    val_targets = []
    for batch, targets in tqdm(dataloader):

        with torch.no_grad():

            batch = batch.to(device)
            targets = targets.to(device)
            predictions = model(batch)

            predictions = predictions.cpu().numpy().reshape(-1, 3)
            val_predictions.extend(predictions)
            targets = targets.cpu().numpy().reshape(-1, 3)
            val_targets.extend(targets)

    val_targets = np.array(val_targets)
    val_predictions = np.array(val_predictions)

    oks_metric = oks(val_targets[:, :2], val_predictions[:, :2], val_targets[:, 2])
    mobilenet_metrics.append(oks_metric)
    print(f'Метрика для класса {file_path.stem}: {oks_metric}')
print()
print(f'Общая метрика: {np.mean(mobilenet_metrics)}')

  0%|          | 0/4 [00:00<?, ?it/s]

Метрика для класса palm: 0.9959151369005514


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса four: 0.992569898589009


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса three: 0.9941260079100244


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса one: 0.9937674418904013


  0%|          | 0/4 [00:00<?, ?it/s]

Метрика для класса call: 0.9944279637366554


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса two_up_inverted: 0.985613416700645


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса like: 0.9939879440049694


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса mute: 0.9947636830002554


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса peace_inverted: 0.9956222567084502


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса peace: 0.9947047250657464


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса three2: 0.9940172982849022


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса fist: 0.995924222546544


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса stop_inverted: 0.9974172389183658


  0%|          | 0/4 [00:00<?, ?it/s]

Метрика для класса stop: 0.9964042132360083


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса rock: 0.9947141896362198


  0%|          | 0/4 [00:00<?, ?it/s]

Метрика для класса dislike: 0.9942070007352865


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса ok: 0.9882353832516373


  0%|          | 0/3 [00:00<?, ?it/s]

Метрика для класса two_up: 0.9968445430594453

Общая метрика: 0.9940701424541731
