In [None]:
!git clone https://huggingface.co/datasets/cj-mills/hagrid-sample-30k-384p

In [None]:
!mkdir funny_people_dataset && cd "$_"
!mkdir images labels
!mkdir images/train images/val && cd ..
!mkdir labels/train labels/val

In [None]:
!cd hagrid-sample-30k-384p

In [None]:
!sudo apt install unzip
!unzip hagrid-sample-30k-384p.zip

In [1]:
from ultralytics import YOLO
import pandas as pd
import numpy as np
import os
import torch
import sklearn.model_selection

In [2]:
torch.cuda.is_available()

True

In [3]:
classes = [
    'call',
    'no_gesture',
    'dislike',
    'fist',
    'four',
    'like',
    'mute',
    'ok',
    'one',
    'palm',
    'peace',
    'peace_inverted',
    'rock',
    'stop',
    'stop_inverted',
    'three',
    'three2',
    'two_up',
    'two_up_inverted'
    ]

In [4]:
class_to_id = {class_name: id for id, class_name in enumerate(classes)}
class_to_id

{'call': 0,
 'no_gesture': 1,
 'dislike': 2,
 'fist': 3,
 'four': 4,
 'like': 5,
 'mute': 6,
 'ok': 7,
 'one': 8,
 'palm': 9,
 'peace': 10,
 'peace_inverted': 11,
 'rock': 12,
 'stop': 13,
 'stop_inverted': 14,
 'three': 15,
 'three2': 16,
 'two_up': 17,
 'two_up_inverted': 18}

In [5]:
data = pd.read_parquet('./hagrid-sample-30k-384p/hagrid-sample-30k-384p/annotations_df.parquet').drop(['leading_hand', 'leading_conf', 'user_id'], axis=1)

In [6]:
data

Unnamed: 0,bboxes,labels
00005c9c-3548-4a8f-9d0b-2dd4aff37fc9,"[[0.23925175, 0.28595301, 0.25055143, 0.207776...",[call]
0020a3db-82d8-47aa-8642-2715d4744db5,"[[0.5801012999999999, 0.53265105, 0.14562138, ...",[call]
004ac93f-0f7c-49a4-aadc-737e0ad4273c,"[[0.46294793, 0.26419774, 0.13834939000000002,...",[call]
006cac69-d3f0-47f9-aac9-38702d038ef1,"[[0.38799208, 0.44643898, 0.27068787, 0.182778...",[call]
00973fac-440e-4a56-b60c-2a06d5fb155d,"[[0.40980118, 0.38144198, 0.08338464, 0.062297...","[call, no_gesture]"
...,...,...
f4fd00a3-1f1e-4594-be3f-362229e430e1,"[[0.46801204, 0.37731512, 0.20508805, 0.200085...",[two_up_inverted]
f7308b1f-0433-42c3-b388-08e0ab2a84a7,"[[0.30225301, 0.22838633, 0.12823989, 0.180910...",[two_up_inverted]
f9103c94-a36e-44ea-acf4-f8ff36d4a2bf,"[[0.19668888, 0.42850023, 0.1132281, 0.1760887...","[two_up_inverted, no_gesture]"
faf1d2ff-f273-47b2-a76b-30bc296703cd,"[[0.49250894, 0.54393085, 0.05427064, 0.090299...","[two_up_inverted, no_gesture]"


In [7]:
root_path = './hagrid-sample-30k-384p/hagrid-sample-30k-384p/hagrid_30k/'
distination = './funny_people_dataset/images/'

In [8]:
def pd_from_path(directory: str) -> pd.DataFrame:
    data = []
    for path, _, files in os.walk(directory):
        for file in files:
            if file.endswith(('.jpg', '.png', '.jpeg')):
                data.append((f'{path}/{file}', file))

    return pd.DataFrame(data, columns=['imagepath', 'filename'])

In [None]:
train_images, val_images = sklearn.model_selection.train_test_split(pd_from_path(root_path), test_size=0.05, train_size=0.95, shuffle=True)

In [17]:
def move_img(df: pd.DataFrame, distination: str, split: str):
    for _, row in df.iterrows():
        os.rename(row.imagepath, f'{distination}{split}/{row.filename}')

In [18]:
move_img(train_images, distination, 'train')
move_img(val_images, distination, 'val')

In [24]:
train_images

Unnamed: 0,imagepath,filename
22679,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,dc8d0763-b8d9-4873-bbdd-cdddc5988c53.jpg
21592,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,3c8d72bc-4bf5-4df3-a786-f62b75f55daa.jpg
11686,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,9980bd86-7d0f-4fee-90c0-2ee7f21408c5.jpg
7879,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,75ac7400-c98f-4612-b5b1-05a4358f6aa7.jpg
17483,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,e1706ad3-7c05-42c3-af39-341d1afa1a5a.jpg
...,...,...
13123,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,6acd35ad-e22e-463e-9a9e-32fa6e8ccbc2.jpg
19648,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,1f2bfd0a-55c7-4ca8-8d04-c0f4f20c1f56.jpg
9845,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,90eb9b58-7a96-468a-959b-780f7fd418b6.jpg
10799,./hagrid-sample-30k-384p/hagrid-sample-30k-384...,1836327a-4cf3-40dc-a1ed-fa1eaa3298bb.jpg


In [36]:
labels_path = './funny_people_dataset/labels'

def get_image_description(stplitted_data: pd.DataFrame, data: pd.DataFrame, labels_root_path: str, split: str):
    for _, row in stplitted_data.iterrows():
        image_name = row.filename.replace('.jpg', '')
        with open(f'{labels_root_path}/{split}/{image_name}.txt', 'w+') as bboxes_discription:
            image = data.loc[image_name]
            for bboxes_amount in range(len(image.labels)):
                top_left_x_point, top_left_y_point, width, height = image.bboxes[bboxes_amount]
                x_center, y_center = top_left_x_point + width / 2, top_left_y_point + height / 2
                bbox_str = f'{class_to_id[image.labels[bboxes_amount]]} {x_center} {y_center} {width} {height}'
                print(bbox_str, file=bboxes_discription)
            bboxes_discription.close()

In [37]:
get_image_description(train_images, data, labels_path, 'train')
get_image_description(val_images, data, labels_path, 'val')

In [10]:
model = YOLO('yolov8m.pt')

In [None]:
results = model.train(data='funny_people.yaml', epochs=10, imgsz=800, workers=1, device=0)

In [None]:
!yolo task=detect mode=train model=yolov8s.pt data={dataset.location}/funny_people.yaml epochs=100 imgsz=800