In [2]:
!pip install pandas

Collecting pandas
  Using cached pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Using cached pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.8 MB)
Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Installing collected packages: pytz, pandas
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [pandas]2m1/2[0m [pandas]
[1A[2KSuccessfully installed pandas-2.3.3 pytz-2025.2


In [None]:
from pathlib import Path
import pandas as pd
import os
import numpy as np
import re
import cv2
import pickle
from tqdm import tqdm

import random
import matplotlib.pyplot as plt
import shutil

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedGroupKFold

# Код

In [None]:
def get_frame(path, position):
    cap = cv2.VideoCapture(path)
    if not cap.isOpened():
        raise RuntimeError("Не удалось открыть видео")

    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    n_frame = int(position * total)
    cap.set(cv2.CAP_PROP_POS_FRAMES, n_frame)

    ok, frame = cap.read()
    cap.release()
    if not ok:
        raise RuntimeError("Не удалось прочитать кадр")

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return frame

In [None]:
def get_file_list(path):
    return [file_path for file_path in os.listdir(path) if os.path.isfile(os.path.join(path, file_path))]


def get_file_paths(path):
    return [os.path.join(path, file_path) for file_path in os.listdir(path) if os.path.isfile(os.path.join(path, file_path))]


def sort_naturally(arr):
    def natural_key(s: str):
        return [int(t) if t.isdigit() else t.lower() for t in re.split(r'(\d+)', s)]

    return sorted(arr, key=natural_key)

In [None]:
def plot_random_video_frames_by_subfolder(root_dir, dpi=120, fontsize=10):
    subfolders = os.listdir(root_dir)

    rows = len(subfolders)
    cols = 4  # 1 для текста + 3 для кадров

    # Создаем figure с сеткой осей
    fig, axes = plt.subplots(
        nrows=rows,
        ncols=cols,
        figsize=(cols * 3.2, rows * 2.4),
        dpi=dpi,
        squeeze=False
    )
    plt.subplots_adjust(wspace=0.15, hspace=0.25)

    items = []
    for r, subdir in enumerate(subfolders):
        # Выбираем случайное видео из подпапки
        full_sub_dir = os.path.join(root_dir, subdir)
        videos = get_file_paths(full_sub_dir)
        video_path = random.choice(videos)
        try:
            frames = [get_frame(video_path, 0.0), get_frame(video_path, 0.5), get_frame(video_path, 0.99)]
        except Exception as e:
            # В случае ошибки — запишем и пропустим строку, но место оставим пустым
            # Можно также вместо этого бросить исключение.
            frames = [None, None, None]
            error_msg = str(e)
        else:
            error_msg = None

        # Левая ячейка: текст (имя подпапки и имя видео)
        ax_text = axes[r, 0]
        ax_text.axis('off')
        video_name = os.path.basename(video_path)
        if error_msg:
            ax_text.text(
                0, 0.5,
                f"{subdir}\n{video_name}\nОшибка: {error_msg}",
                fontsize=fontsize, va='center', ha='left', color='crimson'
            )
        else:
            ax_text.text(
                0, 0.5,
                f"{subdir}\n{video_name}",
                fontsize=fontsize, va='center', ha='left'
            )

        # Справа — три кадра
        titles = ["Первый", "Средний", "Последний"]
        for c in range(3):
            ax = axes[r, c + 1]
            ax.set_xticks([])
            ax.set_yticks([])
            if frames[c] is not None:
                ax.imshow(frames[c])
            ax.set_title(titles[c], fontsize=fontsize)

        items.append({
            "subfolder": subdir,
            "video": video_path,
            "error": error_msg is not None,
            "error_msg": error_msg
        })

    return fig, axes, items

In [None]:
def plot_videos_from_dir(root_dir, frames_amount=10, dpi=240, fontsize=10):
    videos = sort_naturally(get_file_paths(root_dir))

    rows = len(videos)
    cols = 1 + frames_amount

    # Создаем figure с сеткой осей
    fig, axes = plt.subplots(
        nrows=rows,
        ncols=cols,
        figsize=(cols * 3.2, rows * 2.4),
        dpi=dpi,
        squeeze=False
    )
    plt.subplots_adjust(wspace=0.15, hspace=0.25)

    items = []
    for r, video_path in enumerate(videos):
        try:
            frames = [get_frame(video_path, pos) for pos in np.arange(0, 0.99, 0.99 / frames_amount)]
        except Exception as e:
            # В случае ошибки — запишем и пропустим строку, но место оставим пустым
            # Можно также вместо этого бросить исключение.
            frames = [None for i in range(frames_amount)]
            error_msg = str(e)
        else:
            error_msg = None

        # Левая ячейка: текст (имя подпапки и имя видео)
        ax_text = axes[r, 0]
        ax_text.axis('off')
        video_name = os.path.basename(video_path)
        if error_msg:
            ax_text.text(
                0, 0.5,
                f"{video_name}\nОшибка: {error_msg}",
                fontsize=fontsize, va='center', ha='left', color='crimson'
            )
        else:
            ax_text.text(
                0, 0.5,
                f"{video_name}",
                fontsize=fontsize, va='center', ha='left'
            )

        for c in range(cols - 1):
            ax = axes[r, c + 1]
            ax.set_xticks([])
            ax.set_yticks([])
            if frames[c] is not None:
                ax.imshow(frames[c])

        items.append({
            "video": video_path,
            "error": error_msg is not None,
            "error_msg": error_msg
        })

    return fig, axes, items

In [None]:
def plot_exercise_examples(exercise_dir):
    fix, axes, items = plot_videos_from_dir(f'./full_workout_dataset/full/{exercise_dir}')
    plt.savefig(f'./plots/{exercise_dir}.png')

In [None]:
def count_seconds(video_path):
    video = cv2.VideoCapture(video_path)
    frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
    fps = video.get(cv2.CAP_PROP_FPS)
    if fps == 0:
        return 0
    return frames / fps

In [None]:
META_EXTENSION = '.meta.csv'


def find_meta_exists(path):
    for name in os.listdir(path):
        if name.endswith(META_EXTENSION) and os.path.isfile(os.path.join(path, name)):
            return name

    return None


def create_meta_row(video_path, class_name, seconds):
    return {'video': video_path, 'class': class_name, 'angle': np.nan, 'person_id': 0, 'duration_sec': seconds}


def is_ignored_file(file_path):
    return file_path.split('.')[-1] in ['txt', 'csv']


def create_meta_template(path):
    class_name = Path(path).name
    data = [create_meta_row(file_path, class_name, count_seconds(os.path.join(path, file_path))) \
            for file_path in sort_naturally(os.listdir(path)) \
            if os.path.isfile(os.path.join(path, file_path)) and not is_ignored_file(file_path)]
    return pd.DataFrame(data)


def get_meta(path):
    class_name = Path(path).name
    if find_meta_exists(path) is None:
        meta = create_meta_template(path)
        meta.to_csv(os.path.join(path, f'{class_name}{META_EXTENSION}'), index=False)
        return meta
    else:
        return pd.read_csv(os.path.join(path, f'{class_name}{META_EXTENSION}'))


def get_duration_sum(df):
    return df.groupby('person_id')['duration_sec'].sum().sort_values(ascending=False)


# migrations
def migrate_v2(current_meta, path):
    meta_v2 = create_meta_template(path)
    meta_v2 = meta_v2.drop(columns=['angle', 'person_id', 'class'])
    result = pd.merge(current_meta, meta_v2, on="video")

    class_name = Path(path).name
    result.to_csv(os.path.join(path, f'{class_name}{META_EXTENSION}'), index=False)
    return result

In [None]:
def load_all():
    path = './full_workout_dataset/full'
    classes = ['pull Up', 'push-up', 'chest fly machine', 'leg raises', 'squat', 'tricep dips']
    meta_list = [get_meta(os.path.join(path, label)) for label in classes]

    return {k: v for (k, v) in zip(classes, meta_list)}

# Раскадровки

In [None]:
fig, axes, items = plot_random_video_frames_by_subfolder('./full_workout_dataset/full')
plt.savefig('./plots/exercies.png')

In [None]:
for exercise_dir in ['squat', 'tricep dips']:
    print(f'Plotting: {exercise_dir}')
    plot_exercise_examples(exercise_dir)

# Meta

## Pull up

In [32]:
pull_up_meta = get_meta('./full_workout_dataset/full/pull Up')

In [33]:
pull_up_meta

Unnamed: 0,video,class,angle,person_id,duration_sec
0,pull up_1.mp4,pull Up,фас,0,17.550867
1,pull up_2.mp4,pull Up,3_4_спереди,0,14.481133
2,pull up_3.mp4,pull Up,3_4_спереди,0,18.866667
3,pull up_4.mp4,pull Up,сзади,0,19.185833
4,pull up_5.mp4,pull Up,3_4_сзади,1,4.838167
5,pull up_6.mp4,pull Up,3_4_сзади,1,10.276933
6,pull up_7.mp4,pull Up,сзади,1,9.009
7,pull up_8.mp4,pull Up,профиль,1,4.671333
8,pull up_9.mp4,pull Up,3_4_сзади,1,12.178833
9,pull up_10.mp4,pull Up,фас,1,8.1081


In [70]:
get_duration_sum(pull_up_meta)

person_id
1    76.342933
0    70.084500
2    15.306958
8    13.013000
6     8.500000
5     8.100000
7     7.007000
3     5.547208
4     5.505500
Name: duration_sec, dtype: float64

## Push up

In [37]:
push_up_meta = get_meta('./full_workout_dataset/full/push-up')

In [38]:
push_up_meta

Unnamed: 0,video,class,angle,person_id,duration_sec
0,push-up_1.mp4,push-up,профиль,0,5.005000
1,push-up_2.mp4,push-up,профиль,0,5.005000
2,push-up_3.mp4,push-up,профиль,0,5.005000
3,push-up_4.mp4,push-up,профиль,0,5.005000
4,push-up_5.mp4,push-up,профиль,0,5.005000
...,...,...,...,...,...
56,push-up_54.mp4,push-up,фас,8,25.959267
57,push-up_55.mp4,push-up,фас,8,15.849167
58,push-up_55.mp4,push-up,3_4_спереди,8,15.849167
59,push-up_55.mp4,push-up,3_4_спереди,9,15.849167


In [71]:
get_duration_sum(push_up_meta)

person_id
8    157.090267
0     70.070000
3     37.137100
5     33.633600
2     32.365667
9     29.562867
4     24.824800
1     21.821800
7      5.005000
6      5.005000
Name: duration_sec, dtype: float64

## Chest fly machine

In [42]:
chest_meta = get_meta('./full_workout_dataset/full/chest fly machine')

In [43]:
chest_meta

Unnamed: 0,video,class,angle,person_id,duration_sec
0,chest fly machine_1.mp4,chest fly machine,фас,0,5.005
1,chest fly machine_2.mp4,chest fly machine,3_4_спереди,1,3.470133
2,chest fly machine_3.mp4,chest fly machine,фас,1,2.669333
3,chest fly machine_4.mp4,chest fly machine,3_4_спереди,2,4.5045
4,chest fly machine_5.mp4,chest fly machine,3_4_спереди,3,3.236567
5,chest fly machine_6.mp4,chest fly machine,фас,4,4.3043
6,chest fly machine_7.mp4,chest fly machine,3_4_спереди,4,3.236567
7,chest fly machine_8.mp4,chest fly machine,3_4_спереди,5,5.1051
8,chest fly machine_9.mp4,chest fly machine,3_4_спереди,5,5.1051
9,chest fly machine_10.mp4,chest fly machine,3_4_спереди,5,3.4034


In [72]:
get_duration_sum(chest_meta)

person_id
12    31.531500
7     24.391033
5     23.857167
6     20.053367
8     15.115100
9     14.000000
11     8.500000
4      7.540867
1      6.139467
10     6.000000
0      5.005000
2      4.504500
3      3.236567
13     3.000000
14     2.600000
Name: duration_sec, dtype: float64

## Leg raises

In [47]:
leg_raises_meta = get_meta('./full_workout_dataset/full/leg raises')

In [48]:
leg_raises_meta

Unnamed: 0,video,class,angle,person_id,duration_sec
0,leg raises_1.MOV,leg raises,фас,0,13.678333
1,leg raises_2.MOV,leg raises,3_4_спереди,0,14.813333
2,leg raises_3.MOV,leg raises,3_4_спереди,0,12.043333
3,leg raises_4.MOV,leg raises,3_4_спереди,1,7.506667
4,leg raises_5.MOV,leg raises,3_4_спереди,1,15.813333
5,leg raises_6.MOV,leg raises,фас,1,11.71
6,leg raises_7.mp4,leg raises,фас,2,11.8118
7,leg raises_7.mp4,leg raises,сзади,2,11.8118
8,leg raises_8.mp4,leg raises,профиль,3,11.344667
9,leg raises_9.mp4,leg raises,3_4_спереди,3,7.040367


In [73]:
get_duration_sum(leg_raises_meta)

person_id
3    60.126733
0    40.535000
1    35.030000
2    23.623600
4    21.760000
5    14.848167
6     5.296958
Name: duration_sec, dtype: float64

## Squat

In [54]:
squat_meta = get_meta('./full_workout_dataset/full/squat')

In [55]:
squat_meta

Unnamed: 0,video,class,angle,person_id,duration_sec
0,squat_1.MOV,squat,фас,0,19.583333
1,squat_2.MOV,squat,3_4_спереди,0,14.346667
2,squat_3.MOV,squat,3_4_спереди,0,21.318333
3,squat_4.MOV,squat,3_4_спереди,1,20.018333
4,squat_5.MOV,squat,3_4_спереди,1,21.886667
5,squat_6.MOV,squat,фас,1,22.153333
6,squat_7.mp4,squat,3_4_сзади,2,2.32
7,squat_8.mp4,squat,сзади,3,4.212542
8,squat_9.mp4,squat,профиль,4,3.003
9,squat_10.mp4,squat,профиль,4,7.298958


In [74]:
get_duration_sum(squat_meta)

person_id
9     69.602867
1     64.058333
0     55.248333
8     41.975267
10    23.556867
5     18.685333
7     13.840000
3     12.178833
4     10.301958
6      9.634625
2      2.320000
Name: duration_sec, dtype: float64

## Tricep dips

In [58]:
tricep_meta = get_meta('./full_workout_dataset/full/tricep dips')

In [59]:
tricep_meta

Unnamed: 0,video,class,angle,person_id,duration_sec
0,tricep dips_1.MOV,tricep dips,профиль,0,19.816667
1,tricep dips_2.MOV,tricep dips,профиль,0,26.656667
2,tricep dips_3.MOV,tricep dips,сзади,0,16.481667
3,tricep dips_4.MOV,tricep dips,сзади,1,6.371667
4,tricep dips_5.mp4,tricep dips,профиль,2,11.6116
5,tricep dips_6.mp4,tricep dips,профиль,2,11.1111
6,tricep dips_7.mp4,tricep dips,фас,3,10.7107
7,tricep dips_8.mp4,tricep dips,фас,3,5.8058
8,tricep dips_9.mp4,tricep dips,профиль,3,13.013
9,tricep dips_10.mp4,tricep dips,профиль,3,14.9149


In [75]:
get_duration_sum(tricep_meta)

person_id
0    62.955000
3    56.756700
5    53.300000
2    22.722700
4    20.880000
6    16.700000
7     9.909900
1     6.371667
Name: duration_sec, dtype: float64

# Код для фолдов

In [None]:
def concat_dataset(meta_dict):
    current_increase = 0
    for key, value in meta_dict.items():
        meta = meta_dict[key].copy(deep=True)
        meta['person_id'] = meta['person_id'].apply(lambda x: x + current_increase)
        meta_dict[key] = meta
        current_increase = meta['person_id'].max() + 1

    all_df = pd.concat(meta_dict.values(), axis=0)

    return all_df


def labels_to_num(labels):
    le = LabelEncoder()
    result = le.fit_transform(labels)
    return le, result


def create_dataset_for_k_fold(all_df):
    X = all_df['class'] + '/' + all_df['video']
    y = all_df['class']
    groups = all_df['person_id']
    return np.array(X), np.array(y), np.array(groups)

In [None]:
class Partition:
    def __init__(self, data, labels, groups):
        self.data = data
        self.labels = labels
        self.groups = groups


class Fold:
    def __init__(self, fold_id, train_part, val_part, test_part):
        self.id = fold_id
        self.train_part = train_part
        self.val_part = val_part
        self.test_part = test_part


    def show_groups(self):
        print(f"= Fold {self.id}: =======")
        print(f"=== Train: ========")
        print(' '.join([str(i) for i in self.train_part.groups.tolist()]))
        print(f"=== Val: ========")
        print(' '.join([str(i) for i in self.val_part.groups.tolist()]))
        print(f"=== Test: =========")
        print(' '.join([str(i) for i in self.test_part.groups.tolist()]))
        print('\n\n')


    def create_folders(self, source, dest):
        dirs = {
            'train': self.train_part,
            'val': self.val_part,
            'test': self.test_part
        }

        for key, value in dirs.items():
            path = Path(os.path.join(dest, str(self.id), key))
            path.mkdir(parents=True, exist_ok=True)

            data = value.data
            for file_name in tqdm(data):
                Path(os.path.join(str(path), file_name)).parent.mkdir(parents=True, exist_ok=True)
                shutil.copy2(os.path.join(source, file_name), os.path.join(str(path), file_name))

In [None]:
def split(X, y, groups, val_splits=5, test_splits=5):
    test_fold = StratifiedGroupKFold(n_splits=test_splits, shuffle=True)
    for i, (train_val_index, test_index) in enumerate(test_fold.split(X, y, groups)):
        train_val_subset = X[train_val_index]
        train_val_y = y[train_val_index]
        train_val_groups = groups[train_val_index]
        
        test_subset = X[test_index]
        test_y = y[test_index]
        test_groups = groups[test_index]

        val_fold = StratifiedGroupKFold(n_splits=val_splits, shuffle=True)
        for j, (train_index, val_index) in enumerate(val_fold.split(train_val_subset, train_val_y, train_val_groups)):
            yield Fold(
                j,
                Partition(train_val_subset[train_index], train_val_y[train_index], train_val_groups[train_index]),
                Partition(train_val_subset[val_index], train_val_y[val_index], train_val_groups[val_index]),
                Partition(test_subset, test_y, test_groups)
            )


def save_split(folds):
    with open('./folds.pkl', 'wb') as file:
        pickle.dump(folds, file)


def load_folds(file_path):
    with open(file_path, 'rb') as file:
        return pickle.load(file)

# Генерация фолдов

In [65]:
meta_dict = load_all()

In [66]:
all_df = concat_dataset(meta_dict)

In [67]:
all_df

Unnamed: 0,video,class,angle,person_id,duration_sec
0,pull up_1.mp4,pull Up,фас,0,17.550867
1,pull up_2.mp4,pull Up,3_4_спереди,0,14.481133
2,pull up_3.mp4,pull Up,3_4_спереди,0,18.866667
3,pull up_4.mp4,pull Up,сзади,0,19.185833
4,pull up_5.mp4,pull Up,3_4_сзади,1,4.838167
...,...,...,...,...,...
15,tricep dips_16.mp4,tricep dips,3_4_спереди,57,10.800000
16,tricep dips_17.mp4,tricep dips,3_4_спереди,57,22.300000
17,tricep dips_18.mp4,tricep dips,3_4_спереди,57,20.200000
18,tricep dips_19.mp4,tricep dips,сзади,58,16.700000


In [68]:
X, y, groups = create_dataset_for_k_fold(all_df)

In [69]:
X

array(['pull Up/pull up_1.mp4', 'pull Up/pull up_2.mp4',
       'pull Up/pull up_3.mp4', 'pull Up/pull up_4.mp4',
       'pull Up/pull up_5.mp4', 'pull Up/pull up_6.mp4',
       'pull Up/pull up_7.mp4', 'pull Up/pull up_8.mp4',
       'pull Up/pull up_9.mp4', 'pull Up/pull up_10.mp4',
       'pull Up/pull up_11.mp4', 'pull Up/pull up_12.mp4',
       'pull Up/pull up_13.mp4', 'pull Up/pull up_14.mp4',
       'pull Up/pull up_15.mp4', 'pull Up/pull up_16.mp4',
       'pull Up/pull up_17.mp4', 'pull Up/pull up_18.mp4',
       'pull Up/pull up_19.mp4', 'pull Up/pull up_20.mp4',
       'pull Up/pull up_21.mp4', 'pull Up/pull up_22.mp4',
       'pull Up/pull up_23.mp4', 'pull Up/pull up_24.mp4',
       'pull Up/pull up_25.mp4', 'pull Up/pull up_26.mp4',
       'push-up/push-up_1.mp4', 'push-up/push-up_2.mp4',
       'push-up/push-up_3.mp4', 'push-up/push-up_4.mp4',
       'push-up/push-up_5.mp4', 'push-up/push-up_6.mp4',
       'push-up/push-up_7.mp4', 'push-up/push-up_8.mp4',
       'push-u

In [70]:
assert len(X) == len(y) == len(groups)

## Разделение на фолды

In [71]:
folds = list(split(X, y, groups))

In [72]:
for fold in folds:
    fold.show_groups()

0 0 0 0 2 2 3 3 6 7 8 8 8 8 11 11 11 11 11 11 11 13 13 13 13 13 16 17 17 17 18 17 17 17 17 17 18 17 20 20 22 24 24 24 24 24 24 27 28 29 30 31 32 33 35 35 35 38 38 40 40 41 41 41 42 42 42 43 44 44 47 47 44 49 50 50 50 50 50 50 51 51 49 49 52 52 52 53 55 55 55 55 55 56 56 56 56 57 57 57 58
1 1 1 1 1 1 1 1 1 12 12 12 12 12 12 12 12 15 21 23 23 25 25 25 25 25 26 26 26 45 45 59
4 4 5 9 9 9 9 9 9 9 9 9 9 9 9 9 9 10 10 10 10 10 10 14 14 14 14 14 14 14 14 19 34 34 34 36 36 37 37 37 37 37 37 37 37 39 39 46 46 48 48 48 46 54 54



0 0 0 0 1 1 1 1 1 1 1 1 1 2 2 3 3 8 8 8 8 11 11 11 11 11 11 11 12 12 12 12 12 12 12 12 15 17 17 17 18 17 17 17 17 17 18 17 21 22 23 23 24 24 24 24 24 24 25 25 25 25 25 26 26 26 27 28 29 32 33 35 35 35 40 40 41 41 41 42 42 42 43 45 45 47 47 49 50 50 50 50 50 50 49 49 52 52 52 53 55 55 55 55 55 56 56 56 56 57 57 57 58 59
6 7 13 13 13 13 13 16 20 20 30 31 38 38 44 44 44 51 51
4 4 5 9 9 9 9 9 9 9 9 9 9 9 9 9 9 10 10 10 10 10 10 14 14 14 14 14 14 14 14 19 34 34 34 36 36 37 

In [73]:
save_split(folds)

# Создание папки фолда

In [14]:
folds = load_folds('./folds.pkl')

In [24]:
folds[3].show_groups()

0 0 0 0 1 1 1 1 1 1 1 1 1 3 3 6 7 8 8 8 8 12 12 12 12 12 12 12 12 13 13 13 13 13 15 16 20 20 21 23 23 24 24 24 24 24 24 25 25 25 25 25 26 26 26 27 28 29 30 31 32 33 35 35 35 38 38 40 40 41 41 41 42 42 42 43 44 45 45 44 44 49 50 50 50 50 50 50 51 51 49 49 52 52 52 53 57 57 57 58 59
2 2 11 11 11 11 11 11 11 17 17 17 18 17 17 17 17 17 18 17 22 47 47 55 55 55 55 55 56 56 56 56
4 4 5 9 9 9 9 9 9 9 9 9 9 9 9 9 9 10 10 10 10 10 10 14 14 14 14 14 14 14 14 19 34 34 34 36 36 37 37 37 37 37 37 37 37 39 39 46 46 48 48 48 46 54 54





In [34]:
folds[5].val_part.data

array(['pull Up/pull up_14.mp4', 'pull Up/pull up_15.mp4',
       'pull Up/pull up_20.mp4', 'pull Up/pull up_22.mp4',
       'push-up/push-up_50.mp4', 'push-up/push-up_51.mp4',
       'push-up/push-up_52.mp4', 'push-up/push-up_52.mp4',
       'push-up/push-up_53.mp4', 'push-up/push-up_54.mp4',
       'push-up/push-up_54.mp4', 'push-up/push-up_55.mp4',
       'push-up/push-up_55.mp4', 'push-up/push-up_56.mp4',
       'chest fly machine/chest fly machine_8.mp4',
       'chest fly machine/chest fly machine_9.mp4',
       'chest fly machine/chest fly machine_10.mp4',
       'chest fly machine/chest fly machine_11.mp4',
       'chest fly machine/chest fly machine_12.mp4',
       'chest fly machine/chest fly machine_13.mp4',
       'chest fly machine/chest fly machine_24.mp4',
       'tricep dips/tricep dips_7.mp4', 'tricep dips/tricep dips_8.mp4',
       'tricep dips/tricep dips_9.mp4', 'tricep dips/tricep dips_10.mp4',
       'tricep dips/tricep dips_11.mp4', 'tricep dips/tricep dips_19.mp

In [37]:
!pwd

/mnt/c/Notebooks/Workout


In [36]:
folds[6].create_folders('./full_workout_dataset/resized', './folds')

100%|█████████████████████████████████████████████████████████████████████████████████| 109/109 [00:08<00:00, 12.75it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 34/34 [00:03<00:00,  9.93it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 45/45 [00:03<00:00, 12.23it/s]


# StratifiedGroupKFold (эксперимент)

In [41]:
meta_dict = load_all()

In [10]:
from sklearn.model_selection import StratifiedGroupKFold

In [40]:
meta_dict

{'pull Up':              video    class        angle  person_id  duration_sec
 0    pull up_1.mp4  pull Up          фас          0     17.550867
 1    pull up_2.mp4  pull Up  3_4_спереди          0     14.481133
 2    pull up_3.mp4  pull Up  3_4_спереди          0     18.866667
 3    pull up_4.mp4  pull Up        сзади          0     19.185833
 4    pull up_5.mp4  pull Up    3_4_сзади          1      4.838167
 5    pull up_6.mp4  pull Up    3_4_сзади          1     10.276933
 6    pull up_7.mp4  pull Up        сзади          1      9.009000
 7    pull up_8.mp4  pull Up      профиль          1      4.671333
 8    pull up_9.mp4  pull Up    3_4_сзади          1     12.178833
 9   pull up_10.mp4  pull Up          фас          1      8.108100
 10  pull up_11.mp4  pull Up          фас          1      7.340667
 11  pull up_12.mp4  pull Up  3_4_спереди          1      6.506500
 12  pull up_13.mp4  pull Up    3_4_сзади          1     13.413400
 13  pull up_14.mp4  pull Up          фас          

In [None]:
all_df = concat_dataset(meta_dict)

with pd.option_context('display.max_rows', None,):
    print(all_df)

In [60]:
class_le, class_nums = labels_to_num(all_df['class'])
all_df['class'] = class_nums
all_df

Unnamed: 0,video,class,angle,person_id,duration_sec
0,pull up_1.mp4,2,фас,0,17.550867
1,pull up_2.mp4,2,3_4_спереди,0,14.481133
2,pull up_3.mp4,2,3_4_спереди,0,18.866667
3,pull up_4.mp4,2,сзади,0,19.185833
4,pull up_5.mp4,2,3_4_сзади,1,4.838167
...,...,...,...,...,...
15,tricep dips_16.mp4,5,3_4_спереди,57,10.800000
16,tricep dips_17.mp4,5,3_4_спереди,57,22.300000
17,tricep dips_18.mp4,5,3_4_спереди,57,20.200000
18,tricep dips_19.mp4,5,сзади,58,16.700000


In [61]:
X, y, groups = create_dataset_for_k_fold(all_df)

In [94]:
X = np.array(X)

In [108]:
y = np.array(y)

In [76]:
groups = np.array(groups)

In [63]:
list(X)

['pull up_1.mp4',
 'pull up_2.mp4',
 'pull up_3.mp4',
 'pull up_4.mp4',
 'pull up_5.mp4',
 'pull up_6.mp4',
 'pull up_7.mp4',
 'pull up_8.mp4',
 'pull up_9.mp4',
 'pull up_10.mp4',
 'pull up_11.mp4',
 'pull up_12.mp4',
 'pull up_13.mp4',
 'pull up_14.mp4',
 'pull up_15.mp4',
 'pull up_16.mp4',
 'pull up_17.mp4',
 'pull up_18.mp4',
 'pull up_19.mp4',
 'pull up_20.mp4',
 'pull up_21.mp4',
 'pull up_22.mp4',
 'pull up_23.mp4',
 'pull up_24.mp4',
 'pull up_25.mp4',
 'pull up_26.mp4',
 'push-up_1.mp4',
 'push-up_2.mp4',
 'push-up_3.mp4',
 'push-up_4.mp4',
 'push-up_5.mp4',
 'push-up_6.mp4',
 'push-up_7.mp4',
 'push-up_8.mp4',
 'push-up_9.mp4',
 'push-up_10.mp4',
 'push-up_11.mp4',
 'push-up_12.mp4',
 'push-up_13.mp4',
 'push-up_14.mp4',
 'push-up_15.mp4',
 'push-up_16.mp4',
 'push-up_17.mp4',
 'push-up_18.mp4',
 'push-up_19.mp4',
 'push-up_20.mp4',
 'push-up_21.mp4',
 'push-up_22.mp4',
 'push-up_23.mp4',
 'push-up_24.mp4',
 'push-up_25.mp4',
 'push-up_26.mp4',
 'push-up_27.mp4',
 'push-up_2

In [64]:
list(y)

[2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5]

In [65]:
list(groups)

[0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 3,
 3,
 4,
 4,
 5,
 6,
 7,
 8,
 8,
 8,
 8,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 9,
 10,
 10,
 10,
 11,
 11,
 11,
 11,
 11,
 10,
 10,
 10,
 11,
 11,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 12,
 13,
 13,
 13,
 13,
 13,
 14,
 14,
 14,
 14,
 14,
 14,
 14,
 14,
 15,
 16,
 17,
 17,
 17,
 18,
 17,
 17,
 17,
 17,
 17,
 18,
 17,
 19,
 20,
 20,
 21,
 22,
 23,
 23,
 24,
 24,
 24,
 24,
 24,
 24,
 25,
 25,
 25,
 25,
 25,
 26,
 26,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 34,
 34,
 35,
 35,
 35,
 36,
 36,
 37,
 37,
 37,
 37,
 37,
 37,
 37,
 37,
 38,
 38,
 39,
 39,
 40,
 40,
 41,
 41,
 41,
 42,
 42,
 42,
 43,
 44,
 45,
 45,
 44,
 46,
 47,
 47,
 46,
 44,
 48,
 48,
 48,
 49,
 50,
 50,
 50,
 50,
 50,
 50,
 51,
 51,
 49,
 49,
 46,
 52,
 52,
 52,
 53,
 54,
 54,
 55,
 55,
 55,
 55,
 55,
 56,
 56,
 56,
 56,
 57,
 57,
 57,
 58,
 59]

In [69]:
assert len(X) == len(y) == len(groups)