In [260]:
import os
import random
import shutil
import numpy as np
from tqdm import tqdm
from collections import Counter
from pathlib import Path

### amount of split video each exercise
*same for all length*  
pushup: 217 per class  
squat: 122 per class  
jumping-jack: 138 per class  
leg-raise: 127 per class  
half-burpee: 112 per class  

In [261]:
EXERCISE = 'half-burpee'
TYPE = 'keypoints_seq'
LEN = 16
SAM_NUM = 112 # split video level

In [262]:
if TYPE != 'images':
    neg_dir = Path(f'/home/u6188041/jom/Dataset/EzFit_dataset/{TYPE}/len_{LEN}')
    accepted_exts = ('.npy')
else:
    neg_dir = Path(f'/home/u6188041/jom/Dataset/EzFit_dataset/{TYPE}')
    accepted_exts = ('.jpg', '.jpeg','.png')
all_negs = []
classes = []
for path, subdir, files in os.walk(neg_dir):
    for file in files:
        full_path = os.path.join(path, file)
        if file.endswith(accepted_exts) and EXERCISE not in full_path and 'other' not in full_path and 'idle' not in full_path:
            split = full_path.split(os.path.sep)
            if TYPE != 'images':
                class_name = split[-4] + '_' + split[-3]
            else:
                class_name = split[-3] + '_' + split[-2]
            
            all_negs.append(full_path)
            classes.append(class_name)

In [263]:
# all_negs

In [264]:
len(all_negs)

21397

In [265]:
# classes

# Split videos level
image datatype cannot use this (use seq level instead)

In [266]:
def class_no(path):
    split = path.split(os.path.sep)
    return split[-3] + '_' + split[-2]

In [267]:
split_vid = list(map(class_no, all_negs))
# split_vid

In [268]:
len(split_vid)

21397

In [269]:
unique = list(np.unique(split_vid))
# unique

In [270]:
uniq_split_vid_class = list(map(lambda x: x.split('_')[0], unique))
# uniq_split_vid_class

In [271]:
# for counting and weighting
Counter(uniq_split_vid_class)

Counter({'jumping-jack-down': 21,
         'jumping-jack-up': 43,
         'leg-raise-down': 127,
         'leg-raise-up': 127,
         'pushup-down': 177,
         'pushup-up': 155,
         'squat-down': 113,
         'squat-up': 113})

In [272]:
# sample idx for uniq_split_vid_class
# then refer them back to all_negs

In [273]:
total = len(uniq_split_vid_class)
sam_weights_mapping = {k:total/v for k,v in dict(Counter(uniq_split_vid_class)).items()}
sam_weights = [sam_weights_mapping[cls] for cls in uniq_split_vid_class]
sam_weights = np.array(sam_weights)
sam_weights = sam_weights / sam_weights.min() # scale down so that the min value become 1
sam_weights = sam_weights / sam_weights.sum() # make it sum up to 1

In [274]:
# sam_weights

In [275]:
class_mapping = dict(zip(unique, uniq_split_vid_class))
weight_mapping = dict(zip(unique, sam_weights))

In [276]:
np.random.seed(42)
picked = np.random.choice(unique, size=SAM_NUM, replace=False, p=sam_weights)

In [277]:
# random.seed(42)
# picked = random.choices(unique, weights=sam_weights, k=SAM_NUM)

In [278]:
picked_classes = [class_mapping[path] for path in picked]
picked_weights = [weight_mapping[path] for path in picked] # acutually... useless

In [279]:
# check distribution of sample
Counter(picked_classes)

Counter({'leg-raise-down': 17,
         'squat-up': 15,
         'pushup-up': 11,
         'pushup-down': 16,
         'jumping-jack-up': 13,
         'jumping-jack-down': 13,
         'squat-down': 17,
         'leg-raise-up': 10})

In [280]:
split = picked[0].split('_', 1)
split

['leg-raise-down', '9_2']

In [281]:
seq_no = split[-1]
ex_class = split[0]

In [282]:
ex_split = ex_class.split('-')
ex_name = ex_split[0] if len(ex_split) == 2 else ex_split[0] + '-' + ex_split[1]

In [283]:
ex_name, ex_class, seq_no

('leg-raise', 'leg-raise-down', '9_2')

In [284]:
len(np.unique(picked))

112

## Save to dir

In [285]:
if TYPE != 'images':
    save_dir = Path(f'/home/u6188041/jom/Dataset/EzFit_dataset/{TYPE}/len_{LEN}/{EXERCISE}/others')
else:
    save_dir = Path(f'/home/u6188041/jom/Dataset/EzFit_dataset/{TYPE}/{EXERCISE}/others')
for name in tqdm(picked):
    split = name.split('_', 1)
    seq_no = split[-1]
    ex_class = split[0]
    ex_split = ex_class.split('-')
    ex_name = ex_split[0] if len(ex_split) == 2 else ex_split[0] + '-' + ex_split[1]
    # ex_name, ex_class, seq_no = ('squat', 'squat-down', '13_2')
    src = neg_dir / ex_name / ex_class / seq_no
    dst = save_dir / name
    shutil.copytree(src, dst)

100%|████████████████████████████████████████████████████████████████████████████████| 112/112 [00:00<00:00, 781.14it/s]


---

# Seq level

In [73]:
total = len(classes)
sam_weights_mapping = {k:total/v for k,v in dict(Counter(classes)).items()}
# sam_weights_mapping
sam_weights = [sam_weights_mapping[cls] for cls in classes]

In [74]:
class_mapping = dict(zip(all_negs, classes))
weight_mapping = dict(zip(all_negs, sam_weights))

In [75]:
picked = random.choices(all_negs, weights=sam_weights, k=SAM_NUM)

In [76]:
picked_classes = [class_mapping[path] for path in picked]
picked_weights = [weight_mapping[path] for path in picked] # acutually... useless

In [77]:
# check distribution of sample
Counter(picked_classes)

Counter({'squat_squat-up': 42,
         'pushup_pushup-down': 29,
         'pushup_pushup-up': 25,
         'jumping-jack_jumping-jack-up': 22,
         'leg-raise_leg-raise-up': 41,
         'jumping-jack_jumping-jack-down': 28,
         'squat_squat-down': 26,
         'leg-raise_leg-raise-down': 39})

## Save to .npy files

In [78]:
if TYPE != 'images':
    save_dir = Path(f'/home/u6188041/jom/Dataset/EzFit_dataset/{TYPE}/len_{LEN}/{EXERCISE}/others')
else:
    save_dir = Path(f'/home/u6188041/jom/Dataset/EzFit_dataset/{TYPE}/{EXERCISE}/others')
for idx in tqdm(range(len(picked))):
    if TYPE != 'images':
        base_dir = save_dir / str(idx)
    else:
        base_dir = save_dir
    if not base_dir.exists():
        base_dir.mkdir(parents=True)
    selected = Path(picked[idx])
    selected_ext = selected.suffix # this will return extension (e.g. .jpg, .npy)
    shutil.copy2(selected, base_dir / f'others_{idx}{selected_ext}')

100%|███████████████████████████████████████████████████████████████████████████████| 252/252 [00:00<00:00, 6262.79it/s]
