In [None]:
!pip install lightning torchvision mediapipe

In [None]:
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

In [2]:
import os
import glob
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

data_dir = './datasets/angle/segments_landmarks'

id2label = ['comma', 'dot', 'BackSpace', 'idle', 'space', 
            'a', 'b', 'c', 'd', 'e', 'f', 
            'g', 'h', 'i', 'j', 'k', 'l', 
            'm', 'n', 'o', 'p', 'q', 'r', 
            's', 't', 'u', 'v', 'w', 'x', 
            'y', 'z']

label2id = {label: i for i, label in enumerate(id2label)}

class_paths = sorted(glob.glob(f"{data_dir}/*.pt"))

classes = [class_path.split('/')[-1].split('.')[0] for class_path in class_paths]
print('classes: ', classes)

labels = []
all_samples = []
for class_path in class_paths:
    label = class_path.split('/')[-1].split('.')[0]
    samples = torch.load(class_path)
    for sample in samples:
        labels.append(label2id[label])
        all_samples.append(sample)

fit, test, fit_label, test_label = train_test_split(all_samples, labels, test_size=0.2, random_state=0)
train, val, train_label, val_label = train_test_split(fit, fit_label, test_size=0.25, random_state=0)

test = torch.stack(test)
test_label = torch.stack(test_label)
train = torch.stack(train)
train_label = torch.stack(train_label)
val = torch.stack(val)
val_label = torch.stack(val_label)

torch.save(test, './datasets/angle/mp/test.pt')
torch.save(test_label, './datasets/angle/mp/test_label.pt')

torch.save(train, './datasets/angle/mp/train.pt')
torch.save(train_label, './datasets/angle/mp/train_label.pt')

torch.save(val, './datasets/angle/mp/val.pt')
torch.save(val_label, './datasets/angle/mp/val_label.pt')

classes:  ['BackSpace', 'a', 'b', 'c', 'comma', 'd', 'dot', 'e', 'f', 'g', 'h', 'i']


In [26]:
try:
    from key_utils import KeySegmentDataModule, KeyClf, id2label
    from models.resnet import resnet101
except:
    import sys
    sys.path.append("/kaggle/input/keystroke-util")
    from key_utils import KeySegmentDataModule, KeyClf, id2label
    from models.resnet import resnet101


from lightning.pytorch.callbacks import EarlyStopping
import torchvision
import torchvision.transforms.functional
import lightning as L



dm = KeySegmentDataModule(segment_dir='datasets/angle/segments_dir', 
                          num_workers=0,
                          transforms=transforms)
weights = dm.train_weights

class ResnetKeyClf(KeyClf):
    def __init__(self, learning_rate=0.01):
        super().__init__(weights, learning_rate)
        self.model = resnet101(sample_size=224, sample_duration=8, num_classes=len(id2label))


trainer = L.Trainer(
    accelerator='cpu',
    devices='auto',
    fast_dev_run=True,
    log_every_n_steps=100,
    callbacks=EarlyStopping(monitor='val_loss', patience=5),
    max_epochs=100,
)

model = ResnetKeyClf()

# trainer.fit(model, dm)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Users/haily/.pyenv/versions/3.10.4/lib/python3.10/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.


Train:
 Counter({'idle': 2715, 'space': 917, 'e': 497, 'BackSpace': 429, 'i': 328, 'a': 320, 'o': 302, 't': 289, 'r': 250, 'n': 246, 's': 215, 'u': 184, 'l': 183, 'h': 162, 'd': 159, 'c': 155, 'y': 119, 'g': 109, 'm': 109, 'p': 108, 'w': 103, 'b': 91, 'k': 86, 'f': 85, 'dot': 84, 'v': 73, 'comma': 66, 'j': 62, 'z': 58, 'x': 54, 'q': 52})
Val:
 Counter({'idle': 973, 'space': 311, 'e': 162, 'BackSpace': 136, 'i': 112, 'a': 108, 't': 96, 'o': 87, 'n': 75, 'r': 73, 'h': 67, 's': 62, 'u': 57, 'l': 52, 'c': 49, 'd': 49, 'f': 43, 'y': 41, 'm': 39, 'g': 38, 'w': 28, 'p': 26, 'comma': 26, 'b': 26, 'z': 24, 'dot': 23, 'v': 22, 'x': 18, 'k': 16, 'j': 16, 'q': 15})
Test:
 Counter({'idle': 944, 'space': 316, 'BackSpace': 164, 'e': 147, 'i': 112, 't': 103, 'o': 94, 'n': 85, 'r': 84, 'a': 80, 's': 64, 'l': 62, 'c': 57, 'u': 54, 'm': 53, 'd': 46, 'h': 42, 'w': 37, 'f': 36, 'y': 36, 'g': 33, 'p': 31, 'b': 31, 'dot': 28, 'k': 23, 'v': 23, 'q': 21, 'comma': 20, 'x': 20, 'z': 13, 'j': 11})
train_weights: 

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')


In [27]:
frames, label = dm.train.__getitem__(0)

jpg_files:  ['datasets/angle/segments_dir/idle/video_6_idle_f4901_4908/frame_4901.jpg', 'datasets/angle/segments_dir/idle/video_6_idle_f4901_4908/frame_4902.jpg', 'datasets/angle/segments_dir/idle/video_6_idle_f4901_4908/frame_4903.jpg', 'datasets/angle/segments_dir/idle/video_6_idle_f4901_4908/frame_4904.jpg', 'datasets/angle/segments_dir/idle/video_6_idle_f4901_4908/frame_4905.jpg', 'datasets/angle/segments_dir/idle/video_6_idle_f4901_4908/frame_4906.jpg', 'datasets/angle/segments_dir/idle/video_6_idle_f4901_4908/frame_4907.jpg', 'datasets/angle/segments_dir/idle/video_6_idle_f4901_4908/frame_4908.jpg']


In [28]:
print(label)
print(frames.shape)
torchvision.io.video.write_video('test.mp4', frames.permute(1, 2, 3, 0) * 255, fps=3.0)

3
torch.Size([3, 8, 224, 224])


In [2]:
%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/

Reusing TensorBoard on port 6006 (pid 70015), started 0:00:32 ago. (Use '!kill 70015' to kill it.)