# Organize data

In [None]:
!nvidia-smi


Mon Dec  9 22:31:54 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0              49W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Pair videos and frames

In [1]:
import csv

# csv events
def get_events_for_video(video_id):
    events = []
    with open('key_frames.csv', 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)
        for row in reader:
            if int(row[0]) == video_id:
                events.append(int(row[1]))
    return events




### label key frames in dataset

In [None]:
import os
import math
from collections import defaultdict
from moviepy.editor import VideoFileClip
from scipy.io import loadmat
import numpy as np

def label_frames(video_dir, num_classes=8):
    frame_labels = defaultdict(list)

    for video_id_dir in os.listdir(video_dir):

        # video_frames_dir = os.path.join(frames_dir, video_id_dir)
        video_id = int(video_id_dir.split(".")[0])

        # video_entry = next((video for video in golfdb if video[0] == video_id), None)
        # if video_entry is None:
        #     print(f"video ID {video_id} not found")
        #     continue

        key_frames = get_events_for_video(video_id)

        video_path = os.path.join(video_dir, f'{video_id}.mp4')
        # print(video_path)
        # print(f"Loading video from path: {video_path}")
        clip = VideoFileClip(video_path)

        # print(f"Video {video_id} FPS: {clip.fps}, Duration: {clip.duration}")


        fps = math.ceil(clip.fps)
        total_frames = int(clip.duration * fps)
        # print(total_frames)

        # print(f"video {video_path} has {total_frames} total frames")

        key_frame_dict = {int(kf): i for i, kf in enumerate(key_frames)}

        for frame_num in range(total_frames):
            if frame_num in key_frame_dict:
                event_index = key_frame_dict[frame_num]
            else:
                event_index = num_classes

            frame_labels[video_id].append((frame_num, event_index))

    return frame_labels

# frames_dir = "../frames_160"
# video_dir = "../videos_160"
# frame_labels = label_frames(video_dir)
# print(len(frame_labels))

### todo: oversample key events

### inits

In [None]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
import pickle

# Define transformations to preprocess video frames for MobileNetV2
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# frame_labels = label_frames(video_dir)

# with open('golf_frame_labels.pkl', 'wb') as f:
#     pickle.dump(frame_labels, f)

# get labels for frames
with open('golf_frame_labels.pkl', 'rb') as f:
    frame_labels = pickle.load(f)

In [None]:
!nvidia-smi


Mon Dec  9 22:42:05 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0              55W / 400W |    567MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
print(frame_labels)

defaultdict(<class 'list'>, {1013: [(0, 8), (1, 8), (2, 8), (3, 8), (4, 8), (5, 8), (6, 8), (7, 8), (8, 8), (9, 8), (10, 8), (11, 8), (12, 8), (13, 8), (14, 8), (15, 8), (16, 8), (17, 8), (18, 8), (19, 8), (20, 8), (21, 8), (22, 8), (23, 8), (24, 8), (25, 8), (26, 8), (27, 8), (28, 8), (29, 8), (30, 8), (31, 8), (32, 8), (33, 8), (34, 8), (35, 8), (36, 8), (37, 8), (38, 8), (39, 8), (40, 8), (41, 8), (42, 8), (43, 8), (44, 8), (45, 8), (46, 8), (47, 8), (48, 8), (49, 8), (50, 8), (51, 8), (52, 8), (53, 8), (54, 8), (55, 8), (56, 8), (57, 8), (58, 0), (59, 8), (60, 8), (61, 8), (62, 8), (63, 8), (64, 8), (65, 8), (66, 8), (67, 8), (68, 8), (69, 8), (70, 8), (71, 1), (72, 8), (73, 8), (74, 2), (75, 8), (76, 8), (77, 8), (78, 8), (79, 8), (80, 8), (81, 8), (82, 8), (83, 3), (84, 8), (85, 8), (86, 8), (87, 8), (88, 4), (89, 8), (90, 8), (91, 8), (92, 5), (93, 8), (94, 6), (95, 8), (96, 8), (97, 8), (98, 8), (99, 8), (100, 8), (101, 8), (102, 8), (103, 8), (104, 8), (105, 8), (106, 8), (107

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import zipfile

zip_file_path = '/content/drive/My Drive/test_vid_frames.zip'  


destination_dir = '/content/'


os.makedirs(destination_dir, exist_ok=True)


with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(destination_dir)

print(f"Unzipped files to {destination_dir}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Unzipped files to /content/


In [None]:

from data_class import GolfSwingDataset, ToTensor, Normalize


In [None]:
dataset = GolfSwingDataset(
    frame_labels=frame_labels,
    vid_frames_dir='vid_frames',
    seq_length=64,
    transform=transforms.Compose([
        ToTensor(),
        Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    train=True
)

data_loader = DataLoader(
    dataset,
    batch_size=6,
    shuffle=True,
    num_workers=2,
    drop_last=False
)


In [None]:
for i, sample in enumerate(data_loader):
    images, labels = sample['images'], sample['labels']
    print(f"Batch {i} - Images shape: {images.shape}, Labels shape: {labels.shape}")
    if i ==3:
        break


Batch 0 - Images shape: torch.Size([6, 64, 3, 160, 160]), Labels shape: torch.Size([6, 64])
Batch 1 - Images shape: torch.Size([6, 64, 3, 160, 160]), Labels shape: torch.Size([6, 64])
Batch 2 - Images shape: torch.Size([6, 64, 3, 160, 160]), Labels shape: torch.Size([6, 64])
Batch 3 - Images shape: torch.Size([6, 64, 3, 160, 160]), Labels shape: torch.Size([6, 64])


In [None]:
import torch
from EventDetector import EventDetector


model = EventDetector(
    width_mult=1.,
    lstm_layers=1,
    lstm_hidden=256,
    bidirectional=True,
    dropout=False
).cuda()


seq_length = 64
C, H, W = 3, 160, 160
max_memory = torch.cuda.get_device_properties(0).total_memory

batch_size = 1
while True:
    try:
        dummy_input = torch.randn(batch_size, seq_length, C, H, W).cuda()
        _ = model(dummy_input)  # Forward pass
        print(f"Batch size {batch_size} fits into memory.")
        batch_size += 1
    except RuntimeError as e:
        print(f"Batch size {batch_size} exceeds GPU memory!")
        break


Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 87.6MB/s]


Batch size 1 fits into memory.
Batch size 2 fits into memory.
Batch size 3 fits into memory.
Batch size 4 exceeds GPU memory!


# Model

In [None]:
import torch
torch.cuda.empty_cache()


## Training

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from EventDetector import EventDetector
from data_class import GolfSwingDataset, Normalize, ToTensor
import os


class AverageMeter:
    """Tracks and stores the average and current value."""
    def __init__(self):
        self.reset()

    def reset(self):
        """Resets all values to start fresh."""
        self.val = 0  # Current value
        self.avg = 0  # Average value
        self.sum = 0  # Sum of all values
        self.count = 0  # Number of updates

    def update(self, val, n=1):


        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

if __name__ == '__main__':


    split = 1
    iterations = 100
    it_save = 100
    n_cpu = 2
    seq_length = 64
    bs = 3


    model = EventDetector(
        width_mult=1.,
        lstm_layers=1,
        lstm_hidden=256,
        bidirectional=True,
        dropout=False
    )
    model.to(device)

    # Prepare dataset
    dataset = GolfSwingDataset(
        frame_labels=frame_labels,  #frames and labels dictionary
        vid_frames_dir='vid_frames',
        seq_length=seq_length,
        transform=transforms.Compose([
            ToTensor(),
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        train=True
    )


    data_loader = DataLoader(
        dataset,
        batch_size=bs,
        shuffle=True,
        num_workers=n_cpu,
        drop_last=True
    )

    # Handle class imbalance: Assign weights to classes
    weights = torch.FloatTensor([1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/35]).to(device)
    criterion = torch.nn.CrossEntropyLoss(weight=weights)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


    losses = AverageMeter()


    if not os.path.exists('models'):
        os.mkdir('models')

    # Training loop
    i = 0
    while i < iterations:
        for sample in data_loader:
            images, labels = sample['images'].to(device), sample['labels'].to(device)


            labels = labels.view(bs * seq_length)


            logits = model(images)


            loss = criterion(logits, labels)


            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            losses.update(loss.item(), images.size(0))


            print(f"Iteration: {i}\tLoss: {losses.val:.4f} (Average Loss: {losses.avg:.4f})")


            i += 1
            if i % it_save == 0:
                torch.save({
                    'optimizer_state_dict': optimizer.state_dict(),
                    'model_state_dict': model.state_dict()
                }, f'models/swingnet_{i}.pth.tar')


            if i == iterations:
                break


Using device: cuda
Iteration: 0	Loss: 2.2597 (Average Loss: 2.2597)
Iteration: 1	Loss: 1.4089 (Average Loss: 1.8343)
Iteration: 2	Loss: 0.2665 (Average Loss: 1.3117)
Iteration: 3	Loss: 0.7057 (Average Loss: 1.1602)
Iteration: 4	Loss: 0.7660 (Average Loss: 1.0813)
Iteration: 5	Loss: 0.0637 (Average Loss: 0.9117)
Iteration: 6	Loss: 1.1285 (Average Loss: 0.9427)
Iteration: 7	Loss: 1.3421 (Average Loss: 0.9926)
Iteration: 8	Loss: 0.8944 (Average Loss: 0.9817)
Iteration: 9	Loss: 0.6229 (Average Loss: 0.9458)
Iteration: 10	Loss: 1.0336 (Average Loss: 0.9538)
Iteration: 11	Loss: 1.0184 (Average Loss: 0.9592)
Iteration: 12	Loss: 1.0404 (Average Loss: 0.9654)
Iteration: 13	Loss: 0.6180 (Average Loss: 0.9406)
Iteration: 14	Loss: 0.9997 (Average Loss: 0.9446)
Iteration: 15	Loss: 0.4002 (Average Loss: 0.9105)
Iteration: 16	Loss: 0.6298 (Average Loss: 0.8940)
Iteration: 17	Loss: 0.1984 (Average Loss: 0.8554)
Iteration: 18	Loss: 0.5349 (Average Loss: 0.8385)
Iteration: 19	Loss: 0.6714 (Average Loss:

In [None]:
!nvidia-smi


Mon Dec  9 22:43:12 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0              55W / 400W |   9015MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
def correct_preds(probs, labels, tol=-1):
    """
    Gets correct events in full-length sequence using tolerance based on number of frames from address to impact.

    """

    events = np.where(labels < 8)[0]

    if len(events) == 0:
        return np.array([]), np.array([]), np.array([]), tol, np.array([])


    preds = np.zeros(len(events))


    if tol == -1 and len(events) > 1:
        tol = int(max(np.round((events[-1] - events[0]) / len(events)), 1))
    elif tol == -1:
        tol = 1
    # Get the predicted class for each event
    for i in range(len(events)):
        preds[i] = np.argmax(probs[events[i], :])  # Select the class with the highest probability


    deltas = np.abs(events - preds)


    correct = (deltas <= tol).astype(np.uint8)

    return events, preds, deltas, tol, correct


In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.nn.functional as F
import numpy as np
from EventDetector import EventDetector
from data_class import GolfSwingDataset, Normalize, ToTensor



def eval(model, frame_labels, vid_frames_dir, seq_length, n_cpu, disp):

    dataset = GolfSwingDataset(
        frame_labels=frame_labels,
        vid_frames_dir=vid_frames_dir,
        seq_length=seq_length,
        transform=transforms.Compose([
            ToTensor(),
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        train=False
    )


    data_loader = DataLoader(
        dataset,
        batch_size=1,
        shuffle=False,
        num_workers=n_cpu,
        drop_last=False
    )

    correct = []

    for i, sample in enumerate(data_loader):
        images, labels = sample['images'], sample['labels']


        batch = 0
        while batch * seq_length < images.shape[1]:
            if (batch + 1) * seq_length > images.shape[1]:
                image_batch = images[:, batch * seq_length:, :, :, :]
            else:
                image_batch = images[:, batch * seq_length:(batch + 1) * seq_length, :, :, :]
            logits = model(image_batch.to(device))
            if batch == 0:
                probs = F.softmax(logits.data, dim=1).cpu().numpy()
            else:
                probs = np.append(probs, F.softmax(logits.data, dim=1).cpu().numpy(), 0)
            batch += 1


        _, _, _, _, c = correct_preds(probs, labels.squeeze())
        if disp:
            print(i, c)
        correct.append(c)


    PCE = np.mean(correct)
    return PCE


if __name__ == '__main__':
 
    seq_length = 32  
    n_cpu = 2 
    vid_frames_dir = 'vid_frames'  
    frame_labels = 'golf_frame_labels.pkl'  
    model_checkpoint = 'models/swingnet_100.pth.tar'  

    # Load the model
    model = EventDetector(
        width_mult=1.,
        lstm_layers=1,
        lstm_hidden=256,
        bidirectional=True,
        dropout=False
    )
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.load_state_dict(torch.load(model_checkpoint, map_location=device)['model_state_dict'])
    model.to(device)
    model.eval()

    # Load frame labels
    import pickle
    with open(frame_labels, 'rb') as f:
        frame_labels = pickle.load(f)

    # Run evaluation
    PCE = eval(model, frame_labels, vid_frames_dir, seq_length, n_cpu, True)
    print(f'Average PCE: {PCE}')




  model.load_state_dict(torch.load(model_checkpoint, map_location=device)['model_state_dict'])



0 []
1 []
2 [0]
3 []
4 []
5 []
6 []
7 []
8 [0]
9 []
10 []
11 []
12 []
13 [1 1 0 0 0]
14 []
15 []
16 []
17 [0]
18 []
19 [0 0 0 0]
20 []
21 []
22 []
23 []
24 []
25 [0]
26 []
27 []
28 []
29 []
30 []
31 []
32 []
33 []
34 []
35 []
36 []
37 []
38 []
39 []
40 []
41 []
42 []
43 []
44 []
45 []
46 []
47 []
48 []
49 []
50 []
51 []
52 []
53 []
54 []
55 []
56 []
57 [1 0 0 0 0]
58 []
59 []
60 []
61 []
62 [1 0 0 0]
63 []
64 []
65 []
66 []
67 []
68 []
69 []
70 [0]
71 []
72 []
73 []
74 []
75 []
76 []
77 [0 0]
78 []
79 []
80 []
81 []
82 []
83 []
84 []
85 []
86 []
87 []
88 []
89 [1 1 0 0]
90 []
91 []
92 []
93 []
94 []
95 [1 0 0]
96 []
97 []
98 []
99 []
100 [0]
101 []
102 []
103 []
104 []
105 []
106 []
107 []
108 [0]
109 [1 0 0]
110 []
111 []
112 []
113 []
114 []
115 []
116 []
117 []
118 []
119 []
120 []
121 [0]
