## Datapoints generation

In [32]:
import pandas as pd
file_path = 'datasets/Left-All/Left-All-Corrected-events.csv'
main_json_path = 'datasets/Left-All/Left-All-mediapipe.json'
# Load CSV
data = pd.read_csv(file_path)


In [33]:
data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Left-All-Corrected.mp4,LowPocket,35,41,Shot_Phase
1,Left-All-Corrected.mp4,HighPocket,41,44,Shot_Phase
2,Left-All-Corrected.mp4,Ball_RELEASE,49,49,Shot_Phase
3,Left-All-Corrected.mp4,Release,44,53,Shot_Phase
4,Left-All-Corrected.mp4,Shot,35,73,Shot_Status
...,...,...,...,...,...
1526,Left-All-Corrected.mp4,HighPocket,36273,36275,Shot_Phase
1527,Left-All-Corrected.mp4,Ball_RELEASE,36279,36279,Shot_Phase
1528,Left-All-Corrected.mp4,Release,36275,36284,Shot_Phase
1529,Left-All-Corrected.mp4,Shot,36269,36304,Shot_Status


In [34]:
# Exclude 'Shot' and 'PostShot' events
filtered_data = data[~data['event_name'].isin(['Shot', 'PostShot','Ball_RELEASE'])]

In [35]:
filtered_data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Left-All-Corrected.mp4,LowPocket,35,41,Shot_Phase
1,Left-All-Corrected.mp4,HighPocket,41,44,Shot_Phase
3,Left-All-Corrected.mp4,Release,44,53,Shot_Phase
6,Left-All-Corrected.mp4,LowPocket,150,155,Shot_Phase
7,Left-All-Corrected.mp4,HighPocket,155,158,Shot_Phase
...,...,...,...,...,...
1520,Left-All-Corrected.mp4,HighPocket,36154,36156,Shot_Phase
1522,Left-All-Corrected.mp4,Release,36156,36167,Shot_Phase
1525,Left-All-Corrected.mp4,LowPocket,36269,36273,Shot_Phase
1526,Left-All-Corrected.mp4,HighPocket,36273,36275,Shot_Phase


In [36]:
import pandas as pd
import json

# Load JSON file with keypoints
with open(main_json_path, 'r') as f:
    keypoints_data = json.load(f)

# Initialize dictionary to store keypoints for each class
keypoints_by_class = {
    'LowPocket': [],
    'HighPocket': [],
    'Release': []
}

# Iterate through the combined DataFrame
for _, row in filtered_data.iterrows():
    start_frame = row['start_frame_num']
    end_frame = row['end_frame_num']
    event_name = row['event_name']
    
    # Collect keypoints for each frame in the range
    for frame in range(start_frame, end_frame + 1):
        if str(frame) in keypoints_data:
            keypoints_by_class[event_name].append({
                'frame': frame,
                'keypoints': keypoints_data[str(frame)]['keypoints']
            })

# Convert the keypoints dictionary to a DataFrame for easier viewing
LowPocket_keypoints_df = pd.DataFrame(keypoints_by_class['LowPocket'])
HighPocket_keypoints_df = pd.DataFrame(keypoints_by_class['HighPocket'])
Release_keypoints_df = pd.DataFrame(keypoints_by_class['Release'])



In [37]:
len(LowPocket_keypoints_df)

2059

In [38]:
len(HighPocket_keypoints_df)

831

In [39]:
len(Release_keypoints_df)

2625

In [40]:
import os
import numpy as np
import random

# Define the FrameGrouper class
class FrameGrouper:
    def __init__(self, segment_length=20):
        self.segment_length = segment_length

    def group_frames(self, data):
        grouped_values = []
        current_segment = []
        start_frame = 0
        end_frame = max(int(key) for key in data.keys())

        for frame in range(start_frame, end_frame + 1):
            if str(frame) in data:
                current_segment.append(data[str(frame)]['keypoints'])

            if len(current_segment) == self.segment_length:
                grouped_values.append(np.array(current_segment))
                current_segment = []

        # Handle padding if the last group is not full
        if len(current_segment) > 0:
            while len(current_segment) < self.segment_length:
                random_index = random.randint(0, len(current_segment) - 1)
                current_segment.insert(random_index + 1, current_segment[random_index])

            grouped_values.append(np.array(current_segment))

        return grouped_values

def save_structured_data(data_point, label, file_path):
    # Define a structured dtype
    dt = np.dtype([('data', np.float64, data_point.shape), ('label', np.int32)])
    structured_array = np.array([(data_point, label)], dtype=dt)
    np.save(file_path, structured_array)




In [41]:
# Initialize FrameGrouper
frame_grouper = FrameGrouper(segment_length=7)
save_dir_base = 'datasets/Left-All/Phase_datapoints'
position_name = 'Left-All'
# Assume keypoints_by_class contains the grouped keypoints for "Shot" and "Background"
for class_name, keypoints in keypoints_by_class.items():
    grouped_keypoints = frame_grouper.group_frames({str(item['frame']): {'keypoints': item['keypoints']} for item in keypoints})
    
    # Define the directory to save the data
    save_dir = os.path.join(save_dir_base, class_name)
    os.makedirs(save_dir, exist_ok=True)
    
    # Save each segment
    for i, segment in enumerate(grouped_keypoints):
        file_path = os.path.join(save_dir, f'{position_name}_{class_name}_segment_{i}.npy')
        if class_name == 'LowPocket':
            label = 0
        elif class_name == 'HighPocket':
            label = 1
        elif class_name == 'Release':
            label = 2
        save_structured_data(segment, label, file_path=file_path)

    print(f'Saved {len(grouped_keypoints)} segments for class {class_name} in {save_dir}')

Saved 295 segments for class LowPocket in datasets/Left-All/Phase_datapoints/LowPocket
Saved 119 segments for class HighPocket in datasets/Left-All/Phase_datapoints/HighPocket
Saved 375 segments for class Release in datasets/Left-All/Phase_datapoints/Release


In [42]:
import numpy as np

# Specify the path to your .npy file
file_path = 'datasets/Right-All/Phase_datapoints/Release/Right-All_Release_segment_5.npy'

# Load the data
data = np.load(file_path, allow_pickle=True)[0]

# Print the data
print(data['label'])

2


In [44]:
data['data']

array([[[1441.,  452.],
        [1430.,  445.],
        [1428.,  445.],
        [1425.,  446.],
        [1432.,  445.],
        [1431.,  446.],
        [1431.,  447.],
        [1412.,  456.],
        [1421.,  455.],
        [1440.,  464.],
        [1442.,  465.],
        [1386.,  518.],
        [1460.,  514.],
        [1459.,  545.],
        [1555.,  539.],
        [1475.,  491.],
        [1542.,  478.],
        [1467.,  470.],
        [1541.,  461.],
        [1472.,  466.],
        [1532.,  460.],
        [1476.,  475.],
        [1529.,  468.],
        [1394.,  671.],
        [1444.,  668.],
        [1440.,  772.],
        [1500.,  774.],
        [1419.,  874.],
        [1513.,  891.],
        [1403.,  899.],
        [1497.,  912.],
        [1454.,  901.],
        [1544.,  929.]],

       [[1441.,  402.],
        [1428.,  400.],
        [1426.,  400.],
        [1424.,  400.],
        [1429.,  401.],
        [1429.,  402.],
        [1428.,  403.],
        [1413.,  412.],
        [1421.

## Datasplit

In [46]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Specify the root directory where the subdirectories for each class are located
data_root = 'comibined_datapoints_phase'

# Define the directories to store training and validation files
train_dir = 'splitted_datapoints_phase/train'
val_dir = 'splitted_datapoints_phase/val'


# Create base directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Define the split ratio for training and validation
train_ratio = 0.8  # 80% for training, 20% for validation

# Loop through each class directory
for class_name in os.listdir(data_root):
    class_dir = os.path.join(data_root, class_name)
    if os.path.isdir(class_dir):
        # List all .npy files in the class directory
        files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if f.endswith('.npy')]
        
        # Split the files into training and validation sets
        train_files, val_files = train_test_split(files, train_size=train_ratio, random_state=42)
        
        # Ensure class subdirectories are created in train and validation directories
        train_class_dir = os.path.join(train_dir, class_name)
        val_class_dir = os.path.join(val_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(val_class_dir, exist_ok=True)
        
        # Copy files to the respective training and validation class directories
        for f in train_files:
            shutil.copy2(f, train_class_dir)
        for f in val_files:
            shutil.copy2(f, val_class_dir)

# Print confirmation
print("Files have been copied to the respective train and validation directories with class structure preserved.")

Files have been copied to the respective train and validation directories with class structure preserved.


## Dataloader

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os

class NPYDataset(Dataset):
    def __init__(self, main_directory):
        super().__init__()
        self.main_directory = main_directory
        self.filenames = self._load_filenames()

    def _load_filenames(self):
        # Traverse subdirectories to find all .npy files
        filenames = []
        for root, _, files in os.walk(self.main_directory):
            for file in files:
                if file.endswith('.npy'):
                    full_path = os.path.join(root, file)
                    filenames.append(full_path)
        return filenames

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        file_path = self.filenames[idx]
        data = np.load(file_path, allow_pickle=True)
        keypoints = np.ascontiguousarray(data['data'], dtype=np.float32)
        label = np.float32(data['label'])
        # Convert to tensors
        keypoints = torch.tensor(keypoints)
        label = torch.tensor(label)
        return keypoints, label


In [5]:


# DataLoader setup
data_directory = 'datasets/Front-All/test_data'
dataset = NPYDataset(data_directory)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# val_dataset = NPYDataset('datasets/Front-All/data_points_splitted/val')
# val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)


In [6]:
len(dataloader)

2040

In [8]:
for keypoints, labels in dataloader:
    print(keypoints.shape, labels)

torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size