## Datapoints generation

In [11]:
import pandas as pd
file_path = 'datasets/Front-All/Front-All-Corrected-events.csv'
main_json_path = 'datasets/Front-All/Front-All-mediapipe_shot_events_with_ball.json'
# Load CSV
data = pd.read_csv(file_path)


In [12]:
data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Front-All-Corrected.mp4,LowPocket,469,486,Shot_Phase
1,Front-All-Corrected.mp4,HighPocket,486,489,Shot_Phase
2,Front-All-Corrected.mp4,Ball_RELEASE,490,490,Shot_Phase
3,Front-All-Corrected.mp4,Release,489,494,Shot_Phase
4,Front-All-Corrected.mp4,Shot,469,519,Shot_Status
...,...,...,...,...,...
1853,Front-All-Corrected.mp4,HighPocket,45787,45790,Shot_Phase
1854,Front-All-Corrected.mp4,Ball_RELEASE,45795,45795,Shot_Phase
1855,Front-All-Corrected.mp4,Release,45790,45799,Shot_Phase
1856,Front-All-Corrected.mp4,Shot,45772,45819,Shot_Status


In [13]:
# Exclude 'Shot' and 'PostShot' events
filtered_data = data[~data['event_name'].isin(['Shot', 'PostShot','Ball_RELEASE'])]

In [14]:
filtered_data.loc[filtered_data['event_name'] == 'HighPocket', 'event_name'] = 'Release'

In [15]:
filtered_data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Front-All-Corrected.mp4,LowPocket,469,486,Shot_Phase
1,Front-All-Corrected.mp4,Release,486,489,Shot_Phase
3,Front-All-Corrected.mp4,Release,489,494,Shot_Phase
6,Front-All-Corrected.mp4,LowPocket,672,688,Shot_Phase
7,Front-All-Corrected.mp4,Release,688,689,Shot_Phase
...,...,...,...,...,...
1847,Front-All-Corrected.mp4,Release,45606,45610,Shot_Phase
1849,Front-All-Corrected.mp4,Release,45610,45618,Shot_Phase
1852,Front-All-Corrected.mp4,LowPocket,45772,45787,Shot_Phase
1853,Front-All-Corrected.mp4,Release,45787,45790,Shot_Phase


In [16]:
import pandas as pd
import json

# Load JSON file with keypoints
with open(main_json_path, 'r') as f:
    keypoints_data = json.load(f)

# Initialize dictionary to store keypoints for each class
keypoints_by_class = {
    'LowPocket': [],
    # 'HighPocket': [],
    'Release': []
}

# Iterate through the combined DataFrame
for _, row in filtered_data.iterrows():
    start_frame = row['start_frame_num']
    end_frame = row['end_frame_num']
    event_name = row['event_name']
    
    # Collect keypoints for each frame in the range
    for frame in range(start_frame, end_frame + 1):
        if str(frame) in keypoints_data:
            one_frame_data = keypoints_data[str(frame)]['keypoints']
            # xy_only = [[x, y] for x, y, z in one_frame_data]
            # xy_only.append(keypoints_data[str(frame)]['nearest_ball_center'])
            # print(len(one_frame_data))
            # break
            keypoints_by_class[event_name].append({
                'frame': frame,
                'keypoints': one_frame_data
                # 'ball_center': keypoints_data[str(frame)]['nearest_ball_center']
            })

# Convert the keypoints dictionary to a DataFrame for easier viewing
LowPocket_keypoints_df = pd.DataFrame(keypoints_by_class['LowPocket'])
# HighPocket_keypoints_df = pd.DataFrame(keypoints_by_class['HighPocket'])
Release_keypoints_df = pd.DataFrame(keypoints_by_class['Release'])



In [17]:
keypoints_by_class

{'LowPocket': [{'frame': 681,
   'keypoints': [[1003, 509, -0.07837270200252533],
    [1008, 504, -0.06857942044734955],
    [1011, 504, -0.06862571090459824],
    [1015, 505, -0.06865260004997253],
    [997, 504, -0.069905586540699],
    [994, 505, -0.06990177184343338],
    [991, 506, -0.06992324441671371],
    [1021, 513, -0.01857926696538925],
    [987, 516, -0.02408653497695923],
    [1012, 520, -0.06041519343852997],
    [997, 520, -0.06204540655016899],
    [1048, 568, 0.01372575480490923],
    [962, 578, -0.017677029594779015],
    [1058, 631, -0.02947746030986309],
    [924, 649, -0.05404224246740341],
    [1028, 617, -0.1253538578748703],
    [966, 629, -0.1198776438832283],
    [1017, 612, -0.1445164978504181],
    [979, 624, -0.13621686398983002],
    [1014, 603, -0.13817352056503296],
    [981, 612, -0.13293839991092682],
    [1017, 605, -0.1251489371061325],
    [980, 614, -0.11938195675611496],
    [1016, 702, 0.022307639941573143],
    [962, 699, -0.02242356166243553],


In [18]:
len(keypoints_data)

11648

In [19]:
len(LowPocket_keypoints_df)

3018

In [20]:
len(Release_keypoints_df)

3210

In [21]:
import os
import numpy as np
import random

# Define the FrameGrouper class
class FrameGrouper:
    def __init__(self, segment_length=20):
        self.segment_length = segment_length

    def group_frames(self, data):
        grouped_values = []
        current_segment = []
        start_frame = 0
        end_frame = max(int(key) for key in data.keys())

        for frame in range(start_frame, end_frame + 1):
            if str(frame) in data:
                current_segment.append(data[str(frame)]['keypoints'])

            if len(current_segment) == self.segment_length:
                grouped_values.append(np.array(current_segment))
                current_segment = []

        # Handle padding if the last group is not full
        if len(current_segment) > 0:
            while len(current_segment) < self.segment_length:
                random_index = random.randint(0, len(current_segment) - 1)
                current_segment.insert(random_index + 1, current_segment[random_index])

            grouped_values.append(np.array(current_segment))

        return grouped_values

def save_structured_data(data_point, label, file_path):
    # Define a structured dtype
    dt = np.dtype([('data', np.float64, data_point.shape), ('label', np.int32)])
    structured_array = np.array([(data_point, label)], dtype=dt)
    np.save(file_path, structured_array)




In [22]:
# Initialize FrameGrouper
frame_grouper = FrameGrouper(segment_length=7)
save_dir_base = '7-segment_datasets/complete_dataset_z'
position_name = 'Front-All'
# Assume keypoints_by_class contains the grouped keypoints for "Shot" and "Background"
for class_name, keypoints in keypoints_by_class.items():
    grouped_keypoints = frame_grouper.group_frames({str(item['frame']): {'keypoints': item['keypoints']} for item in keypoints})
    
    # Define the directory to save the data
    save_dir = os.path.join(save_dir_base, class_name)
    os.makedirs(save_dir, exist_ok=True)
    
    # Save each segment
    for i, segment in enumerate(grouped_keypoints):
        file_path = os.path.join(save_dir, f'{position_name}_{class_name}_segment_{i}.npy')
        if class_name == 'LowPocket':
            label = 0
        elif class_name == 'HighPocket':
            label = 1
        elif class_name == 'Release':
            label = 1
        save_structured_data(segment, label, file_path=file_path)

    print(f'Saved {len(grouped_keypoints)} segments for class {class_name} in {save_dir}')

Saved 432 segments for class LowPocket in 7-segment_datasets/complete_dataset_z/LowPocket
Saved 420 segments for class Release in 7-segment_datasets/complete_dataset_z/Release


In [46]:
import numpy as np

# Specify the path to your .npy file
file_path = '7-segment_datasets/complete_dataset/Release/Front-All_Release_segment_3.npy'

# Load the data
data = np.load(file_path, allow_pickle=True)[0]

# Print the data
print(data['label'])
print(data['data'].shape)

1
(7, 34, 2)


## Datapoints generation Left

In [23]:
import pandas as pd
file_path = 'datasets/Left-All/Left-All-Corrected-events.csv'
main_json_path = 'datasets/Left-All/Left-All-mediapipe_shot_events_with_ball.json'
# Load CSV
data = pd.read_csv(file_path)


In [24]:
data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Left-All-Corrected.mp4,LowPocket,35,41,Shot_Phase
1,Left-All-Corrected.mp4,HighPocket,41,44,Shot_Phase
2,Left-All-Corrected.mp4,Ball_RELEASE,49,49,Shot_Phase
3,Left-All-Corrected.mp4,Release,44,53,Shot_Phase
4,Left-All-Corrected.mp4,Shot,35,73,Shot_Status
...,...,...,...,...,...
1526,Left-All-Corrected.mp4,HighPocket,36273,36275,Shot_Phase
1527,Left-All-Corrected.mp4,Ball_RELEASE,36279,36279,Shot_Phase
1528,Left-All-Corrected.mp4,Release,36275,36284,Shot_Phase
1529,Left-All-Corrected.mp4,Shot,36269,36304,Shot_Status


In [25]:
# Exclude 'Shot' and 'PostShot' events
filtered_data = data[~data['event_name'].isin(['Shot', 'PostShot','Ball_RELEASE'])]

In [26]:
filtered_data.loc[filtered_data['event_name'] == 'HighPocket', 'event_name'] = 'Release'

In [27]:
filtered_data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Left-All-Corrected.mp4,LowPocket,35,41,Shot_Phase
1,Left-All-Corrected.mp4,Release,41,44,Shot_Phase
3,Left-All-Corrected.mp4,Release,44,53,Shot_Phase
6,Left-All-Corrected.mp4,LowPocket,150,155,Shot_Phase
7,Left-All-Corrected.mp4,Release,155,158,Shot_Phase
...,...,...,...,...,...
1520,Left-All-Corrected.mp4,Release,36154,36156,Shot_Phase
1522,Left-All-Corrected.mp4,Release,36156,36167,Shot_Phase
1525,Left-All-Corrected.mp4,LowPocket,36269,36273,Shot_Phase
1526,Left-All-Corrected.mp4,Release,36273,36275,Shot_Phase


In [28]:
import pandas as pd
import json

# Load JSON file with keypoints
with open(main_json_path, 'r') as f:
    keypoints_data = json.load(f)

# Initialize dictionary to store keypoints for each class
keypoints_by_class = {
    'LowPocket': [],
    # 'HighPocket': [],
    'Release': []
}

# Iterate through the combined DataFrame
for _, row in filtered_data.iterrows():
    start_frame = row['start_frame_num']
    end_frame = row['end_frame_num']
    event_name = row['event_name']
    
    # Collect keypoints for each frame in the range
    for frame in range(start_frame, end_frame + 1):
        if str(frame) in keypoints_data:
            one_frame_data = keypoints_data[str(frame)]['keypoints']
            # xy_only = [[x, y] for x, y, z in one_frame_data]
            # xy_only.append(keypoints_data[str(frame)]['nearest_ball_center'])
            # print(len(one_frame_data))
            # break
            keypoints_by_class[event_name].append({
                'frame': frame,
                'keypoints': one_frame_data
                # 'ball_center': keypoints_data[str(frame)]['nearest_ball_center']
            })

# Convert the keypoints dictionary to a DataFrame for easier viewing
LowPocket_keypoints_df = pd.DataFrame(keypoints_by_class['LowPocket'])
# HighPocket_keypoints_df = pd.DataFrame(keypoints_by_class['HighPocket'])
Release_keypoints_df = pd.DataFrame(keypoints_by_class['Release'])



In [29]:
len(keypoints_data)

5716

In [30]:
len(LowPocket_keypoints_df)

1096

In [31]:
len(Release_keypoints_df)

1952

In [32]:
import os
import numpy as np
import random

# Define the FrameGrouper class
class FrameGrouper:
    def __init__(self, segment_length=20):
        self.segment_length = segment_length

    def group_frames(self, data):
        grouped_values = []
        current_segment = []
        start_frame = 0
        end_frame = max(int(key) for key in data.keys())

        for frame in range(start_frame, end_frame + 1):
            if str(frame) in data:
                current_segment.append(data[str(frame)]['keypoints'])

            if len(current_segment) == self.segment_length:
                grouped_values.append(np.array(current_segment))
                current_segment = []

        # Handle padding if the last group is not full
        if len(current_segment) > 0:
            while len(current_segment) < self.segment_length:
                random_index = random.randint(0, len(current_segment) - 1)
                current_segment.insert(random_index + 1, current_segment[random_index])

            grouped_values.append(np.array(current_segment))

        return grouped_values

def save_structured_data(data_point, label, file_path):
    # Define a structured dtype
    dt = np.dtype([('data', np.float64, data_point.shape), ('label', np.int32)])
    structured_array = np.array([(data_point, label)], dtype=dt)
    np.save(file_path, structured_array)




In [33]:
# Initialize FrameGrouper
frame_grouper = FrameGrouper(segment_length=7)
save_dir_base = '7-segment_datasets/complete_dataset_z'
position_name = 'Left-All'
# Assume keypoints_by_class contains the grouped keypoints for "Shot" and "Background"
for class_name, keypoints in keypoints_by_class.items():
    grouped_keypoints = frame_grouper.group_frames({str(item['frame']): {'keypoints': item['keypoints']} for item in keypoints})
    
    # Define the directory to save the data
    save_dir = os.path.join(save_dir_base, class_name)
    os.makedirs(save_dir, exist_ok=True)
    
    # Save each segment
    for i, segment in enumerate(grouped_keypoints):
        file_path = os.path.join(save_dir, f'{position_name}_{class_name}_segment_{i}.npy')
        if class_name == 'LowPocket':
            label = 0
        elif class_name == 'HighPocket':
            label = 1
        elif class_name == 'Release':
            label = 1
        save_structured_data(segment, label, file_path=file_path)

    print(f'Saved {len(grouped_keypoints)} segments for class {class_name} in {save_dir}')

Saved 157 segments for class LowPocket in 7-segment_datasets/complete_dataset_z/LowPocket
Saved 262 segments for class Release in 7-segment_datasets/complete_dataset_z/Release


In [59]:
import numpy as np

# Specify the path to your .npy file
file_path = '7-segment_datasets/complete_dataset/LowPocket/Front-All_LowPocket_segment_6.npy'

# Load the data
data = np.load(file_path, allow_pickle=True)[0]

# Print the data
print(data['label'])
print(data['data'].shape)

0
(7, 34, 2)


## Datapoints generation Left

In [1]:
import pandas as pd
file_path = 'datasets/Right-All/Right-All-Corrected-events.csv'
main_json_path = 'datasets/Right-All/Right-All-mediapipe_shot_events_with_ball.json'
# Load CSV
data = pd.read_csv(file_path)


In [2]:
data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Right-All-Corrected.mp4,LowPocket,507,520,Shot_Phase
1,Right-All-Corrected.mp4,HighPocket,520,523,Shot_Phase
2,Right-All-Corrected.mp4,Ball_RELEASE,526,526,Shot_Phase
3,Right-All-Corrected.mp4,Release,523,532,Shot_Phase
4,Right-All-Corrected.mp4,Shot,507,552,Shot_Status
...,...,...,...,...,...
1874,Right-All-Corrected.mp4,HighPocket,45333,45338,Shot_Phase
1875,Right-All-Corrected.mp4,Ball_RELEASE,45339,45339,Shot_Phase
1876,Right-All-Corrected.mp4,Release,45338,45347,Shot_Phase
1877,Right-All-Corrected.mp4,Shot,45318,45367,Shot_Status


In [3]:
# Exclude 'Shot' and 'PostShot' events
filtered_data = data[~data['event_name'].isin(['Shot', 'PostShot','Ball_RELEASE'])]

In [4]:
filtered_data.loc[filtered_data['event_name'] == 'HighPocket', 'event_name'] = 'Release'

In [5]:
filtered_data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Right-All-Corrected.mp4,LowPocket,507,520,Shot_Phase
1,Right-All-Corrected.mp4,Release,520,523,Shot_Phase
3,Right-All-Corrected.mp4,Release,523,532,Shot_Phase
6,Right-All-Corrected.mp4,LowPocket,719,726,Shot_Phase
7,Right-All-Corrected.mp4,Release,726,728,Shot_Phase
...,...,...,...,...,...
1868,Right-All-Corrected.mp4,Release,45097,45100,Shot_Phase
1870,Right-All-Corrected.mp4,Release,45100,45110,Shot_Phase
1873,Right-All-Corrected.mp4,LowPocket,45318,45333,Shot_Phase
1874,Right-All-Corrected.mp4,Release,45333,45338,Shot_Phase


In [6]:
import pandas as pd
import json

# Load JSON file with keypoints
with open(main_json_path, 'r') as f:
    keypoints_data = json.load(f)

# Initialize dictionary to store keypoints for each class
keypoints_by_class = {
    'LowPocket': [],
    # 'HighPocket': [],
    'Release': []
}

# Iterate through the combined DataFrame
for _, row in filtered_data.iterrows():
    start_frame = row['start_frame_num']
    end_frame = row['end_frame_num']
    event_name = row['event_name']
    
    # Collect keypoints for each frame in the range
    for frame in range(start_frame, end_frame + 1):
        if str(frame) in keypoints_data:
            one_frame_data = keypoints_data[str(frame)]['keypoints']
            # xy_only = [[x, y] for x, y, z in one_frame_data]
            # xy_only.append(keypoints_data[str(frame)]['nearest_ball_center'])
            # print(len(one_frame_data))
            # break
            keypoints_by_class[event_name].append({
                'frame': frame,
                'keypoints': one_frame_data
                # 'ball_center': keypoints_data[str(frame)]['nearest_ball_center']
            })

# Convert the keypoints dictionary to a DataFrame for easier viewing
LowPocket_keypoints_df = pd.DataFrame(keypoints_by_class['LowPocket'])
# HighPocket_keypoints_df = pd.DataFrame(keypoints_by_class['HighPocket'])
Release_keypoints_df = pd.DataFrame(keypoints_by_class['Release'])



In [7]:
len(keypoints_data)

13364

In [8]:
len(LowPocket_keypoints_df)

2885

In [9]:
len(Release_keypoints_df)

4949

In [10]:
import os
import numpy as np
import random

# Define the FrameGrouper class
class FrameGrouper:
    def __init__(self, segment_length=20):
        self.segment_length = segment_length

    def group_frames(self, data):
        grouped_values = []
        current_segment = []
        start_frame = 0
        end_frame = max(int(key) for key in data.keys())

        for frame in range(start_frame, end_frame + 1):
            if str(frame) in data:
                current_segment.append(data[str(frame)]['keypoints'])

            if len(current_segment) == self.segment_length:
                grouped_values.append(np.array(current_segment))
                current_segment = []

        # Handle padding if the last group is not full
        if len(current_segment) > 0:
            while len(current_segment) < self.segment_length:
                random_index = random.randint(0, len(current_segment) - 1)
                current_segment.insert(random_index + 1, current_segment[random_index])

            grouped_values.append(np.array(current_segment))

        return grouped_values

def save_structured_data(data_point, label, file_path):
    # Define a structured dtype
    dt = np.dtype([('data', np.float64, data_point.shape), ('label', np.int32)])
    structured_array = np.array([(data_point, label)], dtype=dt)
    np.save(file_path, structured_array)




In [11]:
# Initialize FrameGrouper
frame_grouper = FrameGrouper(segment_length=7)
save_dir_base = '7-segment_datasets/complete_dataset_z'
position_name = 'Right-All'
# Assume keypoints_by_class contains the grouped keypoints for "Shot" and "Background"
for class_name, keypoints in keypoints_by_class.items():
    grouped_keypoints = frame_grouper.group_frames({str(item['frame']): {'keypoints': item['keypoints']} for item in keypoints})
    
    # Define the directory to save the data
    save_dir = os.path.join(save_dir_base, class_name)
    os.makedirs(save_dir, exist_ok=True)
    
    # Save each segment
    for i, segment in enumerate(grouped_keypoints):
        file_path = os.path.join(save_dir, f'{position_name}_{class_name}_segment_{i}.npy')
        if class_name == 'LowPocket':
            label = 0
        elif class_name == 'HighPocket':
            label = 1
        elif class_name == 'Release':
            label = 1
        save_structured_data(segment, label, file_path=file_path)

    print(f'Saved {len(grouped_keypoints)} segments for class {class_name} in {save_dir}')

Saved 413 segments for class LowPocket in 7-segment_datasets/complete_dataset_z/LowPocket
Saved 665 segments for class Release in 7-segment_datasets/complete_dataset_z/Release


In [None]:
import numpy as np

# Specify the path to your .npy file
file_path = '7-segment_datasets/complete_dataset/LowPocket/Front-All_LowPocket_segment_6.npy'

# Load the data
data = np.load(file_path, allow_pickle=True)[0]

# Print the data
print(data['label'])
print(data['data'].shape)

0
(7, 34, 2)


## Datapoints generation Back

In [1]:
import pandas as pd
file_path = 'datasets/Back-All/Back-All-Corrected-events.csv'
main_json_path = 'datasets/Back-All/Back-All-mediapipe_shot_events_with_ball.json'
# Load CSV
data = pd.read_csv(file_path)


In [2]:
data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Back-All-Corrected.mp4,LowPocket,15,21,Shot_Phase
1,Back-All-Corrected.mp4,HighPocket,21,23,Shot_Phase
2,Back-All-Corrected.mp4,Ball_RELEASE,27,27,Shot_Phase
3,Back-All-Corrected.mp4,Release,23,32,Shot_Phase
4,Back-All-Corrected.mp4,Shot,15,52,Shot_Status
...,...,...,...,...,...
790,Back-All-Corrected.mp4,HighPocket,16785,16787,Shot_Phase
791,Back-All-Corrected.mp4,Ball_RELEASE,16794,16794,Shot_Phase
792,Back-All-Corrected.mp4,Release,16787,16800,Shot_Phase
793,Back-All-Corrected.mp4,Shot,16779,16820,Shot_Status


In [3]:
# Exclude 'Shot' and 'PostShot' events
filtered_data = data[~data['event_name'].isin(['Shot', 'PostShot','Ball_RELEASE'])]

In [4]:
filtered_data.loc[filtered_data['event_name'] == 'HighPocket', 'event_name'] = 'Release'

In [5]:
filtered_data

Unnamed: 0,video_name,event_name,start_frame_num,end_frame_num,lstm_category
0,Back-All-Corrected.mp4,LowPocket,15,21,Shot_Phase
1,Back-All-Corrected.mp4,Release,21,23,Shot_Phase
3,Back-All-Corrected.mp4,Release,23,32,Shot_Phase
6,Back-All-Corrected.mp4,LowPocket,125,131,Shot_Phase
7,Back-All-Corrected.mp4,Release,131,134,Shot_Phase
...,...,...,...,...,...
784,Back-All-Corrected.mp4,Release,16651,16654,Shot_Phase
786,Back-All-Corrected.mp4,Release,16654,16665,Shot_Phase
789,Back-All-Corrected.mp4,LowPocket,16779,16785,Shot_Phase
790,Back-All-Corrected.mp4,Release,16785,16787,Shot_Phase


In [6]:
import pandas as pd
import json

# Load JSON file with keypoints
with open(main_json_path, 'r') as f:
    keypoints_data = json.load(f)

# Initialize dictionary to store keypoints for each class
keypoints_by_class = {
    'LowPocket': [],
    # 'HighPocket': [],
    'Release': []
}

# Iterate through the combined DataFrame
for _, row in filtered_data.iterrows():
    start_frame = row['start_frame_num']
    end_frame = row['end_frame_num']
    event_name = row['event_name']
    
    # Collect keypoints for each frame in the range
    for frame in range(start_frame, end_frame + 1):
        if str(frame) in keypoints_data:
            one_frame_data = keypoints_data[str(frame)]['keypoints']
            xy_only = [[x, y] for x, y, z in one_frame_data]
            xy_only.append(keypoints_data[str(frame)]['nearest_ball_center'])
            # print(len(one_frame_data))
            # break
            keypoints_by_class[event_name].append({
                'frame': frame,
                'keypoints': xy_only
                # 'ball_center': keypoints_data[str(frame)]['nearest_ball_center']
            })

# Convert the keypoints dictionary to a DataFrame for easier viewing
LowPocket_keypoints_df = pd.DataFrame(keypoints_by_class['LowPocket'])
# HighPocket_keypoints_df = pd.DataFrame(keypoints_by_class['HighPocket'])
Release_keypoints_df = pd.DataFrame(keypoints_by_class['Release'])



In [7]:
len(keypoints_data)

4788

In [8]:
len(LowPocket_keypoints_df)

766

In [9]:
len(Release_keypoints_df)

1900

In [10]:
import os
import numpy as np
import random

# Define the FrameGrouper class
class FrameGrouper:
    def __init__(self, segment_length=20):
        self.segment_length = segment_length

    def group_frames(self, data):
        grouped_values = []
        current_segment = []
        start_frame = 0
        end_frame = max(int(key) for key in data.keys())

        for frame in range(start_frame, end_frame + 1):
            if str(frame) in data:
                current_segment.append(data[str(frame)]['keypoints'])

            if len(current_segment) == self.segment_length:
                grouped_values.append(np.array(current_segment))
                current_segment = []

        # Handle padding if the last group is not full
        if len(current_segment) > 0:
            while len(current_segment) < self.segment_length:
                random_index = random.randint(0, len(current_segment) - 1)
                current_segment.insert(random_index + 1, current_segment[random_index])

            grouped_values.append(np.array(current_segment))

        return grouped_values

def save_structured_data(data_point, label, file_path):
    # Define a structured dtype
    dt = np.dtype([('data', np.float64, data_point.shape), ('label', np.int32)])
    structured_array = np.array([(data_point, label)], dtype=dt)
    np.save(file_path, structured_array)




In [11]:
# Initialize FrameGrouper
frame_grouper = FrameGrouper(segment_length=7)
save_dir_base = '7-segment_datasets/complete_dataset'
position_name = 'Back-All'
# Assume keypoints_by_class contains the grouped keypoints for "Shot" and "Background"
for class_name, keypoints in keypoints_by_class.items():
    grouped_keypoints = frame_grouper.group_frames({str(item['frame']): {'keypoints': item['keypoints']} for item in keypoints})
    
    # Define the directory to save the data
    save_dir = os.path.join(save_dir_base, class_name)
    os.makedirs(save_dir, exist_ok=True)
    
    # Save each segment
    for i, segment in enumerate(grouped_keypoints):
        file_path = os.path.join(save_dir, f'{position_name}_{class_name}_segment_{i}.npy')
        if class_name == 'LowPocket':
            label = 0
        elif class_name == 'HighPocket':
            label = 1
        elif class_name == 'Release':
            label = 1
        save_structured_data(segment, label, file_path=file_path)

    print(f'Saved {len(grouped_keypoints)} segments for class {class_name} in {save_dir}')

Saved 110 segments for class LowPocket in 7-segment_datasets/complete_dataset/LowPocket
Saved 255 segments for class Release in 7-segment_datasets/complete_dataset/Release


In [None]:
import numpy as np

# Specify the path to your .npy file
file_path = '7-segment_datasets/complete_dataset/LowPocket/Front-All_LowPocket_segment_6.npy'

# Load the data
data = np.load(file_path, allow_pickle=True)[0]

# Print the data
print(data['label'])
print(data['data'].shape)

0
(7, 34, 2)


## Datasplit

In [12]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Specify the root directory where the subdirectories for each class are located
data_root = '7-segment_datasets/complete_dataset'

# Define the directories to store training and validation files
train_dir = '7-segment_datasets/highRel_splitted_dataset_backall_included/train'
val_dir = '7-segment_datasets/highRel_splitted_dataset_backall_included/val'


# Create base directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Define the split ratio for training and validation
train_ratio = 0.8  # 80% for training, 20% for validation

# Loop through each class directory
for class_name in os.listdir(data_root):
    class_dir = os.path.join(data_root, class_name)
    if os.path.isdir(class_dir):
        # List all .npy files in the class directory
        files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if f.endswith('.npy')]
        
        # Split the files into training and validation sets
        train_files, val_files = train_test_split(files, train_size=train_ratio, random_state=42)
        
        # Ensure class subdirectories are created in train and validation directories
        train_class_dir = os.path.join(train_dir, class_name)
        val_class_dir = os.path.join(val_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(val_class_dir, exist_ok=True)
        
        # Copy files to the respective training and validation class directories
        for f in train_files:
            shutil.copy2(f, train_class_dir)
        for f in val_files:
            shutil.copy2(f, val_class_dir)

# Print confirmation
print("Files have been copied to the respective train and validation directories with class structure preserved.")

Files have been copied to the respective train and validation directories with class structure preserved.


## Dataloader

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os

class NPYDataset(Dataset):
    def __init__(self, main_directory):
        super().__init__()
        self.main_directory = main_directory
        self.filenames = self._load_filenames()

    def _load_filenames(self):
        # Traverse subdirectories to find all .npy files
        filenames = []
        for root, _, files in os.walk(self.main_directory):
            for file in files:
                if file.endswith('.npy'):
                    full_path = os.path.join(root, file)
                    filenames.append(full_path)
        return filenames

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        file_path = self.filenames[idx]
        data = np.load(file_path, allow_pickle=True)
        keypoints = np.ascontiguousarray(data['data'], dtype=np.float32)
        label = np.float32(data['label'])
        # Convert to tensors
        keypoints = torch.tensor(keypoints)
        label = torch.tensor(label)
        return keypoints, label


In [5]:


# DataLoader setup
data_directory = 'datasets/Front-All/test_data'
dataset = NPYDataset(data_directory)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# val_dataset = NPYDataset('datasets/Front-All/data_points_splitted/val')
# val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)


In [6]:
len(dataloader)

2040

In [8]:
for keypoints, labels in dataloader:
    print(keypoints.shape, labels)

torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[0.]])
torch.Size([1, 1, 20, 33, 2]) tensor([[1.]])
torch.Size