In [1]:
import json
import os
from torch.utils.data import Dataset, random_split, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence

  from .autonotebook import tqdm as notebook_tqdm


## Config

In [2]:
# train params
train_size = 0.8
val_size = 0.1
test_size = 0.1
batch_size = 16

choosen_joints = ["RAnkle_x", "LAnkle_x"]

# file config
files = ["09", "14_cut", "15_cut", "17_cut", "18_cut", "22_cut", "25", "38", "40", "43", "44", "54_cut"]
file_name = "labeled_cycles_"
path = "../cycle_splits"

TODO create cycle plot?!

In [3]:
# class CustomImageDataset(Dataset):
#     def __init__(self, files, path, file_name = "labeled_cycles_", choosen_joints = ["RAnkle_x"], transform=None, target_transform=None):
        
#         self.data, self.labels = self.__load_data(files, file_name, path, choosen_joints)
#         self.transform = transform
#         self.target_transform = target_transform
    
#     # TODO make private?! 
#     def __load_data(self, files, file_name, path, choosen_joints):
        
#         data = []
#         labels = []

#         for file in files:
#             # Open and read the JSON file
#             file_path = os.path.join(path, file_name + file + ".json")
#             with open(file_path, 'r') as file:
#                 data_json = json.load(file)
            
#             for cycle in data_json.values():
#                 # check cycle length
                
#                 data_in_cycle = []  
#                 for joint in choosen_joints:
#                     data_in_cycle.append(cycle[joint])
                    
#                 # Convert to tensor 
#                 data_tensor = torch.tensor(data_in_cycle, dtype=torch.float32)
                
#                 data.append(data_tensor)    
#                 labels.append(cycle["Label"])
                
#             # Pad sequences to the longest found cycle
#             padded_data = pad_sequence(data, batch_first=True, padding_value=self.padding_value)
                
#         return padded_data, labels

#     def __len__(self):
#         return len(self.labels)

#     def __getitem__(self, idx):
#         label = self.labels[idx]
#         item = self.data[idx]
        
#         if self.transform:
#             item = self.transform(item)
            
#         if self.target_transform:
#             label = self.target_transform(label)
            
#         return item, label
        

In [10]:
# half ChatPGT generated
class CustomDataset(Dataset):
    
    def __init__(self, files, path, file_name="labeled_cycles_", choosen_joints=["RAnkle_x"], transform=None, target_transform=None, padding_value=0.0):
        self.choosen_joints = choosen_joints
        self.padding_value = padding_value
        self.data, self.labels = self.__load_data(files, file_name, path)
        self.transform = transform
        self.target_transform = target_transform

    def __load_data(self, files, file_name, path):
        data = []
        labels = []
        longest_cycle = 0  # Track longest cycle length

        for file in files:
            file_path = os.path.join(path, file_name + file + ".json")
            with open(file_path, 'r') as f:
                data_json = json.load(f)

            for cycle in data_json.values():
                # Extract joint data as (num_joints, time_steps)
                cycle_data = [torch.tensor(cycle[joint], dtype=torch.float32) for joint in self.choosen_joints]

                # Stack into a (num_joints, time_steps) tensor
                cycle_tensor = torch.stack(cycle_data)  # Shape: (num_joints, time_steps)
                longest_cycle = max(longest_cycle, cycle_tensor.shape[1])  # Update max length

                data.append(cycle_tensor)
                labels.append(cycle["Label"])

        # Pad all cycles to match the longest cycle length
        padded_data = []
        for cycle in data:
            num_joints, time_steps = cycle.shape

            # Pad the time_steps dimension
            pad_length = longest_cycle - time_steps
            padded_cycle = torch.nn.functional.pad(cycle, (0, pad_length), value=self.padding_value)  # Pad last dim

            padded_data.append(padded_cycle)

        # Stack all padded cycles into a final tensor
        padded_data = torch.stack(padded_data)  # Shape: (num_cycles, num_joints, max_time)

        return padded_data, labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        label = self.labels[idx]
        item = self.data[idx]  # Shape: (num_joints, max_time)

        if self.transform:
            item = self.transform(item)

        if self.target_transform:
            label = self.target_transform(label)

        return item, label


In [15]:
dataset = CustomImageDataset(files, path, file_name, choosen_joints)

padded_data, label = dataset.load_data(files, file_name, path)
print(padded_data[0])



tensor([[-38.6700, -39.1300, -37.3800, -37.7800, -34.5100, -33.1100, -30.2500,
         -27.5400, -24.8100, -23.0900, -20.0700, -16.2500, -15.0800, -13.2000,
         -10.4500, -10.4900,  -8.8900,  -8.7100,  -8.8700,  -9.7600, -10.1800,
         -10.5000, -10.4400, -12.2700, -13.0100, -13.4700, -12.6700, -14.5100,
         -13.3000, -11.6600, -11.7300, -10.7400,  -9.5500,  -9.5100,  -8.7500,
          -9.3500,  -7.0600,  -7.2700,  -8.0300,  -6.6000,  -7.1100,  -7.7400,
          -7.5700,  -8.4300,  -7.9700,  -8.2800,  -7.8400,  -9.6500,  -9.9100,
         -11.7700, -11.9300, -14.4400, -15.4300, -17.0000, -17.3900, -20.5300,
         -21.0100, -22.2500, -25.8000, -29.3200, -32.1300, -34.7400, -41.3100,
         -43.7800, -46.8600, -48.0100, -46.4700,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
           0.0000,   0.0000,   0.0000,   0.0000,   0

In [44]:
# create custom dataset
dataset = CustomImageDataset(files, path, file_name, choosen_joints)

# split data into train, val and test set
generator1 = torch.Generator().manual_seed(42)
train_split = int(dataset.__len__() * 0.8)
val_split = int(dataset.__len__() * 0.1)
test_split = int(dataset.__len__() - train_split - val_split)

print(f"Train size = {train_split}, Val size = {val_split}, Test size =  {test_split}")

train_data, val_data, test_data = random_split(dataset, [train_split, val_split, test_split], generator=generator1)

# create dataloader
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

Train size = 121, Val size = 15, Test size =  16


In [46]:
class Net(nn.Module):

    def __init__(self, input_channels, hidden1, hidden2, output):
        super(Net, self).__init__()
        
        self.linear1 = nn.Linear(input_channels, hidden1)
        self.activation = nn.ReLU()
        self.linear2 = nn.Linear(hidden1, hidden2)
        self.output_layer = nn.Linear(hidden2, output)

    def forward(self, input):
        
        out = self.activation(self.linear1(input))
        out = self.activation(self.linear2(input))
        out = self.output_layer(out)
        
        return out 


net = Net()
print(net)

TypeError: __init__() missing 4 required positional arguments: 'input_channels', 'hidden1', 'hidden2', and 'output'