In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

class UCIDataset(Dataset):
    def __init__(self, data_dir, data_type='train'):
        self.data_dir = data_dir
        self.data_type = data_type
        self.x_data, self.y_data = self.load_data()

    def load_data(self):
        data_path = os.path.join(self.data_dir, self.data_type)
        x_data = pd.read_csv(os.path.join(data_path, f"X_{self.data_type}.txt"), delim_whitespace=True, header=None).values
        y_data = pd.read_csv(os.path.join(data_path, f"y_{self.data_type}.txt"), delim_whitespace=True, header=None).values
        return torch.tensor(x_data, dtype=torch.float32), torch.tensor(y_data.squeeze(), dtype=torch.long)

    def __len__(self):
        return len(self.y_data)

    def __getitem__(self, idx):
        return self.x_data[idx], self.y_data[idx]

def get_dataloaders(data_dir, batch_size=32):
    train_dataset = UCIDataset(data_dir, data_type='train')
    test_dataset = UCIDataset(data_dir, data_type='test')
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

data_dir = '/content/drive/MyDrive/dataset/UCI HAR Dataset'  # Update this path based on your Google Drive location

# Initialize data loaders
train_loader, test_loader = get_dataloaders(data_dir)

for batch_idx, (data, target) in enumerate(train_loader):
    print(f'Batch {batch_idx + 1}')
    print(f'Data: {data}, {data.shape}')
    print(f'Target: {target}, {target.shape}')
    break


Batch 1
Data: tensor([[ 0.2768, -0.0184, -0.1081,  ..., -0.8397,  0.1915, -0.0444],
        [ 0.2602, -0.0162, -0.1022,  ...,  0.3186, -0.7612, -0.1382],
        [ 0.2673, -0.0403, -0.1369,  ..., -0.8033,  0.2340,  0.0239],
        ...,
        [ 0.1004, -0.0031, -0.0327,  ..., -0.8052,  0.2078, -0.0633],
        [-0.4119, -0.1815,  0.8507,  ...,  0.5115, -0.9487,  0.0103],
        [ 0.1872, -0.0178, -0.1015,  ..., -0.7653,  0.2385,  0.1017]]), torch.Size([32, 561])
Target: tensor([5, 6, 5, 1, 5, 2, 4, 2, 3, 4, 5, 2, 5, 3, 1, 6, 5, 4, 4, 4, 6, 4, 3, 1,
        3, 3, 6, 3, 6, 3, 6, 1]), torch.Size([32])


In [6]:
class MHEALTHDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.data, self.labels = self.load_data()

    def load_data(self):
        all_files = [os.path.join(self.data_dir, f) for f in os.listdir(self.data_dir) if f.endswith('.log')]
        data_list = []
        for file in all_files:
            df = pd.read_csv(file, delim_whitespace=True, header=None)
            data_list.append(df)
        data = pd.concat(data_list, ignore_index=True)
        x_data = data.iloc[:, :-1].values
        y_data = data.iloc[:, -1].values
        return torch.tensor(x_data, dtype=torch.float32), torch.tensor(y_data, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

def get_dataloaders(data_dir, batch_size=32, test_split=0.2):
    dataset = MHEALTHDataset(data_dir)
    test_size = int(test_split * len(dataset))
    train_size = len(dataset) - test_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

data_dir = '/content/drive/MyDrive/dataset/mhealth+dataset/MHEALTHDATASET'
# Initialize data loaders
train_loader, test_loader = get_dataloaders(data_dir)


for batch_idx, (data, target) in enumerate(train_loader):
    print(f'Batch {batch_idx + 1}')
    print(f'Data: {data}, {data.shape}')
    print(f'Target: {target}, {target.shape}')
    break


Batch 1
Data: tensor([[-8.5635e+00, -4.1490e+00, -4.7734e+00,  1.4694e+00,  1.1303e+00,
          2.8866e+00, -5.8617e+00, -5.9726e+00,  3.5622e-01, -8.6492e-01,
         -4.7348e-01,  2.8674e+02,  3.6582e+01,  3.2860e+01, -2.0469e+01,
         -1.8685e+01,  2.5241e+00, -6.6275e-01,  3.0801e-01,  8.6853e-01,
         -2.2994e+01, -1.3665e+00,  4.3926e+02],
        [-5.8605e+00, -9.1394e-01, -1.3349e+00, -1.2182e+00, -1.5950e+00,
         -9.8507e-01, -5.4845e+00, -9.2991e-01,  2.7458e-01, -9.2495e-01,
         -2.6523e-01,  1.4512e+02,  2.5338e+01,  6.0793e+00, -5.2750e+00,
         -4.3472e+00,  4.2826e+00, -8.8431e-01,  4.5175e-02,  7.0043e-01,
          9.0675e+00,  2.7035e+01, -2.7887e+01],
        [-9.7711e+00,  6.7767e-01, -7.7059e-01,  1.4233e-01,  1.0047e-01,
         -1.3219e+00, -9.5642e+00, -1.9602e+00, -5.4731e-01, -7.4296e-01,
         -4.9902e-01, -5.0650e+00,  1.8739e+02, -2.3706e+01, -1.8759e+00,
         -8.8053e+00, -1.5340e+00, -1.2941e-01, -9.8563e-01, -6.2716e-01,


In [7]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

class OpportunityDataset(Dataset):
    def __init__(self, data_dir, data_type='train'):
        self.data_dir = data_dir
        self.data_type = data_type
        self.data, self.labels = self.load_data()

    def load_data(self):
        all_files = [os.path.join(self.data_dir, f) for f in os.listdir(self.data_dir) if f.endswith('.dat')]
        data_list = []
        for file in all_files:
            df = pd.read_csv(file, delim_whitespace=True, header=None)
            data_list.append(df)
        data = pd.concat(data_list, ignore_index=True)
        x_data = data.iloc[:, :-1].values
        y_data = data.iloc[:, -1].values
        return torch.tensor(x_data, dtype=torch.float32), torch.tensor(y_data, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

def get_dataloaders(data_dir, batch_size=32, test_split=0.2):
    dataset = OpportunityDataset(data_dir)
    test_size = int(test_split * len(dataset))
    train_size = len(dataset) - test_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader



data_dir = '/content/drive/MyDrive/dataset/opportunity+activity+recognition/OpportunityUCIDataset/dataset'

# Initialize data loaders
train_loader, test_loader = get_dataloaders(data_dir)

for batch_idx, (data, target) in enumerate(train_loader):
    print(f'Batch {batch_idx + 1}')
    print(f'Data: {data}, {data.shape}')
    print(f'Target: {target}, {target.shape}')
    break


Batch 1
Data: tensor([[ 1.0412e+06, -7.1000e+01,  1.0060e+03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2726e+05, -8.4000e+01,  9.8900e+02,  ...,  0.0000e+00,
          4.0600e+02,  5.1600e+02],
        [ 1.0697e+06, -8.6500e+02,  3.8900e+02,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.1015e+06,  1.5000e+02,  1.1500e+03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.3796e+05,  8.4900e+02,  1.1710e+03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.4780e+05,         nan,         nan,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]]), torch.Size([32, 249])
Target: tensor([     0, 406516,      0,      0,      0,      0, 408512, 406520, 406516,
        406520,      0, 406516,      0, 404517,      0, 407521,      0, 406519,
             0,      0,      0,      0,      0, 407521, 404519,      0,      0,
             0,      0,      0,      0,      0]), torch.Size([32])


In [14]:
import torch
from torch.utils.data import Dataset, DataLoader
import os
import pandas as pd
import numpy as np

class SHREC2021Dataset(Dataset):
    def __init__(self, sequences_dir, annotations_file):
        self.sequences_dir = sequences_dir
        self.annotations = self.load_annotations(annotations_file)
        self.sequence_files = os.listdir(sequences_dir)

    def load_annotations(self, annotations_file):
        annotations = []
        with open(annotations_file, 'r') as file:
            for line in file:
                parts = line.strip().split(';')
                sequence_id = int(parts[0])
                gestures = []
                for i in range(1, len(parts)-1, 3):
                    gesture = {
                        'label': parts[i],
                        'start_frame': int(parts[i+1]),
                        'end_frame': int(parts[i+2])
                    }
                    gestures.append(gesture)
                annotations.append({
                    'sequence_id': sequence_id,
                    'gestures': gestures
                })
        return annotations

    def __len__(self):
        return len(self.sequence_files)

    def __getitem__(self, idx):
        sequence_file = self.sequence_files[idx]
        sequence_id = int(sequence_file.split('.')[0])
        sequence_path = os.path.join(self.sequences_dir, sequence_file)

        sequence_data = pd.read_csv(sequence_path, sep=';', header=None)

        annotation = next(item for item in self.annotations if item['sequence_id'] == sequence_id)

        return {
            'sequence': torch.tensor(sequence_data.values, dtype=torch.float32),
            'annotation': annotation
        }

def pad_collate_fn(batch):
    max_length = max([item['sequence'].shape[0] for item in batch])
    padded_sequences = []
    annotations = []
    for item in batch:
        sequence = item['sequence']
        padding = torch.zeros((max_length - sequence.shape[0], sequence.shape[1]))
        padded_sequence = torch.cat((sequence, padding), dim=0)
        padded_sequences.append(padded_sequence)
        annotations.append(item['annotation'])
    return torch.stack(padded_sequences), annotations

def create_dataloader(sequences_dir, annotations_file, batch_size=2, shuffle=True):
    dataset = SHREC2021Dataset(sequences_dir, annotations_file)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=4, collate_fn=pad_collate_fn)
    return dataloader

# Define paths for sequences and annotations
train_sequences_dir = '/content/drive/MyDrive/dataset/Shrec2021/training_set/training_set/sequences'
train_annotations_file = '/content/drive/MyDrive/dataset/Shrec2021/training_set/training_set/annotations_revised_training.txt'

test_sequences_dir = '/content/drive/MyDrive/dataset/Shrec2021/test_set/test_set/sequences'
test_annotations_file = '/content/drive/MyDrive/dataset/Shrec2021/test_set/test_set/annotations_revised.txt'

# Create dataloaders for training and testing
train_dataloader = create_dataloader(train_sequences_dir, train_annotations_file)
test_dataloader = create_dataloader(test_sequences_dir, test_annotations_file)

# Display a batch of data to verify
sample_batch = next(iter(train_dataloader))
print(sample_batch[0].shape)
print(sample_batch)


torch.Size([2, 1938, 141])
(tensor([[[ 0.2361,  2.3443,  0.3476,  ..., -0.3675,  0.2224,     nan],
         [ 0.2338,  2.3449,  0.3550,  ..., -0.2655,  0.3419,     nan],
         [ 0.2334,  2.3712,  0.3280,  ..., -0.2766,  0.4029,     nan],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.3609,  2.4528,  0.4278,  ..., -0.6554,  0.1283,     nan],
         [ 0.3602,  2.4530,  0.4279,  ..., -0.6520,  0.1750,     nan],
         [ 0.3598,  2.4532,  0.4282,  ..., -0.6453,  0.1905,     nan],
         ...,
         [ 0.1546,  2.5405,  0.5314,  ...,  0.1180, -0.8117,     nan],
         [ 0.1553,  2.5403,  0.5313,  ...,  0.1097, -0.8063,     nan],
         [ 0.1561,  2.5397,  0.5321,  ...,  0.1060, -0.7923,     nan]]]), [{'sequence_id': 14, 'gestures': [{'label': 'LEFT', 'start_frame': 227, 'end_frame': 28

In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from PIL import Image

class SHREC2017Dataset(Dataset):
    def __init__(self, root_dir, annotations_file):
        self.root_dir = root_dir
        self.annotations = pd.read_csv(annotations_file, delim_whitespace=True, header=None)
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, idx):
        gesture_id, finger_id, subject_id, essai_id, _, _, seq_len = self.annotations.iloc[idx]
        gesture_dir = os.path.join(self.root_dir, f'gesture_{gesture_id}', f'finger_{finger_id}', f'subject_{subject_id}', f'essai_{essai_id}')
        
        # Load depth images
        depth_images = []
        for i in range(seq_len):
            img_path = os.path.join(gesture_dir, f'{i}_depth.png')
            img = Image.open(img_path)
            img = np.array(img)
            depth_images.append(img)
        
        depth_images = np.stack(depth_images)
        
        # Load general information
        general_info = pd.read_csv(os.path.join(gesture_dir, 'general_informations.txt'), delim_whitespace=True, header=None).values
        
        # Load skeleton data
        skeletons_image = pd.read_csv(os.path.join(gesture_dir, 'skeletons_image.txt'), delim_whitespace=True, header=None).values
        skeletons_world = pd.read_csv(os.path.join(gesture_dir, 'skeletons_world.txt'), delim_whitespace=True, header=None).values
        
        sample = {
            'depth_images': torch.tensor(depth_images, dtype=torch.float32),
            'general_info': torch.tensor(general_info, dtype=torch.float32),
            'skeletons_image': torch.tensor(skeletons_image, dtype=torch.float32),
            'skeletons_world': torch.tensor(skeletons_world, dtype=torch.float32),
            'label_14': self.annotations.iloc[idx, 4],
            'label_28': self.annotations.iloc[idx, 5]
        }
        
        return sample

def pad_collate_fn(batch):
    max_length = max([item['depth_images'].shape[0] for item in batch])
    padded_depth_images = []
    padded_general_info = []
    padded_skeletons_image = []
    padded_skeletons_world = []
    labels_14 = []
    labels_28 = []
    
    for item in batch:
        depth_images = item['depth_images']
        general_info = item['general_info']
        skeletons_image = item['skeletons_image']
        skeletons_world = item['skeletons_world']
        
        pad_size = max_length - depth_images.shape[0]
        
        padded_depth_images.append(torch.cat((depth_images, torch.zeros((pad_size, depth_images.shape[1], depth_images.shape[2]))), dim=0))
        padded_general_info.append(torch.cat((general_info, torch.zeros((pad_size, general_info.shape[1]))), dim=0))
        padded_skeletons_image.append(torch.cat((skeletons_image, torch.zeros((pad_size, skeletons_image.shape[1]))), dim=0))
        padded_skeletons_world.append(torch.cat((skeletons_world, torch.zeros((pad_size, skeletons_world.shape[1]))), dim=0))
        
        labels_14.append(item['label_14'])
        labels_28.append(item['label_28'])
    
    return {
        'depth_images': torch.stack(padded_depth_images),
        'general_info': torch.stack(padded_general_info),
        'skeletons_image': torch.stack(padded_skeletons_image),
        'skeletons_world': torch.stack(padded_skeletons_world),
        'label_14': torch.tensor(labels_14),
        'label_28': torch.tensor(labels_28)
    }

def create_dataloader(root_dir, annotations_file, batch_size=4, shuffle=True):
    dataset = SHREC2017Dataset(root_dir, annotations_file)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=pad_collate_fn)
    return dataloader

# 使用示例
root_dir = rf'C:\Users\1\Downloads\HandGestureDataset_SHREC2017'
train_annotations_file = rf'C:\Users\1\Downloads\HandGestureDataset_SHREC2017\train_gestures.txt'
test_annotations_file = rf'C:\Users\1\Downloads\HandGestureDataset_SHREC2017\test_gestures.txt'
train_dataloader = create_dataloader(root_dir, train_annotations_file)
test_dataloader = create_dataloader(root_dir, test_annotations_file)

sample_batch = next(iter(train_dataloader))
print(sample_batch)
print(sample_batch.keys())
print(sample_batch['depth_images'].shape, sample_batch['general_info'].shape, sample_batch['skeletons_image'].shape,  sample_batch['label_14'].shape,)

{'depth_images': tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         ...,

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0