In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

class UCIDataset(Dataset):
    def __init__(self, data_dir, data_type='train'):
        self.data_dir = data_dir
        self.data_type = data_type
        self.x_data, self.y_data = self.load_data()

    def load_data(self):
        data_path = os.path.join(self.data_dir, self.data_type)
        x_data = pd.read_csv(os.path.join(data_path, f"X_{self.data_type}.txt"), delim_whitespace=True, header=None).values
        y_data = pd.read_csv(os.path.join(data_path, f"y_{self.data_type}.txt"), delim_whitespace=True, header=None).values
        return torch.tensor(x_data, dtype=torch.float32), torch.tensor(y_data.squeeze(), dtype=torch.long)

    def __len__(self):
        return len(self.y_data)

    def __getitem__(self, idx):
        return self.x_data[idx], self.y_data[idx]

def get_dataloaders(data_dir, batch_size=32):
    train_dataset = UCIDataset(data_dir, data_type='train')
    test_dataset = UCIDataset(data_dir, data_type='test')
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

data_dir = '/content/drive/MyDrive/dataset/UCI HAR Dataset'  # Update this path based on your Google Drive location

# Initialize data loaders
train_loader, test_loader = get_dataloaders(data_dir)

for batch_idx, (data, target) in enumerate(train_loader):
    print(f'Batch {batch_idx + 1}')
    print(f'Data: {data}, {data.shape}')
    print(f'Target: {target}, {target.shape}')
    break


Batch 1
Data: tensor([[ 0.2806, -0.0123, -0.0966,  ..., -0.8567,  0.1664,  0.0931],
        [ 0.2778, -0.0162, -0.1068,  ...,  0.6245, -0.4389, -0.5700],
        [ 0.2788, -0.0170, -0.1090,  ..., -0.4899, -0.1701, -0.2125],
        ...,
        [ 0.2739, -0.0103, -0.1273,  ..., -0.4287,  0.3211,  0.3444],
        [ 0.2987, -0.0360, -0.1890,  ..., -0.8836,  0.1784,  0.0249],
        [ 0.2321, -0.0294, -0.0689,  ..., -0.0591, -0.2374, -0.5048]]), torch.Size([32, 561])
Target: tensor([4, 6, 4, 6, 3, 4, 1, 1, 6, 6, 3, 1, 3, 6, 4, 3, 1, 2, 3, 1, 5, 2, 4, 4,
        5, 5, 2, 3, 4, 2, 3, 4]), torch.Size([32])


In [3]:
class MHEALTHDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.data, self.labels = self.load_data()

    def load_data(self):
        all_files = [os.path.join(self.data_dir, f) for f in os.listdir(self.data_dir) if f.endswith('.log')]
        data_list = []
        for file in all_files:
            df = pd.read_csv(file, delim_whitespace=True, header=None)
            data_list.append(df)
        data = pd.concat(data_list, ignore_index=True)
        x_data = data.iloc[:, :-1].values
        y_data = data.iloc[:, -1].values
        return torch.tensor(x_data, dtype=torch.float32), torch.tensor(y_data, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

def get_dataloaders(data_dir, batch_size=32, test_split=0.2):
    dataset = MHEALTHDataset(data_dir)
    test_size = int(test_split * len(dataset))
    train_size = len(dataset) - test_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

data_dir = '/content/drive/MyDrive/dataset/mhealth+dataset/MHEALTHDATASET'
# Initialize data loaders
train_loader, test_loader = get_dataloaders(data_dir)


for batch_idx, (data, target) in enumerate(train_loader):
    print(f'Batch {batch_idx + 1}')
    print(f'Data: {data}, {data.shape}')
    print(f'Target: {target}, {target.shape}')
    break


Batch 1
Data: tensor([[-1.0340e+01, -3.2096e+00,  5.2166e-01, -2.1183e+00, -1.5615e+00,
          1.2709e+00, -1.4231e+01,  6.1363e-01,  1.1874e-01, -7.1295e-01,
          3.6542e-01, -1.0224e+02, -9.0694e+00,  2.4241e+00, -3.7451e+00,
         -5.8887e+00,  6.1326e+00, -6.8627e-01, -1.9507e-01,  8.1897e-01,
          5.4173e+00, -1.4823e+01,  2.9498e+01],
        [-9.5213e+00,  3.0666e-01,  2.1707e-01, -5.0654e-01, -6.2794e-02,
         -2.4433e-01, -9.9374e+00,  3.2361e-01, -6.7347e-01, -7.0544e-01,
          2.7701e-01, -5.5039e-01,  3.7248e-01,  2.9588e-01, -8.6809e+00,
          4.9936e+00,  3.6427e-01, -4.6863e-01,  9.2813e-01, -3.0819e-01,
          4.7963e+00, -5.1065e+00,  2.3131e+00],
        [-9.6080e+00, -9.1365e-01, -6.1176e-02, -2.1350e-01, -5.4422e-02,
          1.6701e+00, -9.6113e+00,  2.5489e-01, -3.7662e-01, -8.4803e-01,
         -4.6562e-01,  2.5118e+00,  1.8097e+00,  5.0887e-01, -2.5799e+00,
         -9.1362e+00,  2.3765e+00, -1.3529e-01, -1.0678e+00, -4.3750e-01,


In [4]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

class OpportunityDataset(Dataset):
    def __init__(self, data_dir, data_type='train'):
        self.data_dir = data_dir
        self.data_type = data_type
        self.data, self.labels = self.load_data()

    def load_data(self):
        all_files = [os.path.join(self.data_dir, f) for f in os.listdir(self.data_dir) if f.endswith('.dat')]
        data_list = []
        for file in all_files:
            df = pd.read_csv(file, delim_whitespace=True, header=None)
            data_list.append(df)
        data = pd.concat(data_list, ignore_index=True)
        x_data = data.iloc[:, :-1].values
        y_data = data.iloc[:, -1].values
        return torch.tensor(x_data, dtype=torch.float32), torch.tensor(y_data, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

def get_dataloaders(data_dir, batch_size=32, test_split=0.2):
    dataset = OpportunityDataset(data_dir)
    test_size = int(test_split * len(dataset))
    train_size = len(dataset) - test_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader



data_dir = '/content/drive/MyDrive/dataset/opportunity+activity+recognition/OpportunityUCIDataset/dataset'

# Initialize data loaders
train_loader, test_loader = get_dataloaders(data_dir)

for batch_idx, (data, target) in enumerate(train_loader):
    print(f'Batch {batch_idx + 1}')
    print(f'Data: {data}, {data.shape}')
    print(f'Target: {target}, {target.shape}')
    break


Batch 1
Data: tensor([[ 2.3480e+05, -6.7500e+02,  1.7720e+03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.9456e+05,  1.2100e+02,  3.8900e+02,  ...,  3.0200e+02,
          4.0600e+02,  5.0200e+02],
        [ 1.5132e+06, -3.0600e+02,  7.5100e+02,  ...,  0.0000e+00,
          4.0500e+02,  5.1700e+02],
        ...,
        [ 8.5459e+05, -4.6000e+01,  1.0530e+03,  ...,  3.1400e+02,
          4.1300e+02,  5.1000e+02],
        [ 1.0013e+06, -5.3800e+02,  6.2400e+02,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0189e+06, -3.9200e+02,  1.0520e+03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]]), torch.Size([32, 249])
Target: tensor([     0,      0, 404517,      0, 406508,      0, 406520, 407521,      0,
             0,      0,      0,      0,      0, 407521,      0,      0,      0,
             0,      0,      0,      0,      0,      0, 407521,      0,      0,
             0, 404520,      0,      0,      0]), torch.Size([32])
