In [1]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [2]:
class DosDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.samples = []  # [(tensor_row, label), ...]

        # Предполагаем: папки внутри root_dir — это имена классов (class_0, class_1, ...)
        for class_name in sorted(os.listdir(root_dir)):
            class_path = os.path.join(root_dir, class_name)
            if not os.path.isdir(class_path):
                continue
            class_label = int(class_name.split("_")[-1])  # например: class_0 → 0

            for fname in os.listdir(class_path):
                if fname.endswith(".dat"):
                    fpath = os.path.join(class_path, fname)
                    df = pd.read_csv(fpath, sep='\s+', skiprows=4, header=None)
                    for row in df.values:
                        feature_tensor = torch.tensor(row, dtype=torch.float32)
                        self.samples.append((feature_tensor, class_label))

        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        x, y = self.samples[idx]
        if self.transform:
            x = self.transform(x)
        return x, y


In [3]:
dos_file_path = '../../Datasets/dos64/'

In [None]:


dataset = DosDataset(dos_file_path)
print(dataset[0])
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)


(tensor([   2., -100.,    0.]), 0)


: 

In [None]:

for X, y in dataloader:
    print(X.shape)  # torch.Size([32, 3])
    print(y.shape)  # torch.Size([32])
    break