In [1]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class StandardizingDosDataset(Dataset):
    def __init__(self, root_dir, skiprows=4, max_rows=None):
        self.samples = []
        self.root_dir = root_dir

        all_data = []

        for class_name in sorted(os.listdir(root_dir)):
            class_path = os.path.join(root_dir, class_name)
            if not os.path.isdir(class_path):
                continue

            try:
                class_label = int(class_name.split("_")[-1])
            except ValueError:
                continue

            for fname in os.listdir(class_path):
                if fname.endswith(".dat"):
                    fpath = os.path.join(class_path, fname)
                    df = pd.read_csv(fpath, sep='\s+', skiprows=skiprows, header=None)
                    
                    if max_rows:
                        df = df.iloc[:max_rows]

                    all_data.append(df.values)
                    for row in df.values:
                        self.samples.append((row, class_label))

        all_data = np.vstack(all_data)  # [N, D]
        self.mean = torch.tensor(all_data.mean(axis=0), dtype=torch.float32)
        self.std = torch.tensor(all_data.std(axis=0), dtype=torch.float32)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        x_np, label = self.samples[idx]
        x = torch.tensor(x_np, dtype=torch.float32)
        x = (x - self.mean) / (self.std + 1e-8)  # стандартизация
        return x, label


In [3]:
dos_file_path = '../../Datasets/dos64/'
dataset = StandardizingDosDataset(dos_file_path)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


In [4]:
class dosClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim=32, output_dim=3):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x  # логиты


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = dosClassifier(input_dim=3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for epoch in range(3):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")


cuda
Epoch 1, Loss: 0.3584
Epoch 2, Loss: 0.3530
Epoch 3, Loss: 0.3525


In [6]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for x, y in val_loader:
        x, y = x.to(device), y.to(device)
        preds = torch.argmax(model(x), dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

print(f"Accuracy: {correct / total:.2%}")


Accuracy: 89.98%


In [7]:
torch.save(model.state_dict(), "model.pth")