In [1]:
import torch
import pandas as pd
import os
from tqdm.auto import tqdm

path = '../data/'
data_list = os.listdir(path)
num_epochs = 10

In [2]:
class Dataset(torch.utils.data.Dataset):
    def __init__(
        self,
        samples
    ):
        self.samples = samples
    def __len__(self):
        return len(self.samples)
    def __getitem__(
        self, 
        idx
    ):
        sample = self.samples[idx]
        return {
            'input': torch.tensor(sample[:-1], dtype=torch.float32),
            'label': torch.tensor(sample[-1], dtype=torch.int64)
        }
        
class MLP(torch.nn.Module):
    def __init__(
        self,
        input_dim=4,
        hidden_dim=32,
        num_labels=3
    ):
        super().__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, num_labels),
        )
    def forward(self, x):
        return self.model(x)

for data in tqdm(data_list):
    print(f'train model for {data}')
    samples = pd.read_csv(path+data).dropna()
    model = MLP()
    dataset = Dataset(samples.astype(int).values)
    print(dataset[0])
    opt = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-3)
    criterion = torch.nn.CrossEntropyLoss()

    loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=32,
        shuffle=True
    )
    avg_loss = 0 
    for _ in range(num_epochs):
        for batch in loader:
            opt.zero_grad()
            logits = model(batch['input'])
            loss = criterion(logits, batch['label'])
            loss.backward()
            opt.step()
            avg_loss += loss.item()
        avg_loss = avg_loss / len(loader)
        print(f'avg loss = {avg_loss}')
    torch.save(model.state_dict(), f'./models/mlp_{data}.pt')

  0%|          | 0/5 [00:00<?, ?it/s]

train model for data_lzsa.csv
{'input': tensor([0., 0., 0., 1.]), 'label': tensor(0)}
avg loss = 1.9965421954790752
avg loss = 2.231628394789166
avg loss = 2.2376747490079314
avg loss = 2.0484453698183285
avg loss = 2.023059511518895
avg loss = 1.9244702697357237
avg loss = 1.7846667508377088
avg loss = 1.7325349407079063
avg loss = 1.7379285387973364
avg loss = 1.6233573604421547
train model for data_pca.csv
{'input': tensor([0., 0., 0., 1.]), 'label': tensor(0)}
avg loss = 2.1092145442962646
avg loss = 2.292722054890224
avg loss = 2.2091045817550348
avg loss = 2.2652997011345954
avg loss = 2.075110677230959
avg loss = 2.057477951525025
avg loss = 1.9233730350900793
avg loss = 1.687377656002401
avg loss = 1.7353068369484457
avg loss = 1.684105205789301
train model for data_left_h.csv
{'input': tensor([0., 0., 0., 1.]), 'label': tensor(0)}
avg loss = 2.109067678451538
avg loss = 2.739816983540853
avg loss = 2.8752516375647654
avg loss = 2.8422174806948064
avg loss = 2.7517953831472517
