In [1]:
import os
import time
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

from label_flip_revised.alfa_nn_v3 import get_dual_loss, solveLPNN
from label_flip_revised.simple_nn_model import SimpleModel
from label_flip_revised.torch_utils import evaluate, train_model
from label_flip_revised.utils import create_dir, open_csv, time2str, to_csv

In [2]:
PATH_ROOT = os.path.join(Path().absolute().parent)
print(PATH_ROOT)

/home/lukec/workspace/label_flip_revised_new


In [3]:
BATCH_SIZE = 256  # Size of mini-batch.
HIDDEN_LAYER = 128  # Number of hidden neurons in a hidden layer.
LR = 0.01  # Learning rate.
MAX_EPOCHS = 300  # Number of iteration for training.

In [4]:
path_train = os.path.join(PATH_ROOT, 'data', 'synth', 'train', 'Easy_f26_i18_r02_c01_w4_n2000_1_train.csv')
X_train, y_train, _ = open_csv(path_train)

In [5]:
path_test = os.path.join(PATH_ROOT, 'data', 'synth', 'test', 'Easy_f26_i18_r02_c01_w4_n2000_1_test.csv')
X_test, y_test, _ = open_csv(path_test)

In [6]:
dataset_train = TensorDataset(
    torch.from_numpy(X_train).type(torch.float32),
    torch.from_numpy(y_train).type(torch.int64)
)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
dataset_test = TensorDataset(
    torch.from_numpy(X_test).type(torch.float32),
    torch.from_numpy(y_test).type(torch.int64)
)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

In [7]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    print('Running on CPU!')

In [8]:
n_features = X_train.shape[1]
model = SimpleModel(n_features, hidden_dim=HIDDEN_LAYER, output_dim=2).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=0.9)
loss_fn = nn.CrossEntropyLoss()

In [9]:
train_model(model, dataloader_train, optimizer, loss_fn, device, MAX_EPOCHS)
acc_train, _ = evaluate(dataloader_train, model, loss_fn, device)
acc_test, _ = evaluate(dataloader_test, model, loss_fn, device)

print(f'Acc train: {acc_train:.2f} test: {acc_test:.2f}')

Acc train: 0.91 test: 0.91


In [10]:
from sklearn.svm import SVC

clf = SVC()
clf.fit(X_train, y_train)

acc_train = clf.score(X_train, y_train)
acc_test = clf.score(X_test, y_test)

print(f'Acc train: {acc_train:.2f} test: {acc_test:.2f}')

Acc train: 0.99 test: 0.97
