In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import torch 
import torch.nn as nn 
import torch.optim as optim

import copy
import tqdm

import util
import os 

In [2]:
cwd = os.getcwd()
print(cwd)
model_path_lg = '../model/torch_logistic_regression'
model_path_sm_384 = '../model/torch_softmax_sgd_384'
model_path_sm_768 = '../model/torch_softmax_sgd_768'

/home/linhnm/msc_code/big_data_mining/eabdt/python


In [3]:
device = 'cpu'
if torch.cuda.is_available():
  device = 'cuda:0'
  print('ok')

ok


In [4]:
dataset = 'mix'

training_batch_from = 0
training_batch_to = 2000
val_batch_from = 3000
val_batch_to = 3600

testing_batch_from = 0
testing_batch_to = 400

testing_768_batch_from = 0
testing_768_batch_to = 8

In [5]:
epochs = 10000
trigger = 1000
early_stopping_round = 20

input_dim = 384
output_dim = 1
learning_rate = 0.01

In [6]:
class LogisticRegression(nn.Module):
  def __init__(self, input_dim, output_dim):
    super(LogisticRegression, self).__init__()
    self.linear = nn.Linear(input_dim, output_dim)
  def forward(self, x):
    outputs = torch.sigmoid(self.linear(x))
    return outputs

In [7]:
class Softmax(nn.Module):
  def __init__(self, input_dim, output_dim):
    super(Softmax, self).__init__()
    # hidden layer 
    self.linear_1 = torch.nn.Linear(input_dim, 128, bias=True)
    self.linear_2 = torch.nn.Linear(128, 64, bias=True) 
    self.linear_3 = torch.nn.Linear(64, output_dim) 
    # defining layers as attributes
    self.layer_in = None
    self.act = None
    self.layer_out = None
  def forward(self, x):
    self.layer_in_1 = self.linear_1(x)
    self.act_1 = torch.relu(self.layer_in_1)
    self.layer_in_2 = self.linear_2(self.act_1)
    self.act_2 = torch.relu(self.layer_in_2)
    self.layer_out = self.linear_3(self.act_2)
    y_pred = torch.sigmoid(self.linear_3(self.act_2))
    return y_pred

In [8]:
model_lg = LogisticRegression(input_dim, output_dim)
model_sm_384 = Softmax(384, 1)
model_sm_768 = Softmax(768, 1)
criterion = nn.BCELoss()

In [9]:
model_lg.load_state_dict(torch.load(model_path_lg))
model_sm_384.load_state_dict(torch.load(model_path_sm_384))
model_sm_768.load_state_dict(torch.load(model_path_sm_768))

print(model_lg.eval())
print(model_sm_384.eval())
print(model_sm_768.eval())

LogisticRegression(
  (linear): Linear(in_features=384, out_features=1, bias=True)
)
Softmax(
  (linear_1): Linear(in_features=384, out_features=128, bias=True)
  (linear_2): Linear(in_features=128, out_features=64, bias=True)
  (linear_3): Linear(in_features=64, out_features=1, bias=True)
)
Softmax(
  (linear_1): Linear(in_features=768, out_features=128, bias=True)
  (linear_2): Linear(in_features=128, out_features=64, bias=True)
  (linear_3): Linear(in_features=64, out_features=1, bias=True)
)


In [17]:
model_sm_384.to(device)
model_sm_768.to(device)

Softmax(
  (linear_1): Linear(in_features=768, out_features=128, bias=True)
  (linear_2): Linear(in_features=128, out_features=64, bias=True)
  (linear_3): Linear(in_features=64, out_features=1, bias=True)
)

In [18]:
X_test = util.load_vector(os.path.join(cwd, '../data/vectorize/384/mix/testing_set'), testing_batch_from, testing_batch_to)
y_test = util.load_label(os.path.join(cwd, '../data/raw/mix/testing_set'), testing_batch_from, testing_batch_to)

X_768_test = util.load_vector(os.path.join(cwd, '../data/vectorize/mix/testing_set'), testing_768_batch_from, testing_768_batch_to)
y_768_test = util.load_label(os.path.join(cwd, '../data/raw/mix/testing_set'), testing_768_batch_from, testing_768_batch_to)

X_gpu_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_gpu_test = torch.flatten(torch.tensor(y_test).type(torch.float32)).to(device)

X_768_gpu_test = torch.tensor(X_768_test, dtype=torch.float32).to(device)
y_768_gpu_test = torch.flatten(torch.tensor(y_768_test).type(torch.float32)).to(device)

In [19]:
X_gpu_test.cuda()

tensor([[-0.0297, -0.0425,  0.0579,  ..., -0.0358, -0.0148, -0.1152],
        [-0.0629,  0.0458,  0.0021,  ..., -0.0511,  0.0432, -0.0235],
        [-0.0997,  0.0008, -0.0115,  ..., -0.0179, -0.0186,  0.0597],
        ...,
        [-0.0708, -0.0321, -0.0142,  ...,  0.0142,  0.0859,  0.0578],
        [-0.0387, -0.0754,  0.0961,  ...,  0.0431, -0.0230,  0.0405],
        [-0.0274, -0.0936, -0.0093,  ...,  0.0515, -0.0042, -0.0122]],
       device='cuda:0')

In [20]:
y_gpu_test.cuda()

tensor([1., 1., 0.,  ..., 0., 1., 0.], device='cuda:0')

In [21]:
# correct_test = 0
# total_test = 0
# outputs_test = torch.squeeze(model_lg(X_gpu_test))
# loss_test = criterion(outputs_test, y_gpu_test)

# total_test += y_gpu_test.size(0)
# correct_test += torch.eq(outputs_test.round(), y_gpu_test).sum()
# accuracy_test = 100 * correct_test/total_test

In [22]:
# print(accuracy_test.item())

In [23]:
acc_sm_384, auc_sm_384 = util.metrics(model_sm_384, criterion,  X_gpu_test, y_gpu_test)
acc_sm_768, auc_sm_768 = util.metrics(model_sm_768, criterion,  X_768_gpu_test, y_768_gpu_test)

In [24]:
print(acc_sm_384, auc_sm_384)
print(acc_sm_768, auc_sm_768)

89.729248046875 0.9619949044125
91.82500457763672 0.9714606467587035
