### 0. Prepare Data in Each Fold

In [118]:
from post_parse_ml import LabelParse, LoadData

k = 5 # k-fold cross validation
fold_n = 2
### Prepare the dataset with [k-fold cross validation]
LabelParse().train_test(fold_n, k) # Formalize label
LoadData().load_data(fold_n) # Formalize [torch geometric] data

--- LOADING 1-TH SPLIT TRAINING DATA ---
--- LOADING 2-TH SPLIT TEST DATA ---
--- LOADING 3-TH SPLIT TRAINING DATA ---
--- LOADING 4-TH SPLIT TRAINING DATA ---
--- LOADING 5-TH SPLIT TRAINING DATA ---
--- COMBINING DATA ... ---
     node_idx  t2ds  pret2ds  no_t2ds
0         896     0        0        1
1         856     0        1        0
2         482     0        0        1
3         799     0        0        1
4        1256     0        0        1
..        ...   ...      ...      ...
276      1194     0        0        1
277       540     0        0        1
278      1375     0        1        0
279       962     0        0        1
280       310     0        0        1

[1124 rows x 4 columns]
[ 774  734  360 ... 1253  840  188]
[[0 0 1]
 [0 1 0]
 [0 0 1]
 ...
 [0 1 0]
 [0 0 1]
 [0 0 1]]
(1124, 3)
(1124, 3451)
(1124, 3451)
[1235  103  527  127  442  661 1036  123   13 1090  686  440  961  819
 1315 1219  737  908  162  497  391 1285 1179  658 1003 1373  233  643
  342   63  246  

### 1. DNN model

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [58]:
train_x = np.load('./data/post_ml_data/train_x.npy')
train_label = np.load('./data/post_ml_data/train_label.npy')
test_x = np.load('./data/post_ml_data/test_x.npy')
test_label = np.load('./data/post_ml_data/test_label.npy')

train_x_tensor = torch.FloatTensor(train_x)
train_label_tensor = torch.LongTensor(train_label)
test_x_tensor = torch.FloatTensor(test_x)
test_label_tensor = torch.LongTensor(test_label)

In [59]:
batch_size = 64

train_dataset = TensorDataset(train_x_tensor, train_label_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(test_x_tensor, test_label_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [60]:
class DNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x)

input_dim = train_x.shape[1]
hidden_dim = 1024
output_dim = 3

model = DNN(input_dim, hidden_dim, output_dim)

In [62]:
from tqdm import tqdm
# Training Loop with tqdm
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    train_correct = 0
    train_total = 0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1}/{epochs}")
    
    for i, (batch_x, batch_label) in pbar:
        optimizer.zero_grad()
        outputs = model(batch_x)
        _, batch_targets = batch_label.max(dim=1)
        loss = criterion(outputs, batch_targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
        _, predicted = torch.max(outputs, 1)
        train_total += batch_label.size(0)
        train_correct += (predicted == batch_targets).sum().item()
        
        train_accuracy = 100 * train_correct / train_total
        pbar.set_postfix(Loss=loss.item(), Training_Accuracy=train_accuracy)
    avg_loss = total_loss / len(train_loader)
    print(f"\nEpoch [{epoch+1}/{epochs}], Avg Loss: {avg_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%")

    # Evaluate the Model with tqdm
    model.eval()
    correct = 0
    total = 0
    pbar = tqdm(test_loader, desc="Evaluating")

    with torch.no_grad():
        for batch_x, batch_label in pbar:
            outputs = model(batch_x)
            _, predicted = torch.max(outputs, 1)
            _, batch_targets = batch_label.max(dim=1)
            total += batch_label.size(0)
            correct += (predicted == batch_targets).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test set: {accuracy:.2f}%')
    print('--------------------------------------------------------------')
    print('\n')
    



Epoch 1/20: 100%|██████████| 18/18 [00:00<00:00, 44.23it/s, Loss=2.6, Training_Accuracy=60]   



Epoch [1/20], Avg Loss: 13.8139, Training Accuracy: 59.96%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 250.80it/s]


Accuracy on test set: 79.00%
--------------------------------------------------------------




Epoch 2/20: 100%|██████████| 18/18 [00:00<00:00, 44.57it/s, Loss=0.867, Training_Accuracy=77.7]



Epoch [2/20], Avg Loss: 1.2304, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 426.61it/s]


Accuracy on test set: 79.00%
--------------------------------------------------------------




Epoch 3/20: 100%|██████████| 18/18 [00:00<00:00, 45.13it/s, Loss=0.891, Training_Accuracy=77.7]



Epoch [3/20], Avg Loss: 0.9847, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 384.87it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 4/20: 100%|██████████| 18/18 [00:00<00:00, 45.08it/s, Loss=0.717, Training_Accuracy=77.6]



Epoch [4/20], Avg Loss: 1.0484, Training Accuracy: 77.58%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 422.05it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 5/20: 100%|██████████| 18/18 [00:00<00:00, 46.80it/s, Loss=0.901, Training_Accuracy=77.7]



Epoch [5/20], Avg Loss: 0.7617, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 297.12it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 6/20: 100%|██████████| 18/18 [00:00<00:00, 45.34it/s, Loss=0.898, Training_Accuracy=77.7]



Epoch [6/20], Avg Loss: 0.7298, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 302.34it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 7/20: 100%|██████████| 18/18 [00:00<00:00, 46.33it/s, Loss=0.738, Training_Accuracy=77.7]



Epoch [7/20], Avg Loss: 0.7032, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 427.16it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 8/20: 100%|██████████| 18/18 [00:00<00:00, 45.09it/s, Loss=0.546, Training_Accuracy=77.7]



Epoch [8/20], Avg Loss: 0.6866, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 434.74it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 9/20: 100%|██████████| 18/18 [00:00<00:00, 47.03it/s, Loss=0.613, Training_Accuracy=77.7]



Epoch [9/20], Avg Loss: 0.6824, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 351.36it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 10/20: 100%|██████████| 18/18 [00:00<00:00, 46.78it/s, Loss=0.995, Training_Accuracy=77.7]



Epoch [10/20], Avg Loss: 0.6896, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 464.19it/s]


Accuracy on test set: 78.65%
--------------------------------------------------------------




Epoch 11/20: 100%|██████████| 18/18 [00:00<00:00, 46.69it/s, Loss=0.669, Training_Accuracy=77.7]



Epoch [11/20], Avg Loss: 0.6810, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 303.38it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 12/20: 100%|██████████| 18/18 [00:00<00:00, 48.05it/s, Loss=0.717, Training_Accuracy=77.7]



Epoch [12/20], Avg Loss: 0.6818, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 221.26it/s]


Accuracy on test set: 78.65%
--------------------------------------------------------------




Epoch 13/20: 100%|██████████| 18/18 [00:00<00:00, 46.81it/s, Loss=0.414, Training_Accuracy=77.7]



Epoch [13/20], Avg Loss: 0.6745, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 442.82it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 14/20: 100%|██████████| 18/18 [00:00<00:00, 45.94it/s, Loss=0.776, Training_Accuracy=77.7]



Epoch [14/20], Avg Loss: 0.6831, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 284.05it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 15/20: 100%|██████████| 18/18 [00:00<00:00, 45.89it/s, Loss=0.557, Training_Accuracy=77.7]



Epoch [15/20], Avg Loss: 0.6778, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 501.24it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 16/20: 100%|██████████| 18/18 [00:00<00:00, 46.52it/s, Loss=0.726, Training_Accuracy=77.7]



Epoch [16/20], Avg Loss: 0.6816, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 264.47it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 17/20: 100%|██████████| 18/18 [00:00<00:00, 46.66it/s, Loss=0.496, Training_Accuracy=77.7]



Epoch [17/20], Avg Loss: 0.6761, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 455.71it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 18/20: 100%|██████████| 18/18 [00:00<00:00, 46.63it/s, Loss=0.677, Training_Accuracy=77.7]



Epoch [18/20], Avg Loss: 0.6804, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 466.15it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 19/20: 100%|██████████| 18/18 [00:00<00:00, 45.31it/s, Loss=0.75, Training_Accuracy=77.7] 



Epoch [19/20], Avg Loss: 0.6822, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<?, ?it/s]


Accuracy on test set: 78.29%
--------------------------------------------------------------




Epoch 20/20: 100%|██████████| 18/18 [00:00<00:00, 46.94it/s, Loss=0.555, Training_Accuracy=77.7]



Epoch [20/20], Avg Loss: 0.6776, Training Accuracy: 77.67%


Evaluating: 100%|██████████| 5/5 [00:00<00:00, 245.83it/s]

Accuracy on test set: 78.29%
--------------------------------------------------------------







### 2. Random Forest

In [81]:
train_x = np.load('./data/post_ml_data/train_x.npy')
train_label = np.load('./data/post_ml_data/train_label.npy')
test_x = np.load('./data/post_ml_data/test_x.npy')
test_label = np.load('./data/post_ml_data/test_label.npy')

In [82]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf = RandomForestClassifier(n_estimators=100, random_state=42)  # using 100 trees
rf.fit(train_x, train_label)

In [83]:
predicted_labels = rf.predict(test_x)
accuracy = accuracy_score(test_label, predicted_labels)
print(f'Accuracy with Random Forest: {accuracy * 100:.2f}%')

Accuracy with Random Forest: 72.95%


### 3. Linear Regression

In [119]:
train_x = np.load('./data/post_ml_data/train_x.npy')
train_label = np.load('./data/post_ml_data/train_label.npy')
test_x = np.load('./data/post_ml_data/test_x.npy')
test_label = np.load('./data/post_ml_data/test_label.npy')

In [120]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [121]:
# Create the model. Increase max_iter if the algorithm doesn't converge.
lr_classifier = LogisticRegression(max_iter=100)

# Fit the model to the training data
train_label_1d = np.argmax(train_label, axis=1)
lr_classifier.fit(train_x, train_label_1d)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [122]:
predicted_probs = lr.predict(test_x)
predicted_labels = np.argmax(predicted_probs, axis=1)
test_label_1d = np.argmax(test_label, axis=1)
accuracy = accuracy_score(test_label_1d, predicted_labels)
print(f'Accuracy with Logistic Regression: {accuracy * 100:.2f}%')

Accuracy with Logistic Regression: 100.00%
