# Model-1
GraphSage + Bert embeddings using default Parameters and 3 Layer MLP.

In [None]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
!pip install torch-geometric

Looking in links: https://data.pyg.org/whl/torch-+.html
Looking in links: https://data.pyg.org/whl/torch-+.html


In [None]:
from torch_geometric.datasets import UPFD #importing the UPFD Dataset

In [None]:
#defining the train and test split by defining the feature as bert and setting the name as Gossipcop
test_data_gos = UPFD(root=".", name="gossipcop", feature="bert",split="test") 
train_data_gos = UPFD(root=".", name="gossipcop", feature="bert", split="train")
val_data_gos = UPFD(root=".", name="gossipcop", feature="bert", split="val")
train_data_gos = train_data_gos + val_data_gos


In [None]:
print("Gossipcop Dataset")
print("Train Samples: ", len(train_data_gos))
print("Test Samples: ", len(test_data_gos))

Gossipcop Dataset
Train Samples:  1638
Test Samples:  3826


In [None]:
train_data_gos[0].edge_index


tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          1, 70, 74],
        [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
         37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
         55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
         73, 74, 75]])

In [None]:
from torch_geometric.loader import DataLoader
train_loader = DataLoader(train_data_gos, batch_size=256, shuffle=True)
test_loader = DataLoader(test_data_gos, batch_size=256, shuffle=False)

In [None]:
import torch
import torch.nn.functional as F
from torch.nn import LeakyReLU, Softmax, Linear, SELU,Dropout
from torch_geometric.nn import SAGEConv, global_max_pool, GATv2Conv, TopKPooling, global_mean_pool
from torch_geometric.transforms import ToUndirected
from torch.nn import LeakyReLU

In [None]:
#defining the GraphSage Model with 3 SageConv layers and 3 unit MLP
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(Net, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels[0])
        self.conv2 = SAGEConv(hidden_channels[0], hidden_channels[1])
        self.conv3 = SAGEConv(hidden_channels[1], hidden_channels[2])
        
        self.full1 = Linear(hidden_channels[2],hidden_channels[3])
        self.full2 = Linear(hidden_channels[3],hidden_channels[4])
        self.full3 = Linear(hidden_channels[4],hidden_channels[5])

        self.softmax = Linear(hidden_channels[5],out_channels)

        #droupouts
        self.dp1 = Dropout(0.2)
        self.dp2 = Dropout(0.2)
        self.dp3 = Dropout(0.2)

    def forward(self, x, edge_index, batch):
        h = self.conv1(x, edge_index).relu()
        h = self.conv2(h, edge_index).relu()
        h = self.conv3(h, edge_index).relu()

        h = global_max_pool(h,batch)

        h = self.full1(h).relu()
        h = self.dp1(h)
        h = self.full2(h).relu()
        h = self.dp2(h)
        h = self.full3(h).relu()
        h = self.dp3(h)
        
        h = self.softmax(h)

        return torch.sigmoid(h)

In [None]:
from torch.autograd import Variable
from sklearn.metrics import accuracy_score, f1_score 

In [None]:
#specifying number of input features, hidden layer sizes, and number of output channels
model = Net(test_data_gos.num_features,[512,512,512,256,256,256],1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) #setting optimiser and learning rate as defined by the paper 
lossff = torch.nn.BCELoss()
print(device)

cpu


In [None]:
#defining the train and test function for the model
def train(epoch):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        # print(out)

        loss = lossff(torch.reshape(out,(-1,)), data.y.float())
        # print(loss)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs
    return total_loss / len(train_loader.dataset)

@torch.no_grad()
def test(epoch):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    for data in test_loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        # print(out)
        loss = lossff(torch.reshape(out,(-1,)), data.y.float())
        # print(loss)
        total_loss += float(loss) * data.num_graphs
        all_preds.append(torch.reshape(out, (-1,)))
        all_labels.append(data.y.float())
    # print(all_preds)
    accuracy, f1 = metrics(all_preds, all_labels)
    return total_loss / len(test_loader.dataset), accuracy, f1


def metrics(preds, gts):
    preds = torch.round(torch.cat(preds))
    gts = torch.cat(gts)
    # print(preds.cpu().numpy())

    acc = accuracy_score(preds.cpu().numpy(), gts.cpu().numpy())
    f1 = f1_score(preds.cpu().numpy(), gts.cpu().numpy())
    return acc, f1


In [None]:
#printing out the epoch at lowest wloss. 
wloss = []
weighted_loss = 0
exp_param = 0.8
best_test_loss = float('inf')  #initialize with a large value

#training results 
for epoch in range(100): #setting epoch at 100
  train_loss = train(epoch)
  test_loss, test_acc, test_f1 = test(epoch)
  weighted_loss = exp_param * (weighted_loss) + (1 - exp_param) * (test_loss / len(test_loader.dataset))
  
  wloss.append(weighted_loss / (1 - exp_param ** (epoch + 1)))
  
  if test_loss < best_test_loss:
    best_test_loss = test_loss  #updating the best test loss

  print(f'Epoch: {epoch:02d} |  TrainLoss: {train_loss:.5f} | '
        f'TestLoss: {test_loss:.5f} | TestAcc: {test_acc:.5f} | TestF1: {test_f1:.2f}')

#printing the best values
best_wloss = min(wloss)
best_epoch = wloss.index(best_wloss)
print(f'Best WLoss: {best_wloss:.5f} | Best Epoch: {best_epoch}')

Epoch: 00 |  TrainLoss: 0.69271 | TestLoss: 0.69131 | TestAcc: 0.82593 | TestF1: 0.84
Epoch: 01 |  TrainLoss: 0.69028 | TestLoss: 0.68640 | TestAcc: 0.87689 | TestF1: 0.88
Epoch: 02 |  TrainLoss: 0.68323 | TestLoss: 0.67375 | TestAcc: 0.88082 | TestF1: 0.89
Epoch: 03 |  TrainLoss: 0.66677 | TestLoss: 0.64455 | TestAcc: 0.89963 | TestF1: 0.91
Epoch: 04 |  TrainLoss: 0.63063 | TestLoss: 0.58553 | TestAcc: 0.90669 | TestF1: 0.91
Epoch: 05 |  TrainLoss: 0.55566 | TestLoss: 0.46683 | TestAcc: 0.93152 | TestF1: 0.93
Epoch: 06 |  TrainLoss: 0.42451 | TestLoss: 0.30236 | TestAcc: 0.94119 | TestF1: 0.94
Epoch: 07 |  TrainLoss: 0.26788 | TestLoss: 0.19082 | TestAcc: 0.94668 | TestF1: 0.95
Epoch: 08 |  TrainLoss: 0.18987 | TestLoss: 0.17248 | TestAcc: 0.94590 | TestF1: 0.95
Epoch: 09 |  TrainLoss: 0.17958 | TestLoss: 0.19811 | TestAcc: 0.94773 | TestF1: 0.95
Epoch: 10 |  TrainLoss: 0.18190 | TestLoss: 0.16731 | TestAcc: 0.95191 | TestF1: 0.95
Epoch: 11 |  TrainLoss: 0.16443 | TestLoss: 0.15976 | 

# Model 2: 
GraphSage + Bert Embeddings with hyperparameters as defined by the paper and 3 Layer MLP

(Embedding size = 128, batch size= 128, l2 Regularization = 0.001)

In [None]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, embedding_size, batch_size, l2_reg_weight): #adding in embedding size, batch size and l2 regularisation parameters 
        super(Net, self).__init__()
        self.embedding_size = embedding_size
        self.batch_size = batch_size
        self.l2_reg_weight = l2_reg_weight
        
        self.conv1 = SAGEConv(in_channels, hidden_channels[0])
        self.conv2 = SAGEConv(hidden_channels[0], hidden_channels[1])
        self.conv3 = SAGEConv(hidden_channels[1], hidden_channels[2])
        
        self.full1 = Linear(hidden_channels[2], hidden_channels[3])
        self.full2 = Linear(hidden_channels[3], hidden_channels[4])
        self.full3 = Linear(hidden_channels[4], hidden_channels[5])

        self.softmax = Linear(hidden_channels[5], out_channels)

        # Dropouts
        self.dp1 = Dropout(0.2)
        self.dp2 = Dropout(0.2)
        self.dp3 = Dropout(0.2)

    def forward(self, x, edge_index, batch):
        h = self.conv1(x, edge_index).relu()
        h = self.conv2(h, edge_index).relu()
        h = self.conv3(h, edge_index).relu()

        h = global_max_pool(h, batch)

        h = self.full1(h).relu()
        h = self.dp1(h)
        h = self.full2(h).relu()
        h = self.dp2(h)
        h = self.full3(h).relu()
        h = self.dp3(h)
        
        h = self.softmax(h)

        return torch.sigmoid(h)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(test_data_gos.num_features,[512,512,512,256,256,256],1, 128, 128, 0.001).to(device) #setting embedding size=128, batch size=128 and l2 regularization weight as 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
lossff = torch.nn.BCELoss() #using Binary Cross Entropy loss function 
print(device)

cpu


In [None]:
def train(epoch):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        # print(out)

        loss = lossff(torch.reshape(out,(-1,)), data.y.float())
        # print(loss)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs
    return total_loss / len(train_loader.dataset)

@torch.no_grad()
def test(epoch):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    for data in test_loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        # print(out)
        loss = lossff(torch.reshape(out,(-1,)), data.y.float())
        # print(loss)
        total_loss += float(loss) * data.num_graphs
        all_preds.append(torch.reshape(out, (-1,)))
        all_labels.append(data.y.float())
    # print(all_preds)
    accuracy, f1 = metrics(all_preds, all_labels)
    return total_loss / len(test_loader.dataset), accuracy, f1


def metrics(preds, gts):
    preds = torch.round(torch.cat(preds))
    gts = torch.cat(gts)
    # print(preds.cpu().numpy())

    acc = accuracy_score(preds.cpu().numpy(), gts.cpu().numpy())
    f1 = f1_score(preds.cpu().numpy(), gts.cpu().numpy())
    return acc, f1

In [None]:
wloss = []
weighted_loss = 0
exp_param = 0.8
best_test_loss = float('inf')  #initialize with a large value

#iithout dropout training results
for epoch in range(100):
  train_loss = train(epoch)
  test_loss, test_acc, test_f1 = test(epoch)
  weighted_loss = exp_param * (weighted_loss) + (1 - exp_param) * (test_loss / len(test_loader.dataset))
  
  wloss.append(weighted_loss / (1 - exp_param ** (epoch + 1)))
  
  if test_loss < best_test_loss:
    best_test_loss = test_loss  #update the best test loss

  print(f'Epoch: {epoch:02d} |  TrainLoss: {train_loss:.5f} | '
        f'TestLoss: {test_loss:.5f} | TestAcc: {test_acc:.5f} | TestF1: {test_f1:.2f}')

#print the best values
best_wloss = min(wloss)
best_epoch = wloss.index(best_wloss)
print(f'Best WLoss: {best_wloss:.5f} | Best Epoch: {best_epoch}')

Epoch: 00 |  TrainLoss: 0.69249 | TestLoss: 0.69143 | TestAcc: 0.71720 | TestF1: 0.78
Epoch: 01 |  TrainLoss: 0.69042 | TestLoss: 0.68718 | TestAcc: 0.88134 | TestF1: 0.88
Epoch: 02 |  TrainLoss: 0.68520 | TestLoss: 0.67789 | TestAcc: 0.87167 | TestF1: 0.88
Epoch: 03 |  TrainLoss: 0.67251 | TestLoss: 0.65598 | TestAcc: 0.89493 | TestF1: 0.89
Epoch: 04 |  TrainLoss: 0.64574 | TestLoss: 0.60772 | TestAcc: 0.90042 | TestF1: 0.90
Epoch: 05 |  TrainLoss: 0.58485 | TestLoss: 0.50331 | TestAcc: 0.91009 | TestF1: 0.91
Epoch: 06 |  TrainLoss: 0.46447 | TestLoss: 0.35867 | TestAcc: 0.90408 | TestF1: 0.90
Epoch: 07 |  TrainLoss: 0.30784 | TestLoss: 0.22962 | TestAcc: 0.92185 | TestF1: 0.92
Epoch: 08 |  TrainLoss: 0.22559 | TestLoss: 0.18448 | TestAcc: 0.93675 | TestF1: 0.94
Epoch: 09 |  TrainLoss: 0.19111 | TestLoss: 0.17126 | TestAcc: 0.94198 | TestF1: 0.94
Epoch: 10 |  TrainLoss: 0.17946 | TestLoss: 0.16538 | TestAcc: 0.94485 | TestF1: 0.95
Epoch: 11 |  TrainLoss: 0.15980 | TestLoss: 0.16261 | 

# Model 3: 
GraphSage + Bert Embeddings with 2 Layer MLP as defined by the paper. (This is the model implemented in the paper)

In [None]:
import torch
import torch.nn as nn

class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, embedding_size, batch_size, l2_reg_weight):
        super(Net, self).__init__()
        self.embedding_size = embedding_size
        self.batch_size = batch_size
        self.l2_reg_weight = l2_reg_weight
        
        self.conv1 = SAGEConv(in_channels, hidden_channels[0])
        self.conv2 = SAGEConv(hidden_channels[0], hidden_channels[1])
        self.conv3 = SAGEConv(hidden_channels[1], hidden_channels[2])
        
        self.full1 = Linear(hidden_channels[2], hidden_channels[3])
        self.full2 = Linear(hidden_channels[3], hidden_channels[4])
        self.softmax = Linear(hidden_channels[4], out_channels)

        # Dropouts
        self.dp1 = Dropout(0.2)
        self.dp2 = Dropout(0.2)

    def forward(self, x, edge_index, batch):
        h = self.conv1(x, edge_index).relu()
        h = self.conv2(h, edge_index).relu()
        h = self.conv3(h, edge_index).relu()

        h = global_max_pool(h, batch)

        h = self.full1(h).relu()
        h = self.dp1(h)
        h = self.full2(h).relu()
        h = self.dp2(h)
        
        h = self.softmax(h)

        return torch.sigmoid(h)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(test_data_gos.num_features,[512,512,512,256,256,256],1, 128, 128, 0.001).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
lossff = torch.nn.BCELoss()
print(device)


cpu


In [None]:
def train(epoch):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        # print(out)

        loss = lossff(torch.reshape(out,(-1,)), data.y.float())
        # print(loss)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs
    return total_loss / len(train_loader.dataset)

@torch.no_grad()
def test(epoch):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    for data in test_loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        # print(out)
        loss = lossff(torch.reshape(out,(-1,)), data.y.float())
        # print(loss)
        total_loss += float(loss) * data.num_graphs
        all_preds.append(torch.reshape(out, (-1,)))
        all_labels.append(data.y.float())
    # print(all_preds)
    accuracy, f1 = metrics(all_preds, all_labels)
    return total_loss / len(test_loader.dataset), accuracy, f1


def metrics(preds, gts):
    preds = torch.round(torch.cat(preds))
    gts = torch.cat(gts)
    # print(preds.cpu().numpy())

    acc = accuracy_score(preds.cpu().numpy(), gts.cpu().numpy())
    f1 = f1_score(preds.cpu().numpy(), gts.cpu().numpy())
    return acc, f1

In [None]:
wloss = []
weighted_loss = 0
exp_param = 0.8
best_test_loss = float('inf')  #initialize with a large value

#without dropout training results
for epoch in range(100):
  train_loss = train(epoch)
  test_loss, test_acc, test_f1 = test(epoch)
  weighted_loss = exp_param * (weighted_loss) + (1 - exp_param) * (test_loss / len(test_loader.dataset))
  
  wloss.append(weighted_loss / (1 - exp_param ** (epoch + 1)))
  
  if test_loss < best_test_loss:
    best_test_loss = test_loss  #update the best test loss

  print(f'Epoch: {epoch:02d} |  TrainLoss: {train_loss:.5f} | '
        f'TestLoss: {test_loss:.5f} | TestAcc: {test_acc:.5f} | TestF1: {test_f1:.2f}')

#print the best values
best_wloss = min(wloss)
best_epoch = wloss.index(best_wloss)
print(f'Best WLoss: {best_wloss:.5f} | Best Epoch: {best_epoch}')

Epoch: 00 |  TrainLoss: 0.69129 | TestLoss: 0.68761 | TestAcc: 0.80136 | TestF1: 0.77
Epoch: 01 |  TrainLoss: 0.68454 | TestLoss: 0.67688 | TestAcc: 0.69420 | TestF1: 0.57
Epoch: 02 |  TrainLoss: 0.66988 | TestLoss: 0.65339 | TestAcc: 0.77757 | TestF1: 0.72
Epoch: 03 |  TrainLoss: 0.63928 | TestLoss: 0.61159 | TestAcc: 0.81234 | TestF1: 0.78
Epoch: 04 |  TrainLoss: 0.59011 | TestLoss: 0.53668 | TestAcc: 0.88761 | TestF1: 0.89
Epoch: 05 |  TrainLoss: 0.49988 | TestLoss: 0.43406 | TestAcc: 0.88369 | TestF1: 0.89
Epoch: 06 |  TrainLoss: 0.38927 | TestLoss: 0.31651 | TestAcc: 0.89388 | TestF1: 0.90
Epoch: 07 |  TrainLoss: 0.29827 | TestLoss: 0.25255 | TestAcc: 0.90538 | TestF1: 0.91
Epoch: 08 |  TrainLoss: 0.23997 | TestLoss: 0.21457 | TestAcc: 0.92211 | TestF1: 0.92
Epoch: 09 |  TrainLoss: 0.21290 | TestLoss: 0.18328 | TestAcc: 0.93675 | TestF1: 0.94
Epoch: 10 |  TrainLoss: 0.18359 | TestLoss: 0.16632 | TestAcc: 0.94407 | TestF1: 0.94
Epoch: 11 |  TrainLoss: 0.16350 | TestLoss: 0.16125 | 

# Code 4: 
GraphSage + Bert Embedding with hyperparameters as defined in the paper and replacemenent of 1 MLP layer with 1 RNN Layer:

In [None]:
import torch.nn as nn

class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, embedding_size, batch_size, l2_reg_weight):
        super(Net, self).__init__()
        self.embedding_size = embedding_size
        self.batch_size = batch_size
        self.l2_reg_weight = l2_reg_weight
        
        self.conv1 = SAGEConv(in_channels, hidden_channels[0])
        self.conv2 = SAGEConv(hidden_channels[0], hidden_channels[1])
        self.conv3 = SAGEConv(hidden_channels[1], hidden_channels[2])
        
        self.rnn = nn.RNN(embedding_size, hidden_channels[3], batch_first=True)
        self.full1 = nn.Linear(hidden_channels[3], hidden_channels[4])
        self.softmax = nn.Linear(hidden_channels[4], out_channels)

        # Dropouts
        self.dp1 = nn.Dropout(0.2)

    def forward(self, x, edge_index, batch):
        h = self.conv1(x, edge_index).relu()
        h = self.conv2(h, edge_index).relu()
        h = self.conv3(h, edge_index).relu()

        h = global_max_pool(h, batch)
        
        # Reshape the input tensor for RNN
        h = h.unsqueeze(0)  # Add a time dimension
        h = self.dp1(h)
        
        # Apply RNN
        h, _ = self.rnn(h)
        h = h.squeeze(0)  # Remove the time dimension
        
        h = self.full1(h).relu()
        h = self.softmax(h)

        return torch.sigmoid(h)

In [None]:
def train(epoch):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        # print(out)

        loss = lossff(torch.reshape(out,(-1,)), data.y.float())
        # print(loss)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs
    return total_loss / len(train_loader.dataset)

@torch.no_grad()
def test(epoch):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    for data in test_loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        # print(out)
        loss = lossff(torch.reshape(out,(-1,)), data.y.float())
        # print(loss)
        total_loss += float(loss) * data.num_graphs
        all_preds.append(torch.reshape(out, (-1,)))
        all_labels.append(data.y.float())
    # print(all_preds)
    accuracy, f1 = metrics(all_preds, all_labels)
    return total_loss / len(test_loader.dataset), accuracy, f1


def metrics(preds, gts):
    preds = torch.round(torch.cat(preds))
    gts = torch.cat(gts)
    # print(preds.cpu().numpy())

    acc = accuracy_score(preds.cpu().numpy(), gts.cpu().numpy())
    f1 = f1_score(preds.cpu().numpy(), gts.cpu().numpy())
    return acc, f1

In [None]:
wloss = []
weighted_loss = 0
exp_param = 0.8
best_test_loss = float('inf')  # Initialize with a large value

# Without dropout training results
for epoch in range(100):
  train_loss = train(epoch)
  test_loss, test_acc, test_f1 = test(epoch)
  weighted_loss = exp_param * (weighted_loss) + (1 - exp_param) * (test_loss / len(test_loader.dataset))
  
  wloss.append(weighted_loss / (1 - exp_param ** (epoch + 1)))
  
  if test_loss < best_test_loss:
    best_test_loss = test_loss  # Update the best test loss

  print(f'Epoch: {epoch:02d} |  TrainLoss: {train_loss:.5f} | '
        f'TestLoss: {test_loss:.5f} | TestAcc: {test_acc:.5f} | TestF1: {test_f1:.2f}')

# Print the best values
best_wloss = min(wloss)
best_epoch = wloss.index(best_wloss)
print(f'Best WLoss: {best_wloss:.5f} | Best Epoch: {best_epoch}')

Epoch: 00 |  TrainLoss: 0.00033 | TestLoss: 0.20748 | TestAcc: 0.95949 | TestF1: 0.96
Epoch: 01 |  TrainLoss: 0.00034 | TestLoss: 0.20776 | TestAcc: 0.95949 | TestF1: 0.96
Epoch: 02 |  TrainLoss: 0.00035 | TestLoss: 0.20856 | TestAcc: 0.95896 | TestF1: 0.96
Epoch: 03 |  TrainLoss: 0.00031 | TestLoss: 0.21071 | TestAcc: 0.95975 | TestF1: 0.96
Epoch: 04 |  TrainLoss: 0.00034 | TestLoss: 0.21048 | TestAcc: 0.95896 | TestF1: 0.96
Epoch: 05 |  TrainLoss: 0.00042 | TestLoss: 0.21112 | TestAcc: 0.95792 | TestF1: 0.96
Epoch: 06 |  TrainLoss: 0.00034 | TestLoss: 0.21320 | TestAcc: 0.95896 | TestF1: 0.96
Epoch: 07 |  TrainLoss: 0.00027 | TestLoss: 0.21239 | TestAcc: 0.95870 | TestF1: 0.96
Epoch: 08 |  TrainLoss: 0.00027 | TestLoss: 0.21396 | TestAcc: 0.95923 | TestF1: 0.96
Epoch: 09 |  TrainLoss: 0.00029 | TestLoss: 0.21412 | TestAcc: 0.95844 | TestF1: 0.96
Epoch: 10 |  TrainLoss: 0.00031 | TestLoss: 0.21541 | TestAcc: 0.95818 | TestF1: 0.96
Epoch: 11 |  TrainLoss: 0.00025 | TestLoss: 0.21589 | 