<a href="https://colab.research.google.com/github/aSafarpoor/OSN_FAD/blob/main/CLS/FAD%2BNN%2BNode2Vec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [155]:
!pip install node2vec



In [156]:
!pip install torch_geometric



In [172]:
import networkx as nx
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt
from node2vec import Node2Vec
import random
SEED = 10

In [190]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATConv
from sklearn.metrics import roc_curve,accuracy_score, roc_auc_score, confusion_matrix



# read data

In [159]:
edges = []
with open('graph.txt', 'r') as file:
    for line in file:
        # Split each line into two numbers and convert them to integers
        pair = list(map(int, line.strip().split()))
        edges.append(pair)


with open('train.txt', 'r') as file:
    lines = file.readlines()
    btrain = list(map(float, lines[0].strip().split()))
    strain = list(map(float, lines[1].strip().split()))

with open('test.txt', 'r') as file:
    lines = file.readlines()
    btest = list(map(float, lines[0].strip().split()))
    stest = list(map(float, lines[1].strip().split()))

In [160]:
print("Length of edges:", len(edges),
      "| Length of btrain:", len(btrain),
      "| Length of btest:", len(btest),
      "| Length of strain:", len(strain),
      "| Length of stest:", len(stest))

Length of edges: 6818501 | Length of btrain: 20000 | Length of btest: 158377 | Length of strain: 10000 | Length of stest: 81263


# data preparation

In [161]:
bigG = nx.Graph()
bigG.add_edges_from(edges)

In [162]:
trainG = bigG.subgraph(btrain+strain).copy()
testG = bigG.subgraph(btest+stest).copy()

In [163]:
print(len(bigG.nodes()),len(trainG.nodes()),len(testG.nodes()))

269640 30000 239640


In [164]:
def reindex_graph_and_dictionaries(G, train, test):
    new_index_mapping = {old_index: new_index for new_index, old_index in enumerate(G.nodes())}
    G_reindexed = nx.relabel_nodes(G, new_index_mapping)

    train = [new_index_mapping[int(node)] for node in train]
    test =  [new_index_mapping[int(node)] for node in test]

    return G_reindexed, train, test

In [165]:
trainG,btrain,strain =reindex_graph_and_dictionaries(trainG, btrain,strain)
testG,btest,stest =reindex_graph_and_dictionaries(testG, btest,stest)

In [176]:
trainDictionaryX = {}
trainDictionaryY = {}
testDictionaryX = {}
testDictionaryY = {}

random.seed(SEED)

c=0
for node in btrain:
    if random.random() < 0.20:
        c+=1
        trainDictionaryX[node] = [0]
    else:
        trainDictionaryX[node] = [0.5]
    trainDictionaryY[node] = 0
print(c)

c=0
for node in strain:
    if random.random() < 0.20:
        c+=1
        trainDictionaryX[node] = [1]
    else:
        trainDictionaryX[node] = [0.5]
    trainDictionaryY[node] = 1
print(c)

c=0
for node in btest:
    if random.random() < 0.1:
        c+=1
        testDictionaryX[node] = [0]
    else:
        testDictionaryX[node] = [0.5]
    testDictionaryY[node] = 0
print(c)

c=0
for node in stest:
    if random.random() < 0.1:
        c+=1
        testDictionaryX[node] = [1]
    else:
        testDictionaryX[node] = [0.5]
    testDictionaryY[node] = 1
print(c)

4038
1986
15631
8053


In [178]:
# print(len(btrain))
# print(len(strain))
# print(len(btest))
# print(len(stest))


#model

In [179]:
# Prepare data (assuming PyTorch Geometric format for graphs)
def prepare_data(graph, dictionaryX, dictionaryY):
    x = torch.tensor([dictionaryX[node] for node in graph.nodes()], dtype=torch.float)
    y = torch.tensor([dictionaryY[node] for node in graph.nodes()], dtype=torch.long)
    # edge_index = torch.tensor(list(graph.edges())).t().contiguous()
    edge_index = torch.tensor(list(graph.edges()), dtype=torch.long).t().contiguous()
    return x, y, edge_index

In [180]:
def pure_sybil_test(test_x, test_y):
    print(len(test_x))
    mask = (test_y == 1)
    x = test_x[mask]
    y = test_y[mask]
    print(len(x))
    return x, y, mask

In [181]:
train_x, train_y, train_edge_index = prepare_data(trainG, trainDictionaryX, trainDictionaryY)
test_x, test_y, test_edge_index = prepare_data(testG, testDictionaryX, testDictionaryY)
test_x2, test_y2, mask2 = pure_sybil_test(test_x, test_y)


239640
81263


In [205]:
class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout=True):
        super(GCN, self).__init__()
        self.dropout = dropout
        self.convs = torch.nn.ModuleList()
        self.num_classes = output_dim
        for i in range(num_layers):
            if i == 0:  # First layer
                self.convs.append(GCNConv(input_dim, hidden_dim, bias=False))
            elif i == num_layers - 1:  # Last layer
                self.convs.append(GCNConv(hidden_dim, output_dim, bias=False))
            else:  # Middle layers
                self.convs.append(GCNConv(hidden_dim, hidden_dim, bias=False))

    def forward(self, x, edge_index):
        h = x
        i = 0
        for conv in self.convs:
            if self.dropout:
                h = F.dropout(h, p=0.5, training=self.training)
            h = conv(h, edge_index)
            if i < len(self.convs) - 1:
                h = F.tanh(h)
            i += 1

        if self.num_classes == 1:
            return F.sigmoid(h), h
        else:
            return F.softmax(h, dim=1)




class GAT(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, heads=1):
        super(GAT, self).__init__()
        self.conv1 = GATConv(input_dim, hidden_dim, heads=heads)
        self.conv2 = GATConv(hidden_dim * heads, output_dim, heads=1)  # Single head in the final layer

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)  # ELU is commonly used in GAT models instead of ReLU
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [204]:
# Initialize model, optimizer, and loss function
input_dim = 1
hidden_dim = 16
output_dim = len(set(trainDictionaryY.values()))  # Number of classes

model = GCN(input_dim, hidden_dim, output_dim, num_layers=3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

model.train()
for epoch in tqdm(range(400)):
    optimizer.zero_grad()
    out = model(train_x, train_edge_index)
    loss = loss_fn(out, train_y)
    loss.backward()
    optimizer.step()
    if (epoch+1)%20 == 0 or epoch==0:
        print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

model.eval()
with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute metrics
    acc = accuracy_score(test_y, pred)
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')  # Adjust depending on binary or multi-class
    cm = confusion_matrix(test_y, pred)
    cm = cm.astype('float') / len(pred)


    print(f'Accuracy: {acc:.3f}')
    print(f'AUC: {auc:.3f}')
    print('Confusion Matrix:')
    print(f"                  Predicted Negative    Predicted Positive")
    print(f"Actual Negative       {cm[0, 0]:.3f}           {cm[0, 1]:.3f}")
    print(f"Actual Positive       {cm[1, 0]:.3f}           {cm[1, 1]:.3f}")

    print("\n--------------------------just sybils-----------------------------\n")

    test_out = model(test_x, test_edge_index)
    test_out = test_out[mask2]
    pred = test_out.argmax(dim=1).cpu()
    acc = accuracy_score(test_y2, pred)
    print(f'Accuracy: {acc:.3f}')


with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute AUC score as usual
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')
    fpr, tpr, thresholds = roc_curve(test_y, test_out[:, 1])
    j_scores = tpr - fpr
    optimal_threshold = thresholds[j_scores.argmax()]
    pred = (test_out[:, 1] >= optimal_threshold)
    acc = accuracy_score(test_y, pred)

    # just sybil
    test_out = test_out[mask2]
    pred = (test_out[:, 1] >= optimal_threshold)
    acc_sybils = accuracy_score(test_y2, pred)
    print("Accuracy with optimal threshold:", acc)
    print("Accuracy with optimal threshold just for sybils:", acc_sybils)

  0%|          | 1/400 [00:00<02:43,  2.44it/s]

Epoch 1, Loss: 0.7187546491622925


  5%|▌         | 21/400 [00:06<01:41,  3.75it/s]

Epoch 20, Loss: 0.6340564489364624


 10%|█         | 41/400 [00:10<00:57,  6.23it/s]

Epoch 40, Loss: 0.6338817477226257


 15%|█▌        | 61/400 [00:13<00:53,  6.35it/s]

Epoch 60, Loss: 0.6338405609130859


 20%|██        | 81/400 [00:16<00:51,  6.24it/s]

Epoch 80, Loss: 0.6350160241127014


 25%|██▌       | 101/400 [00:20<00:55,  5.39it/s]

Epoch 100, Loss: 0.6301419734954834


 30%|███       | 121/400 [00:24<00:44,  6.21it/s]

Epoch 120, Loss: 0.6343177556991577


 35%|███▌      | 141/400 [00:27<00:41,  6.20it/s]

Epoch 140, Loss: 0.6323092579841614


 40%|████      | 160/400 [00:30<00:39,  6.06it/s]

Epoch 160, Loss: 0.6329246759414673


 45%|████▌     | 181/400 [00:34<00:39,  5.60it/s]

Epoch 180, Loss: 0.6335758566856384


 50%|█████     | 201/400 [00:38<00:31,  6.24it/s]

Epoch 200, Loss: 0.631949782371521


 55%|█████▌    | 221/400 [00:41<00:27,  6.43it/s]

Epoch 220, Loss: 0.6313079595565796


 60%|██████    | 240/400 [00:44<00:27,  5.84it/s]

Epoch 240, Loss: 0.6334832906723022


 65%|██████▌   | 261/400 [00:49<00:24,  5.69it/s]

Epoch 260, Loss: 0.6303413510322571


 70%|███████   | 281/400 [00:52<00:19,  6.12it/s]

Epoch 280, Loss: 0.6301495432853699


 75%|███████▌  | 301/400 [00:55<00:15,  6.19it/s]

Epoch 300, Loss: 0.627575159072876


 80%|████████  | 320/400 [00:58<00:15,  5.13it/s]

Epoch 320, Loss: 0.6246885061264038


 85%|████████▌ | 341/400 [01:03<00:09,  6.00it/s]

Epoch 340, Loss: 0.6261425018310547


 90%|█████████ | 361/400 [01:06<00:06,  6.12it/s]

Epoch 360, Loss: 0.6259609460830688


 95%|█████████▌| 381/400 [01:09<00:03,  6.23it/s]

Epoch 380, Loss: 0.6282768845558167


100%|██████████| 400/400 [01:13<00:00,  5.48it/s]

Epoch 400, Loss: 0.621724009513855





Accuracy: 0.661
AUC: 0.393
Confusion Matrix:
                  Predicted Negative    Predicted Positive
Actual Negative       0.661           0.000
Actual Positive       0.339           0.000

--------------------------just sybils-----------------------------

Accuracy: 0.000
Accuracy with optimal threshold: 0.339555166082457
Accuracy with optimal threshold just for sybils: 0.9996308282982415


In [200]:
# Initialize model, optimizer, and loss function
input_dim = 1
hidden_dim = 4
output_dim = len(set(trainDictionaryY.values()))  # Number of classes

model = GCN(input_dim, hidden_dim, output_dim, num_layers=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

model.train()
for epoch in tqdm(range(900)):
    optimizer.zero_grad()
    out = model(train_x, train_edge_index)
    loss = loss_fn(out, train_y)
    loss.backward()
    optimizer.step()
    if (epoch+1)%20 == 0 or epoch==0:
        print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

model.eval()
with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute metrics
    acc = accuracy_score(test_y, pred)
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')  # Adjust depending on binary or multi-class
    cm = confusion_matrix(test_y, pred)
    cm = cm.astype('float') / len(pred)


    print(f'Accuracy: {acc:.3f}')
    print(f'AUC: {auc:.3f}')
    print('Confusion Matrix:')
    print(f"                  Predicted Negative    Predicted Positive")
    print(f"Actual Negative       {cm[0, 0]:.3f}           {cm[0, 1]:.3f}")
    print(f"Actual Positive       {cm[1, 0]:.3f}           {cm[1, 1]:.3f}")

    print("\n--------------------------just sybils-----------------------------\n")

    test_out = model(test_x, test_edge_index)
    test_out = test_out[mask2]
    pred = test_out.argmax(dim=1).cpu()
    acc = accuracy_score(test_y2, pred)
    print(f'Accuracy: {acc:.3f}')

# model.eval()
with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute AUC score as usual
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')
    fpr, tpr, thresholds = roc_curve(test_y, test_out[:, 1])
    j_scores = tpr - fpr
    optimal_threshold = thresholds[j_scores.argmax()]
    pred = (test_out[:, 1] >= optimal_threshold)
    acc = accuracy_score(test_y, pred)

    # just sybil
    test_out = test_out[mask2]
    pred = (test_out[:, 1] >= optimal_threshold)
    acc_sybils = accuracy_score(test_y2, pred)
    print("Accuracy with optimal threshold:", acc)
    print("Accuracy with optimal threshold just for sybils:", acc_sybils)

  0%|          | 1/900 [00:00<01:57,  7.66it/s]

Epoch 1, Loss: 0.6871949434280396


  2%|▏         | 21/900 [00:02<01:58,  7.42it/s]

Epoch 20, Loss: 0.6314573287963867


  5%|▍         | 41/900 [00:05<01:54,  7.51it/s]

Epoch 40, Loss: 0.6228500008583069


  7%|▋         | 60/900 [00:08<02:06,  6.62it/s]

Epoch 60, Loss: 0.6245721578598022


  9%|▉         | 81/900 [00:12<02:15,  6.05it/s]

Epoch 80, Loss: 0.6214005947113037


 11%|█         | 101/900 [00:15<01:47,  7.44it/s]

Epoch 100, Loss: 0.6169861555099487


 13%|█▎        | 121/900 [00:18<01:45,  7.39it/s]

Epoch 120, Loss: 0.6191636323928833


 16%|█▌        | 141/900 [00:20<01:41,  7.44it/s]

Epoch 140, Loss: 0.6228303909301758


 18%|█▊        | 160/900 [00:24<02:40,  4.61it/s]

Epoch 160, Loss: 0.6204693913459778


 20%|██        | 181/900 [00:27<01:39,  7.22it/s]

Epoch 180, Loss: 0.6202832460403442


 22%|██▏       | 201/900 [00:30<01:34,  7.42it/s]

Epoch 200, Loss: 0.620721161365509


 25%|██▍       | 221/900 [00:33<01:31,  7.44it/s]

Epoch 220, Loss: 0.6184046864509583


 27%|██▋       | 241/900 [00:35<01:30,  7.32it/s]

Epoch 240, Loss: 0.6186428666114807


 29%|██▉       | 260/900 [00:39<02:22,  4.48it/s]

Epoch 260, Loss: 0.6182431578636169


 31%|███       | 281/900 [00:43<01:22,  7.54it/s]

Epoch 280, Loss: 0.6174297332763672


 33%|███▎      | 301/900 [00:45<01:19,  7.51it/s]

Epoch 300, Loss: 0.6148807406425476


 36%|███▌      | 321/900 [00:48<01:18,  7.33it/s]

Epoch 320, Loss: 0.6166278719902039


 38%|███▊      | 340/900 [00:51<01:41,  5.50it/s]

Epoch 340, Loss: 0.6175216436386108


 40%|████      | 361/900 [00:55<01:20,  6.71it/s]

Epoch 360, Loss: 0.6193756461143494


 42%|████▏     | 381/900 [00:58<01:10,  7.38it/s]

Epoch 380, Loss: 0.61679607629776


 45%|████▍     | 401/900 [01:00<01:08,  7.30it/s]

Epoch 400, Loss: 0.6189859509468079


 47%|████▋     | 421/900 [01:03<01:04,  7.47it/s]

Epoch 420, Loss: 0.6173849105834961


 49%|████▉     | 440/900 [01:06<01:39,  4.62it/s]

Epoch 440, Loss: 0.6181730628013611


 51%|█████     | 461/900 [01:10<00:59,  7.33it/s]

Epoch 460, Loss: 0.6133067011833191


 53%|█████▎    | 481/900 [01:13<00:57,  7.34it/s]

Epoch 480, Loss: 0.6160266399383545


 56%|█████▌    | 501/900 [01:16<00:53,  7.39it/s]

Epoch 500, Loss: 0.6191539764404297


 58%|█████▊    | 521/900 [01:18<00:51,  7.43it/s]

Epoch 520, Loss: 0.6186871528625488


 60%|██████    | 541/900 [01:23<01:08,  5.26it/s]

Epoch 540, Loss: 0.619486927986145


 62%|██████▏   | 561/900 [01:25<00:46,  7.32it/s]

Epoch 560, Loss: 0.6150709390640259


 65%|██████▍   | 581/900 [01:28<00:43,  7.33it/s]

Epoch 580, Loss: 0.6174647212028503


 67%|██████▋   | 601/900 [01:31<00:39,  7.51it/s]

Epoch 600, Loss: 0.6207337379455566


 69%|██████▉   | 620/900 [01:34<01:01,  4.54it/s]

Epoch 620, Loss: 0.618650496006012


 71%|███████   | 641/900 [01:38<00:36,  7.18it/s]

Epoch 640, Loss: 0.6127781271934509


 73%|███████▎  | 661/900 [01:41<00:32,  7.43it/s]

Epoch 660, Loss: 0.6161658763885498


 76%|███████▌  | 681/900 [01:43<00:29,  7.33it/s]

Epoch 680, Loss: 0.6125075817108154


 78%|███████▊  | 701/900 [01:46<00:27,  7.15it/s]

Epoch 700, Loss: 0.6184164881706238


 80%|████████  | 720/900 [01:50<00:38,  4.65it/s]

Epoch 720, Loss: 0.6168346405029297


 82%|████████▏ | 741/900 [01:53<00:21,  7.40it/s]

Epoch 740, Loss: 0.6147407293319702


 85%|████████▍ | 761/900 [01:56<00:18,  7.49it/s]

Epoch 760, Loss: 0.6142576336860657


 87%|████████▋ | 781/900 [01:59<00:16,  7.29it/s]

Epoch 780, Loss: 0.6166989207267761


 89%|████████▉ | 800/900 [02:01<00:18,  5.30it/s]

Epoch 800, Loss: 0.6165232062339783


 91%|█████████ | 821/900 [02:06<00:11,  6.85it/s]

Epoch 820, Loss: 0.618055522441864


 93%|█████████▎| 841/900 [02:08<00:08,  7.28it/s]

Epoch 840, Loss: 0.6180877685546875


 96%|█████████▌| 861/900 [02:11<00:05,  7.20it/s]

Epoch 860, Loss: 0.6161966919898987


 98%|█████████▊| 881/900 [02:14<00:02,  7.43it/s]

Epoch 880, Loss: 0.617091715335846


100%|██████████| 900/900 [02:17<00:00,  6.52it/s]

Epoch 900, Loss: 0.619329035282135





Accuracy: 0.661
AUC: 0.402
Confusion Matrix:
                  Predicted Negative    Predicted Positive
Actual Negative       0.661           0.000
Actual Positive       0.339           0.000

--------------------------just sybils-----------------------------

Accuracy: 0.000
Accuracy with optimal threshold: 0.3423844099482557
Accuracy with optimal threshold just for sybils: 0.9957422197063854


In [201]:
# Initialize model, optimizer, and loss function
input_dim = 1
hidden_dim = 16
output_dim = len(set(trainDictionaryY.values()))  # Number of classes

model = GAT(input_dim, hidden_dim, output_dim, heads=4)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

model.train()
for epoch in tqdm(range(400)):
    optimizer.zero_grad()
    out = model(train_x, train_edge_index)
    loss = loss_fn(out, train_y)
    loss.backward()
    optimizer.step()
    if (epoch+1)%20 == 0 or epoch==0:
        print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

model.eval()
with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute metrics
    acc = accuracy_score(test_y, pred)
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')  # Adjust depending on binary or multi-class
    cm = confusion_matrix(test_y, pred)
    cm = cm.astype('float') / len(pred)


    print(f'Accuracy: {acc:.3f}')
    print(f'AUC: {auc:.3f}')
    print('Confusion Matrix:')
    print(f"                  Predicted Negative    Predicted Positive")
    print(f"Actual Negative       {cm[0, 0]:.3f}           {cm[0, 1]:.3f}")
    print(f"Actual Positive       {cm[1, 0]:.3f}           {cm[1, 1]:.3f}")


    print("\n--------------------------just sybils-----------------------------\n")

    test_out = model(test_x, test_edge_index)
    test_out = test_out[mask2]
    pred = test_out.argmax(dim=1).cpu()
    acc = accuracy_score(test_y2, pred)
    print(f'Accuracy: {acc:.3f}')

# model.eval()
with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute AUC score as usual
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')
    fpr, tpr, thresholds = roc_curve(test_y, test_out[:, 1])
    j_scores = tpr - fpr
    optimal_threshold = thresholds[j_scores.argmax()]
    pred = (test_out[:, 1] >= optimal_threshold)
    acc = accuracy_score(test_y, pred)

    # just sybil
    test_out = test_out[mask2]
    pred = (test_out[:, 1] >= optimal_threshold)
    acc_sybils = accuracy_score(test_y2, pred)
    print("Accuracy with optimal threshold:", acc)
    print("Accuracy with optimal threshold just for sybils:", acc_sybils)

  0%|          | 1/400 [00:00<03:31,  1.89it/s]

Epoch 1, Loss: 0.7374464869499207


  5%|▌         | 20/400 [00:11<03:53,  1.63it/s]

Epoch 20, Loss: 0.6248778700828552


 10%|█         | 40/400 [00:22<04:01,  1.49it/s]

Epoch 40, Loss: 0.5681548714637756


 15%|█▌        | 60/400 [00:33<02:56,  1.93it/s]

Epoch 60, Loss: 0.4933188855648041


 20%|██        | 80/400 [00:45<02:47,  1.91it/s]

Epoch 80, Loss: 0.4526253938674927


 25%|██▌       | 100/400 [00:57<02:37,  1.91it/s]

Epoch 100, Loss: 0.4361429214477539


 30%|███       | 120/400 [01:09<02:34,  1.81it/s]

Epoch 120, Loss: 0.4259282648563385


 35%|███▌      | 140/400 [01:20<03:01,  1.43it/s]

Epoch 140, Loss: 0.40881094336509705


 40%|████      | 160/400 [01:31<02:25,  1.64it/s]

Epoch 160, Loss: 0.4012042284011841


 45%|████▌     | 180/400 [01:42<01:54,  1.92it/s]

Epoch 180, Loss: 0.40029650926589966


 50%|█████     | 200/400 [01:54<01:46,  1.88it/s]

Epoch 200, Loss: 0.3997192978858948


 55%|█████▌    | 220/400 [02:06<01:37,  1.85it/s]

Epoch 220, Loss: 0.39928925037384033


 60%|██████    | 240/400 [02:18<01:36,  1.66it/s]

Epoch 240, Loss: 0.3989126682281494


 65%|██████▌   | 260/400 [02:29<01:36,  1.44it/s]

Epoch 260, Loss: 0.3985716700553894


 70%|███████   | 280/400 [02:40<01:02,  1.91it/s]

Epoch 280, Loss: 0.39825472235679626


 75%|███████▌  | 300/400 [02:53<00:52,  1.91it/s]

Epoch 300, Loss: 0.3979615569114685


 80%|████████  | 320/400 [03:04<00:41,  1.92it/s]

Epoch 320, Loss: 0.39769700169563293


 85%|████████▌ | 340/400 [03:16<00:32,  1.86it/s]

Epoch 340, Loss: 0.39745011925697327


 90%|█████████ | 360/400 [03:28<00:23,  1.70it/s]

Epoch 360, Loss: 0.39723384380340576


 95%|█████████▌| 380/400 [03:39<00:13,  1.44it/s]

Epoch 380, Loss: 0.397045373916626


100%|██████████| 400/400 [03:50<00:00,  1.74it/s]

Epoch 400, Loss: 0.3968808352947235





Accuracy: 0.859
AUC: 0.902
Confusion Matrix:
                  Predicted Negative    Predicted Positive
Actual Negative       0.611           0.050
Actual Positive       0.091           0.248

--------------------------just sybils-----------------------------

Accuracy: 0.731
Accuracy with optimal threshold: 0.8589091971290269
Accuracy with optimal threshold just for sybils: 0.7313414469069565


In [202]:
# Initialize model, optimizer, and loss function
input_dim = 1
hidden_dim = 4
output_dim = len(set(trainDictionaryY.values()))  # Number of classes

model = GAT(input_dim, hidden_dim, output_dim, heads=4)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

model.train()
for epoch in tqdm(range(400)):
    optimizer.zero_grad()
    out = model(train_x, train_edge_index)
    loss = loss_fn(out, train_y)
    loss.backward()
    optimizer.step()
    if (epoch+1)%20 == 0 or epoch==0:
        print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

model.eval()
with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute metrics
    acc = accuracy_score(test_y, pred)
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')  # Adjust depending on binary or multi-class
    cm = confusion_matrix(test_y, pred)
    cm = cm.astype('float') / len(pred)


    print(f'Accuracy: {acc:.3f}')
    print(f'AUC: {auc:.3f}')
    print('Confusion Matrix:')
    print(f"                  Predicted Negative    Predicted Positive")
    print(f"Actual Negative       {cm[0, 0]:.3f}           {cm[0, 1]:.3f}")
    print(f"Actual Positive       {cm[1, 0]:.3f}           {cm[1, 1]:.3f}")


    print("\n--------------------------just sybils-----------------------------\n")

    test_out = model(test_x, test_edge_index)
    test_out = test_out[mask2]
    pred = test_out.argmax(dim=1).cpu()
    acc = accuracy_score(test_y2, pred)
    print(f'Accuracy: {acc:.3f}')

# model.eval()
with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute AUC score as usual
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')
    fpr, tpr, thresholds = roc_curve(test_y, test_out[:, 1])
    j_scores = tpr - fpr
    optimal_threshold = thresholds[j_scores.argmax()]
    pred = (test_out[:, 1] >= optimal_threshold)
    acc = accuracy_score(test_y, pred)

    # just sybil
    test_out = test_out[mask2]
    pred = (test_out[:, 1] >= optimal_threshold)
    acc_sybils = accuracy_score(test_y2, pred)
    print("Accuracy with optimal threshold:", acc)
    print("Accuracy with optimal threshold just for sybils:", acc_sybils)

  0%|          | 1/400 [00:00<01:25,  4.67it/s]

Epoch 1, Loss: 0.676198422908783


  5%|▌         | 20/400 [00:04<01:23,  4.54it/s]

Epoch 20, Loss: 0.6228294968605042


 10%|█         | 40/400 [00:08<01:30,  3.97it/s]

Epoch 40, Loss: 0.5681564807891846


 15%|█▌        | 60/400 [00:14<01:15,  4.51it/s]

Epoch 60, Loss: 0.5039029717445374


 20%|██        | 80/400 [00:19<01:09,  4.58it/s]

Epoch 80, Loss: 0.45633450150489807


 25%|██▌       | 100/400 [00:23<01:31,  3.27it/s]

Epoch 100, Loss: 0.43924203515052795


 30%|███       | 120/400 [00:29<01:01,  4.53it/s]

Epoch 120, Loss: 0.4339584708213806


 35%|███▌      | 140/400 [00:33<00:57,  4.52it/s]

Epoch 140, Loss: 0.428366094827652


 40%|████      | 160/400 [00:39<01:21,  2.95it/s]

Epoch 160, Loss: 0.4185997247695923


 45%|████▌     | 180/400 [00:44<00:48,  4.55it/s]

Epoch 180, Loss: 0.4045342206954956


 50%|█████     | 200/400 [00:48<00:43,  4.60it/s]

Epoch 200, Loss: 0.40143516659736633


 55%|█████▌    | 220/400 [00:54<01:02,  2.89it/s]

Epoch 220, Loss: 0.4009459912776947


 60%|██████    | 240/400 [00:59<00:35,  4.55it/s]

Epoch 240, Loss: 0.4006095826625824


 65%|██████▌   | 260/400 [01:03<00:30,  4.57it/s]

Epoch 260, Loss: 0.40040311217308044


 70%|███████   | 280/400 [01:09<00:38,  3.11it/s]

Epoch 280, Loss: 0.4002630114555359


 75%|███████▌  | 300/400 [01:13<00:21,  4.55it/s]

Epoch 300, Loss: 0.4001624286174774


 80%|████████  | 320/400 [01:18<00:17,  4.56it/s]

Epoch 320, Loss: 0.40007513761520386


 85%|████████▌ | 340/400 [01:24<00:15,  3.88it/s]

Epoch 340, Loss: 0.3999920189380646


 90%|█████████ | 360/400 [01:28<00:08,  4.62it/s]

Epoch 360, Loss: 0.39991021156311035


 95%|█████████▌| 380/400 [01:33<00:04,  4.53it/s]

Epoch 380, Loss: 0.3998268246650696


100%|██████████| 400/400 [01:38<00:00,  4.04it/s]

Epoch 400, Loss: 0.3997393548488617





Accuracy: 0.802
AUC: 0.902
Confusion Matrix:
                  Predicted Negative    Predicted Positive
Actual Negative       0.532           0.129
Actual Positive       0.069           0.270

--------------------------just sybils-----------------------------

Accuracy: 0.796
Accuracy with optimal threshold: 0.8590302119846436
Accuracy with optimal threshold just for sybils: 0.7281542645484415
