<a href="https://colab.research.google.com/github/aSafarpoor/OSN_FAD/blob/main/CLS/FAD%2BNN%2BNode2Vec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
!pip install node2vec

Collecting node2vec
  Downloading node2vec-0.5.0-py3-none-any.whl.metadata (849 bytes)
Downloading node2vec-0.5.0-py3-none-any.whl (7.2 kB)
Installing collected packages: node2vec
Successfully installed node2vec-0.5.0


In [10]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [13]:
import networkx as nx
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt
from node2vec import Node2Vec
import random
SEED = 1

In [22]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

# read data

In [1]:
edges = []
with open('graph.txt', 'r') as file:
    for line in file:
        # Split each line into two numbers and convert them to integers
        pair = list(map(int, line.strip().split()))
        edges.append(pair)


with open('train.txt', 'r') as file:
    lines = file.readlines()
    btrain = list(map(float, lines[0].strip().split()))
    strain = list(map(float, lines[1].strip().split()))

with open('test.txt', 'r') as file:
    lines = file.readlines()
    btest = list(map(float, lines[0].strip().split()))
    stest = list(map(float, lines[1].strip().split()))

In [2]:
print("Length of edges:", len(edges),
      "| Length of btrain:", len(btrain),
      "| Length of btest:", len(btest),
      "| Length of strain:", len(strain),
      "| Length of stest:", len(stest))

Length of edges: 6818501 | Length of btrain: 20000 | Length of btest: 158377 | Length of strain: 10000 | Length of stest: 81263


# data preparation

In [53]:
bigG = nx.Graph()
bigG.add_edges_from(edges)

In [72]:
trainG = bigG.subgraph(btrain+strain).copy()
testG = bigG.subgraph(btest+stest).copy()

In [73]:
print(len(bigG.nodes()),len(trainG.nodes()),len(testG.nodes()))

269640 30000 239640


In [74]:
def reindex_graph_and_dictionaries(G, train, test):
    new_index_mapping = {old_index: new_index for new_index, old_index in enumerate(G.nodes())}
    G_reindexed = nx.relabel_nodes(G, new_index_mapping)

    train = [new_index_mapping[int(node)] for node in train]
    test =  [new_index_mapping[int(node)] for node in test]

    return G_reindexed, train, test

In [75]:
trainG,btrain,strain =reindex_graph_and_dictionaries(trainG, btrain,strain)
testG,btest,stest =reindex_graph_and_dictionaries(testG, btest,stest)

In [91]:
trainDictionaryX = {}
trainDictionaryY = {}
testDictionaryX = {}
testDictionaryY = {}

random.seed(SEED)

for node in btrain:
    if random.random() < 0.8:
        trainDictionaryX[node] = [0]
    else:
        trainDictionaryX[node] = [0.5]
    trainDictionaryY[node] = 0

for node in strain:
    if random.random() < 0.8:
        trainDictionaryX[node] = [1]
    else:
        trainDictionaryX[node] = [0.5]
    trainDictionaryY[node] = 1

for node in btest:
    if random.random() < 0.8:
        testDictionaryX[node] = [0]
    else:
        testDictionaryX[node] = [0.5]
    testDictionaryY[node] = 0

for node in stest:
    if random.random() < 0.8:
        testDictionaryX[node] = [1]
    else:
        testDictionaryX[node] = [0.5]
    testDictionaryY[node] = 1

#model

In [92]:
class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [93]:
# Prepare data (assuming PyTorch Geometric format for graphs)
def prepare_data(graph, dictionaryX, dictionaryY):
    x = torch.tensor([dictionaryX[node] for node in graph.nodes()], dtype=torch.float)
    y = torch.tensor([dictionaryY[node] for node in graph.nodes()], dtype=torch.long)
    # edge_index = torch.tensor(list(graph.edges())).t().contiguous()
    edge_index = torch.tensor(list(graph.edges()), dtype=torch.long).t().contiguous()
    return x, y, edge_index

In [94]:
train_x, train_y, train_edge_index = prepare_data(trainG, trainDictionaryX, trainDictionaryY)
test_x, test_y, test_edge_index = prepare_data(testG, testDictionaryX, testDictionaryY)

In [95]:
# Initialize model, optimizer, and loss function
input_dim = 1
hidden_dim = 16
output_dim = len(set(trainDictionaryY.values()))  # Number of classes

model = GCN(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

model.train()
for epoch in tqdm(range(300)):
    optimizer.zero_grad()
    out = model(train_x, train_edge_index)
    loss = loss_fn(out, train_y)
    loss.backward()
    optimizer.step()
    if (epoch+1)%20 == 0 or epoch==0:
        print(f'Epoch {epoch + 1}, Loss: {loss.item()}')


  1%|          | 2/300 [00:00<00:27, 10.65it/s]

Epoch 1, Loss: 0.7179412245750427


  7%|▋         | 22/300 [00:02<00:27, 10.18it/s]

Epoch 20, Loss: 0.5448640584945679


 14%|█▍        | 42/300 [00:04<00:25, 10.05it/s]

Epoch 40, Loss: 0.42602235078811646


 21%|██        | 62/300 [00:06<00:23, 10.09it/s]

Epoch 60, Loss: 0.35117241740226746


 27%|██▋       | 81/300 [00:08<00:30,  7.13it/s]

Epoch 80, Loss: 0.31953153014183044


 33%|███▎      | 100/300 [00:11<00:25,  7.73it/s]

Epoch 100, Loss: 0.3089323937892914


 41%|████      | 122/300 [00:13<00:17,  9.93it/s]

Epoch 120, Loss: 0.3060009777545929


 47%|████▋     | 142/300 [00:15<00:15,  9.93it/s]

Epoch 140, Loss: 0.30529454350471497


 54%|█████▎    | 161/300 [00:17<00:13,  9.95it/s]

Epoch 160, Loss: 0.3051179349422455


 60%|██████    | 181/300 [00:19<00:12,  9.86it/s]

Epoch 180, Loss: 0.30504146218299866


 67%|██████▋   | 201/300 [00:21<00:12,  7.69it/s]

Epoch 200, Loss: 0.3049876093864441


 74%|███████▎  | 221/300 [00:24<00:11,  6.66it/s]

Epoch 220, Loss: 0.3049410581588745


 80%|████████  | 241/300 [00:26<00:05, 10.21it/s]

Epoch 240, Loss: 0.30489930510520935


 87%|████████▋ | 261/300 [00:28<00:03, 10.08it/s]

Epoch 260, Loss: 0.30486178398132324


 94%|█████████▎| 281/300 [00:30<00:01,  9.95it/s]

Epoch 280, Loss: 0.3048284947872162


100%|██████████| 300/300 [00:32<00:00,  9.18it/s]

Epoch 300, Loss: 0.3047991394996643





In [96]:
model.eval()
with torch.no_grad():
    test_out = model(test_x, test_edge_index)
    pred = test_out.argmax(dim=1).cpu()

    # Compute metrics
    acc = accuracy_score(test_y, pred)
    auc = roc_auc_score(test_y, test_out[:, 1], multi_class='ovo')  # Adjust depending on binary or multi-class
    cm = confusion_matrix(test_y, pred)
    cm = cm.astype('float') / len(pred)


    print(f'Accuracy: {acc:.3f}')
    print(f'AUC: {auc:.3f}')
    print('Confusion Matrix:')
    print(f"                  Predicted Negative    Predicted Positive")
    print(f"Actual Negative       {cm[0, 0]:.3f}           {cm[0, 1]:.3f}")
    print(f"Actual Positive       {cm[1, 0]:.3f}           {cm[1, 1]:.3f}")

Accuracy: 0.865
AUC: 0.924
Confusion Matrix:
                  Predicted Negative    Predicted Positive
Actual Negative       0.640           0.021
Actual Positive       0.114           0.225
