In [19]:
pip install torch torch-geometric pandas scikit-learn




In [27]:
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
from torch_geometric.nn import SAGEConv

In [28]:
entities = pd.read_excel("entity.csv.xlsx")
relations = pd.read_excel("relation.csv.xlsx")




In [29]:
print(entities.head())
print(relations.head())


  entity_id  account_age_days  kyc_verified  avg_balance  country 
0        C1              1200             1        54000         1
1        C2                50             0         1200         1
2        C3               800             1        23000         2
3        P1                 0             1            0         1
4        P2                 0             1            0         1
  source_id target_id      relation
0        P1        C1   director_of
1        P1        C2   director_of
2        P2        C3   director_of
3        C1        A1  uses_account
4        C2        A1  uses_account


In [30]:
encoder = LabelEncoder()
entities["node_id"] = encoder.fit_transform(entities["entity_id"])

id_map = dict(zip(entities["entity_id"], entities["node_id"]))
print(id_map)

{'C1': 1, 'C2': 2, 'C3': 3, 'P1': 4, 'P2': 5, 'A1': 0}


In [31]:
edge_index = torch.tensor(
    [
        [id_map[src] for src in relations["source_id"]],
        [id_map[tgt] for tgt in relations["target_id"]],
    ],
    dtype=torch.long
)

print(edge_index)


tensor([[4, 4, 5, 1, 2, 1],
        [1, 2, 3, 0, 0, 2]])


In [36]:
x = torch.tensor(
    entities[
        ["account_age_days", "kyc_verified", "avg_balance", "country "]
    ].values,
    dtype=torch.float
)

print(x.shape)  # (num_nodes, num_features)

torch.Size([6, 4])


In [37]:
y = torch.zeros(len(entities), dtype=torch.long)

# Mark suspicious companies
y[id_map["C1"]] = 1
y[id_map["C2"]] = 1


In [38]:
class FraudGNN(torch.nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, 16)
        self.conv2 = SAGEConv(16, 2)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x



In [39]:
model = FraudGNN(x.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

for epoch in range(200):
    model.train()
    optimizer.zero_grad()

    out = model(x, edge_index)
    loss = loss_fn(out, y)

    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}")


Epoch 0 | Loss: 1960.9224
Epoch 20 | Loss: 0.1884
Epoch 40 | Loss: 0.0547
Epoch 60 | Loss: 0.0135
Epoch 80 | Loss: 0.0058
Epoch 100 | Loss: 0.0035
Epoch 120 | Loss: 0.0024
Epoch 140 | Loss: 0.0018
Epoch 160 | Loss: 0.0014
Epoch 180 | Loss: 0.0011


In [40]:
model.eval()
with torch.no_grad():
    logits = model(x, edge_index)
    probs = torch.softmax(logits, dim=1)

entities["fraud_probability"] = probs[:, 1].numpy()
print(entities[["entity_id", "fraud_probability"]])


  entity_id  fraud_probability
0        C1           0.000000
1        C2           1.000000
2        C3           1.000000
3        P1           0.000322
4        P2           0.002661
5        A1           0.002661


In [None]:
# C1 â†’ high probability (ðŸš¨)
# C2 â†’ high probability (ðŸš¨)
# C3 â†’ low probability
# P1, P2, A1 â†’ neutral
