In [1]:
!pip install torch torch-geometric 

Defaulting to user installation because normal site-packages is not writeable


In [17]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [18]:
df = pd.read_csv('project.csv')

In [19]:
df.shape

(1048575, 11)

In [20]:
df.isnull()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
1048570,False,False,False,False,False,False,False,False,False,False,False
1048571,False,False,False,False,False,False,False,False,False,False,False
1048572,False,False,False,False,False,False,False,False,False,False,False
1048573,False,False,False,False,False,False,False,False,False,False,False


In [21]:
df.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

In [22]:
df['type'] = df['type'].astype('category').cat.codes

In [23]:
features = ['step', 'type', 'amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']
X = df[features].values
y = df['isFraud'].values

In [24]:
edge_index = torch.randint(0, X.shape[0], (2, 50000))

In [25]:
X = torch.tensor(X, dtype=torch.float)
y = torch.tensor(y, dtype=torch.long)

In [26]:
data = Data(x=X, edge_index=edge_index, y=y)

In [27]:
class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [28]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(input_dim=X.shape[1], hidden_dim=32, output_dim=2).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [31]:
model.train()
for epoch in range(10):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out, data.y)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')


Epoch 1, Loss: 551.9313
Epoch 2, Loss: 539.2540
Epoch 3, Loss: 526.3215
Epoch 4, Loss: 513.1244
Epoch 5, Loss: 499.6556
Epoch 6, Loss: 485.9106
Epoch 7, Loss: 471.8776
Epoch 8, Loss: 457.5443
Epoch 9, Loss: 442.9160
Epoch 10, Loss: 427.9935


In [32]:
model.eval()
_, pred = model(data).max(dim=1)
report = classification_report(data.y.cpu(), pred.cpu(), digits=4)
print(report)

              precision    recall  f1-score   support

           0     0.9994    0.9716    0.9853   1047433
           1     0.0182    0.4825    0.0350      1142

    accuracy                         0.9711   1048575
   macro avg     0.5088    0.7270    0.5102   1048575
weighted avg     0.9984    0.9711    0.9843   1048575

