# Graph Neural Network Training for Fraud Detection

In [1]:
import sys
sys.path.append('../src')

import torch
import numpy as np
import pandas as pd
import pickle
from torch_geometric.data import DataLoader
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

from gnn_model import FraudGNN, GraphDataProcessor, GNNTrainer

In [2]:
# Load data
X_train = np.load('../models/X_train.npy')
X_test = np.load('../models/X_test.npy')
y_train = np.load('../models/y_train.npy')
y_test = np.load('../models/y_test.npy')

with open('../models/feature_names.pkl', 'rb') as f:
    feature_names = pickle.load(f)

print(f"Training data: {X_train.shape}, Features: {len(feature_names)}")

Training data: (227845, 32), Features: 32


In [3]:
# Create DataFrames with synthetic Time/Amount for graph creation
train_df = pd.DataFrame(X_train[:2000], columns=feature_names)  # Limit for efficiency
train_df['Class'] = y_train[:2000]
train_df['Time'] = np.arange(len(train_df))
train_df['Amount'] = np.random.exponential(50, len(train_df))

test_df = pd.DataFrame(X_test[:500], columns=feature_names)
test_df['Class'] = y_test[:500]
test_df['Time'] = np.arange(len(test_df))
test_df['Amount'] = np.random.exponential(50, len(test_df))

In [4]:
# Create graphs
processor = GraphDataProcessor()
train_graph = processor.create_transaction_graph(train_df, feature_names)
test_graph = processor.create_transaction_graph(test_df, feature_names)

print(f"Train graph: {train_graph.num_nodes} nodes, {train_graph.num_edges} edges")
print(f"Test graph: {test_graph.num_nodes} nodes, {test_graph.num_edges} edges")

Train graph: 2000 nodes, 66860 edges
Test graph: 500 nodes, 16600 edges


In [5]:
# Create data loaders
train_loader = DataLoader([train_graph], batch_size=1)
val_loader = DataLoader([test_graph], batch_size=1)

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FraudGNN(input_dim=len(feature_names))
trainer = GNNTrainer(model, device)

print(f"Using device: {device}")

Using device: cpu




In [6]:
# Train model
print("Training GNN...")
best_auc = trainer.train(train_loader, val_loader, epochs=30)
print(f"Best validation AUC: {best_auc:.4f}")

Training GNN...


ValueError: Using a target size (torch.Size([2000])) that is different to the input size (torch.Size([])) is deprecated. Please ensure they have the same size.

In [None]:
# Evaluate
test_auc, test_predictions, test_labels = trainer.evaluate(val_loader)
test_pred_binary = (test_predictions > 0.5).astype(int)

print(f"Test AUC: {test_auc:.4f}")
print(classification_report(test_labels, test_pred_binary))

# Save predictions
np.save('../models/gnn_test_predictions.npy', test_predictions)
np.save('../models/gnn_test_labels.npy', test_labels)

print("GNN training completed!")