# Anti fraud mechanism

Training the neural network

In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Loading the CSV dataset
rawData = pd.read_csv('../data/transactional-sample.csv', dtype={
    'transaction_id': int,
    'merchant_id': int,
    'user_id': int,
    'card_number': str,
    'transaction_date': str,
    'transaction_amount': float,
    'device_id': str,
    'has_cbk': str
    })
data = rawData.copy()

# Building the neural network model
class FraudDetectionModel(nn.Module):
    def __init__(self, input_dim):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

# Organizing and filtering useful data
data['has_cbk'] = data['has_cbk'].map({'TRUE': 1, 'FALSE': 0})
target = data['has_cbk'].astype(int)

data['transaction_date'] = pd.to_datetime(data['transaction_date'])

data['transaction_day'] = data['transaction_date'].dt.day
data['transaction_hour'] = data['transaction_date'].dt.hour
data['transaction_week_day'] = data['transaction_date'].dt.dayofweek

features = data.drop(columns=['transaction_id', 'merchant_id', 'user_id', 'device_id', 'card_number', 'transaction_date', 'has_cbk'])

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Converting to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Initialize the model
input_dim = X_train.shape[1]
model = FraudDetectionModel(input_dim)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 200
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

# Step 5: Model Evaluation
with torch.no_grad():
    outputs = model(X_test_tensor)
    predictions = (outputs >= 0.5).float()
    accuracy = accuracy_score(y_test_tensor.numpy(), predictions.numpy())
    precision = precision_score(y_test_tensor.numpy(), predictions.numpy())
    recall = recall_score(y_test_tensor.numpy(), predictions.numpy())
    f1 = f1_score(y_test_tensor.numpy(), predictions.numpy())
    roc_auc = roc_auc_score(y_test_tensor.numpy(), outputs.numpy())

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("ROC AUC Score:", roc_auc)


Accuracy: 0.8609375
Precision: 0.5714285714285714
Recall: 0.044444444444444446
F1 Score: 0.08247422680412372
ROC AUC Score: 0.6698787878787879


Defining functions

In [10]:
def verifySuspiciousEntity(fieldName, entity):
  fraud_data = rawData[rawData['has_cbk'] == 'TRUE']
  cbk_amount = fraud_data.groupby(fieldName)['has_cbk'].count().reset_index()
  no_fraud_data = rawData[rawData['has_cbk'] == 'FALSE']
  transaction_data = rawData.copy()
  transaction_amount = transaction_data.groupby(fieldName)['transaction_id'].count().reset_index()
  cbk_amount = pd.merge(cbk_amount, transaction_amount, on=fieldName, how='left')
  cbk_amount.columns = [fieldName, 'cbk_amount', 'transaction_amount']
  cbk_amount['cbk_rate'] = cbk_amount['cbk_amount'] / cbk_amount['transaction_amount']
  userInTable = cbk_amount.loc[cbk_amount[fieldName] == entity]
  if userInTable.empty:
    return ''
  entityFraudRate = userInTable['cbk_rate'].values[0]

  if entityFraudRate > 0.2:
    return f"Potential risk found: {fieldName} contains {round(entityFraudRate*100, 2)}% of chargeback rate."
  return ''

###############################################################################

def preprocess_data(example_dt):
  example_dt['transaction_date'] = pd.to_datetime(example_dt['transaction_date'])

  example_dt['transaction_day'] = example_dt['transaction_date'].dt.day
  example_dt['transaction_hour'] = example_dt['transaction_date'].dt.hour
  example_dt['transaction_week_day'] = data['transaction_date'].dt.dayofweek

  example_dt = example_dt.drop(columns=['transaction_date', 'merchant_id', 'user_id', 'card_number', 'device_id'])
  return example_dt

###############################################################################

def verifyPotentialFraudRisk(transaction):
  specific_case_features = preprocess_data(transaction)
  specific_case_tensor = torch.tensor(specific_case_features.values, dtype=torch.float32)

  with torch.no_grad():
    output = model(specific_case_tensor)
    probability = output.item()
  print("Probability of fraud based on datetime and amount: " + str(round((probability*100), 2)) + "%. ")

  userRisk = verifySuspiciousEntity('user_id', transaction['user_id'].values[0])
  merchantRisk = verifySuspiciousEntity('merchant_id', transaction['merchant_id'].values[0])
  deviceRisk = verifySuspiciousEntity('device_id', transaction['device_id'].values[0])
  cardRisk = verifySuspiciousEntity('card_number', transaction['card_number'].values[0])

  if (userRisk != '' or merchantRisk != '' or deviceRisk != '' or cardRisk != ''):
    print(userRisk, merchantRisk, deviceRisk, cardRisk)
    print('Payment rejected.')
  else:
    print("Payment " + ("accepted." if probability < 0.5 else "rejected."))


Testing the mechanism

In [44]:
specific_case = pd.DataFrame({
    'merchant_id': 29744,
    'user_id': 3584,
    'card_number': '434505******9116',
    'device_id': 285475,
    'transaction_date': ['2019-12-07T23:16:32.812632'],
    'transaction_amount': [136]
    })

verifyPotentialFraudRisk(specific_case)

Probability of fraud based on datetime and amount: 3.8%. 
Potential risk found: user_id contains 66.67% chargeback rate.   
Payment rejected.
