In [1]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Dataset class for grouped sequences of transactions by `cc_num`
class CreditCardFraudDataset(Dataset):
    def __init__(self, file_path, seq_len):
        self.data = pd.read_csv(file_path)

        # Convert date to timestamp
        self.data['trans_date_trans_time'] = pd.to_datetime(self.data['trans_date_trans_time']).apply(lambda x: x.timestamp())

        # Encode category column
        self.label_encoder = LabelEncoder()
        self.data['category'] = self.label_encoder.fit_transform(self.data['category'])

        # Normalize features
        scaler = MinMaxScaler()
        self.data[['amt']] = scaler.fit_transform(self.data[['amt']])

        # Group transactions by `cc_num` and create sequences
        self.seq_len = seq_len
        self.sequences = []
        grouped = self.data.groupby('cc_num')
        for _, group in grouped:
            group = group[['category', 'amt', 'is_fraud', 'trans_date_trans_time']].values
            for i in range(len(group)):
                if i < self.seq_len - 1:
                    padding = [group[0]] * (self.seq_len - i - 1)
                    seq = padding + group[:i + 1].tolist()
                else:
                    seq = group[i - self.seq_len + 1:i + 1].tolist()

                label = group[i, -2]  # Fraud label of the current transaction
                time_intervals = np.diff([s[-1] for s in seq], prepend=seq[0][-1])  # Time differences
                time_intervals = time_intervals.reshape(-1, 1)  # Reshape for compatibility
                seq_features = np.array([s[:-1] for s in seq])  # Remove timestamp from features
                seq_features = np.concatenate((seq_features, time_intervals), axis=1)  # Add time intervals as feature
                self.sequences.append((seq_features, label))

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x_seq, y_label = self.sequences[idx]
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_label, dtype=torch.float32)


# TH-LSTM Model (with time-aware gate `T_t`)
class TH_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, memory_size):
        super(TH_LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.memory_size = memory_size

        # LSTM weights
        self.Wsh = nn.Linear(hidden_dim, hidden_dim)
        self.Wsx = nn.Linear(input_dim, hidden_dim)
        self.Wst = nn.Linear(1, hidden_dim)
        self.bs = nn.Parameter(torch.zeros(hidden_dim))

        self.Wfh = nn.Linear(hidden_dim, hidden_dim)
        self.Wfx = nn.Linear(input_dim, hidden_dim)
        self.Wfs = nn.Linear(hidden_dim, hidden_dim)
        self.bf = nn.Parameter(torch.zeros(hidden_dim))

        self.Wih = nn.Linear(hidden_dim, hidden_dim)
        self.Wix = nn.Linear(input_dim, hidden_dim)
        self.Wis = nn.Linear(hidden_dim, hidden_dim)
        self.bi = nn.Parameter(torch.zeros(hidden_dim))

        self.Wuh = nn.Linear(hidden_dim, hidden_dim)
        self.Wux = nn.Linear(input_dim, hidden_dim)
        self.Wus = nn.Linear(hidden_dim, hidden_dim)
        self.bu = nn.Parameter(torch.zeros(hidden_dim))

        self.WTh = nn.Linear(hidden_dim, hidden_dim)
        self.WTx = nn.Linear(input_dim, hidden_dim)
        self.WTs = nn.Linear(hidden_dim, hidden_dim)
        self.bT = nn.Parameter(torch.zeros(hidden_dim))

        self.Woh = nn.Linear(hidden_dim, hidden_dim)
        self.Wox = nn.Linear(input_dim, hidden_dim)
        self.Wos = nn.Linear(hidden_dim, hidden_dim)
        self.bo = nn.Parameter(torch.zeros(hidden_dim))

        # Attention module
        self.Waq = nn.Linear(hidden_dim * 2, hidden_dim)
        self.Wah = nn.Linear(hidden_dim, hidden_dim)
        self.ba = nn.Parameter(torch.zeros(hidden_dim))
        self.vt = nn.Parameter(torch.randn(hidden_dim, 1))

        # Output layer
        self.classifier = nn.Linear(hidden_dim, 1)

    def forward(self, X_seq):
        batch_size = X_seq.size(0)
        seq_len = X_seq.size(1)
        h_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        c_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        historical_states = torch.zeros(batch_size, self.memory_size, self.hidden_dim).to(X_seq.device)

        for t in range(seq_len):
            x_t = X_seq[:, t, :-1]
            delta_t = X_seq[:, t, -1].view(-1, 1)

            s_t = torch.tanh(self.Wsh(h_prev) + self.Wsx(x_t) + self.Wst(delta_t) + self.bs)
            f_t = torch.sigmoid(self.Wfh(h_prev) + self.Wfx(x_t) + self.Wfs(s_t) + self.bf)
            i_t = torch.sigmoid(self.Wih(h_prev) + self.Wix(x_t) + self.Wis(s_t) + self.bi)
            T_t = torch.sigmoid(self.WTh(h_prev) + self.WTx(x_t) + self.WTs(s_t) + self.bT)
            zeta_t = torch.tanh(self.Wuh(h_prev) + self.Wux(x_t) + self.Wus(s_t) + self.bu)
            c_t = f_t * c_prev + i_t * zeta_t + T_t * s_t
            o_t = torch.sigmoid(self.Woh(h_prev) + self.Wox(x_t) + self.Wos(s_t) + self.bo)
            h_t = o_t * torch.tanh(c_t)

            historical_states = torch.cat((historical_states[:, 1:], h_t.unsqueeze(1)), dim=1)
            h_prev, c_prev = h_t, c_t

        q_t = torch.cat((h_t, c_t), dim=1)
        o_t_i = torch.tanh(self.Waq(q_t).unsqueeze(1) + self.Wah(historical_states))
        alpha_t_i = torch.exp(torch.matmul(o_t_i, self.vt)).squeeze(-1)
        alpha_t_i = alpha_t_i / torch.sum(alpha_t_i, dim=1, keepdim=True)
        e_t = torch.sum(alpha_t_i.unsqueeze(-1) * historical_states, dim=1)

        y_pred = torch.sigmoid(self.classifier(e_t))
        return y_pred


# Training and testing
if __name__ == "__main__":
    batch_size = 32
    input_dim = 3  # category, amt, delta_t
    hidden_dim = 64
    memory_size = 10
    seq_len = 5
    epochs = 1

    train_dataset = CreditCardFraudDataset("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTrain.csv", seq_len=seq_len)
    test_dataset = CreditCardFraudDataset("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTest.csv", seq_len=seq_len)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = TH_LSTM(input_dim, hidden_dim, memory_size).to('cuda' if torch.cuda.is_available() else 'cpu')
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCELoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        y_true_epoch = []
        y_pred_epoch = []

        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            y_pred_binary = (y_pred >= 0.5).float()
            y_true_epoch.extend(y_batch.cpu().numpy())
            y_pred_epoch.extend(y_pred_binary.cpu().numpy())

        epoch_accuracy = accuracy_score(y_true_epoch, y_pred_epoch)
        print(f"Epoch [{epoch + 1}/{epochs}] Loss: {total_loss / len(train_loader):.4f} | Accuracy: {epoch_accuracy:.4f}")

    # Testing phase with thresholds
    model.eval()
    y_true_test = []
    y_pred_test_prob = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            y_pred = model(X_batch)
            y_true_test.extend(y_batch.cpu().numpy())
            y_pred_test_prob.extend(y_pred.cpu().numpy())

    thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.9]
    for threshold in thresholds:
        y_pred_test_binary = (np.array(y_pred_test_prob) >= threshold).astype(int)
        accuracy = accuracy_score(y_true_test, y_pred_test_binary)
        precision = precision_score(y_true_test, y_pred_test_binary, zero_division=0)
        recall = recall_score(y_true_test, y_pred_test_binary)
        f1 = f1_score(y_true_test, y_pred_test_binary)
        auc = roc_auc_score(y_true_test, y_pred_test_binary)

        print(f"Threshold: {threshold:.2f} | Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1: {f1:.4f} | AUC: {auc:.4f}")

Epoch [1/1] Loss: 0.0022 | Accuracy: 0.9995
Threshold: 0.10 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.20 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.30 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.40 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.50 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.70 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.90 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000


In [2]:
train_cards = set(train_dataset.data['cc_num'])
test_cards = set(test_dataset.data['cc_num'])
overlap = train_cards.intersection(test_cards)
print(f"Số lượng thẻ tín dụng xuất hiện trong cả tập train và test: {len(overlap)}")

Số lượng thẻ tín dụng xuất hiện trong cả tập train và test: 908


In [3]:
# Loại bỏ các thẻ tín dụng trùng lặp
test_dataset.data = test_dataset.data[~test_dataset.data['cc_num'].isin(train_cards)]
print(f"Số lượng giao dịch còn lại trong tập kiểm tra: {len(test_dataset.data)}")

Số lượng giao dịch còn lại trong tập kiểm tra: 163


In [4]:
# Lấy danh sách tất cả các thẻ tín dụng từ tập train
train_cc_nums = train_dataset.data['cc_num'].unique()  # Các thẻ tín dụng trong tập train ban đầu
np.random.shuffle(train_cc_nums)  # Xáo trộn để đảm bảo ngẫu nhiên

# Chia lại tập train thành hai tập mới
new_train_cc_nums = train_cc_nums[:int(0.8 * len(train_cc_nums))]
new_test_cc_nums = train_cc_nums[int(0.8 * len(train_cc_nums)):]

# Tạo tập train và test mới
new_train_data = train_dataset.data[train_dataset.data['cc_num'].isin(new_train_cc_nums)]
new_test_data = train_dataset.data[train_dataset.data['cc_num'].isin(new_test_cc_nums)]

print(f"Số lượng giao dịch trong tập train mới: {len(new_train_data)}")
print(f"Số lượng giao dịch trong tập test mới: {len(new_test_data)}")
print(f"Số thẻ tín dụng trong tập train mới: {len(new_train_cc_nums)}")
print(f"Số thẻ tín dụng trong tập test mới: {len(new_test_cc_nums)}")

Số lượng giao dịch trong tập train mới: 1034175
Số lượng giao dịch trong tập test mới: 262500
Số thẻ tín dụng trong tập train mới: 786
Số thẻ tín dụng trong tập test mới: 197


Still have Optimi but little

In [1]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import random

# Dataset class for grouped sequences of transactions by `cc_num`
class CreditCardFraudDataset(Dataset):
    def __init__(self, data, seq_len):
        self.data = data.copy()

        # Convert date to timestamp
        self.data['trans_date_trans_time'] = pd.to_datetime(self.data['trans_date_trans_time']).apply(lambda x: x.timestamp())

        # Encode category column
        self.label_encoder = LabelEncoder()
        self.data['category'] = self.label_encoder.fit_transform(self.data['category'])

        # Normalize features
        scaler = MinMaxScaler()
        self.data[['amt']] = scaler.fit_transform(self.data[['amt']])

        # Group transactions by `cc_num` and create sequences
        self.seq_len = seq_len
        self.sequences = []
        grouped = self.data.groupby('cc_num')
        for _, group in grouped:
            group = group[['category', 'amt', 'is_fraud', 'trans_date_trans_time']].values
            for i in range(len(group)):
                if i < self.seq_len - 1:
                    padding = [group[0]] * (self.seq_len - i - 1)
                    seq = padding + group[:i + 1].tolist()
                else:
                    seq = group[i - self.seq_len + 1:i + 1].tolist()

                label = group[i, -2]  # Fraud label of the current transaction
                time_intervals = np.diff([s[-1] for s in seq], prepend=seq[0][-1])  # Time differences
                time_intervals = time_intervals.reshape(-1, 1)  # Reshape for compatibility
                seq_features = np.array([s[:-1] for s in seq])  # Remove timestamp from features
                seq_features = np.concatenate((seq_features, time_intervals), axis=1)  # Add time intervals as feature
                self.sequences.append((seq_features, label))

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x_seq, y_label = self.sequences[idx]
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_label, dtype=torch.float32)


# TH-LSTM Model (with time-aware gate `T_t`)
class TH_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, memory_size):
        super(TH_LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.memory_size = memory_size

        # LSTM weights
        self.Wsh = nn.Linear(hidden_dim, hidden_dim)
        self.Wsx = nn.Linear(input_dim, hidden_dim)
        self.Wst = nn.Linear(1, hidden_dim)
        self.bs = nn.Parameter(torch.zeros(hidden_dim))

        self.Wfh = nn.Linear(hidden_dim, hidden_dim)
        self.Wfx = nn.Linear(input_dim, hidden_dim)
        self.Wfs = nn.Linear(hidden_dim, hidden_dim)
        self.bf = nn.Parameter(torch.zeros(hidden_dim))

        self.Wih = nn.Linear(hidden_dim, hidden_dim)
        self.Wix = nn.Linear(input_dim, hidden_dim)
        self.Wis = nn.Linear(hidden_dim, hidden_dim)
        self.bi = nn.Parameter(torch.zeros(hidden_dim))

        self.Wuh = nn.Linear(hidden_dim, hidden_dim)
        self.Wux = nn.Linear(input_dim, hidden_dim)
        self.Wus = nn.Linear(hidden_dim, hidden_dim)
        self.bu = nn.Parameter(torch.zeros(hidden_dim))

        self.WTh = nn.Linear(hidden_dim, hidden_dim)
        self.WTx = nn.Linear(input_dim, hidden_dim)
        self.WTs = nn.Linear(hidden_dim, hidden_dim)
        self.bT = nn.Parameter(torch.zeros(hidden_dim))

        self.Woh = nn.Linear(hidden_dim, hidden_dim)
        self.Wox = nn.Linear(input_dim, hidden_dim)
        self.Wos = nn.Linear(hidden_dim, hidden_dim)
        self.bo = nn.Parameter(torch.zeros(hidden_dim))

        # Attention module
        self.Waq = nn.Linear(hidden_dim * 2, hidden_dim)
        self.Wah = nn.Linear(hidden_dim, hidden_dim)
        self.ba = nn.Parameter(torch.zeros(hidden_dim))
        self.vt = nn.Parameter(torch.randn(hidden_dim, 1))

        # Output layer
        self.classifier = nn.Linear(hidden_dim, 1)

    def forward(self, X_seq):
        batch_size = X_seq.size(0)
        seq_len = X_seq.size(1)
        h_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        c_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        historical_states = torch.zeros(batch_size, self.memory_size, self.hidden_dim).to(X_seq.device)

        for t in range(seq_len):
            x_t = X_seq[:, t, :-1]
            delta_t = X_seq[:, t, -1].view(-1, 1)

            s_t = torch.tanh(self.Wsh(h_prev) + self.Wsx(x_t) + self.Wst(delta_t) + self.bs)
            f_t = torch.sigmoid(self.Wfh(h_prev) + self.Wfx(x_t) + self.Wfs(s_t) + self.bf)
            i_t = torch.sigmoid(self.Wih(h_prev) + self.Wix(x_t) + self.Wis(s_t) + self.bi)
            T_t = torch.sigmoid(self.WTh(h_prev) + self.WTx(x_t) + self.WTs(s_t) + self.bT)
            zeta_t = torch.tanh(self.Wuh(h_prev) + self.Wux(x_t) + self.Wus(s_t) + self.bu)
            c_t = f_t * c_prev + i_t * zeta_t + T_t * s_t
            o_t = torch.sigmoid(self.Woh(h_prev) + self.Wox(x_t) + self.Wos(s_t) + self.bo)
            h_t = o_t * torch.tanh(c_t)

            historical_states = torch.cat((historical_states[:, 1:], h_t.unsqueeze(1)), dim=1)
            h_prev, c_prev = h_t, c_t

        q_t = torch.cat((h_t, c_t), dim=1)
        o_t_i = torch.tanh(self.Waq(q_t).unsqueeze(1) + self.Wah(historical_states))
        alpha_t_i = torch.exp(torch.matmul(o_t_i, self.vt)).squeeze(-1)
        alpha_t_i = alpha_t_i / torch.sum(alpha_t_i, dim=1, keepdim=True)
        e_t = torch.sum(alpha_t_i.unsqueeze(-1) * historical_states, dim=1)

        y_pred = torch.sigmoid(self.classifier(e_t))
        return y_pred


# Training and testing
if __name__ == "__main__":
    batch_size = 32
    input_dim = 3  # category, amt, delta_t
    hidden_dim = 32
    memory_size = 10
    seq_len = 5
    epochs = 3

    # Load training dataset
    train_data = pd.read_csv("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTrain.csv")

    # Split the training data into 80% for train and 20% for validation
    all_cc_nums = train_data['cc_num'].unique()
    random.shuffle(all_cc_nums)

    train_cc_nums = all_cc_nums[:int(0.8 * len(all_cc_nums))]
    val_cc_nums = all_cc_nums[int(0.8 * len(all_cc_nums)):]

    new_train_data = train_data[train_data['cc_num'].isin(train_cc_nums)]
    new_val_data = train_data[train_data['cc_num'].isin(val_cc_nums)]

    print(f"New train size: {len(new_train_data)} transactions")
    print(f"New validation size: {len(new_val_data)} transactions")

    # Create datasets and loaders
    train_dataset = CreditCardFraudDataset(new_train_data, seq_len=seq_len)
    val_dataset = CreditCardFraudDataset(new_val_data, seq_len=seq_len)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Define model
    model = TH_LSTM(input_dim, hidden_dim, memory_size).to('cuda' if torch.cuda.is_available() else 'cpu')
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCELoss()

# Training loop with accuracy and validation metrics at different thresholds
for epoch in range(epochs):
    model.train()
    total_loss = 0
    train_preds = []
    train_labels = []

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        # Collect predictions and labels for training accuracy
        train_preds.extend(y_pred.detach().cpu().numpy())
        train_labels.extend(y_batch.detach().cpu().numpy())

    train_preds = np.array(train_preds).flatten()
    train_labels = np.array(train_labels).flatten()
    train_pred_binary = (train_preds >= 0.5).astype(int)
    train_accuracy = accuracy_score(train_labels, train_pred_binary)

    print(f"Epoch [{epoch + 1}/{epochs}] Loss: {total_loss / len(train_loader):.4f} | Train Accuracy: {train_accuracy:.4f}")

    # Evaluate on validation set
    model.eval()
    val_preds = []
    val_labels = []

    with torch.no_grad():
        for X_val, y_val in val_loader:
            X_val, y_val = X_val.to('cuda'), y_val.view(-1, 1).to('cuda')
            y_val_pred = model(X_val)
            val_preds.extend(y_val_pred.cpu().numpy())
            val_labels.extend(y_val.cpu().numpy())

    val_preds = np.array(val_preds).flatten()
    val_labels = np.array(val_labels).flatten()

    # Calculate metrics for each threshold
    thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
    print("Validation Metrics:")
    for threshold in thresholds:
        y_pred_thresholded = (val_preds >= threshold).astype(int)
        accuracy = accuracy_score(val_labels, y_pred_thresholded)
        precision = precision_score(val_labels, y_pred_thresholded, zero_division=0)
        recall = recall_score(val_labels, y_pred_thresholded)
        f1 = f1_score(val_labels, y_pred_thresholded)
        auc = roc_auc_score(val_labels, val_preds)
        
        print(f"Threshold: {threshold:.2f} | Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1: {f1:.4f} | AUC: {auc:.4f}")

New train size: 1032454 transactions
New validation size: 264221 transactions
Epoch [1/3] Loss: 0.0022 | Train Accuracy: 0.9995
Validation Metrics:
Threshold: 0.10 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.20 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.30 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.40 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Threshold: 0.50 | Accuracy: 1.0000 | Precision: 1.0000 | Recall: 1.0000 | F1: 1.0000 | AUC: 1.0000
Epoch [2/3] Loss: 0.0005 | Train Accuracy: 0.9999
Validation Metrics:
Threshold: 0.10 | Accuracy: 0.9999 | Precision: 0.9912 | Recall: 1.0000 | F1: 0.9956 | AUC: 1.0000
Threshold: 0.20 | Accuracy: 1.0000 | Precision: 0.9931 | Recall: 1.0000 | F1: 0.9965 | AUC: 1.0000
Threshold: 0.30 | Accuracy: 1.0000 | Precision: 0.9956 | Recall: 1.0000 | F1: 0.9978 | AU

Perfect with 80:20

In [6]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import random

# Dataset class for grouped sequences of transactions by `cc_num`
class CreditCardFraudDataset(Dataset):
    def __init__(self, data, seq_len):
        self.data = data.copy()

        # Convert date to timestamp
        self.data['trans_date_trans_time'] = pd.to_datetime(self.data['trans_date_trans_time']).apply(lambda x: x.timestamp())

        # Encode category column
        self.label_encoder = LabelEncoder()
        self.data['category'] = self.label_encoder.fit_transform(self.data['category'])

        # Normalize features
        scaler = MinMaxScaler()
        self.data[['amt']] = scaler.fit_transform(self.data[['amt']])

        # Group transactions by `cc_num` and create sequences
        self.seq_len = seq_len
        self.sequences = []
        grouped = self.data.groupby('cc_num')
        for _, group in grouped:
            group = group[['category', 'amt', 'is_fraud', 'trans_date_trans_time']].values
            for i in range(len(group)):
                if i < self.seq_len - 1:
                    padding = [group[0]] * (self.seq_len - i - 1)
                    seq = padding + group[:i + 1].tolist()
                else:
                    seq = group[i - self.seq_len + 1:i + 1].tolist()

                label = group[i, -2]  # Fraud label of the current transaction
                time_intervals = np.diff([s[-1] for s in seq], prepend=seq[0][-1])  # Time differences
                time_intervals = time_intervals.reshape(-1, 1)  # Reshape for compatibility
                seq_features = np.array([s[:-1] for s in seq])  # Remove timestamp from features
                seq_features = np.concatenate((seq_features, time_intervals), axis=1)  # Add time intervals as feature
                self.sequences.append((seq_features, label))

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x_seq, y_label = self.sequences[idx]
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_label, dtype=torch.float32)


# TH-LSTM Model (with Dropout and Regularization)
class TH_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, memory_size):
        super(TH_LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.memory_size = memory_size
        self.dropout = nn.Dropout(p=0.3)  # Dropout: ngẫu nhiên bỏ qua 30% nơ-ron

        # LSTM weights
        self.Wsh = nn.Linear(hidden_dim, hidden_dim)
        self.Wsx = nn.Linear(input_dim, hidden_dim)
        self.Wst = nn.Linear(1, hidden_dim)
        self.bs = nn.Parameter(torch.zeros(hidden_dim))

        self.Wfh = nn.Linear(hidden_dim, hidden_dim)
        self.Wfx = nn.Linear(input_dim, hidden_dim)
        self.Wfs = nn.Linear(hidden_dim, hidden_dim)
        self.bf = nn.Parameter(torch.zeros(hidden_dim))

        self.Wih = nn.Linear(hidden_dim, hidden_dim)
        self.Wix = nn.Linear(input_dim, hidden_dim)
        self.Wis = nn.Linear(hidden_dim, hidden_dim)
        self.bi = nn.Parameter(torch.zeros(hidden_dim))

        self.Wuh = nn.Linear(hidden_dim, hidden_dim)
        self.Wux = nn.Linear(input_dim, hidden_dim)
        self.Wus = nn.Linear(hidden_dim, hidden_dim)
        self.bu = nn.Parameter(torch.zeros(hidden_dim))

        self.WTh = nn.Linear(hidden_dim, hidden_dim)
        self.WTx = nn.Linear(input_dim, hidden_dim)
        self.WTs = nn.Linear(hidden_dim, hidden_dim)
        self.bT = nn.Parameter(torch.zeros(hidden_dim))

        self.Woh = nn.Linear(hidden_dim, hidden_dim)
        self.Wox = nn.Linear(input_dim, hidden_dim)
        self.Wos = nn.Linear(hidden_dim, hidden_dim)
        self.bo = nn.Parameter(torch.zeros(hidden_dim))

        # Attention module
        self.Waq = nn.Linear(hidden_dim * 2, hidden_dim)
        self.Wah = nn.Linear(hidden_dim, hidden_dim)
        self.ba = nn.Parameter(torch.zeros(hidden_dim))
        self.vt = nn.Parameter(torch.randn(hidden_dim, 1))

        # Output layer
        self.classifier = nn.Linear(hidden_dim, 1)

    def forward(self, X_seq):
        batch_size = X_seq.size(0)
        seq_len = X_seq.size(1)
        h_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        c_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        historical_states = torch.zeros(batch_size, self.memory_size, self.hidden_dim).to(X_seq.device)

        for t in range(seq_len):
            x_t = X_seq[:, t, :-1]
            delta_t = X_seq[:, t, -1].view(-1, 1)

            s_t = torch.tanh(self.Wsh(h_prev) + self.Wsx(x_t) + self.Wst(delta_t) + self.bs)
            f_t = torch.sigmoid(self.Wfh(h_prev) + self.Wfx(x_t) + self.Wfs(s_t) + self.bf)
            i_t = torch.sigmoid(self.Wih(h_prev) + self.Wix(x_t) + self.Wis(s_t) + self.bi)
            T_t = torch.sigmoid(self.WTh(h_prev) + self.WTx(x_t) + self.WTs(s_t) + self.bT)
            zeta_t = torch.tanh(self.Wuh(h_prev) + self.Wux(x_t) + self.Wus(s_t) + self.bu)
            c_t = f_t * c_prev + i_t * zeta_t + T_t * s_t
            o_t = torch.sigmoid(self.Woh(h_prev) + self.Wox(x_t) + self.Wos(s_t) + self.bo)
            h_t = o_t * torch.tanh(c_t)

            # Add dropout at the hidden layer
            h_t = self.dropout(h_t)  # Dropout at the output layer

            historical_states = torch.cat((historical_states[:, 1:], h_t.unsqueeze(1)), dim=1)
            h_prev, c_prev = h_t, c_t

        q_t = torch.cat((h_t, c_t), dim=1)
        o_t_i = torch.tanh(self.Waq(q_t).unsqueeze(1) + self.Wah(historical_states))
        alpha_t_i = torch.exp(torch.matmul(o_t_i, self.vt)).squeeze(-1)
        alpha_t_i = alpha_t_i / torch.sum(alpha_t_i, dim=1, keepdim=True)
        e_t = torch.sum(alpha_t_i.unsqueeze(-1) * historical_states, dim=1)

        y_pred = torch.sigmoid(self.classifier(e_t))
        return y_pred


# Training loop with L2 Regularization (weight_decay)
if __name__ == "__main__":
    batch_size = 32
    input_dim = 3  # category, amt, delta_t
    hidden_dim = 8  # Đã giảm kích thước hidden_dim từ 64 xuống 32
    memory_size = 10
    seq_len = 5
    epochs = 1

    # Load dataset
    train_data = pd.read_csv("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTrain.csv")

    # Split the training data (80% train, 20% validation)
    all_cc_nums = train_data['cc_num'].unique()
    random.shuffle(all_cc_nums)
    train_cc_nums = all_cc_nums[:int(0.8 * len(all_cc_nums))]
    val_cc_nums = all_cc_nums[int(0.8 * len(all_cc_nums)):]

    new_train_data = train_data[train_data['cc_num'].isin(train_cc_nums)]
    new_val_data = train_data[train_data['cc_num'].isin(val_cc_nums)]

    train_dataset = CreditCardFraudDataset(new_train_data, seq_len=seq_len)
    val_dataset = CreditCardFraudDataset(new_val_data, seq_len=seq_len)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Define model, optimizer, and loss function with weight decay (L2 Regularization)
    model = TH_LSTM(input_dim, hidden_dim, memory_size).to('cuda' if torch.cuda.is_available() else 'cpu')
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    criterion = nn.BCELoss()

    # Training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{epochs}] Loss: {total_loss / len(train_loader):.4f}")

    print("Training complete.")

Epoch [1/1] Loss: 0.0050
Training complete.


In [3]:
# Function to evaluate the model
def evaluate_model(model, data_loader, criterion):
    model.eval()
    y_true = []
    y_pred_prob = []
    total_loss = 0

    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            total_loss += loss.item()

            y_true.extend(y_batch.cpu().numpy())  # True labels
            y_pred_prob.extend(y_pred.cpu().numpy())  # Predicted probabilities

    # Convert probabilities to binary predictions with threshold 0.5
    y_pred_binary = (np.array(y_pred_prob) >= 0.5).astype(int)

    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred_binary)
    precision = precision_score(y_true, y_pred_binary, zero_division=0)
    recall = recall_score(y_true, y_pred_binary)
    f1 = f1_score(y_true, y_pred_binary)
    auc = roc_auc_score(y_true, y_pred_prob)

    avg_loss = total_loss / len(data_loader)
    
    return avg_loss, accuracy, precision, recall, f1, auc

# After training, evaluate the model on the validation set
val_loss, val_accuracy, val_precision, val_recall, val_f1, val_auc = evaluate_model(model, val_loader, criterion)

# Print validation metrics
print("\nValidation Results:")
print(f"Validation Loss: {val_loss:.4f}")
print(f"Accuracy: {val_accuracy:.4f}")
print(f"Precision: {val_precision:.4f}")
print(f"Recall: {val_recall:.4f}")
print(f"F1-Score: {val_f1:.4f}")
print(f"AUC: {val_auc:.4f}")


Validation Results:
Validation Loss: 0.0003
Accuracy: 1.0000
Precision: 1.0000
Recall: 0.9986
F1-Score: 0.9993
AUC: 1.0000


In [4]:
similar_rows = pd.merge(new_train_data, new_val_data, on=['category', 'amt', 'trans_date_trans_time'])
print(f"Số lượng giao dịch tương tự giữa train và validation: {len(similar_rows)}")

Số lượng giao dịch tương tự giữa train và validation: 0


In [5]:
fraud_ratio_train = new_train_data['is_fraud'].mean()
fraud_ratio_val = new_val_data['is_fraud'].mean()
print(f"Tỷ lệ giao dịch gian lận trong tập train: {fraud_ratio_train:.4f}")
print(f"Tỷ lệ giao dịch gian lận trong tập validation: {fraud_ratio_val:.4f}")

Tỷ lệ giao dịch gian lận trong tập train: 0.0059
Tỷ lệ giao dịch gian lận trong tập validation: 0.0054


In [7]:
fraud_train_data = new_train_data[new_train_data['is_fraud'] == 1]
print(fraud_train_data['category'].value_counts())  # Kiểm tra loại giao dịch gian lận phổ biến
print(fraud_train_data['amt'].describe())  # Thống kê về số tiền giao dịch gian lận

category
grocery_pos       1411
shopping_net      1387
misc_net           734
shopping_pos       672
gas_transport      502
misc_pos           195
kids_pets          193
entertainment      188
personal_care      173
home               161
food_dining        125
grocery_net        108
health_fitness     103
travel              95
Name: count, dtype: int64
count    6047.000000
mean      530.939836
std       390.658590
min         1.060000
25%       246.090000
50%       389.570000
75%       901.190000
max      1376.040000
Name: amt, dtype: float64


In [10]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import random
from math import sqrt

# Dataset class for grouped sequences of transactions by `cc_num`
class CreditCardFraudDataset(Dataset):
    def __init__(self, data, seq_len):
        self.data = data.copy()

        # Convert date to timestamp and extract time-based features
        self.data['trans_date_trans_time'] = pd.to_datetime(self.data['trans_date_trans_time'])
        self.data['hour_of_day'] = self.data['trans_date_trans_time'].dt.hour
        self.data['day_of_week'] = self.data['trans_date_trans_time'].dt.dayofweek
        self.data['trans_date_trans_time'] = self.data['trans_date_trans_time'].apply(lambda x: x.timestamp())

        # Encode category and merchant columns
        self.category_encoder = LabelEncoder()
        self.data['category'] = self.category_encoder.fit_transform(self.data['category'])
        self.merchant_encoder = LabelEncoder()
        self.data['merchant'] = self.merchant_encoder.fit_transform(self.data['merchant'])

        # Normalize amt
        scaler = MinMaxScaler()
        self.data[['amt']] = scaler.fit_transform(self.data[['amt']])

        # Approximate distance between user location and merchant location (faster than geodesic)
        self.data['distance_to_merchant'] = self.data.apply(
            lambda row: sqrt((row['lat'] - row['merch_lat'])**2 + (row['long'] - row['merch_long'])**2), axis=1
        )

        # Fill NaNs for missing values
        self.data.fillna(0, inplace=True)

        # Group transactions by `cc_num` and create sequences
        self.seq_len = seq_len
        self.sequences = []
        grouped = self.data.groupby('cc_num')
        for _, group in grouped:
            group = group[['category', 'amt', 'is_fraud', 'trans_date_trans_time', 'hour_of_day', 'day_of_week', 'distance_to_merchant']].values
            for i in range(len(group)):
                if i < self.seq_len - 1:
                    padding = [group[0]] * (self.seq_len - i - 1)
                    seq = padding + group[:i + 1].tolist()
                else:
                    seq = group[i - self.seq_len + 1:i + 1].tolist()

                label = group[i, 2]  # Fraud label of the current transaction
                time_intervals = np.diff([s[3] for s in seq], prepend=seq[0][3])  # Time differences
                time_intervals = time_intervals.reshape(-1, 1)  # Reshape for compatibility
                seq_features = np.array([s[:-1] for s in seq])  # Remove timestamp from features
                seq_features = np.concatenate((seq_features, time_intervals), axis=1)  # Add time intervals as feature
                self.sequences.append((seq_features, label))

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x_seq, y_label = self.sequences[idx]
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_label, dtype=torch.float32)


# Define the TH-LSTM model
class TH_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, memory_size):
        super(TH_LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.memory_size = memory_size
        self.dropout_hidden = nn.Dropout(p=0.4)
        self.dropout_attention = nn.Dropout(p=0.3)
        self.dropout_classifier = nn.Dropout(p=0.4)

        # LSTM weights
        self.Wsh = nn.Linear(hidden_dim, hidden_dim)
        self.Wsx = nn.Linear(input_dim, hidden_dim)
        self.Wst = nn.Linear(1, hidden_dim)
        self.bs = nn.Parameter(torch.zeros(hidden_dim))

        self.Wfh = nn.Linear(hidden_dim, hidden_dim)
        self.Wfx = nn.Linear(input_dim, hidden_dim)
        self.Wfs = nn.Linear(hidden_dim, hidden_dim)
        self.bf = nn.Parameter(torch.zeros(hidden_dim))

        self.Wih = nn.Linear(hidden_dim, hidden_dim)
        self.Wix = nn.Linear(input_dim, hidden_dim)
        self.Wis = nn.Linear(hidden_dim, hidden_dim)
        self.bi = nn.Parameter(torch.zeros(hidden_dim))

        self.Wuh = nn.Linear(hidden_dim, hidden_dim)
        self.Wux = nn.Linear(input_dim, hidden_dim)
        self.Wus = nn.Linear(hidden_dim, hidden_dim)
        self.bu = nn.Parameter(torch.zeros(hidden_dim))

        self.WTh = nn.Linear(hidden_dim, hidden_dim)
        self.WTx = nn.Linear(input_dim, hidden_dim)
        self.WTs = nn.Linear(hidden_dim, hidden_dim)
        self.bT = nn.Parameter(torch.zeros(hidden_dim))

        self.Woh = nn.Linear(hidden_dim, hidden_dim)
        self.Wox = nn.Linear(input_dim, hidden_dim)
        self.Wos = nn.Linear(hidden_dim, hidden_dim)
        self.bo = nn.Parameter(torch.zeros(hidden_dim))

        # Attention module
        self.Waq = nn.Linear(hidden_dim * 2, hidden_dim)
        self.Wah = nn.Linear(hidden_dim, hidden_dim)
        self.ba = nn.Parameter(torch.zeros(hidden_dim))
        self.vt = nn.Parameter(torch.randn(hidden_dim, 1))

        # Output layer
        self.classifier = nn.Linear(hidden_dim, 1)

    def forward(self, X_seq):
        batch_size = X_seq.size(0)
        seq_len = X_seq.size(1)
        h_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        c_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        historical_states = torch.zeros(batch_size, self.memory_size, self.hidden_dim).to(X_seq.device)

        for t in range(seq_len):
            x_t = X_seq[:, t, :-1]
            delta_t = X_seq[:, t, -1].view(-1, 1)

            s_t = torch.tanh(self.Wsh(h_prev) + self.Wsx(x_t) + self.Wst(delta_t) + self.bs)
            f_t = torch.sigmoid(self.Wfh(h_prev) + self.Wfx(x_t) + self.Wfs(s_t) + self.bf)
            i_t = torch.sigmoid(self.Wih(h_prev) + self.Wix(x_t) + self.Wis(s_t) + self.bi)
            T_t = torch.sigmoid(self.WTh(h_prev) + self.WTx(x_t) + self.WTs(s_t) + self.bT)
            zeta_t = torch.tanh(self.Wuh(h_prev) + self.Wux(x_t) + self.Wus(s_t) + self.bu)
            c_t = f_t * c_prev + i_t * zeta_t + T_t * s_t
            o_t = torch.sigmoid(self.Woh(h_prev) + self.Wox(x_t) + self.Wos(s_t) + self.bo)
            h_t = o_t * torch.tanh(c_t)

            h_t = self.dropout_hidden(h_t)  # Dropout at the output layer

            historical_states = torch.cat((historical_states[:, 1:], h_t.unsqueeze(1)), dim=1)
            h_prev, c_prev = h_t, c_t

        q_t = torch.cat((h_t, c_t), dim=1)
        o_t_i = torch.tanh(self.Waq(q_t).unsqueeze(1) + self.Wah(historical_states))
        alpha_t_i = self.dropout_attention(torch.exp(torch.matmul(o_t_i, self.vt)).squeeze(-1))
        alpha_t_i = alpha_t_i / torch.sum(alpha_t_i, dim=1, keepdim=True)
        e_t = torch.sum(alpha_t_i.unsqueeze(-1) * historical_states, dim=1)

        y_pred = torch.sigmoid(self.dropout_classifier(self.classifier(e_t)))
        return y_pred

In [17]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import random
from math import sqrt

class CreditCardFraudDataset(Dataset):
    def __init__(self, data, seq_len):
        self.data = data.copy()

        # Convert date to timestamp and extract time-based features
        self.data['trans_date_trans_time'] = pd.to_datetime(self.data['trans_date_trans_time'])
        self.data['hour_of_day'] = self.data['trans_date_trans_time'].dt.hour
        self.data['day_of_week'] = self.data['trans_date_trans_time'].dt.dayofweek
        self.data['trans_date_trans_time'] = self.data['trans_date_trans_time'].apply(lambda x: x.timestamp())

        # Encode category and merchant columns
        self.category_encoder = LabelEncoder()
        self.data['category'] = self.category_encoder.fit_transform(self.data['category'])
        self.merchant_encoder = LabelEncoder()
        self.data['merchant'] = self.merchant_encoder.fit_transform(self.data['merchant'])

        # Normalize amt
        scaler = MinMaxScaler()
        self.data[['amt']] = scaler.fit_transform(self.data[['amt']])

        # Approximate distance between user location and merchant location
        self.data['distance_to_merchant'] = self.data.apply(
            lambda row: sqrt((row['lat'] - row['merch_lat'])**2 + (row['long'] - row['merch_long'])**2), axis=1
        )

        # Fill NaNs for missing values
        self.data.fillna(0, inplace=True)

        # Group transactions by `cc_num` and create sequences
        self.seq_len = seq_len
        self.sequences = []
        grouped = self.data.groupby('cc_num')
        for _, group in grouped:
            group = group[['category', 'amt', 'hour_of_day', 'day_of_week', 'distance_to_merchant', 'is_fraud', 'trans_date_trans_time']].values
            for i in range(len(group)):
                if i < self.seq_len - 1:
                    padding = [group[0]] * (self.seq_len - i - 1)
                    seq = padding + group[:i + 1].tolist()
                else:
                    seq = group[i - self.seq_len + 1:i + 1].tolist()

                label = group[i, 5]  # Fraud label of the current transaction
                time_intervals = np.diff([s[-1] for s in seq], prepend=seq[0][-1])  # Time differences
                time_intervals = time_intervals.reshape(-1, 1)  # Reshape for compatibility
                seq_features = np.array([s[:-2] for s in seq])  # Exclude fraud label and timestamp
                seq_features = np.concatenate((seq_features, time_intervals), axis=1)  # Add time intervals
                self.sequences.append((seq_features, label))

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x_seq, y_label = self.sequences[idx]
        print(f"x_seq shape: {x_seq.shape}")  # Debug print for shape
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_label, dtype=torch.float32)


# Define the TH-LSTM model
class TH_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, memory_size):
        super(TH_LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.memory_size = memory_size
        self.dropout_hidden = nn.Dropout(p=0.4)
        self.dropout_attention = nn.Dropout(p=0.3)
        self.dropout_classifier = nn.Dropout(p=0.4)

        # LSTM weights
        self.Wsh = nn.Linear(hidden_dim, hidden_dim)
        self.Wsx = nn.Linear(input_dim, hidden_dim)
        self.Wst = nn.Linear(1, hidden_dim)
        self.bs = nn.Parameter(torch.zeros(hidden_dim))

        self.Wfh = nn.Linear(hidden_dim, hidden_dim)
        self.Wfx = nn.Linear(input_dim, hidden_dim)
        self.Wfs = nn.Linear(hidden_dim, hidden_dim)
        self.bf = nn.Parameter(torch.zeros(hidden_dim))

        self.Wih = nn.Linear(hidden_dim, hidden_dim)
        self.Wix = nn.Linear(input_dim, hidden_dim)
        self.Wis = nn.Linear(hidden_dim, hidden_dim)
        self.bi = nn.Parameter(torch.zeros(hidden_dim))

        self.Wuh = nn.Linear(hidden_dim, hidden_dim)
        self.Wux = nn.Linear(input_dim, hidden_dim)
        self.Wus = nn.Linear(hidden_dim, hidden_dim)
        self.bu = nn.Parameter(torch.zeros(hidden_dim))

        self.WTh = nn.Linear(hidden_dim, hidden_dim)
        self.WTx = nn.Linear(input_dim, hidden_dim)
        self.WTs = nn.Linear(hidden_dim, hidden_dim)
        self.bT = nn.Parameter(torch.zeros(hidden_dim))

        self.Woh = nn.Linear(hidden_dim, hidden_dim)
        self.Wox = nn.Linear(input_dim, hidden_dim)
        self.Wos = nn.Linear(hidden_dim, hidden_dim)
        self.bo = nn.Parameter(torch.zeros(hidden_dim))

        # Attention module
        self.Waq = nn.Linear(hidden_dim * 2, hidden_dim)
        self.Wah = nn.Linear(hidden_dim, hidden_dim)
        self.ba = nn.Parameter(torch.zeros(hidden_dim))
        self.vt = nn.Parameter(torch.randn(hidden_dim, 1))

        # Output layer
        self.classifier = nn.Linear(hidden_dim, 1)

    def forward(self, X_seq):
        batch_size = X_seq.size(0)
        seq_len = X_seq.size(1)
        h_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        c_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        historical_states = torch.zeros(batch_size, self.memory_size, self.hidden_dim).to(X_seq.device)

        for t in range(seq_len):
            x_t = X_seq[:, t, :-1]
            delta_t = X_seq[:, t, -1].view(-1, 1)

            s_t = torch.tanh(self.Wsh(h_prev) + self.Wsx(x_t) + self.Wst(delta_t) + self.bs)
            f_t = torch.sigmoid(self.Wfh(h_prev) + self.Wfx(x_t) + self.Wfs(s_t) + self.bf)
            i_t = torch.sigmoid(self.Wih(h_prev) + self.Wix(x_t) + self.Wis(s_t) + self.bi)
            T_t = torch.sigmoid(self.WTh(h_prev) + self.WTx(x_t) + self.WTs(s_t) + self.bT)
            zeta_t = torch.tanh(self.Wuh(h_prev) + self.Wux(x_t) + self.Wus(s_t) + self.bu)
            c_t = f_t * c_prev + i_t * zeta_t + T_t * s_t
            o_t = torch.sigmoid(self.Woh(h_prev) + self.Wox(x_t) + self.Wos(s_t) + self.bo)
            h_t = o_t * torch.tanh(c_t)

            h_t = self.dropout_hidden(h_t)  # Dropout at the output layer

            historical_states = torch.cat((historical_states[:, 1:], h_t.unsqueeze(1)), dim=1)
            h_prev, c_prev = h_t, c_t

        q_t = torch.cat((h_t, c_t), dim=1)
        o_t_i = torch.tanh(self.Waq(q_t).unsqueeze(1) + self.Wah(historical_states))
        alpha_t_i = self.dropout_attention(torch.exp(torch.matmul(o_t_i, self.vt)).squeeze(-1))
        alpha_t_i = alpha_t_i / torch.sum(alpha_t_i, dim=1, keepdim=True)
        e_t = torch.sum(alpha_t_i.unsqueeze(-1) * historical_states, dim=1)

        y_pred = torch.sigmoid(self.dropout_classifier(self.classifier(e_t)))
        return y_pred


# Function to evaluate the model
def evaluate_model(model, data_loader, criterion):
    model.eval()
    val_loss = 0
    y_true = []
    y_pred_prob = []

    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            val_loss += loss.item()

            y_true.extend(y_batch.cpu().numpy())
            y_pred_prob.extend(y_pred.cpu().numpy())

    val_loss /= len(data_loader)
    return val_loss, y_true, y_pred_prob


# Training loop
if __name__ == "__main__":
    batch_size = 32
    input_dim = 8  # Update for added features
    hidden_dim = 32
    memory_size = 10
    seq_len = 5
    epochs = 10
    patience = 2
    best_val_loss = float('inf')
    epochs_no_improve = 0

    # Get the correct input dimension
    first_seq, _ = train_dataset[0]  # Get the first sequence from the dataset
    input_dim = first_seq.shape[1]  # Number of features per timestep
    print(f"Updated input_dim: {input_dim}")

# Define the model with the correct input dimension

    model = TH_LSTM(input_dim, hidden_dim, memory_size).to('cuda' if torch.cuda.is_available() else 'cpu')
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    criterion = nn.BCELoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
            optimizer.step()
            total_loss += loss.item()

            y_pred_binary = (y_pred >= 0.5).float()
            correct += (y_pred_binary == y_batch).sum().item()
            total += y_batch.size(0)

        train_accuracy = correct / total
        val_loss, y_true, y_pred_prob = evaluate_model(model, val_loader, criterion)
        y_pred_val_binary = (np.array(y_pred_prob) >= 0.5).astype(int)
        val_accuracy = accuracy_score(y_true, y_pred_val_binary)

        print(f"Epoch [{epoch + 1}/{epochs}] | Training Loss: {total_loss / len(train_loader):.4f} | Training Accuracy: {train_accuracy:.4f} | Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_accuracy:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    print("Training complete.")

Updated input_dim: 4


RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x3 and 4x32)

Using Op

In [28]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Dataset class for grouped sequences of transactions by `cc_num`
class CreditCardFraudDataset(Dataset):
    def __init__(self, file_path, seq_len):
        self.data = pd.read_csv(file_path)

        # Convert date to timestamp
        self.data['trans_date_trans_time'] = pd.to_datetime(self.data['trans_date_trans_time']).apply(lambda x: x.timestamp())

        # Encode category column
        self.label_encoder = LabelEncoder()
        self.data['category'] = self.label_encoder.fit_transform(self.data['category'])

        # Normalize features
        scaler = MinMaxScaler()
        self.data[['amt']] = scaler.fit_transform(self.data[['amt']])

        # Group transactions by `cc_num` and create sequences
        self.seq_len = seq_len
        self.sequences = []
        grouped = self.data.groupby('cc_num')
        for _, group in grouped:
            group = group[['category', 'amt', 'is_fraud', 'trans_date_trans_time']].values
            for i in range(len(group)):
                if i < self.seq_len - 1:
                    padding = [group[0]] * (self.seq_len - i - 1)
                    seq = padding + group[:i + 1].tolist()
                else:
                    seq = group[i - self.seq_len + 1:i + 1].tolist()

                label = group[i, -2]  # Fraud label of the current transaction
                time_intervals = np.diff([s[-1] for s in seq], prepend=seq[0][-1])  # Time differences
                time_intervals = time_intervals.reshape(-1, 1)  # Reshape for compatibility
                seq_features = np.array([s[:-1] for s in seq])  # Remove timestamp from features
                seq_features = np.concatenate((seq_features, time_intervals), axis=1)  # Add time intervals as feature
                self.sequences.append((seq_features, label))

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x_seq, y_label = self.sequences[idx]  # Return only `x_seq` and `y_label`
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_label, dtype=torch.float32)


# Simple Linear Model for Fraud Detection
class SimpleLinearModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleLinearModel, self).__init__()
        # A single linear layer for binary classification
        self.classifier = nn.Linear(input_dim, 1)

    def forward(self, X_seq):
        # Average over the sequence to create a single vector representation
        X_avg = X_seq.mean(dim=1)  # Shape: [batch_size, input_dim]
        y_pred = torch.sigmoid(self.classifier(X_avg))  # Sigmoid to get probabilities
        return y_pred


# Training and testing
if __name__ == "__main__":
    batch_size = 32
    input_dim = 4  # category, amt, delta_t
    seq_len = 20

    # Load datasets
    train_dataset = CreditCardFraudDataset("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTrain.csv", seq_len=seq_len)
    test_dataset = CreditCardFraudDataset("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTest.csv", seq_len=seq_len)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Instantiate model, optimizer, and loss function

    model = SimpleLinearModel(input_dim).to('cuda' if torch.cuda.is_available() else 'cpu')

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCELoss()

    # Training
    epochs = 3
    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{epochs}] Loss: {total_loss / len(train_loader):.4f}")

    # Testing
    model.eval()
    y_true_test = []
    y_pred_test_prob = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            y_pred = model(X_batch)
            y_true_test.extend(y_batch.cpu().numpy())
            y_pred_test_prob.extend(y_pred.cpu().numpy())

    thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
    for threshold in thresholds:
        y_pred_test_binary = (np.array(y_pred_test_prob) >= threshold).astype(int)
        y_true_test = np.array(y_true_test) if isinstance(y_true_test, list) else y_true_test
        print(y_true_test.shape)  # Print first 10 true labels
        print(y_pred_test_binary.shape)  # Print first 10 predicted labels
        accuracy = accuracy_score(y_true_test, y_pred_test_binary)
        precision = precision_score(y_true_test, y_pred_test_binary)
        recall = recall_score(y_true_test, y_pred_test_binary)
        f1 = f1_score(y_true_test, y_pred_test_binary)
        auc = roc_auc_score(y_true_test, y_pred_test_binary)

        print(f"Threshold: {threshold:.2f} | Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1: {f1:.4f} | AUC: {auc:.4f}")


Epoch [1/3] Loss: 14.9789
Epoch [2/3] Loss: 1.7341
Epoch [3/3] Loss: 10.7996
(555719, 1)
(555719, 1)
Threshold: 0.10 | Accuracy: 0.9958 | Precision: 0.0981 | Recall: 0.0121 | F1: 0.0216 | AUC: 0.5058
(555719, 1)
(555719, 1)
Threshold: 0.20 | Accuracy: 0.9962 | Precision: 1.0000 | Recall: 0.0112 | F1: 0.0221 | AUC: 0.5056
(555719, 1)
(555719, 1)
Threshold: 0.30 | Accuracy: 0.9962 | Precision: 1.0000 | Recall: 0.0107 | F1: 0.0212 | AUC: 0.5054
(555719, 1)
(555719, 1)
Threshold: 0.40 | Accuracy: 0.9962 | Precision: 1.0000 | Recall: 0.0103 | F1: 0.0203 | AUC: 0.5051
(555719, 1)
(555719, 1)
Threshold: 0.50 | Accuracy: 0.9962 | Precision: 1.0000 | Recall: 0.0103 | F1: 0.0203 | AUC: 0.5051


Using:
Dropout layers for regularization (reduce overfitting).
L2 regularization (weight decay) in the optimizer.
Weighted loss function for handling class imbalance.
Added a validation split to monitor performance during training.

In [30]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Dataset class for grouped sequences of transactions by `cc_num`
class CreditCardFraudDataset(Dataset):
    def __init__(self, file_path, seq_len):
        self.data = pd.read_csv(file_path)

        # Convert date to timestamp
        self.data['trans_date_trans_time'] = pd.to_datetime(self.data['trans_date_trans_time']).apply(lambda x: x.timestamp())

        # Encode category column
        self.label_encoder = LabelEncoder()
        self.data['category'] = self.label_encoder.fit_transform(self.data['category'])

        # Normalize features
        scaler = MinMaxScaler()
        self.data[['amt']] = scaler.fit_transform(self.data[['amt']])

        # Group transactions by `cc_num` and create sequences
        self.seq_len = seq_len
        self.sequences = []
        grouped = self.data.groupby('cc_num')
        for _, group in grouped:
            group = group[['category', 'amt', 'is_fraud', 'trans_date_trans_time']].values
            for i in range(len(group)):
                if i < self.seq_len - 1:
                    padding = [group[0]] * (self.seq_len - i - 1)
                    seq = padding + group[:i + 1].tolist()
                else:
                    seq = group[i - self.seq_len + 1:i + 1].tolist()

                label = group[i, -2]  # Fraud label of the current transaction
                time_intervals = np.diff([s[-1] for s in seq], prepend=seq[0][-1])  # Time differences
                time_intervals = time_intervals.reshape(-1, 1)  # Reshape for compatibility
                seq_features = np.array([s[:-1] for s in seq])  # Remove timestamp from features
                seq_features = np.concatenate((seq_features, time_intervals), axis=1)  # Add time intervals as feature
                self.sequences.append((seq_features, label))

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x_seq, y_label = self.sequences[idx]  # Return only `x_seq` and `y_label` (no separate `time_intervals`)
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_label, dtype=torch.float32)


# TH-LSTM Model (with time-aware gate `T_t`)
class TH_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, memory_size):
        super(TH_LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.memory_size = memory_size

        # LSTM weights
        self.Wsh = nn.Linear(hidden_dim, hidden_dim)
        self.Wsx = nn.Linear(input_dim, hidden_dim)
        self.Wst = nn.Linear(1, hidden_dim)  # For time intervals
        self.bs = nn.Parameter(torch.zeros(hidden_dim))
        self.dropout = nn.Dropout(0.5)  # Add dropout to gates


        self.Wfh = nn.Linear(hidden_dim, hidden_dim)
        self.Wfx = nn.Linear(input_dim, hidden_dim)
        self.Wfs = nn.Linear(hidden_dim, hidden_dim)
        self.bf = nn.Parameter(torch.zeros(hidden_dim))

        self.Wih = nn.Linear(hidden_dim, hidden_dim)
        self.Wix = nn.Linear(input_dim, hidden_dim)
        self.Wis = nn.Linear(hidden_dim, hidden_dim)
        self.bi = nn.Parameter(torch.zeros(hidden_dim))

        self.Wuh = nn.Linear(hidden_dim, hidden_dim)
        self.Wux = nn.Linear(input_dim, hidden_dim)
        self.Wus = nn.Linear(hidden_dim, hidden_dim)
        self.bu = nn.Parameter(torch.zeros(hidden_dim))

        self.WTh = nn.Linear(hidden_dim, hidden_dim)
        self.WTx = nn.Linear(input_dim, hidden_dim)
        self.WTs = nn.Linear(hidden_dim, hidden_dim)
        self.bT = nn.Parameter(torch.zeros(hidden_dim))

        self.Woh = nn.Linear(hidden_dim, hidden_dim)
        self.Wox = nn.Linear(input_dim, hidden_dim)
        self.Wos = nn.Linear(hidden_dim, hidden_dim)
        self.bo = nn.Parameter(torch.zeros(hidden_dim))

        # Attention module
        self.Waq = nn.Linear(hidden_dim * 2, hidden_dim)
        self.Wah = nn.Linear(hidden_dim, hidden_dim)
        self.ba = nn.Parameter(torch.zeros(hidden_dim))
        self.vt = nn.Parameter(torch.randn(hidden_dim, 1))

        # Output layer
        self.classifier = nn.Linear(hidden_dim, 1)

    def forward(self, X_seq):
        batch_size = X_seq.size(0)
        seq_len = X_seq.size(1)
        h_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        c_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        historical_states = torch.zeros(batch_size, self.memory_size, self.hidden_dim).to(X_seq.device)

        for t in range(seq_len):
            x_t = X_seq[:, t, :-1]  # Features excluding delta_t
            delta_t = X_seq[:, t, -1].view(-1, 1)  # Time interval (delta_t)

            # Time-aware state
            s_t = torch.tanh(self.Wsh(h_prev) + self.Wsx(x_t) + self.Wst(delta_t) + self.bs)
            s_t = self.dropout(s_t)

            # Gates
            f_t = torch.sigmoid(self.Wfh(h_prev) + self.Wfx(x_t) + self.Wfs(s_t) + self.bf)
            i_t = torch.sigmoid(self.Wih(h_prev) + self.Wix(x_t) + self.Wis(s_t) + self.bi)
            T_t = torch.sigmoid(self.WTh(h_prev) + self.WTx(x_t) + self.WTs(s_t) + self.bT)  # Time-aware gate

            # Candidate cell state
            zeta_t = torch.tanh(self.Wuh(h_prev) + self.Wux(x_t) + self.Wus(s_t) + self.bu)

            # New cell state
            c_t = f_t * c_prev + i_t * zeta_t + T_t * s_t

            # Hidden state
            o_t = torch.sigmoid(self.Woh(h_prev) + self.Wox(x_t) + self.Wos(s_t) + self.bo)
            h_t = o_t * torch.tanh(c_t)

            # Save historical hidden states for attention
            historical_states = torch.cat((historical_states[:, 1:], h_t.unsqueeze(1)), dim=1)
            h_prev, c_prev = h_t, c_t

        # Attention mechanism
        q_t = torch.cat((h_t, c_t), dim=1)
        o_t_i = torch.tanh(self.Waq(q_t).unsqueeze(1) + self.Wah(historical_states))
        alpha_t_i = torch.exp(torch.matmul(o_t_i, self.vt)).squeeze(-1)
        alpha_t_i = alpha_t_i / torch.sum(alpha_t_i, dim=1, keepdim=True)
        e_t = torch.sum(alpha_t_i.unsqueeze(-1) * historical_states, dim=1)

        # Final classification
        y_pred = torch.sigmoid(self.classifier(e_t))
        return y_pred


# Training and testing
if __name__ == "__main__":
    batch_size = 32
    input_dim = 3  # category, amt, delta_t
    hidden_dim = 64
    memory_size = 10
    seq_len = 5

    train_dataset = CreditCardFraudDataset("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTrain.csv", seq_len=seq_len)
    test_dataset = CreditCardFraudDataset("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTest.csv", seq_len=seq_len)

    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_data, val_data = random_split(train_dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = TH_LSTM(input_dim, hidden_dim, memory_size).to('cuda' if torch.cuda.is_available() else 'cpu')
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Add L2 regularization
    criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([10.0]).to('cuda'))  # Weighted loss for imbalance

    epochs = 3
    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for X_batch, y_batch in train_loader:  # Unpack 2 values
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}] Loss: {total_loss / len(train_loader):.4f}")

    # Testing
    model.eval()
    y_true_test = []
    y_pred_test_prob = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:  # Unpack 2 values
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            y_pred = model(X_batch)
            y_true_test.extend(y_batch.cpu().numpy())
            y_pred_test_prob.extend(y_pred.cpu().numpy())

    thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
    for threshold in thresholds:
        y_pred_test_binary = (np.array(y_pred_test_prob) >= threshold).astype(int)
        accuracy = accuracy_score(y_true_test, y_pred_test_binary)
        precision = precision_score(y_true_test, y_pred_test_binary)
        recall = recall_score(y_true_test, y_pred_test_binary)
        f1 = f1_score(y_true_test, y_pred_test_binary)
        auc = roc_auc_score(y_true_test, y_pred_test_binary)

        print(f"Threshold: {threshold:.2f} | Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1: {f1:.4f} | AUC: {auc:.4f}")

Epoch [1/3] Loss: 0.7294
Epoch [2/3] Loss: 0.7293
Epoch [3/3] Loss: 0.7293
Threshold: 0.10 | Accuracy: 0.9958 | Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000 | AUC: 0.4998
Threshold: 0.20 | Accuracy: 0.9960 | Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000 | AUC: 0.4999


  _warn_prf(average, modifier, msg_start, len(result))


Threshold: 0.30 | Accuracy: 0.9961 | Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000 | AUC: 0.5000


  _warn_prf(average, modifier, msg_start, len(result))


Threshold: 0.40 | Accuracy: 0.9961 | Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000 | AUC: 0.5000


  _warn_prf(average, modifier, msg_start, len(result))


Threshold: 0.50 | Accuracy: 0.9961 | Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000 | AUC: 0.5000


In [4]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Dataset class for grouped sequences of transactions by `cc_num`
class CreditCardFraudDataset(Dataset):
    def __init__(self, file_path, seq_len):
        self.data = pd.read_csv(file_path)

        # Convert date to timestamp
        self.data['trans_date_trans_time'] = pd.to_datetime(self.data['trans_date_trans_time']).apply(lambda x: x.timestamp())

        # Encode category column
        self.label_encoder = LabelEncoder()
        self.data['category'] = self.label_encoder.fit_transform(self.data['category'])

        # Normalize features
        scaler = MinMaxScaler()
        self.data[['amt']] = scaler.fit_transform(self.data[['amt']])

        # Group transactions by `cc_num` and create sequences
        self.seq_len = seq_len
        self.sequences = []
        grouped = self.data.groupby('cc_num')
        for _, group in grouped:
            group = group[['category', 'amt', 'is_fraud', 'trans_date_trans_time']].values
            for i in range(len(group)):
                if i < self.seq_len - 1:
                    padding = [group[0]] * (self.seq_len - i - 1)
                    seq = padding + group[:i + 1].tolist()
                else:
                    seq = group[i - self.seq_len + 1:i + 1].tolist()

                label = group[i, -2]  # Fraud label of the current transaction
                time_intervals = np.diff([s[-1] for s in seq], prepend=seq[0][-1])  # Time differences
                time_intervals = time_intervals.reshape(-1, 1)  # Reshape for compatibility
                seq_features = np.array([s[:-1] for s in seq])  # Remove timestamp from features
                seq_features = np.concatenate((seq_features, time_intervals), axis=1)  # Add time intervals as feature
                self.sequences.append((seq_features, label))

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x_seq, y_label = self.sequences[idx]
        return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_label, dtype=torch.float32)


# TH-LSTM Model (with time-aware gate `T_t`)
class TH_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, memory_size):
        super(TH_LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.memory_size = memory_size
        self.dropout = nn.Dropout(0.5)

        # LSTM weights
        self.Wsh = nn.Linear(hidden_dim, hidden_dim)
        self.Wsx = nn.Linear(input_dim, hidden_dim)
        self.Wst = nn.Linear(1, hidden_dim)  # For time intervals
        self.bs = nn.Parameter(torch.zeros(hidden_dim))

        # Attention module
        self.Waq = nn.Linear(hidden_dim * 2, hidden_dim)
        self.Wah = nn.Linear(hidden_dim, hidden_dim)
        self.ba = nn.Parameter(torch.zeros(hidden_dim))
        self.vt = nn.Parameter(torch.randn(hidden_dim, 1))

        # Output layer
        self.classifier = nn.Linear(hidden_dim, 1)

    def forward(self, X_seq):
        batch_size = X_seq.size(0)
        seq_len = X_seq.size(1)
        h_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        c_prev = torch.zeros(batch_size, self.hidden_dim).to(X_seq.device)
        historical_states = torch.zeros(batch_size, self.memory_size, self.hidden_dim).to(X_seq.device)

        for t in range(seq_len):
            x_t = X_seq[:, t, :-1]  # Features excluding delta_t
            delta_t = X_seq[:, t, -1].view(-1, 1)  # Time interval (delta_t)
            s_t = torch.tanh(self.Wsh(h_prev) + self.Wsx(x_t) + self.Wst(delta_t) + self.bs)
            s_t = self.dropout(s_t)

            # Save historical hidden states for attention
            historical_states = torch.cat((historical_states[:, 1:], s_t.unsqueeze(1)), dim=1)
            h_prev, c_prev = s_t, s_t

        # Attention mechanism
        q_t = torch.cat((h_prev, c_prev), dim=1)
        o_t_i = torch.tanh(self.Waq(q_t).unsqueeze(1) + self.Wah(historical_states))
        alpha_t_i = torch.exp(torch.matmul(o_t_i, self.vt)).squeeze(-1)
        alpha_t_i = alpha_t_i / torch.sum(alpha_t_i, dim=1, keepdim=True)
        e_t = torch.sum(alpha_t_i.unsqueeze(-1) * historical_states, dim=1)

        # Final classification
        y_pred = self.classifier(e_t)
        return y_pred


# Training, validation, and testing
if __name__ == "__main__":
    batch_size = 32
    input_dim = 3  # category, amt, delta_t
    hidden_dim = 64
    memory_size = 10
    seq_len = 5

    train_dataset = CreditCardFraudDataset("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTrain.csv", seq_len=seq_len)
    test_dataset = CreditCardFraudDataset("/home/ducanh/Credit Card Transactions Fraud Detection/fraudTest.csv", seq_len=seq_len)

    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_data, val_data = random_split(train_dataset, [train_size, val_size])

    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = TH_LSTM(input_dim, hidden_dim, memory_size).to('cuda' if torch.cuda.is_available() else 'cpu')
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([10.0]).to('cuda'))

    best_val_loss = float('inf')
    patience = 5  # Early stopping patience
    epochs_no_improve = 0

    epochs = 3
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
                y_pred = model(X_batch)
                val_loss += criterion(y_pred, y_batch).item()

        val_loss /= len(val_loader)
        print(f"Epoch [{epoch + 1}/{epochs}] Train Loss: {total_loss / len(train_loader):.4f} | Val Loss: {val_loss:.4f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping triggered!")
                break

    # **Testing and Evaluation Phase**
    model.eval()
    y_true_test = []
    y_pred_test_prob = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to('cuda'), y_batch.view(-1, 1).to('cuda')
            y_pred = model(X_batch)
            y_true_test.extend(y_batch.cpu().numpy())
            y_pred_test_prob.extend(y_pred.cpu().numpy())

    thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
    for threshold in thresholds:
        y_pred_test_binary = (np.array(y_pred_test_prob) >= threshold).astype(int)
        accuracy = accuracy_score(y_true_test, y_pred_test_binary)
        precision = precision_score(y_true_test, y_pred_test_binary)
        recall = recall_score(y_true_test, y_pred_test_binary)
        f1 = f1_score(y_true_test, y_pred_test_binary)
        auc = roc_auc_score(y_true_test, y_pred_test_binary)

        print(f"Threshold: {threshold:.2f} | Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1: {f1:.4f} | AUC: {auc:.4f}")

Epoch [1/3] Train Loss: 0.1272 | Val Loss: 0.1188
Epoch [2/3] Train Loss: 0.1127 | Val Loss: 0.1188
Epoch [3/3] Train Loss: 0.1127 | Val Loss: 0.1219
Threshold: 0.10 | Accuracy: 0.9972 | Precision: 0.6317 | Recall: 0.6373 | F1: 0.6345 | AUC: 0.8179
Threshold: 0.20 | Accuracy: 0.9972 | Precision: 0.6317 | Recall: 0.6373 | F1: 0.6345 | AUC: 0.8179
Threshold: 0.30 | Accuracy: 0.9972 | Precision: 0.6317 | Recall: 0.6373 | F1: 0.6345 | AUC: 0.8179
Threshold: 0.40 | Accuracy: 0.9972 | Precision: 0.6317 | Recall: 0.6373 | F1: 0.6345 | AUC: 0.8179
Threshold: 0.50 | Accuracy: 0.9972 | Precision: 0.6317 | Recall: 0.6373 | F1: 0.6345 | AUC: 0.8179
