In [1]:
%pip install --upgrade torch onnx

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
import crypten
from crypten import mpc
import crypten.communicator as comm


from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score

from collections import defaultdict
import os

# os.environ["CUDA_VISIBLE_DEVICES"]=""
os.environ["CUDA_LAUNCH_BLOCKING"]="1"

In [3]:
df_nn = pd.read_csv("card_transdata.csv")

train_transactions, test_transactions = train_test_split(df_nn, test_size=0.2, random_state=42)


X_train_transactions = torch.from_numpy(train_transactions.drop(columns=["fraud"]).values)
y_train_transactions = torch.from_numpy(train_transactions.fraud.values).view(-1, 1)
X_test_transactions = torch.from_numpy(test_transactions.drop(columns=["fraud"]).values)
y_test_transactions = torch.from_numpy(test_transactions.fraud.values).view(-1, 1)

print(f"X_train_transactions has shape: {X_train_transactions.shape}")
print(f"y_train_transactions has shape: {y_train_transactions.shape}")
print(f"X_test_transactions has shape: {X_test_transactions.shape}")
print(f"y_test_transactions has shape: {y_test_transactions.shape}")


X_train_transactions has shape: torch.Size([800000, 7])
y_train_transactions has shape: torch.Size([800000, 1])
X_test_transactions has shape: torch.Size([200000, 7])
y_test_transactions has shape: torch.Size([200000, 1])


In [4]:
class FraudDetectionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, lr=0.001):
        super(FraudDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size, dtype=torch.float64)
        self.relu = nn.ReLU()
        self.dropout_prob = 0.5
        self.fc2 = nn.Linear(hidden_size, num_classes, dtype=torch.float64)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = x.to(self.fc1.weight.dtype)

        x = self.fc1(x)
        x = self.relu(x)
        x = F.dropout(x, p=self.dropout_prob, training=self.training)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x


In [5]:
crypten.init()
torch.set_num_threads(1)

In [6]:
# Specify file locations to save data for each bank
filenames = {
    "X_samples_bank1": "data/X_samples_bank1.pth",
    "X_samples_bank2": "data/X_samples_bank2.pth",
    "X_samples_bank3": "data/X_samples_bank3.pth",
    "X_samples_bank4": "data/X_samples_bank4.pth",
    "y_samples_bank1": "data/y_samples_bank1.pth",
    "y_samples_bank2": "data/y_samples_bank2.pth",
    "y_samples_bank3": "data/y_samples_bank3.pth",
    "y_samples_bank4": "data/y_samples_bank4.pth",
}

print(X_train_transactions[:200000])

def save_all_data():   
    # Save split dataset for the 4 banks
    X_samples_bank1 = X_train_transactions[:200000]
    X_samples_bank2 = X_train_transactions[200000:400000]
    X_samples_bank3 = X_train_transactions[400000:600000]
    X_samples_bank4 = X_train_transactions[600000:800000]
    crypten.save_from_party(X_samples_bank1, filenames["X_samples_bank1"])
    crypten.save_from_party(X_samples_bank2, filenames["X_samples_bank2"])
    crypten.save_from_party(X_samples_bank3, filenames["X_samples_bank3"])
    crypten.save_from_party(X_samples_bank4, filenames["X_samples_bank4"])

    y_samples_bank1 = y_train_transactions[:200000]
    y_samples_bank2 = y_train_transactions[200000:400000]
    y_samples_bank3 = y_train_transactions[400000:600000]
    y_samples_bank4 = y_train_transactions[600000:800000]
    crypten.save_from_party(y_samples_bank1, filenames["y_samples_bank1"])
    crypten.save_from_party(y_samples_bank2, filenames["y_samples_bank2"])
    crypten.save_from_party(y_samples_bank3, filenames["y_samples_bank3"])
    crypten.save_from_party(y_samples_bank4, filenames["y_samples_bank4"])
    
    
save_all_data()


tensor([[36.2172,  0.2478,  0.1814,  ...,  1.0000,  0.0000,  1.0000],
        [20.6738,  0.5731,  0.6636,  ...,  1.0000,  0.0000,  1.0000],
        [ 4.7852,  2.3890,  3.5380,  ...,  1.0000,  0.0000,  1.0000],
        ...,
        [62.9908,  0.4695,  0.9350,  ...,  0.0000,  0.0000,  0.0000],
        [ 4.1692,  4.3749,  0.9618,  ...,  0.0000,  0.0000,  1.0000],
        [30.5910, 12.0598,  0.7788,  ...,  1.0000,  0.0000,  0.0000]],
       dtype=torch.float64)


In [7]:
input_size = 7  # Number of input features
hidden_size = 64  # Number of hidden units
num_classes = 1  # Binary classification
num_epochs = 1500

torch_model = FraudDetectionModel(input_size, hidden_size, num_classes)


In [8]:
def encrypted_training():
    # Load data:
    x_bank1_enc = crypten.load_from_party(filenames['X_samples_bank1'])
    x_bank2_enc = crypten.load_from_party(filenames['X_samples_bank2'])
    x_bank3_enc = crypten.load_from_party(filenames['X_samples_bank3'])
    x_bank4_enc = crypten.load_from_party(filenames['X_samples_bank4'])

    y_bank1_enc = crypten.load_from_party(filenames['y_samples_bank1'])
    y_bank2_enc = crypten.load_from_party(filenames['y_samples_bank2'])
    y_bank3_enc = crypten.load_from_party(filenames['y_samples_bank3'])
    y_bank4_enc = crypten.load_from_party(filenames['y_samples_bank4'])
    dummy_input = torch.randn(200000, 7)
    model = crypten.nn.from_pytorch(torch_model,dummy_input)
    model.encrypt()
    # Set train mode
    model.train()
    
    # Combine the feature sets: identical to Tutorial 3
    x_combined_enc = crypten.cat([x_bank1_enc, x_bank2_enc, x_bank3_enc, x_bank4_enc], dim=1)
    y_combined_enc = crypten.cat([y_bank1_enc, y_bank2_enc, y_bank3_enc, y_bank4_enc], dim=1)
    
    # Reshape to match the network architecture
    x_combined_enc = x_combined_enc.unsqueeze(1)
    y_combined_enc = y_combined_enc.unsqueeze(1)

    loss = crypten.nn.BCELoss()

        # Define training parameters
    learning_rate = 0.001
    num_epochs = 2
    batch_size = 10
    num_batches = x_combined_enc.size(0) // batch_size
    
    for i in range(num_epochs): 
        crypten.print(f"Epoch {i} in progress:")       
        
        for batch in range(num_batches):
            # define the start and end of the training mini-batch
            start, end = batch * batch_size, (batch + 1) * batch_size
                                    
            # construct CrypTensors out of training examples / labels
            x_train = x_combined_enc[start:end]
            y_train = y_combined_enc[start:end]
            # y_train = crypten.cryptensor(y_batch, requires_grad=True)
            
            # perform forward pass:
            output = model(x_train)
            loss_value = loss(output, y_train)
            
            # set gradients to "zero" 
            model.zero_grad()

            # perform backward pass: 
            loss_value.backward()

            # update parameters
            model.update_parameters(learning_rate)
            
            # Print progress every batch:
            batch_loss = loss_value.get_plain_text()
            crypten.print(f"\tBatch {(batch + 1)} of {num_batches} Loss {batch_loss.item():.4f}")

encrypted_training()

CheckerError: Unrecognized attribute: ratio for operator Dropout

==> Context: Bad node spec for node. Name: /dropout/Dropout OpType: Dropout