# Model Evaluation

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import json
import joblib
from xgboost import XGBClassifier
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.dataset import random_split
import math
import numpy as np


In [5]:
# load the dataset

transactions_df = pd.read_csv("./data/transactions_enrich_df.csv", parse_dates = ['post_ts'])

## Scikit-Learn - RandomForest

In [9]:
##------------------------------------------
##             Scikit-Learn   
##------------------------------------------

# Load the dataset

data = transactions_df[transactions_df['post_ts'] > pd.Timestamp("2023-03-01")]

# Separate features (F1, F2, F3) and target (TARGET)
columns =  list(transactions_df.columns)

# Entries you want to remove
entries_to_remove = ['transaction_id', 
                     'customer_id', 'bin', 'entry_mode', 
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                        'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                        'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window'
                        ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

X = data[features]
target = 'fraud'
y = data[target]

# Define the traing range max limit
end_training = pd.Timestamp('2023-05-31')

# Split the data into training and testing sets
X_train = X[X['post_ts'] <= end_training]
y_train = y.loc[X_train.index]
X_test = X[X['post_ts'] > end_training]
y_test =  y.loc[X_test.index]

# Drop teh column post_ts
X_train = X_train.drop(columns=['post_ts'])
X_test = X_test.drop(columns=['post_ts'])

# Save feature names and target name to a JSON file
metadata = {
    'features': features,
    'target': target,
    'model_name': 'RandomForestClassifier Model Example',
    'model_version': 1.0,
    'model_type':  'Scikit-Learn'
}

with open('./models/scikit-learn-random-forest-model/scikit-learn-random-forest-metadata.json', 'w') as metadata_file:
    json.dump(metadata, metadata_file)


# Initialize the Random Forest Classifier
clf = RandomForestClassifier(n_estimators=10, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy}')

# Save the trained model
joblib.dump(clf, './models/scikit-learn-random-forest-model/scikit-learn-random-forest-model.pkl')

Accuracy: 0.9723759463122748


['./models/scikit-learn-random-forest-model/scikit-learn-random-forest-model.pkl']

In [10]:

model = joblib.load('./models/scikit-learn-random-forest-model/scikit-learn-random-forest-model.pkl')
# Read the metadata file
with open('./models/scikit-learn-random-forest-model/scikit-learn-random-forest-metadata.json', 'r') as metadata_file:
    metadata = json.load(metadata_file)

model_name = metadata['model_name']
model_version = metadata['model_version']


# Define the new record -- Fraud
new_record_fraud = pd.DataFrame(
    {'amt':[141.45], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[3], 
            'customer_id_avg_amount_1day_window':[88.08],
            'customer_id_nb_tx_7day_window':[20.0], 
            'customer_id_avg_amount_7day_window':[64.4855],
            'customer_id_nb_tx_30day_window':[82.0], 
            'customer_id_avg_amount_30day_window':[59.64829268292683],
            })

# Define the new record -- No Fraud
new_record_no_fraud = pd.DataFrame(
    {'amt':[72.33], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[5], 
            'customer_id_avg_amount_1day_window':[49.09],
            'customer_id_nb_tx_7day_window':[21.0], 
            'customer_id_avg_amount_7day_window':[49.249],
            'customer_id_nb_tx_30day_window':[62.0], 
            'customer_id_avg_amount_30day_window':[50.29],
            })



# Get the cluster assignment for the new record
prediction_fraud = model.predict(new_record_fraud)[0]
prediction_no_fraud = model.predict(new_record_no_fraud)[0]


print (f"Fraud:{prediction_fraud} - No Fraud:{prediction_no_fraud}")


Fraud:0 - No Fraud:0


## Feature Importance

In [11]:
# Get feature importances
model = joblib.load('./models/scikit-learn-random-forest-model/scikit-learn-random-forest-model.pkl')
importances = model.feature_importances_

# Separate features (F1, F2, F3) and target (TARGET)
columns =  list(transactions_df.columns)

# Entries you want to remove
entries_to_remove = ['transaction_id', 
                     'customer_id', 'bin', 'entry_mode',
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                        'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                        'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window',
                        'post_ts'
                        ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

# Convert the importances into a DataFrame
feature_importance_df = pd.DataFrame({'feature': features, 'importance': importances})

# Sort the DataFrame by importance
feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

feature_importance_df


Unnamed: 0,feature,importance
0,amt,0.24731
8,customer_id_avg_amount_30day_window,0.203065
6,customer_id_avg_amount_7day_window,0.166842
4,customer_id_avg_amount_1day_window,0.154899
7,customer_id_nb_tx_30day_window,0.094059
5,customer_id_nb_tx_7day_window,0.063439
3,customer_id_nb_tx_1day_window,0.050415
1,during_weekend,0.014414
2,during_night,0.005558


In [None]:
features

## XGBoost

In [12]:

# Load the dataset
data = transactions_df[transactions_df['post_ts'] > pd.Timestamp("2023-03-01")]

# Separate features and target
columns = list(transactions_df.columns)

# Entries to remove
entries_to_remove = ['transaction_id', 
                     'customer_id', 'bin', 'entry_mode',
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                     'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                     'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window'
                    ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

X = data[features]
target = 'fraud'
y = data[target]

# Define the training range max limit
end_training = pd.Timestamp('2023-05-31')

# Split the data into training and testing sets
X_train = X[X['post_ts'] <= end_training]
y_train = y.loc[X_train.index]
X_test = X[X['post_ts'] > end_training]
y_test = y.loc[X_test.index]

# Drop the 'post_ts' column
X_train = X_train.drop(columns=['post_ts'])
X_test = X_test.drop(columns=['post_ts'])

# Save feature names and target name to a JSON file
metadata = {
    'features': features,
    'target': target,
    'model_name': 'XGBoost Model Example',
    'model_version': 1.0,
    'model_type': 'XGBoost'
}

with open('./models/xgboost-model/xgboost-model-metadata.json', 'w') as metadata_file:
    json.dump(metadata, metadata_file)

# Initialize the XGBoost Classifier
clf = XGBClassifier(n_estimators=10, random_state=42, use_label_encoder=False, eval_metric='logloss')

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy}')

# Save the trained model
joblib.dump(clf, './models/xgboost-model/xgboost-model.pkl')



Accuracy: 0.9719147740421171


['./models/xgboost-model/xgboost-model.pkl']

In [14]:

model = joblib.load('./models/xgboost-model/xgboost-model.pkl')
# Read the metadata file
with open('./models/xgboost-model/xgboost-model-metadata.json', 'r') as metadata_file:
    metadata = json.load(metadata_file)

model_name = metadata['model_name']
model_version = metadata['model_version']


# Define the new record -- Fraud
new_record_fraud = pd.DataFrame(
    {'amt':[141.45], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[3], 
            'customer_id_avg_amount_1day_window':[88.08],
            'customer_id_nb_tx_7day_window':[20.0], 
            'customer_id_avg_amount_7day_window':[64.4855],
            'customer_id_nb_tx_30day_window':[82.0], 
            'customer_id_avg_amount_30day_window':[59.64829268292683],
            # 'terminal_id_nb_tx_1day_window':[43.0], 
            # 'terminal_id_risk_1day_window':[0.09302325581395349],
            # 'terminal_id_nb_tx_7day_window':[233.0], 
            # 'terminal_id_risk_7day_window':[0.04721030042918455],
            # 'terminal_id_nb_tx_30day_window':[1013.0], 
            # 'terminal_id_risk_30day_window':[0.03060217176702863]
            })

# Define the new record -- No Fraud
new_record_no_fraud = pd.DataFrame(
    {'amt':[72.33], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[5], 
            'customer_id_avg_amount_1day_window':[49.09],
            'customer_id_nb_tx_7day_window':[21.0], 
            'customer_id_avg_amount_7day_window':[49.249],
            'customer_id_nb_tx_30day_window':[62.0], 
            'customer_id_avg_amount_30day_window':[50.29],
            # 'terminal_id_nb_tx_1day_window':[31.0], 
            # 'terminal_id_risk_1day_window':[0],
            # 'terminal_id_nb_tx_7day_window':[252.0], 
            # 'terminal_id_risk_7day_window':[0.03571428571428571],
            # 'terminal_id_nb_tx_30day_window':[1166.0], 
            # 'terminal_id_risk_30day_window':[0.0274442538593482]
            })



# Get the cluster assignment for the new record
prediction_fraud = model.predict(new_record_fraud)[0]
prediction_no_fraud = model.predict(new_record_no_fraud)[0]


print (f"Fraud:{prediction_fraud} - No Fraud:{prediction_no_fraud}")


Fraud:0 - No Fraud:0


## PyTorch

In [15]:
##------------------------------------------
##             PyTorch 
##------------------------------------------

# Load the dataset
data = transactions_df[transactions_df['post_ts'] > pd.Timestamp("2023-03-01")]

# Separate features and target
columns = list(transactions_df.columns)

# Entries to remove
entries_to_remove = ['transaction_id', 
                     'customer_id', 'bin', 'entry_mode',
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                     'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                     'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window'
                    ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

X = data[features]

target = 'fraud'
y = data[target]

features = [col for col in columns if col not in ['post_ts']]


# Save feature names and output format to a JSON file
metadata = {
    'features': features,
    'target': target,
    'model_type':  'PyTorch',
    'model_name': 'PyTorch Model Example',
    'model_version': 1.0,
    'scaler_file': './models/pytorch-ffn-model/pytorch-ffn-scaler.pkl'
}

with open('./models/pytorch-ffn-model/pytorch-ffn-metadata.json', 'w') as metadata_file:
    json.dump(metadata, metadata_file)


# Define the training range max limit
end_training = pd.Timestamp('2023-05-31')

# Split the data into training and testing sets
X_train = X[X['post_ts'] <= end_training]
y_train = y.loc[X_train.index].tolist()

X_test = X[X['post_ts'] > end_training]
y_test = y.loc[X_test.index].tolist()

# Drop the 'post_ts' column
X_train = X_train.drop(columns=['post_ts'])
X_test = X_test.drop(columns=['post_ts'])


# Standardize features (optional but recommended for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save the scaler to a file
joblib.dump(scaler, metadata['scaler_file'])

# Convert back to PyTorch tensors after scaling
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


# # Create a TensorDataset and DataLoader
# dataset = TensorDataset(X_train, y_tensor)
# train_loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Define a simple neural network model
class FeedforwardNN(nn.Module):
    def __init__(self, input_size):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Initialize the model, loss function, and optimizer
ffn = FeedforwardNN(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(ffn.parameters(), lr=0.001)

# Train the model
epochs = 200
batch_size =  512 #math.ceil(len(X_train)/epochs) #512

print(f"size:{len(X_train)}, batch_size: {batch_size}")

for epoch in range(epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        labels = y_train[i:i+batch_size]

        optimizer.zero_grad()
        outputs = ffn(inputs)
        # loss = criterion(outputs, labels)
        loss = criterion(outputs.squeeze(), labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Make predictions on the test set
with torch.no_grad():
    predictions = ffn(X_test).numpy()
    predictions = (predictions > 0.5).astype(float)

# Evaluate the model
accuracy = accuracy_score(y_test.numpy(), predictions)
print(f'Accuracy: {accuracy}')

ffn.eval()
# Export to TorchScript
model_scripted = torch.jit.script(ffn) 
# Save the trained model
model_scripted.save('./models/pytorch-ffn-model/pytorch-ffn-model.pt') 


size:902513, batch_size: 512
Epoch [1/200], Loss: 0.3709
Epoch [2/200], Loss: 0.3635
Epoch [3/200], Loss: 0.3611
Epoch [4/200], Loss: 0.3583
Epoch [5/200], Loss: 0.3557
Epoch [6/200], Loss: 0.3541
Epoch [7/200], Loss: 0.3537
Epoch [8/200], Loss: 0.3535
Epoch [9/200], Loss: 0.3532
Epoch [10/200], Loss: 0.3538
Epoch [11/200], Loss: 0.3533
Epoch [12/200], Loss: 0.3533
Epoch [13/200], Loss: 0.3532
Epoch [14/200], Loss: 0.3527
Epoch [15/200], Loss: 0.3523
Epoch [16/200], Loss: 0.3523
Epoch [17/200], Loss: 0.3518
Epoch [18/200], Loss: 0.3513
Epoch [19/200], Loss: 0.3508
Epoch [20/200], Loss: 0.3501
Epoch [21/200], Loss: 0.3496
Epoch [22/200], Loss: 0.3489
Epoch [23/200], Loss: 0.3489
Epoch [24/200], Loss: 0.3490
Epoch [25/200], Loss: 0.3487
Epoch [26/200], Loss: 0.3485
Epoch [27/200], Loss: 0.3488
Epoch [28/200], Loss: 0.3485
Epoch [29/200], Loss: 0.3483
Epoch [30/200], Loss: 0.3482
Epoch [31/200], Loss: 0.3482
Epoch [32/200], Loss: 0.3491
Epoch [33/200], Loss: 0.3484
Epoch [34/200], Loss: 0

In [16]:
model = torch.jit.load('./models/pytorch-ffn-model/pytorch-ffn-model.pt')

# Read the metadata file
with open('./models/pytorch-ffn-model/pytorch-ffn-metadata.json', 'r') as metadata_file:
    metadata = json.load(metadata_file)

model_name = metadata['model_name']
model_version = metadata['model_version']
scaler_file = metadata['scaler_file']

scaler = joblib.load(scaler_file)


# Define the new record -- Fraud
new_record_fraud = pd.DataFrame(
    {'amt':[141.45], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[3], 
            'customer_id_avg_amount_1day_window':[88.08],
            'customer_id_nb_tx_7day_window':[20.0], 
            'customer_id_avg_amount_7day_window':[64.4855],
            'customer_id_nb_tx_30day_window':[82.0], 
            'customer_id_avg_amount_30day_window':[59.64829268292683],
            })



# Define the new record -- No Fraud
new_record_no_fraud = pd.DataFrame(
    {'amt':[72.33], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[5], 
            'customer_id_avg_amount_1day_window':[49.09],
            'customer_id_nb_tx_7day_window':[21.0], 
            'customer_id_avg_amount_7day_window':[49.249],
            'customer_id_nb_tx_30day_window':[62.0], 
            'customer_id_avg_amount_30day_window':[50.29],
            })

new_record_fraud = scaler.transform(new_record_fraud)
new_record_no_fraud = scaler.transform(new_record_no_fraud)

# Convert back to PyTorch tensors after scaling
new_record_fraud = torch.tensor(new_record_fraud, dtype=torch.float32)
new_record_no_fraud = torch.tensor(new_record_no_fraud, dtype=torch.float32)




# Get the prediction for the new record
# Set the model to evaluation mode (important for models with dropout or batch normalization) 
model.eval()
# Disable gradient computation.
with torch.no_grad():
    predictions_fraud = model(new_record_fraud)[0][0].item()
    # predictions_fraud = (predictions_fraud > 0.5) #.astype(float)

    predictions_no_fraud = model(new_record_no_fraud)[0][0].item()
    # predictions_no_fraud = (predictions_no_fraud > 0.5) #.astype(float)


print(f"Fraud:{predictions_fraud}, No Fraud: {predictions_no_fraud}")

Fraud:0.7296394109725952, No Fraud: 0.04654536023736


## Autoencoders

In [17]:
# Load the dataset Only the no fraud records
data = transactions_df[(transactions_df['post_ts'] > pd.Timestamp("2023-03-01")) & 
                    #    (transactions_df['post_ts'] < pd.Timestamp("2023-05-01")) & 
                       (data['fraud'] == 0) ]


# Separate features and target
columns = list(transactions_df.columns)

# Entries to remove
entries_to_remove = ['transaction_id', 'post_ts', 
                     'customer_id', 'bin', 'entry_mode',
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                     'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                     'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window'
                    ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

X = data[features]

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)

print(f"Size: {X_tensor.shape[1]}")

# Define the Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 6),
            nn.ReLU(),
            nn.Linear(6, 4),
            # nn.ReLU(),
            # nn.Linear(4, 2)
        )
        self.decoder = nn.Sequential(
            # nn.Linear(2, 4),
            # nn.ReLU(),
            nn.Linear(4, 6),
            nn.ReLU(),
            nn.Linear(6, input_size),
            # nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Initialize the model, loss function, and optimizer
autenc = Autoencoder(X_tensor.shape[1])
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(autenc.parameters(), lr=0.001)

# Train the model
epochs = 50
batch_size =  512 # math.ceil(len(X_tensor)*3/epochs) #512 len(X_tensor) # 
print(f"batch_size: {batch_size}")
for epoch in range(epochs):
    for i in range(0, len(X_tensor), batch_size):
        inputs = X_tensor[i:i+batch_size]
        optimizer.zero_grad()
        outputs = autenc(inputs)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Calculate reconstruction errors
with torch.no_grad():
    reconstructed = autenc(X_tensor)

    # print(f"X_tensor: {X_tensor}")
    # print(f"reconstructed: {reconstructed}")
    
    # reconstruction_error = torch.nn.functional.mse_loss(reconstructed, X_tensor, reduction='mean')
    reconstruction_error = torch.mean(torch.abs(X_tensor - reconstructed), dim=1).numpy()

# threshold = reconstruction_error.item()
# print(np.mean(reconstruction_error), np.std(reconstruction_error))
threshold = np.mean(reconstruction_error) + 1 * np.std(reconstruction_error)  # 1 standard deviation above the mean
print(threshold)

# Save model
# torch.save(model.state_dict(), './autoencoder-model/autoencoder-model.pt')
autenc.eval()
# Export to TorchScript
model_scripted = torch.jit.script(autenc) 
# Save the trained model
model_scripted.save('./models/autoencoder-model/autoencoder-model.pt') 

# Optional: Save metadata and scaler
metadata = {
    'features': features,
    'model_type': 'PyTorch Autoencoder',
    'model_name': 'Autoencoder for Anomaly Detection',
    'model_version': 1.0,
    'threshold': float(threshold),
    'scaler_file': './models/autoencoder-model/autoencoder-scaler.pkl'
}
with open('./models/autoencoder-model/autoencoder-metadata.json', 'w') as metadata_file:
    json.dump(metadata, metadata_file)

joblib.dump(scaler, metadata['scaler_file'])


Size: 9
batch_size: 512
Epoch [1/50], Loss: 0.2165
Epoch [2/50], Loss: 0.1258
Epoch [3/50], Loss: 0.1238
Epoch [4/50], Loss: 0.1267
Epoch [5/50], Loss: 0.1225
Epoch [6/50], Loss: 0.1200
Epoch [7/50], Loss: 0.1186
Epoch [8/50], Loss: 0.1172
Epoch [9/50], Loss: 0.1170
Epoch [10/50], Loss: 0.1167
Epoch [11/50], Loss: 0.1163
Epoch [12/50], Loss: 0.1161
Epoch [13/50], Loss: 0.1159
Epoch [14/50], Loss: 0.1159
Epoch [15/50], Loss: 0.1157
Epoch [16/50], Loss: 0.1157
Epoch [17/50], Loss: 0.1156
Epoch [18/50], Loss: 0.1156
Epoch [19/50], Loss: 0.1156
Epoch [20/50], Loss: 0.1156
Epoch [21/50], Loss: 0.1156
Epoch [22/50], Loss: 0.1156
Epoch [23/50], Loss: 0.1155
Epoch [24/50], Loss: 0.1155
Epoch [25/50], Loss: 0.1154
Epoch [26/50], Loss: 0.1154
Epoch [27/50], Loss: 0.1155
Epoch [28/50], Loss: 0.1158
Epoch [29/50], Loss: 0.1177
Epoch [30/50], Loss: 0.1207
Epoch [31/50], Loss: 0.1257
Epoch [32/50], Loss: 0.1273
Epoch [33/50], Loss: 0.1284
Epoch [34/50], Loss: 0.1277
Epoch [35/50], Loss: 0.1272
Epoch

['./models/autoencoder-model/autoencoder-scaler.pkl']

In [18]:
model = torch.jit.load('./models/autoencoder-model/autoencoder-model.pt')

# Read the metadata file
with open('./models/autoencoder-model/autoencoder-metadata.json', 'r') as metadata_file:
    metadata = json.load(metadata_file)

model_name = metadata['model_name']
model_version = metadata['model_version']
scaler_file = metadata['scaler_file']
threshold = metadata['threshold']

scaler = joblib.load(scaler_file)

# Define the new record -- Fraud
new_record_fraud = pd.DataFrame(
    {'amt':[ 99.4], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[4], 
            'customer_id_avg_amount_1day_window':[430.22],
            'customer_id_nb_tx_7day_window':[6], 
            'customer_id_avg_amount_7day_window':[602.15],
            'customer_id_nb_tx_30day_window':[6], 
            'customer_id_avg_amount_30day_window':[602.15],
            })


   



# Define the new record -- No Fraud
new_record_no_fraud = pd.DataFrame(
    {'amt':[72.33], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[5], 
            'customer_id_avg_amount_1day_window':[49.09],
            'customer_id_nb_tx_7day_window':[21.0], 
            'customer_id_avg_amount_7day_window':[49.249],
            'customer_id_nb_tx_30day_window':[62.0], 
            'customer_id_avg_amount_30day_window':[50.29],
            })

new_record_fraud = scaler.transform(new_record_fraud)
new_record_no_fraud = scaler.transform(new_record_no_fraud)

# Convert back to PyTorch tensors after scaling
new_record_fraud = torch.tensor(new_record_fraud, dtype=torch.float32)
new_record_no_fraud = torch.tensor(new_record_no_fraud, dtype=torch.float32)

# Get the prediction for the new record
# Set the model to evaluation mode (important for models with dropout or batch normalization) 
model.eval()
# Disable gradient computation.
with torch.no_grad():
    reconstructed_fraud = model(new_record_fraud)
    reconstruction_fraud_error = np.mean(torch.mean(torch.abs(reconstructed_fraud - new_record_fraud), dim=1).numpy())
    # reconstruction_fraud_error = torch.nn.functional.mse_loss(reconstructed_fraud, new_record_fraud, reduction='mean')

    reconstructed_no_fraud = model(new_record_no_fraud)
    reconstruction_no_fraud_error = np.mean(torch.mean(torch.abs(reconstructed_no_fraud - new_record_no_fraud), dim=1).numpy())
    # reconstruction_no_fraud_error = torch.nn.functional.mse_loss(reconstructed_no_fraud, new_record_no_fraud, reduction='mean')


print(f"Fraud:{reconstruction_fraud_error}, No Fraud: {reconstruction_no_fraud_error}, threshold: {threshold}")


Fraud:5.178807735443115, No Fraud: 0.22883540391921997, threshold: 0.31190311908721924
