# Model Evaluation

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import json
import joblib
from xgboost import XGBClassifier
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.dataset import random_split
import math
import numpy as np


In [2]:
# load the dataset

transactions_df = pd.read_csv("./data/transactions_enrich_df.csv", parse_dates = ['post_ts'])

KeyboardInterrupt: 

## Scikit-Learn - RandomForest

In [None]:
##------------------------------------------
##             Scikit-Learn   
##------------------------------------------

# Load the dataset

data = transactions_df[transactions_df['post_ts'] > pd.Timestamp("2023-03-01")]

# Separate features (F1, F2, F3) and target (TARGET)
columns =  list(transactions_df.columns)

# Entries you want to remove
entries_to_remove = ['transaction_id', 
                     'customer_id', 'bin', 'entry_mode', 
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                        'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                        'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window'
                        ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

X = data[features]
target = 'fraud'
y = data[target]

# Define the traing range max limit
end_training = pd.Timestamp('2023-05-31')

# Split the data into training and testing sets
X_train = X[X['post_ts'] <= end_training]
y_train = y.loc[X_train.index]
X_test = X[X['post_ts'] > end_training]
y_test =  y.loc[X_test.index]

# Drop teh column post_ts
X_train = X_train.drop(columns=['post_ts'])
X_test = X_test.drop(columns=['post_ts'])

# Save feature names and target name to a JSON file
metadata = {
    'features': features,
    'target': target,
    'model_name': 'RandomForestClassifier Model Example',
    'model_version': 1.0,
    'model_type':  'Scikit-Learn'
}

with open('./scikit-learn-random-forest-model/scikit-learn-random-forest-metadata.json', 'w') as metadata_file:
    json.dump(metadata, metadata_file)


# Initialize the Random Forest Classifier
clf = RandomForestClassifier(n_estimators=10, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy}')

# Save the trained model
joblib.dump(clf, './scikit-learn-random-forest-model/scikit-learn-random-forest-model.pkl')

In [None]:

model = joblib.load('./scikit-learn-random-forest-model/scikit-learn-random-forest-model.pkl')
# Read the metadata file
with open('./scikit-learn-random-forest-model/scikit-learn-random-forest-metadata.json', 'r') as metadata_file:
    metadata = json.load(metadata_file)

model_name = metadata['model_name']
model_version = metadata['model_version']


# Define the new record -- Fraud
new_record_fraud = pd.DataFrame(
    {'amt':[141.45], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[3], 
            'customer_id_avg_amount_1day_window':[88.08],
            'customer_id_nb_tx_7day_window':[20.0], 
            'customer_id_avg_amount_7day_window':[64.4855],
            'customer_id_nb_tx_30day_window':[82.0], 
            'customer_id_avg_amount_30day_window':[59.64829268292683],
            # 'terminal_id_nb_tx_1day_window':[43.0], 
            # 'terminal_id_risk_1day_window':[0.09302325581395349],
            # 'terminal_id_nb_tx_7day_window':[233.0], 
            # 'terminal_id_risk_7day_window':[0.04721030042918455],
            # 'terminal_id_nb_tx_30day_window':[1013.0], 
            # 'terminal_id_risk_30day_window':[0.03060217176702863]
            })

# Define the new record -- No Fraud
new_record_no_fraud = pd.DataFrame(
    {'amt':[72.33], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[5], 
            'customer_id_avg_amount_1day_window':[49.09],
            'customer_id_nb_tx_7day_window':[21.0], 
            'customer_id_avg_amount_7day_window':[49.249],
            'customer_id_nb_tx_30day_window':[62.0], 
            'customer_id_avg_amount_30day_window':[50.29],
            # 'terminal_id_nb_tx_1day_window':[31.0], 
            # 'terminal_id_risk_1day_window':[0],
            # 'terminal_id_nb_tx_7day_window':[252.0], 
            # 'terminal_id_risk_7day_window':[0.03571428571428571],
            # 'terminal_id_nb_tx_30day_window':[1166.0], 
            # 'terminal_id_risk_30day_window':[0.0274442538593482]
            })



# Get the cluster assignment for the new record
prediction_fraud = model.predict(new_record_fraud)[0]
prediction_no_fraud = model.predict(new_record_no_fraud)[0]


print (f"Fraud:{prediction_fraud} - No Fraud:{prediction_no_fraud}")


## Feature Importance

In [None]:
# Get feature importances
model = joblib.load('./scikit-learn-random-forest-model/scikit-learn-random-forest-model.pkl')
importances = model.feature_importances_

# Separate features (F1, F2, F3) and target (TARGET)
columns =  list(transactions_df.columns)

# Entries you want to remove
entries_to_remove = ['transaction_id', 
                     'customer_id', 'bin', 'entry_mode',
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                        'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                        'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window',
                        'post_ts'
                        ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

# Convert the importances into a DataFrame
feature_importance_df = pd.DataFrame({'feature': features, 'importance': importances})

# Sort the DataFrame by importance
feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

feature_importance_df


In [None]:
features

## XGBoost

In [None]:

# Load the dataset
data = transactions_df[transactions_df['post_ts'] > pd.Timestamp("2023-03-01")]

# Separate features and target
columns = list(transactions_df.columns)

# Entries to remove
entries_to_remove = ['transaction_id', 
                     'customer_id', 'bin', 'entry_mode',
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                     'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                     'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window'
                    ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

X = data[features]
target = 'fraud'
y = data[target]

# Define the training range max limit
end_training = pd.Timestamp('2023-05-31')

# Split the data into training and testing sets
X_train = X[X['post_ts'] <= end_training]
y_train = y.loc[X_train.index]
X_test = X[X['post_ts'] > end_training]
y_test = y.loc[X_test.index]

# Drop the 'post_ts' column
X_train = X_train.drop(columns=['post_ts'])
X_test = X_test.drop(columns=['post_ts'])

# Save feature names and target name to a JSON file
metadata = {
    'features': features,
    'target': target,
    'model_name': 'XGBoost Model Example',
    'model_version': 1.0,
    'model_type': 'XGBoost'
}

with open('./xgboost-model/xgboost-model-metadata.json', 'w') as metadata_file:
    json.dump(metadata, metadata_file)

# Initialize the XGBoost Classifier
clf = XGBClassifier(n_estimators=10, random_state=42, use_label_encoder=False, eval_metric='logloss')

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy}')

# Save the trained model
joblib.dump(clf, './xgboost-model/xgboost-model.pkl')



In [None]:

model = joblib.load('./xgboost-model/xgboost-model.pkl')
# Read the metadata file
with open('./xgboost-model/xgboost-model-metadata.json', 'r') as metadata_file:
    metadata = json.load(metadata_file)

model_name = metadata['model_name']
model_version = metadata['model_version']


# Define the new record -- Fraud
new_record_fraud = pd.DataFrame(
    {'amt':[141.45], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[3], 
            'customer_id_avg_amount_1day_window':[88.08],
            'customer_id_nb_tx_7day_window':[20.0], 
            'customer_id_avg_amount_7day_window':[64.4855],
            'customer_id_nb_tx_30day_window':[82.0], 
            'customer_id_avg_amount_30day_window':[59.64829268292683],
            # 'terminal_id_nb_tx_1day_window':[43.0], 
            # 'terminal_id_risk_1day_window':[0.09302325581395349],
            # 'terminal_id_nb_tx_7day_window':[233.0], 
            # 'terminal_id_risk_7day_window':[0.04721030042918455],
            # 'terminal_id_nb_tx_30day_window':[1013.0], 
            # 'terminal_id_risk_30day_window':[0.03060217176702863]
            })

# Define the new record -- No Fraud
new_record_no_fraud = pd.DataFrame(
    {'amt':[72.33], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[5], 
            'customer_id_avg_amount_1day_window':[49.09],
            'customer_id_nb_tx_7day_window':[21.0], 
            'customer_id_avg_amount_7day_window':[49.249],
            'customer_id_nb_tx_30day_window':[62.0], 
            'customer_id_avg_amount_30day_window':[50.29],
            # 'terminal_id_nb_tx_1day_window':[31.0], 
            # 'terminal_id_risk_1day_window':[0],
            # 'terminal_id_nb_tx_7day_window':[252.0], 
            # 'terminal_id_risk_7day_window':[0.03571428571428571],
            # 'terminal_id_nb_tx_30day_window':[1166.0], 
            # 'terminal_id_risk_30day_window':[0.0274442538593482]
            })



# Get the cluster assignment for the new record
prediction_fraud = model.predict(new_record_fraud)[0]
prediction_no_fraud = model.predict(new_record_no_fraud)[0]


print (f"Fraud:{prediction_fraud} - No Fraud:{prediction_no_fraud}")


## PyTorch

In [None]:
##------------------------------------------
##             PyTorch 
##------------------------------------------

# Load the dataset
data = transactions_df[transactions_df['post_ts'] > pd.Timestamp("2023-03-01")]

# Separate features and target
columns = list(transactions_df.columns)

# Entries to remove
entries_to_remove = ['transaction_id', 
                     'customer_id', 'bin', 'entry_mode',
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                     'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                     'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window'
                    ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

X = data[features]

target = 'fraud'
y = data[target]

features = [col for col in columns if col not in ['post_ts']]


# Save feature names and output format to a JSON file
metadata = {
    'features': features,
    'target': target,
    'model_type':  'PyTorch',
    'model_name': 'PyTorch Model Example',
    'model_version': 1.0,
    'scaler_file': './pytorch-ffn-model/pytorch-ffn-scaler.pkl'
}

with open('./pytorch-ffn-model/pytorch-ffn-metadata.json', 'w') as metadata_file:
    json.dump(metadata, metadata_file)


# Define the training range max limit
end_training = pd.Timestamp('2023-05-31')

# Split the data into training and testing sets
X_train = X[X['post_ts'] <= end_training]
y_train = y.loc[X_train.index].tolist()

X_test = X[X['post_ts'] > end_training]
y_test = y.loc[X_test.index].tolist()

# Drop the 'post_ts' column
X_train = X_train.drop(columns=['post_ts'])
X_test = X_test.drop(columns=['post_ts'])


# Standardize features (optional but recommended for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save the scaler to a file
joblib.dump(scaler, metadata['scaler_file'])

# Convert back to PyTorch tensors after scaling
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


# # Create a TensorDataset and DataLoader
# dataset = TensorDataset(X_train, y_tensor)
# train_loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Define a simple neural network model
class FeedforwardNN(nn.Module):
    def __init__(self, input_size):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Initialize the model, loss function, and optimizer
ffn = FeedforwardNN(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(ffn.parameters(), lr=0.001)

# Train the model
epochs = 200
batch_size =  512 #math.ceil(len(X_train)/epochs) #512

print(f"size:{len(X_train)}, batch_size: {batch_size}")

for epoch in range(epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        labels = y_train[i:i+batch_size]

        optimizer.zero_grad()
        outputs = ffn(inputs)
        # loss = criterion(outputs, labels)
        loss = criterion(outputs.squeeze(), labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Make predictions on the test set
with torch.no_grad():
    predictions = ffn(X_test).numpy()
    predictions = (predictions > 0.5).astype(float)

# Evaluate the model
accuracy = accuracy_score(y_test.numpy(), predictions)
print(f'Accuracy: {accuracy}')

ffn.eval()
# Export to TorchScript
model_scripted = torch.jit.script(ffn) 
# Save the trained model
model_scripted.save('./pytorch-ffn-model/pytorch-ffn-model.pt') 


In [None]:
model = torch.jit.load('./pytorch-ffn-model/pytorch-ffn-model.pt')

# Read the metadata file
with open('./pytorch-ffn-model/pytorch-ffn-metadata.json', 'r') as metadata_file:
    metadata = json.load(metadata_file)

model_name = metadata['model_name']
model_version = metadata['model_version']
scaler_file = metadata['scaler_file']

scaler = joblib.load(scaler_file)


# Define the new record -- Fraud
new_record_fraud = pd.DataFrame(
    {'amt':[141.45], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[3], 
            'customer_id_avg_amount_1day_window':[88.08],
            'customer_id_nb_tx_7day_window':[20.0], 
            'customer_id_avg_amount_7day_window':[64.4855],
            'customer_id_nb_tx_30day_window':[82.0], 
            'customer_id_avg_amount_30day_window':[59.64829268292683],
            # 'terminal_id_nb_tx_1day_window':[43.0], 
            # 'terminal_id_risk_1day_window':[0.09302325581395349],
            # 'terminal_id_nb_tx_7day_window':[233.0], 
            # 'terminal_id_risk_7day_window':[0.04721030042918455],
            # 'terminal_id_nb_tx_30day_window':[1013.0], 
            # 'terminal_id_risk_30day_window':[0.03060217176702863]
            })



# Define the new record -- No Fraud
new_record_no_fraud = pd.DataFrame(
    {'amt':[72.33], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[5], 
            'customer_id_avg_amount_1day_window':[49.09],
            'customer_id_nb_tx_7day_window':[21.0], 
            'customer_id_avg_amount_7day_window':[49.249],
            'customer_id_nb_tx_30day_window':[62.0], 
            'customer_id_avg_amount_30day_window':[50.29],
            # 'terminal_id_nb_tx_1day_window':[31.0], 
            # 'terminal_id_risk_1day_window':[0],
            # 'terminal_id_nb_tx_7day_window':[252.0], 
            # 'terminal_id_risk_7day_window':[0.03571428571428571],
            # 'terminal_id_nb_tx_30day_window':[1166.0], 
            # 'terminal_id_risk_30day_window':[0.0274442538593482]
            })

new_record_fraud = scaler.transform(new_record_fraud)
new_record_no_fraud = scaler.transform(new_record_no_fraud)

# Convert back to PyTorch tensors after scaling
new_record_fraud = torch.tensor(new_record_fraud, dtype=torch.float32)
new_record_no_fraud = torch.tensor(new_record_no_fraud, dtype=torch.float32)




# Get the prediction for the new record
# Set the model to evaluation mode (important for models with dropout or batch normalization) 
model.eval()
# Disable gradient computation.
with torch.no_grad():
    predictions_fraud = model(new_record_fraud)[0][0].item()
    # predictions_fraud = (predictions_fraud > 0.5) #.astype(float)

    predictions_no_fraud = model(new_record_no_fraud)[0][0].item()
    # predictions_no_fraud = (predictions_no_fraud > 0.5) #.astype(float)


print(f"Fraud:{predictions_fraud}, No Fraud: {predictions_no_fraud}")

## Autoencoders

In [None]:
# Load the dataset Only the no fraud records
data = transactions_df[(transactions_df['post_ts'] > pd.Timestamp("2023-03-01")) & 
                    #    (transactions_df['post_ts'] < pd.Timestamp("2023-05-01")) & 
                       (data['fraud'] == 0) ]


# Separate features and target
columns = list(transactions_df.columns)

# Entries to remove
entries_to_remove = ['transaction_id', 'post_ts', 
                     'customer_id', 'bin', 'entry_mode',
                     'terminal_id', 'fraud',
                     'fraud_scenario',
                     'terminal_id_nb_tx_1day_window', 'terminal_id_risk_1day_window',
                     'terminal_id_nb_tx_7day_window', 'terminal_id_risk_7day_window',
                     'terminal_id_nb_tx_30day_window', 'terminal_id_risk_30day_window'
                    ]  

# Remove the entries
features = [col for col in columns if col not in entries_to_remove]

X = data[features]

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)

print(f"Size: {X_tensor.shape[1]}")

# Define the Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 6),
            nn.ReLU(),
            nn.Linear(6, 4),
            # nn.ReLU(),
            # nn.Linear(4, 2)
        )
        self.decoder = nn.Sequential(
            # nn.Linear(2, 4),
            # nn.ReLU(),
            nn.Linear(4, 6),
            nn.ReLU(),
            nn.Linear(6, input_size),
            # nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Initialize the model, loss function, and optimizer
autenc = Autoencoder(X_tensor.shape[1])
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(autenc.parameters(), lr=0.001)

# Train the model
epochs = 50
batch_size =  512 # math.ceil(len(X_tensor)*3/epochs) #512 len(X_tensor) # 
print(f"batch_size: {batch_size}")
for epoch in range(epochs):
    for i in range(0, len(X_tensor), batch_size):
        inputs = X_tensor[i:i+batch_size]
        optimizer.zero_grad()
        outputs = autenc(inputs)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Calculate reconstruction errors
with torch.no_grad():
    reconstructed = autenc(X_tensor)

    # print(f"X_tensor: {X_tensor}")
    # print(f"reconstructed: {reconstructed}")
    
    # reconstruction_error = torch.nn.functional.mse_loss(reconstructed, X_tensor, reduction='mean')
    reconstruction_error = torch.mean(torch.abs(X_tensor - reconstructed), dim=1).numpy()

# threshold = reconstruction_error.item()
# print(np.mean(reconstruction_error), np.std(reconstruction_error))
threshold = np.mean(reconstruction_error) + 1 * np.std(reconstruction_error)  # 1 standard deviation above the mean
print(threshold)

# Save model
# torch.save(model.state_dict(), './autoencoder-model/autoencoder-model.pt')
autenc.eval()
# Export to TorchScript
model_scripted = torch.jit.script(autenc) 
# Save the trained model
model_scripted.save('./autoencoder-model/autoencoder-model.pt') 

# Optional: Save metadata and scaler
metadata = {
    'features': features,
    'model_type': 'PyTorch Autoencoder',
    'model_name': 'Autoencoder for Anomaly Detection',
    'model_version': 1.0,
    'threshold': float(threshold),
    'scaler_file': './autoencoder-model/autoencoder-scaler.pkl'
}
with open('./autoencoder-model/autoencoder-metadata.json', 'w') as metadata_file:
    json.dump(metadata, metadata_file)

joblib.dump(scaler, metadata['scaler_file'])


In [None]:
model = torch.jit.load('./autoencoder-model/autoencoder-model.pt')

# Read the metadata file
with open('./autoencoder-model/autoencoder-metadata.json', 'r') as metadata_file:
    metadata = json.load(metadata_file)

model_name = metadata['model_name']
model_version = metadata['model_version']
scaler_file = metadata['scaler_file']
threshold = metadata['threshold']

scaler = joblib.load(scaler_file)

# Define the new record -- Fraud
new_record_fraud = pd.DataFrame(
    # {'amt':[141.45], 
    #         'during_weekend':[0], 
    #         'during_night':[0],
    #         'customer_id_nb_tx_1day_window':[3], 
    #         'customer_id_avg_amount_1day_window':[88.08],
    #         'customer_id_nb_tx_7day_window':[20.0], 
    #         'customer_id_avg_amount_7day_window':[64.4855],
    #         'customer_id_nb_tx_30day_window':[82.0], 
    #         'customer_id_avg_amount_30day_window':[59.64829268292683],
    #         # 'terminal_id_nb_tx_1day_window':[43.0], 
    #         # 'terminal_id_risk_1day_window':[0.09302325581395349],
    #         # 'terminal_id_nb_tx_7day_window':[233.0], 
    #         # 'terminal_id_risk_7day_window':[0.04721030042918455],
    #         # 'terminal_id_nb_tx_30day_window':[1013.0], 
    #         # 'terminal_id_risk_30day_window':[0.03060217176702863]
    #         })
    {'amt':[ 99.4], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[4], 
            'customer_id_avg_amount_1day_window':[430.22],
            'customer_id_nb_tx_7day_window':[6], 
            'customer_id_avg_amount_7day_window':[602.15],
            'customer_id_nb_tx_30day_window':[6], 
            'customer_id_avg_amount_30day_window':[602.15],
            # 'terminal_id_nb_tx_1day_window':[43.0], 
            # 'terminal_id_risk_1day_window':[0.09302325581395349],
            # 'terminal_id_nb_tx_7day_window':[233.0], 
            # 'terminal_id_risk_7day_window':[0.04721030042918455],
            # 'terminal_id_nb_tx_30day_window':[1013.0], 
            # 'terminal_id_risk_30day_window':[0.03060217176702863]
            })


   



# Define the new record -- No Fraud
new_record_no_fraud = pd.DataFrame(
    {'amt':[72.33], 
            'during_weekend':[0], 
            'during_night':[0],
            'customer_id_nb_tx_1day_window':[5], 
            'customer_id_avg_amount_1day_window':[49.09],
            'customer_id_nb_tx_7day_window':[21.0], 
            'customer_id_avg_amount_7day_window':[49.249],
            'customer_id_nb_tx_30day_window':[62.0], 
            'customer_id_avg_amount_30day_window':[50.29],
            # 'terminal_id_nb_tx_1day_window':[31.0], 
            # 'terminal_id_risk_1day_window':[0],
            # 'terminal_id_nb_tx_7day_window':[252.0], 
            # 'terminal_id_risk_7day_window':[0.03571428571428571],
            # 'terminal_id_nb_tx_30day_window':[1166.0], 
            # 'terminal_id_risk_30day_window':[0.0274442538593482]
            })

new_record_fraud = scaler.transform(new_record_fraud)
new_record_no_fraud = scaler.transform(new_record_no_fraud)

# Convert back to PyTorch tensors after scaling
new_record_fraud = torch.tensor(new_record_fraud, dtype=torch.float32)
new_record_no_fraud = torch.tensor(new_record_no_fraud, dtype=torch.float32)

# Get the prediction for the new record
# Set the model to evaluation mode (important for models with dropout or batch normalization) 
model.eval()
# Disable gradient computation.
with torch.no_grad():
    reconstructed_fraud = model(new_record_fraud)
    reconstruction_fraud_error = np.mean(torch.mean(torch.abs(reconstructed_fraud - new_record_fraud), dim=1).numpy())
    # reconstruction_fraud_error = torch.nn.functional.mse_loss(reconstructed_fraud, new_record_fraud, reduction='mean')

    reconstructed_no_fraud = model(new_record_no_fraud)
    reconstruction_no_fraud_error = np.mean(torch.mean(torch.abs(reconstructed_no_fraud - new_record_no_fraud), dim=1).numpy())
    # reconstruction_no_fraud_error = torch.nn.functional.mse_loss(reconstructed_no_fraud, new_record_no_fraud, reduction='mean')


print(f"Fraud:{reconstruction_fraud_error}, No Fraud: {reconstruction_no_fraud_error}, threshold: {threshold}")
