## Grabs the top 5 names from the fraudTest.csv file.

In [9]:
import pandas as pd

# Load the CSV file. Adjust the filename/path as needed.
df = pd.read_csv('../fraudTest.csv', index_col=0)

# Group by the first and last name, and count transactions for each group.
transaction_counts = df.groupby(['first', 'last']).size().reset_index(name='transaction_count')

# Sort the result in descending order by the transaction count.
top_names = transaction_counts.sort_values('transaction_count', ascending=False)

print(top_names.head(5))


        first        last  transaction_count
788     Scott      Martin               1965
385   Jeffrey       Smith               1526
314      Gina      Grimes               1474
655  Michelle     Gregory               1466
132    Carrie  Washington               1462


In [10]:
from tabulate import tabulate

# Convert the top 5 names to a list of tuples for easier iteration
top_5_names = top_names.head(5)[['first', 'last']].values.tolist()

transactions_by_purchaser = {}

for first, last in top_5_names:
    # Filter the original DataFrame for each purchaser and exclude fraud transactions
    transactions = df[(df['first'] == first) & (df['last'] == last) & (df['is_fraud'] == 0)]
    transactions_by_purchaser[(first, last)] = transactions

    # Print transactions in a tabular format
    print(f"Transactions for {first} {last} (No Fraud Only):")
    if not transactions.empty:
        print(tabulate(transactions, headers='keys', tablefmt='pretty', showindex=False))
    else:
        print("No transactions found.")


Transactions for Scott Martin (No Fraud Only):
+-----------------------+------------------+---------------------------------------------+----------------+---------+-------+--------+--------+----------------------------+------------+-------+-------+---------+-----------+----------+---------------------------+------------+----------------------------------+------------+--------------------+---------------------+----------+
| trans_date_trans_time |      cc_num      |                  merchant                   |    category    |   amt   | first |  last  | gender |           street           |    city    | state |  zip  |   lat   |   long    | city_pop |            job            |    dob     |            trans_num             | unix_time  |     merch_lat      |     merch_long      | is_fraud |
+-----------------------+------------------+---------------------------------------------+----------------+---------+-------+--------+--------+----------------------------+------------+-------+----

In [18]:
# Get transactions for Scott Martin (No Fraud Only)
transactions_scott_martin = transactions_by_purchaser[('Scott', 'Martin')]

if not transactions_scott_martin.empty:
    # Identify the most frequently used credit card number for Scott Martin
    common_card = transactions_scott_martin['cc_num'].value_counts().idxmax()
    # Filter transactions to only those using that common credit card number
    filtered_transactions = transactions_scott_martin[
        transactions_scott_martin['cc_num'] == common_card
    ]
else:
    filtered_transactions = transactions_scott_martin

print("Transactions for Scott Martin (No Fraud Only, Same Credit Card Only):")
if not filtered_transactions.empty:
    print(tabulate(filtered_transactions, headers='keys', tablefmt='pretty', showindex=False))
else:
    print("No transactions found.")


Transactions for Scott Martin (No Fraud Only, Same Credit Card Only):
+-----------------------+------------------+---------------------------------------------+----------------+---------+-------+--------+--------+----------------------------+------------+-------+-------+---------+----------+----------+----------------------+------------+----------------------------------+------------+--------------------+--------------------+----------+
| trans_date_trans_time |      cc_num      |                  merchant                   |    category    |   amt   | first |  last  | gender |           street           |    city    | state |  zip  |   lat   |   long   | city_pop |         job          |    dob     |            trans_num             | unix_time  |     merch_lat      |     merch_long     | is_fraud |
+-----------------------+------------------+---------------------------------------------+----------------+---------+-------+--------+--------+----------------------------+------------+---

In [19]:
# Get transactions for Jeffrey Smith (No Fraud Only)
transactions_jeffrey_smith = transactions_by_purchaser[('Jeffrey', 'Smith')]

if not transactions_jeffrey_smith.empty:
    # Identify the most frequently used credit card number for Jeffrey Smith
    common_card = transactions_jeffrey_smith['cc_num'].value_counts().idxmax()
    # Filter transactions to only those using that common credit card number
    filtered_transactions = transactions_jeffrey_smith[
        transactions_jeffrey_smith['cc_num'] == common_card
    ]
else:
    filtered_transactions = transactions_jeffrey_smith

print("Transactions for Jeffrey Smith (No Fraud Only, Same Credit Card Only):")
if not filtered_transactions.empty:
    print(tabulate(filtered_transactions, headers='keys', tablefmt='pretty', showindex=False))
else:
    print("No transactions found.")


Transactions for Jeffrey Smith (No Fraud Only, Same Credit Card Only):
+-----------------------+------------------+-----------------------------------------+----------------+--------+---------+-------+--------+-------------------------+---------+-------+-------+---------+-----------+----------+-------------------------+------------+----------------------------------+------------+--------------------+---------------------+----------+
| trans_date_trans_time |      cc_num      |                merchant                 |    category    |  amt   |  first  | last  | gender |         street          |  city   | state |  zip  |   lat   |   long    | city_pop |           job           |    dob     |            trans_num             | unix_time  |     merch_lat      |     merch_long      | is_fraud |
+-----------------------+------------------+-----------------------------------------+----------------+--------+---------+-------+--------+-------------------------+---------+-------+-------+------

In [20]:
# Get transactions for Gina Grimes (No Fraud Only)
transactions_gina_grimes = transactions_by_purchaser[('Gina', 'Grimes')]

if not transactions_gina_grimes.empty:
    # Identify the most frequently used credit card number for Gina Grimes
    common_card = transactions_gina_grimes['cc_num'].value_counts().idxmax()
    # Filter transactions to only those using that common credit card number
    filtered_transactions = transactions_gina_grimes[
        transactions_gina_grimes['cc_num'] == common_card
    ]
else:
    filtered_transactions = transactions_gina_grimes

print("Transactions for Gina Grimes (No Fraud Only, Same Credit Card Only):")
if not filtered_transactions.empty:
    print(tabulate(filtered_transactions, headers='keys', tablefmt='pretty', showindex=False))
else:
    print("No transactions found.")


Transactions for Gina Grimes (No Fraud Only, Same Credit Card Only):
+-----------------------+------------------+---------------------------------------------+----------------+---------+-------+--------+--------+-----------------+--------------+-------+-------+---------+----------+----------+----------------+------------+----------------------------------+------------+--------------------+--------------------+----------+
| trans_date_trans_time |      cc_num      |                  merchant                   |    category    |   amt   | first |  last  | gender |     street      |     city     | state |  zip  |   lat   |   long   | city_pop |      job       |    dob     |            trans_num             | unix_time  |     merch_lat      |     merch_long     | is_fraud |
+-----------------------+------------------+---------------------------------------------+----------------+---------+-------+--------+--------+-----------------+--------------+-------+-------+---------+----------+------

In [21]:
# Get transactions for Michelle Gregory (No Fraud Only)
transactions_michelle_gregory = transactions_by_purchaser[('Michelle', 'Gregory')]

if not transactions_michelle_gregory.empty:
    # Identify the most frequently used credit card number for Michelle Gregory
    common_card = transactions_michelle_gregory['cc_num'].value_counts().idxmax()
    # Filter transactions to only those using that common credit card number
    filtered_transactions = transactions_michelle_gregory[
        transactions_michelle_gregory['cc_num'] == common_card
    ]
else:
    filtered_transactions = transactions_michelle_gregory

print("Transactions for Michelle Gregory (No Fraud Only, Same Credit Card Only):")
if not filtered_transactions.empty:
    print(tabulate(filtered_transactions, headers='keys', tablefmt='pretty', showindex=False))
else:
    print("No transactions found.")


Transactions for Michelle Gregory (No Fraud Only, Same Credit Card Only):
+-----------------------+------------------+---------------------------------------------+----------------+--------+----------+---------+--------+--------------------+---------------+-------+-------+---------+---------+----------+------------------------+------------+----------------------------------+------------+--------------------+--------------------+----------+
| trans_date_trans_time |      cc_num      |                  merchant                   |    category    |  amt   |  first   |  last   | gender |       street       |     city      | state |  zip  |   lat   |  long   | city_pop |          job           |    dob     |            trans_num             | unix_time  |     merch_lat      |     merch_long     | is_fraud |
+-----------------------+------------------+---------------------------------------------+----------------+--------+----------+---------+--------+--------------------+---------------+---

## Simulating Transactions

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model

def haversine(lat1, lon1, lat2, lon2):
    """
    Compute the great circle distance between two points on the earth (in kilometers)
    using the haversine formula.
    """
    # Convert degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Earth radius in kilometers
    return c * r

def load_and_preprocess_data(csv_file):
    # Load the CSV file
    df = pd.read_csv(csv_file)
    
    # Convert transaction date/time to datetime objects
    df["trans_date_trans_time"] = pd.to_datetime(df["trans_date_trans_time"])
    
    # Sort transactions by credit card number and transaction time to preserve sequence order
    df = df.sort_values(by=["cc_num", "unix_time"])
    
    # Compute the distance between the user's location and merchant's location
    df["distance"] = haversine(df["lat"], df["long"], df["merch_lat"], df["merch_long"])
    
    # Compute the time difference (in seconds) between consecutive transactions for each user
    df["time_diff"] = df.groupby("cc_num")["unix_time"].diff().fillna(0)
    
    return df

def scale_features(df, features):
    # Note: Ideally, use the same scaler (or its saved parameters) as used during training.
    scaler = MinMaxScaler()
    df[features] = scaler.fit_transform(df[features])
    return df, scaler

def create_sequences(df, features, target, sequence_length=10):
    X, y = [], []
    # Group by user (using cc_num as unique identifier)
    for user, group in df.groupby("cc_num"):
        group = group.sort_values("unix_time")
        transactions = group[features].values
        fraud_flags = group[target].values
        # Create sequences: each sequence of transactions is used to predict the next transaction's fraud flag.
        for i in range(len(group) - sequence_length):
            X.append(transactions[i:i+sequence_length])
            y.append(fraud_flags[i+sequence_length])
    return np.array(X), np.array(y)

def main():
    # Path to your test dataset
    csv_file = "../fraudTrain.csv"
    
    # Load and preprocess the data
    df = load_and_preprocess_data(csv_file)
    
    # Select features and the target variable
    features = ["amt", "distance", "time_diff"]
    target = "is_fraud"
    
    # Scale the features. For consistent results, the same scaler used during training should be applied.
    df, scaler = scale_features(df, features)
    
    # Define the sequence length (should match the one used during training)
    SEQUENCE_LENGTH = 10
    X, y = create_sequences(df, features, target, sequence_length=SEQUENCE_LENGTH)
    
    print("Test sequence data shape:", X.shape)
    print("Test target data shape:", y.shape)
    
    # Load the pre-trained model (ensure that fraud_detection_model.h5 exists)
    model = load_model("../fraud_detection_model.h5")
    model.summary()
    
    # Evaluate the model on the test set
    loss, accuracy = model.evaluate(X, y, batch_size=32)
    print("Test Loss: {:.4f}, Test Accuracy: {:.4f}".format(loss, accuracy))
    
if __name__ == "__main__":
    main()




Test sequence data shape: (1286910, 10, 3)
Test target data shape: (1286910,)


[1m40216/40216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 953us/step - accuracy: 0.9970 - loss: 0.0135
Test Loss: 0.0133, Test Accuracy: 0.9970


In [88]:
import numpy as np
import pandas as pd
import io
import pickle
import warnings
import logging
from math import radians, cos, sin, asin, sqrt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Silence warnings
warnings.filterwarnings('ignore')
logging.getLogger('tensorflow').setLevel(logging.ERROR)
logging.getLogger('absl').setLevel(logging.ERROR)


def predict_fraud_for_new_transaction(user_transactions, new_transaction, scaler, max_seq_len, model):
    """
    Predict fraud for a new transaction using the user's historical transactions.
    In this modified version, each transaction is treated independently, so
    user_transactions is expected to be an empty list.
    """
    seq = user_transactions + [new_transaction]
    seq = np.array(seq)
    seq = scaler.transform(seq)
    seq_padded = pad_sequences([seq], maxlen=max_seq_len, dtype='float32', padding='pre')
    fraud_prob = model.predict(seq_padded, verbose=0)  # verbose=0 to silence prediction output
    return fraud_prob[0][0]


# ------------------------------
# Step 1: Load the Existing Model and Scaler
# ------------------------------

# Replace these file paths with the actual paths to your files
MODEL_PATH = '../fraud_detection_model.h5'
SCALER_PATH = '../scaler.pkl'

# Load the pre-trained Keras model
model = load_model(MODEL_PATH, compile=False)  # compile=False to avoid compilation warnings

# Load the fitted scaler (e.g., StandardScaler) from disk
with open(SCALER_PATH, 'rb') as f:
    scaler = pickle.load(f)

# Define the maximum sequence length as used in your training process.
max_seq_len = 10

# ------------------------------
# Step 2: Define the Haversine Function for Distance Calculation
# ------------------------------

def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great-circle distance between two points on Earth.
    """
    # Convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # Haversine formula calculation
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of Earth in kilometers
    return c * r

# ------------------------------
# Step 3: Load and Process Test Data
# ------------------------------

# Load the test data
df = pd.read_csv("test.csv")

# Drop the is_fraud column before processing if it exists
if 'is_fraud' in df.columns:
    df = df.drop('is_fraud', axis=1)

# Group transactions by user (first name + last name)
user_groups = df.groupby(['first', 'last'])

# Process each user's transactions
for (first, last), user_transactions in user_groups:
    print(f"\nProcessing transactions for {first} {last}")
    
    # Sort transactions by time
    user_transactions = user_transactions.sort_values('unix_time')
    
    # Process each transaction for this user independently
    for idx, transaction in user_transactions.iterrows():
        # Extract features
        amt = float(transaction['amt'])
        
        # Calculate distance between user's location and merchant's location
        user_lat = float(transaction['lat'])
        user_lon = float(transaction['long'])
        merch_lat = float(transaction['merch_lat'])
        merch_lon = float(transaction['merch_long'])
        distance = haversine(user_lat, user_lon, merch_lat, merch_lon)
        
        # For independent evaluation, set time_diff to 0
        time_diff = 0
        
        # Create feature list for this transaction
        transaction_features = [amt, distance, time_diff]
        
        # Evaluate transaction independently (no historical context)
        history = []  # Empty history for independent evaluation
        
        fraud_probability = predict_fraud_for_new_transaction(
            history,
            transaction_features,
            scaler,
            max_seq_len,
            model
        )
      
        print(f"Transaction {transaction['trans_num']}: Fraud Probability = {fraud_probability:.4f}")
      



Processing transactions for Bill Zhang
Transaction c3939b412c44c4eece77f4a527479629: Fraud Probability = 0.4636
Transaction 3d11340fab65815a86edd2c5386dd664: Fraud Probability = 0.5749
Transaction 9494a3f97b837dccdccbfca029b48d0f: Fraud Probability = 0.8581

Processing transactions for Lia Sindhunirmala
Transaction 902360c2990e949a8f7fbc2bd28322a0: Fraud Probability = 0.4791
Transaction d8103c37a1dbb77b12f083a597476478: Fraud Probability = 0.5883
Transaction f5e607cb00ed9926334429a02c7ec212: Fraud Probability = 0.8232

Processing transactions for Lisa Juan
Transaction 765ed5fc5af9dc18191ef41c4b5670f1: Fraud Probability = 0.4645
Transaction 9ab1ae384dceeb899cd4f859f87ca9ab: Fraud Probability = 0.7642
Transaction 4c3feb6a6795326731c7e8ceaa5a0671: Fraud Probability = 0.8311

Processing transactions for Warren Yun
Transaction 45e3842baf79e19f647b64c2174200e6: Fraud Probability = 0.5697
Transaction e3727fc6901a29a76e573b9c52e96c5b: Fraud Probability = 0.8665
Transaction 7cd3d0445bbee5a45f6

In [142]:
import pandas as pd
# Example of filtering transactions for a specific user with fraud ratio
def get_user_sample(df, first_name, last_name, num_non_fraud=4, num_fraud=1):
    # Get all transactions for this user
    user_transactions = df[
        (df['first'] == first_name) & 
        (df['last'] == last_name)
    ]
    
    # Split into fraud and non-fraud
    fraud_trans = user_transactions[user_transactions['is_fraud'] == 1]
    non_fraud_trans = user_transactions[user_transactions['is_fraud'] == 0]
    
    # Sample required number of each
    fraud_sample = fraud_trans.sample(n=min(num_fraud, len(fraud_trans)))
    non_fraud_sample = non_fraud_trans.sample(n=min(num_non_fraud, len(non_fraud_trans)))
    
    # Combine and sort by time
    sample = pd.concat([fraud_sample, non_fraud_sample])
    sample = sample.sort_values('unix_time')
    
    return sample

# Example usage:
df = pd.read_csv("../fraudTest.csv")
sample_transactions = get_user_sample(df, 'Monica', 'Cohen', 5, 0)
print("\nSample transactions:")
print(sample_transactions[['trans_num', 'first', 'last', 'amt', 'is_fraud']])
# Print full raw CSV entries for the sampled transactions
print("\nRaw CSV entries:")
for _, row in sample_transactions.iterrows():
    # Get column order from base.csv
    ordered_cols = ['trans_date_trans_time', 'cc_num', 'merchant', 'category', 'amt', 
                   'first', 'last', 'gender', 'street', 'city', 'state', 'zip', 'lat', 
                   'long', 'city_pop', 'job', 'dob', 'trans_num', 'unix_time', 'merch_lat', 
                   'merch_long', 'is_fraud']
    values = [str(row[col]) for col in ordered_cols]
    print(','.join(values))



Sample transactions:
                               trans_num   first   last    amt  is_fraud
78078   96ae1e7e974942d007b03c04802bc908  Monica  Cohen   3.81         0
226586  9243cd9fe6b30905679de0173a735633  Monica  Cohen  88.98         0
288454  dbd4028bda2163eb294b389ef9ef717b  Monica  Cohen   9.33         0
345628  a5d9751193b3e609c3890b6b4a90db56  Monica  Cohen   1.44         0
484456  2b658af05e6758d9609f279b69ae4140  Monica  Cohen   3.92         0

Raw CSV entries:
2020-07-18 20:06:21,4512828414983801773,fraud_Connelly-Carter,home,3.81,Monica,Cohen,F,864 Reynolds Plains,Uledi,PA,15484,39.8936,-79.7856,328,Tree surgeon,1983-07-25,96ae1e7e974942d007b03c04802bc908,1374177981,39.390605,-80.186326,0
2020-09-10 09:00:12,4512828414983801773,fraud_Kuvalis Ltd,gas_transport,88.98,Monica,Cohen,F,864 Reynolds Plains,Uledi,PA,15484,39.8936,-79.7856,328,Tree surgeon,1983-07-25,9243cd9fe6b30905679de0173a735633,1378803612,40.053138,-80.11427900000001,0
2020-10-06 22:27:24,4512828414983801773,

In [183]:
import numpy as np
import pandas as pd
import io
import pickle
import warnings
import logging
from math import radians, cos, sin, asin, sqrt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Silence warnings
warnings.filterwarnings('ignore')
logging.getLogger('tensorflow').setLevel(logging.ERROR)
logging.getLogger('absl').setLevel(logging.ERROR)


def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great-circle distance between two points on Earth.
    """
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Earth's radius in kilometers
    return c * r


def predict_fraud_for_new_transaction(user_transactions, new_transaction, scaler, max_seq_len, model):
    """
    Predict fraud for a new transaction using the user's historical transactions.
    
    Parameters:
      - user_transactions: List of past transactions (each as a list of features).
      - new_transaction: The new transaction's feature list: [amt, distance, time_diff].
      - scaler: The pre-fitted scaler.
      - max_seq_len: Maximum sequence length for padding.
      - model: The trained NN model.
      
    Returns:
      - fraud_prob: Predicted fraud probability.
    """
    seq = user_transactions + [new_transaction]
    seq = np.array(seq)
    seq = scaler.transform(seq)
    seq_padded = pad_sequences([seq], maxlen=max_seq_len, dtype='float32', padding='pre')
    fraud_prob = model.predict(seq_padded, verbose=0)
    return fraud_prob[0][0]


def predict_for_transaction(first, last, tx_index, base_file, separate_file, scaler, max_seq_len, model):
    """
    Given a first and last name and a transaction index (from the separate file),
    this function:
      1. Reads the base CSV file (base_file) to build a history of transactions
         for the specified user.
      2. Reads the separate CSV file (separate_file) and filters (if needed) to 
         find new transactions for the specified user.
      3. Selects the transaction at the given index (0-based) from the separate file.
      4. Computes features for both history and the new transaction and feeds them 
         into the neural network.
    
    Parameters:
      - first, last: The user's first and last names.
      - tx_index: Index (0-based) of the transaction in the separate file.
      - base_file: Path to the base CSV file with historical transactions.
      - separate_file: Path to the separate CSV file with new transactions.
      - scaler, max_seq_len, model: Pre-loaded objects from training.
    
    Returns:
      - fraud_prob: Predicted fraud probability for the new transaction.
    """
    # ------------------------------
    # Build History from base_file
    # ------------------------------
    base_df = pd.read_csv(base_file)
    # Filter the base file for the given user
    base_history_df = base_df[(base_df['first'] == first) & (base_df['last'] == last)]
    base_history_df = base_history_df.sort_values('unix_time')
    
    history = []
    prev_time = None
    for idx, row in base_history_df.iterrows():
        amt = float(row['amt'])
        user_lat = float(row['lat'])
        user_lon = float(row['long'])
        merch_lat = float(row['merch_lat'])
        merch_lon = float(row['merch_long'])
        distance = haversine(user_lat, user_lon, merch_lat, merch_lon)
        if prev_time is None:
            time_diff = 0
        else:
            time_diff = float(row['unix_time']) - prev_time
        prev_time = float(row['unix_time'])
        history.append([amt])
    
    # ------------------------------
    # Read New Transaction from separate_file
    # ------------------------------
    separate_df = pd.read_csv(separate_file)
    # Optionally, filter the separate file as well by user if it contains multiple users
    separate_user_df = separate_df[(separate_df['first'] == first) & (separate_df['last'] == last)]
    separate_user_df = separate_user_df.reset_index(drop=True)
    
    if tx_index < 0 or tx_index >= len(separate_user_df):
        raise IndexError("Transaction index out of range in the separate file for this user.")
    
    # Select the new transaction row from the separate file
    tx = separate_user_df.iloc[tx_index]
    new_amt = float(tx['amt'])
    user_lat = float(tx['lat'])
    user_lon = float(tx['long'])
    merch_lat = float(tx['merch_lat'])
    merch_lon = float(tx['merch_long'])
    new_distance = haversine(user_lat, user_lon, merch_lat, merch_lon)
    
    # Compute time difference for the new transaction relative to the last history transaction (if any)
    new_time_diff = 0
        
    new_transaction = [new_amt]
    
    # ------------------------------
    # Make the Prediction
    # ------------------------------
    print(history)
    print(new_transaction)
    fraud_prob = predict_fraud_for_new_transaction(history, new_transaction, scaler, max_seq_len, model)
    print(f"Prediction for {first} {last} on transaction index {tx_index}: Fraud Probability = {fraud_prob:.4f}")
    return fraud_prob


# ------------------------------
# Example Usage
# ------------------------------

# File paths for the CSV files (adjust paths as necessary)
base_file = './sample_data/base.csv'
separate_file = './sample_data/no_fraud.csv'

# Load the existing model and scaler
MODEL_PATH = '../fraud_detection_model.h5'
SCALER_PATH = '../scaler.pkl'
model = load_model(MODEL_PATH, compile=False)
with open(SCALER_PATH, 'rb') as f:
    scaler = pickle.load(f)

# Set max sequence length (should be same as used in training)
max_seq_len = 10

predict_for_transaction("Scott", "Martin", 4, base_file, separate_file, scaler, max_seq_len, model)


[[15.78], [6.32], [1.08], [62.52], [17.05]]
[19.03]
Prediction for Scott Martin on transaction index 4: Fraud Probability = 0.2212


np.float32(0.22118893)