In [None]:
# Ensure compatible versions
!pip install xgboost>=1.6.0 --quiet numpy==1.24.4 --quiet

In [None]:
!pip install xgboost==1.7.6 --quiet


In [1]:
import xgboost
import numpy as np

print("XGBoost version:", xgboost.__version__)
print("NumPy version:", np.__version__)

  from pandas.core.computation.check import NUMEXPR_INSTALLED


XGBoost version: 1.7.6
NumPy version: 1.24.4


In [4]:
import boto3
import tarfile
import pandas as pd
import numpy as np
import time
import xgboost as xgb
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split

In [5]:
# --- 1. Download test data from S3
s3 = boto3.client('s3')
BUCKET = 'fraud-model-artifacts'
CSV_KEY = 'creditcard.csv'
MODEL_KEY = 'xgb_model_final.tar.gz'

In [6]:
# Load test CSV
obj = s3.get_object(Bucket=BUCKET, Key=CSV_KEY)
df = pd.read_csv(obj['Body'])

In [7]:
# --- 2. Prepare train-test split and add noise to test set
df = df.drop(columns=['Time'])  
X = df.drop(columns=['Class'])
y = df['Class']


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Inject noise into 20% of test features
noise_mask = np.random.rand(*X_test.shape) < 0.2
noise = np.random.normal(0, 0.1, size=X_test.shape)
X_test_noisy = X_test + (noise * noise_mask)

dmatrix = xgb.DMatrix(X_test_noisy)

In [9]:
# --- 3. Download and load XGBoost model from S3
s3.download_file(BUCKET, MODEL_KEY, 'xgb_model_final.tar.gz')
with tarfile.open('xgb_model_final.tar.gz', 'r:gz') as tar:
    tar.extractall()  # extracts model.bin

booster = xgb.Booster()
booster.load_model('model.bin')


In [10]:
# --- 4. Make predictions and time it
start = time.time()
y_pred_prob = booster.predict(dmatrix)
total_time = time.time() - start

y_pred = (y_pred_prob >= 0.5).astype(int)


In [14]:
len(y_pred)

56962

In [13]:
# --- 5. Report metrics
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print(f"Avg Inference Latency per Tx: {total_time / len(X):.6f} sec")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Precision: 0.9791666666666666
Recall: 0.9591836734693877
F1 Score: 0.9690721649484536
Avg Inference Latency per Tx: 0.000000 sec

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.98      0.96      0.97        98

    accuracy                           1.00     56962
   macro avg       0.99      0.98      0.98     56962
weighted avg       1.00      1.00      1.00     56962

