In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.impute import SimpleImputer
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

data = pd.read_csv('financial_anomaly_data.csv')

X = data.drop(columns=['Timestamp', 'TransactionID', 'AccountID', 'Merchant', 'TransactionType', 'Location'])

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X_imputed = pd.DataFrame(X_imputed, columns=X.columns)

model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
model.fit(X_imputed, X_imputed)

predictions = model.predict(X_imputed)

X_imputed = X_imputed.values.reshape(-1)

errors = abs(predictions - X_imputed)

mean_errors = errors.mean()

threshold = mean_errors + 0.1 * errors.std()

anomalies = data[errors > threshold]

np.random.seed(42)
ground_truth = np.random.choice([0, 1], size=len(data), p=[0.95, 0.05])

precision = precision_score(ground_truth, errors > threshold)
recall = recall_score(ground_truth, errors > threshold)
f1 = f1_score(ground_truth, errors > threshold)

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

print("Number of Detected Anomalies:", len(anomalies))

print("Detected Anomalies:")
print(anomalies)


Precision: 0.0528169014084507
Recall: 0.004176721737516243
F1-score: 0.007741269568209187
Number of Detected Anomalies: 852
Detected Anomalies:
               Timestamp TransactionID AccountID    Amount   Merchant  \
1577    02-01-2023 10:17        TXN540     ACC15  99819.52  MerchantA   
1766    02-01-2023 13:26        TXN443      ACC2  99758.28  MerchantA   
1957    02-01-2023 16:37       TXN1220      ACC3  99713.22  MerchantC   
2192    02-01-2023 20:32        TXN697     ACC11  99949.64  MerchantF   
2505    03-01-2023 01:45       TXN1666     ACC11  99963.57  MerchantF   
...                  ...           ...       ...       ...        ...   
216244  31-05-2023 12:04       TXN1820     ACC15  99893.89  MerchantF   
216291  31-05-2023 12:51       TXN1681      ACC4  99895.79  MerchantF   
216502  31-05-2023 16:22       TXN1506     ACC12  99721.87  MerchantF   
216595  31-05-2023 17:55        TXN132     ACC14  99852.06  MerchantF   
216837  31-05-2023 21:57        TXN872      ACC1  998