In [None]:
#libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from scipy.stats import zscore
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
df = pd.read_csv('

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Time'] = pd.to_datetime(df['Time'], errors='coerce').dt.time
df['Amount'].fillna(df['Amount'].mean(), inplace=True)

In [None]:
# Summary statistics
print(df.describe())

In [None]:
# Plotting of transacttion amount
plt.figure(figsize=(10, 6))
sns.histplot(df['Amount'], bins=50, kde=True)
plt.title('Transaction Amount Distribution')
plt.xlabel('Amount')
plt.ylabel('Frequency')
plt.show()

In [None]:
corr_matrix = df.corr(numeric_only=True)
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.show()

In [None]:
# Encode categorical variables
df_encoded = pd.get_dummies(df, columns=['Merchant', 'Transaction Type'])

In [None]:
# Normalizing
scaler = StandardScaler()
df_encoded['Amount'] = scaler.fit_transform(df_encoded[['Amount']])

In [None]:
# Convert DataFrame to matrix
transaction_matrix = df_encoded.to_numpy()

In [None]:
# Z-score method
df_encoded['Z-Score'] = zscore(df_encoded['Amount'])
threshold = 3
df_encoded['Is Anomaly (Z-Score)'] = (np.abs(df_encoded['Z-Score']) > threshold).astype(int)

In [None]:
# Isolation Forest
iso_forest = IsolationForest(contamination=0.02)
df_encoded['Is Anomaly (Isolation Forest)'] = iso_forest.fit_predict(df_encoded.drop(columns=['Transaction ID', 'Date', 'Time', 'Z-Score']))
df_encoded['Is Anomaly (Isolation Forest)'] = df_encoded['Is Anomaly (Isolation Forest)'].apply(lambda x: 1 if x == -1 else 0)

In [None]:
df['Is Anomaly'] = df_encoded[['Is Anomaly (Z-Score)', 'Is Anomaly (Isolation Forest)']].max(axis=1)


In [None]:
# Generate Classification Report
report = classification_report(df['Is_Fraud'], df['Is Anomaly'], target_names=['Non-Fraud', 'Fraud'])
print(report)

In [None]:
# Calculate Confusion Matrix
cm = confusion_matrix(df['Is_Fraud'], df['Is Anomaly'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Non-Fraud', 'Fraud'])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix')
plt.show()