Load data


In [None]:
import pandas as pd

df = pd.read_csv("synthetic_fraud_dataset.csv")
df.head()


Unnamed: 0,Transaction_ID,User_ID,Transaction_Amount,Transaction_Type,Date,Account_Balance,Device_Type,Location,Merchant_Category,Previous_Fraudulent_Activity,Daily_Transaction_Count,Card_Type,Card_Age,Fraud_Label
0,TXN_33553,USER_1834,39.79,POS,14 August 2023,93213.17,Laptop,Sydney,Travel,0,7,Amex,65,0
1,TXN_9427,USER_7875,1.19,Bank Transfer,7 June 2023,75725.25,Mobile,New York,Clothing,0,13,Mastercard,186,1
2,TXN_199,USER_2734,28.96,Online,20 June 2023,1588.96,Tablet,Mumbai,Restaurants,0,14,Visa,226,1
3,TXN_12447,USER_2617,254.32,ATM Withdrawal,7 December 2023,76807.2,Tablet,New York,Clothing,0,8,Visa,76,1
4,TXN_39489,USER_2014,31.28,POS,11 November 2023,92354.66,Mobile,Mumbai,Electronics,1,14,Mastercard,140,1


Clean data

In [None]:
df = df.dropna()
df['Date'] = pd.to_datetime(df['Date'])
df['Hour'] = df['Date'].dt.hour

CONVERT TEXT â†’ NUMBERS

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

cols = [
 'Transaction_Type',
 'Device_Type',
 'Location',
 'Merchant_Category'
]

for col in cols:
    df[col] = le.fit_transform(df[col])


DEFINE INPUT & OUTPUT

In [None]:
#Input (Features)
X = df[
 ['Transaction_Amount',
  'Account_Balance',
  'Device_Type',
  'Location',
  'Merchant_Category',
  'Daily_Transaction_Count',
  'Previous_Fraudulent_Activity']
]


In [None]:
#Output (Target)
y = df['Fraud_Label']


SPLIT DATA (STUDY vs TEST)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


TRAIN SUPERVISED MODELS

1. Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

print("Logistic Regression trained successfully")


Logistic Regression trained successfully


2. Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

print("Decision Tree trained successfully")


Decision Tree trained successfully


3. Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)

print("Random Forest trained successfully")


Random Forest trained successfully


4. XGBoost

In [None]:
!pip install xgboost



In [None]:
from xgboost import XGBClassifier

xgb = XGBClassifier(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    eval_metric='logloss',
    random_state=42
)

xgb.fit(X_train, y_train)

print("XGBoost trained successfully")


XGBoost trained successfully


EVALUATE SUPERVISED MODELS

In [None]:
from sklearn.metrics import classification_report

print("LOGISTIC REGRESSION RESULTS")
print(classification_report(y_test, lr.predict(X_test)))

print("\nDECISION TREE RESULTS")
print(classification_report(y_test, dt.predict(X_test)))

print("\nRANDOM FOREST RESULTS")
print(classification_report(y_test, rf.predict(X_test)))

print("\nXGBOOST RESULTS")
print(classification_report(y_test, xgb.predict(X_test)))


LOGISTIC REGRESSION RESULTS
              precision    recall  f1-score   support

           0       0.68      1.00      0.81      6765
           1       0.00      0.00      0.00      3235

    accuracy                           0.68     10000
   macro avg       0.34      0.50      0.40     10000
weighted avg       0.46      0.68      0.55     10000


DECISION TREE RESULTS
              precision    recall  f1-score   support

           0       0.68      0.67      0.67      6765
           1       0.33      0.33      0.33      3235

    accuracy                           0.56     10000
   macro avg       0.50      0.50      0.50     10000
weighted avg       0.56      0.56      0.56     10000


RANDOM FOREST RESULTS


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.68      0.89      0.77      6765
           1       0.32      0.11      0.16      3235

    accuracy                           0.64     10000
   macro avg       0.50      0.50      0.46     10000
weighted avg       0.56      0.64      0.57     10000


XGBOOST RESULTS
              precision    recall  f1-score   support

           0       0.68      1.00      0.81      6765
           1       0.32      0.00      0.00      3235

    accuracy                           0.68     10000
   macro avg       0.50      0.50      0.41     10000
weighted avg       0.56      0.68      0.55     10000



TRAIN UNSUPERVISED MODELS

1. Isolation Forest

In [None]:
from sklearn.ensemble import IsolationForest

iso = IsolationForest(
    contamination=0.05,
    random_state=42
)

iso.fit(X_train)

print("Isolation Forest trained successfully")


Isolation Forest trained successfully


2. One-Class SVM

In [None]:
from sklearn.svm import OneClassSVM

ocsvm = OneClassSVM(
    kernel='rbf',
    nu=0.05,
    gamma='scale'
)

ocsvm.fit(X_train)

print("One-Class SVM trained successfully")


One-Class SVM trained successfully


SELECT FINAL MODEL

In [None]:
# Select Decision Tree as the final model
final_model = dt
print("Decision Tree selected as the final model for deployment")


Decision Tree selected as the final model for deployment


SAVE FINAL MODEL

In [None]:
import joblib

# Save the model
joblib.dump(final_model, "fraud_model.pkl")
print("Final model saved as fraud_model.pkl")


Final model saved as fraud_model.pkl


GENERATE FRAUD PREDICTIONS

In [None]:
# Predict fraud labels
df['Fraud_Prediction'] = final_model.predict(X)

# Optional: risk score (for Decision Tree, you can use predict_proba)
df['Fraud_Risk_Score'] = final_model.predict_proba(X)[:,1]

# Look at the results
df[['Fraud_Prediction', 'Fraud_Risk_Score']].head()


Unnamed: 0,Fraud_Prediction,Fraud_Risk_Score
0,0,0.0
1,1,1.0
2,1,1.0
3,1,1.0
4,0,0.0


SAVE FINAL DATASET FOR POWER BI

In [None]:
df.to_csv("fraud_predictions.csv", index=False)
print("fraud_predictions.csv created! Ready for Power BI")


fraud_predictions.csv created! Ready for Power BI
