### Build, Train, and Save Isolation Forest Model

In [1]:
# Import necessary libraries
import pandas as pd
import joblib
from sklearn.ensemble import IsolationForest
import plotly.express as px

In [2]:
# Load the fake dataset
df = pd.read_csv('../data/fake_dataset.csv')

In [3]:
df.head()

Unnamed: 0,Amount,Merchant,Location,TimeOfDay,TransactionType
0,3746.026648,B,Local,Afternoon,Purchase
1,9507.19235,B,International,Afternoon,Withdrawal
2,7320.207424,C,Local,Morning,Purchase
3,5986.986183,A,Local,Evening,Withdrawal
4,1561.030386,A,International,Morning,Purchase


In [4]:
# Preprocess the data
# Fill missing values with mean for 'Amount' column, encode categorical variables using one-hot encoding
df['Amount'].fillna(df['Amount'].mean(), inplace=True)
df = pd.get_dummies(df, columns=['Merchant', 'Location', 'TimeOfDay', 'TransactionType'], drop_first=True)

In [5]:
df

Unnamed: 0,Amount,Merchant_B,Merchant_C,Location_Local,TimeOfDay_Evening,TimeOfDay_Morning,TransactionType_Withdrawal
0,3746.026648,True,False,True,False,False,False
1,9507.192350,True,False,False,False,False,True
2,7320.207424,False,True,True,False,True,False
3,5986.986183,False,False,True,True,False,True
4,1561.030386,False,False,False,False,True,False
...,...,...,...,...,...,...,...
99995,7923.256002,False,True,True,True,False,False
99996,7792.749699,False,False,True,False,True,True
99997,6744.859620,False,False,False,True,False,False
99998,4994.972999,False,False,False,True,False,True


In [6]:
# Train the Isolation Forest model
model = IsolationForest(contamination=0.2, random_state=42)
model.fit(df)

In [7]:
# Define features
features = df.columns

In [8]:
# Save the model and features for deployment
joblib.dump(model, '../models/isolation_forest_model.joblib')
joblib.dump(features, '../models/model_features.joblib')

['../models/model_features.joblib']