In [None]:
# Model Training Notebook

import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import IsolationForest
from joblib import dump

# Load data
df = pd.read_csv('../data/raw/transactions_raw.csv')

# Encode categorical columns
df['location_code'] = LabelEncoder().fit_transform(df['location'])
df['device_type_code'] = LabelEncoder().fit_transform(df['device_type'])

# Scale amount
df['amount_scaled'] = MinMaxScaler().fit_transform(df[['amount']])

# Save processed data
processed = df[['timestamp', 'user_id', 'amount_scaled', 'location_code', 'device_type_code']]
processed.to_csv('../data/processed/transactions_processed.csv', index=False)

# Train model
X = processed[['amount_scaled', 'location_code', 'device_type_code']]
model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
model.fit(X)

# Save model
dump(model, '../models/isolation_forest_model.pkl')
print("Model trained and saved.")
