In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import mlflow
import mlflow.sklearn

# Load preprocessed data
fraud_data = pd.read_csv('../data/processed_fraud_data.csv')
creditcard_data = pd.read_csv('../data/processed_creditcard_data.csv')

# Feature and target separation
X_fraud = fraud_data.drop(['user_id', 'signup_time', 'purchase_time', 'ip_address', 'class'], axis=1)
y_fraud = fraud_data['class']

X_credit = creditcard_data.drop(['Time', 'Class'], axis=1)
y_credit = creditcard_data['Class']

# Train-test split
X_train_fraud, X_test_fraud, y_train_fraud, y_test_fraud = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)
X_train_credit, X_test_credit, y_train_credit, y_test_credit = train_test_split(X_credit, y_credit, test_size=0.2, random_state=42)

# Model training and evaluation
mlflow.start_run()

model = RandomForestClassifier(random_state=42)
model.fit(X_train_fraud, y_train_fraud)

y_pred_fraud = model.predict(X_test_fraud)
print(classification_report(y_test_fraud, y_pred_fraud))
print("ROC AUC Score:", roc_auc_score(y_test_fraud, y_pred_fraud))

# Log metrics and model
mlflow.log_metric("roc_auc", roc_auc_score(y_test_fraud, y_pred_fraud))
mlflow.sklearn.log_model(model, "fraud_detection_model")

mlflow.end_run()