# MOBILE MONEY FRAUD DETECTION SYSTEM
Tanzania Context: Mixx (Yas), Airtel Money, M-Pesa

In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve
import joblib

In [ ]:
df = pd.read_csv('mobile_money_fraud.csv')
df.head()

In [ ]:
X = df.drop('Fraud', axis=1)
y = df['Fraud']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [ ]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
lr_acc = accuracy_score(y_test, lr_pred)
print('Logistic Regression Accuracy:', lr_acc)
print(classification_report(y_test, lr_pred))

In [ ]:
dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)
dt_acc = accuracy_score(y_test, dt_pred)
print('Decision Tree Accuracy:', dt_acc)
print(classification_report(y_test, dt_pred))

In [ ]:
if dt_acc > lr_acc:
    best_model = dt
    print('Best Model: Decision Tree')
else:
    best_model = lr
    print('Best Model: Logistic Regression')

In [ ]:
joblib.dump(best_model, 'model.pkl')

In [ ]:
lr_probs = lr.predict_proba(X_test)[:,1]
dt_probs = dt.predict_proba(X_test)[:,1]
lr_fpr, lr_tpr, _ = roc_curve(y_test, lr_probs)
dt_fpr, dt_tpr, _ = roc_curve(y_test, dt_probs)
plt.plot(lr_fpr, lr_tpr, label='Logistic Regression')
plt.plot(dt_fpr, dt_tpr, label='Decision Tree')
plt.legend()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison')
plt.show()