In [1]:
# Install dependencies first if not already installed:
# pip install python-docx

from docx import Document
from docx.shared import Pt, RGBColor
import os

# Create a new Word document
doc = Document()

doc.add_heading('Mobile Money Fraud Detection Notebook - Explanation', 0)

# Sections with code and explanations
sections = [
    ('Importing Libraries', 
     '''import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, roc_auc_score

import joblib''',
     'Explanation:\n- pandas and numpy are used for reading data and numerical operations.\n- matplotlib.pyplot and seaborn are used for visualizations.\n- sklearn provides machine learning tools.\n- joblib is used to save trained models.'),
    
    ('Reading the Data', 
     'df = pd.read_csv(\'mobile_money_fraud.csv\')\ndf.head()',
     'Explanation:\n- Reads dataset from CSV.\n- df.head() shows first 5 rows.'),
    
    ('Checking for Missing Values', 
     'df.isnull().sum()',
     'Explanation:\n- Checks columns for missing values.'),
    
    ('Handling Missing Values', 
     'df.fillna(df.mean(), inplace=True)',
     'Explanation:\n- Fills missing values with column mean.'),
    
    ('Separating Features and Target', 
     'X = df.drop(\'Fraud\', axis=1)\ny = df[\'Fraud\']',
     'Explanation:\n- X contains features, y is the target.'),
    
    ('Splitting Data', 
     'X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)',
     'Explanation:\n- 80% training, 20% testing.'),
    
    ('Linear Regression Model', 
     'lr = LinearRegression()\nlr.fit(X_train, y_train)\nlr_pred = lr.predict(X_test)\nlr_pred_class = [1 if x > 0.5 else 0 for x in lr_pred]\nlr_acc = accuracy_score(y_test, lr_pred_class)',
     'Explanation:\n- Linear Regression trained.\n- Convert predictions to 0 or 1.\n- Calculate accuracy.'),
    
    ('Linear Regression Report', 
     'print("Linear Regression Accuracy:", lr_acc)\nprint(classification_report(y_test, lr_pred_class))',
     'Explanation:\n- Shows accuracy and classification report.'),
    
    ('Decision Tree Model', 
     'dt = DecisionTreeClassifier(max_depth=5)\ndt.fit(X_train, y_train)\ndt_pred = dt.predict(X_test)\ndt_acc = accuracy_score(y_test, dt_pred)',
     'Explanation:\n- Decision Tree trained with max depth 5.\n- Accuracy computed.'),
    
    ('Decision Tree Report', 
     'print("Decision Tree Accuracy:", dt_acc)\nprint(classification_report(y_test, dt_pred))',
     'Explanation:\n- Shows precision, recall, f1-score, accuracy.'),
    
    ('Selecting the Best Model', 
     'if dt_acc > lr_acc:\n    best_model = dt\n    print("Best Model: Decision Tree")\nelse:\n    best_model = lr\n    print("Best Model: Linear Regression")',
     'Explanation:\n- Compares models and chooses the best one.'),
    
    ('Confusion Matrix', 
     'cm = confusion_matrix(y_test, dt_pred)\nsns.heatmap(cm, annot=True, fmt=\'d\', cmap=\'Blues\')\nplt.title("Confusion Matrix - Decision Tree")\nplt.xlabel("Predicted")\nplt.ylabel("Actual")\nplt.show()',
     'Explanation:\n- Visualizes true positives, negatives, false positives, negatives.'),
    
    ('ROC Curve and AUC', 
     'dt_probs = dt.predict_proba(X_test)[:,1]\nfpr, tpr, _ = roc_curve(y_test, dt_probs)\nplt.plot(fpr, tpr)\nplt.xlabel("False Positive Rate")\nplt.ylabel("True Positive Rate")\nplt.title("ROC Curve - Decision Tree")\nplt.show()\nprint("AUC Score:", roc_auc_score(y_test, dt_probs))',
     'Explanation:\n- Plots ROC curve.\n- Calculates AUC score.'),
    
    ('Saving the Best Model', 
     'joblib.dump(best_model, "model.pkl")',
     'Explanation:\n- Saves best model for future use.'),
]

# Add sections to document
for title, code, explanation in sections:
    doc.add_heading(title, level=1)
    doc.add_paragraph(explanation)
    code_paragraph = doc.add_paragraph(code)
    code_paragraph.style.font.name = 'Courier New'
    code_paragraph.style.font.size = Pt(10)
    run = code_paragraph.runs[0]
    run.font.color.rgb = RGBColor(0, 0, 139)  # Dark blue

# Save document
filename = 'Mobile_Money_Fraud_Explanation.docx'
doc.save(filename)

# Automatically open in Microsoft Word (Windows)
os.startfile(filename)

print(f"Document '{filename}' created and opened in Word successfully!")


Document 'Mobile_Money_Fraud_Explanation.docx' created and opened in Word successfully!
