In [8]:
import os
import json
import pandas as pd
from docx import Document
from docx.shared import Inches

# ----------------------
# Config
# ----------------------
base_path = "results"
output_docx = "results/Bank_Churn_Full_Report.docx"

doc = Document()
doc.add_heading("Bank Customer Churn Prediction - Full Pipeline Report", 0)

# ----------------------
# Section 1: Problem Formulation
# ----------------------
doc.add_heading("Problem Formulation", level=1)
doc.add_paragraph(
    "The objective of this project is to predict whether a bank customer will churn "
    "(leave the bank) based on demographic, financial, and transactional attributes. "
    "This involves building a machine learning pipeline covering data ingestion, EDA, "
    "feature engineering, model training, and evaluation."
)

# ----------------------
# Section 2: EDA Summary
# ----------------------
eda_report = os.path.join(base_path, "eda/eda_report.pdf")
summary_csv = os.path.join(base_path, "eda/summary_statistics.csv")

doc.add_heading("Exploratory Data Analysis", level=1)
if os.path.exists(eda_report):
    doc.add_paragraph("A full PDF EDA report was generated and can be found at:")
    doc.add_paragraph(eda_report)

if os.path.exists(summary_csv):
    df_summary = pd.read_csv(summary_csv)
    doc.add_heading("Summary Statistics", level=2)
    table = doc.add_table(rows=1, cols=len(df_summary.columns))
    hdr_cells = table.rows[0].cells
    for i, col in enumerate(df_summary.columns):
        hdr_cells[i].text = col

    for _, row in df_summary.iterrows():
        row_cells = table.add_row().cells
        for i, col in enumerate(df_summary.columns):
            row_cells[i].text = str(row[col])

# ----------------------
# Section 3: Feature Store
# ----------------------
doc.add_heading("Feature Store", level=1)
feature_doc = os.path.join(base_path, "feature_store/feature_store_doc.txt")
if os.path.exists(feature_doc):
    with open(feature_doc, "r") as f:
        doc.add_paragraph(f.read())

# ----------------------
# Section 4: Data Versioning
# ----------------------
doc.add_heading("Data Versioning", level=1)
raw_data = os.path.join(base_path, "data/raw/churn.csv")
transformed_data = os.path.join(base_path, "data/transformed/churn_transformed.csv")

if os.path.exists(raw_data):
    doc.add_paragraph(f"Raw dataset stored at: {raw_data}")
if os.path.exists(transformed_data):
    doc.add_paragraph(f"Transformed dataset stored at: {transformed_data}")

# ----------------------
# Section 5: Models and Results
# ----------------------
doc.add_heading("Model Training and Evaluation", level=1)

results_file = os.path.join(base_path, "models/model_results.txt")
if os.path.exists(results_file):
    with open(results_file, "r") as f:
        results_text = f.read()
    doc.add_paragraph("Results Summary:")
    doc.add_paragraph(results_text)

# Handle model versions JSON
model_versions = os.path.join(base_path, "models/model_versions.json")
best_model = None
best_score = -1

if os.path.exists(model_versions):
    doc.add_heading("Model Versions", level=2)
    with open(model_versions, "r") as f:
        versions = json.load(f)
    
    if isinstance(versions, dict):
        for key, val in versions.items():
            doc.add_paragraph(f"{key}: {val}")
            # Pick best model if Accuracy/F1 available
            if isinstance(val, dict):
                score = val.get("Accuracy") or val.get("F1") or 0
                if score > best_score:
                    best_score, best_model = score, key
    elif isinstance(versions, list):
        for i, entry in enumerate(versions, start=1):
            doc.add_heading(f"Model {i}", level=3)
            if isinstance(entry, dict):
                for k, v in entry.items():
                    doc.add_paragraph(f"{k}: {v}")
                score = entry.get("Accuracy") or entry.get("F1") or 0
                if score > best_score:
                    best_score, best_model = score, f"Model {i}"
            else:
                doc.add_paragraph(str(entry))
    else:
        doc.add_paragraph(str(versions))

    if best_model:
        doc.add_heading("Best Model Summary", level=2)
        doc.add_paragraph(
            f"The best performing model is **{best_model}** "
            f"with a score of {best_score:.4f}."
        )

# ----------------------
# Section 6: Pipeline Orchestration
# ----------------------
doc.add_heading("Pipeline Orchestration", level=1)
orchestor_dot = os.path.join(base_path, "visualization/orchestor.dot")
if os.path.exists(orchestor_dot):
    doc.add_paragraph(f"Pipeline graph stored as DOT file at: {orchestor_dot}")

# ----------------------
# Save Document
# ----------------------
doc.save(output_docx)
print(f"✅ Word report generated at: {output_docx}")


FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/Bank_Churn_Full_Report.docx'

In [4]:
!pip install python-docx

Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
Installing collected packages: python-docx
Successfully installed python-docx-1.2.0
