# Configuration & Environment — Report

Visualizes CSV outputs generated by the **Configuration_Environment** block.

Included CSVs:
- `Configuration_Classes.csv`
- `Configuration_Files.csv`
- `Feature_Flags.csv`
- `Injected_Properties.csv`

> Notes
> - The dataset may contain real `N/A` values for some properties. We keep them as-is to reflect missing keys.
> - Charts are limited to the most useful summaries for this block.


In [None]:
# Setup: imports, paths, helpers
# - CSVs live under ../reports/csv-reports/<CATEGORY>/<file>.csv relative to this notebook folder.
# - Minimal console output; only show information if a CSV is missing/empty.
# - Bar charts use an explicit default color so it's easy to tweak later.

import os
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px

# Base folders and category for this notebook
CATEGORY = "Configuration_Environment"
CSV_BASE = Path("../reports/csv-reports").resolve()
CONF_DIR = CSV_BASE / CATEGORY

# Explicit default color for all bar charts in this notebook
DEFAULT_BAR_COLOR = ["#1f77b4"]

# NA policy: treat common NA literals as missing
NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(path: Path) -> pd.DataFrame:
    """Read a CSV if present; otherwise return an empty DataFrame.
    Prints a minimal info message when missing or unreadable."""
    path = Path(path)
    if not path.exists():
        print(f"[info] Missing CSV: {path}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(path, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        return df
    except Exception as e:
        print(f"[warn] Failed to read {path}: {e}")
        return pd.DataFrame()

def show_head(df: pd.DataFrame, n: int = 8):
    """Display a quick head for ad-hoc inspection; silent if empty."""
    if df.empty:
        print("[info] DataFrame is empty.")
    else:
        display(df.head(n))

def labelize_na(s, label="N/A"):
    """Replace NA-like values with a visible label for categorical charts."""
    s = s.copy()
    s = s.mask(s.isna(), label).astype(str)
    s = s.replace({"nan": label, "NaN": label})
    return s

def pick_col(df, names=None, kind=None):
    """Pick a useful column by preference list or dtype kind ('numeric' | 'text')."""
    names = names or []
    by_lower = {c.lower(): c for c in df.columns}
    if kind == "numeric":
        nums = list(df.select_dtypes(include=[np.number]).columns)
        return nums[0] if nums else None
    if kind == "text":
        objs = [c for c in df.columns if df[c].dtype == "object"]
        return objs[0] if objs else (df.columns[0] if len(df.columns) else None)
    for n in names:
        got = by_lower.get(n.lower())
        if got:
            return got
    return None

def ext_from_name(x: str) -> str:
    """Derive a file extension from a filename/path; 'unknown' if none."""
    s = str(x)
    if "." in s:
        return s.split(".")[-1].lower()
    return "unknown"


## 1) Configuration classes — by annotation type

In [None]:
# Charts for Configuration_Classes
# Where charts are generated:
#  - 1A) Configuration classes by annotation type (pie)

# Expected columns: annotationType / annotation / type
path = CONF_DIR / "Configuration_Classes.csv"
df_cfg = read_csv_safe(path)
show_head(df_cfg, 10)

if not df_cfg.empty:
    c_ann = pick_col(df_cfg, ["annotationType","annotation","type"], kind=None)
    if c_ann:
        counts = labelize_na(df_cfg[c_ann]).value_counts().reset_index()
        counts.columns = ["annotationType", "count"]
        fig = px.pie(counts, names="annotationType", values="count", hole=0.35,
                     title="Configuration classes by annotation type")
        fig.update_layout(width=760, height=460)
        fig.update_traces(textposition="outside")
        fig.show()
    else:
        print("[info] No annotation column found — skipping pie.")
else:
    print("[info] Configuration_Classes.csv missing or empty.")


## 2) Configuration files — by extension

In [None]:
# Charts for Configuration_Files
# Where charts are generated:
#  - 2A) Configuration files by extension (bar, explicit color)

# Try to find a name-like column
path = CONF_DIR / "Configuration_Files.csv"
df_files = read_csv_safe(path)
show_head(df_files, 10)

if not df_files.empty:
    name_col = None
    for cand in ["configurationFile.name","name","fileName","filename","path","configurationFile"]:
        candidates = [c for c in df_files.columns if c.lower() == cand.lower()]
        if candidates:
            name_col = candidates[0]
            break
    if name_col is None:
        name_col = pick_col(df_files, kind="text")

    if name_col:
        names = labelize_na(df_files[name_col])
        df_files["ext"] = names.map(ext_from_name)
        ext_counts = df_files["ext"].value_counts().reset_index()
        ext_counts.columns = ["extension", "count"]
        fig = px.bar(ext_counts, x="extension", y="count", text_auto=True,
                     title="Configuration files by extension",
                     color_discrete_sequence=DEFAULT_BAR_COLOR)
        fig.update_layout(width=820, height=440, xaxis_title="extension", yaxis_title="count")
        fig.show()
    else:
        print("[info] Could not detect file-name column — skipping chart.")
else:
    print("[info] Configuration_Files.csv missing or empty.")


## 3) Feature flags — sources (Hardcoded vs @Value)

In [None]:
# Charts for Feature_Flags
# Where charts are generated:
#  - 3A) Feature-flag sources (pie)

path = CONF_DIR / "Feature_Flags.csv"
df_flags = read_csv_safe(path)
show_head(df_flags, 10)

if not df_flags.empty:
    c_src = pick_col(df_flags, ["source","origin"], kind=None)
    if c_src:
        counts = labelize_na(df_flags[c_src]).value_counts().reset_index()
        counts.columns = ["source", "count"]
        fig = px.pie(counts, names="source", values="count", hole=0.35,
                     title="Feature-flag sources")
        fig.update_layout(width=720, height=420)
        fig.update_traces(textposition="outside")
        fig.show()
    else:
        print("[info] No 'source' column — skipping pie.")
else:
    print("[info] Feature_Flags.csv missing or empty.")


## 4) Injected properties — field types (Top 25)

In [None]:
# Charts for Injected_Properties
# Where charts are generated:
#  - 4A) Injected field types (Top 25) (bar, explicit color)

path = CONF_DIR / "Injected_Properties.csv"
df_inj = read_csv_safe(path)
show_head(df_inj, 10)

if not df_inj.empty:
    c_type = pick_col(df_inj, ["fieldType","type","signature"], kind=None)
    if c_type:
        counts = labelize_na(df_inj[c_type]).value_counts().reset_index()
        counts.columns = ["fieldType", "count"]
        fig = px.bar(counts.head(25), x="fieldType", y="count", text_auto=True,
                     title="Injected field types (Top 25)",
                     color_discrete_sequence=DEFAULT_BAR_COLOR)
        fig.update_layout(width=1100, height=500, xaxis_tickangle=45,
                          xaxis_title="fieldType", yaxis_title="count")
        fig.show()
    else:
        print("[info] No field type column — skipping bar.")
else:
    print("[info] Injected_Properties.csv missing or empty.")
