
# Security — Report (CSV → Plotly)

This notebook reads pre-generated CSV files for the **Security** block and renders clear, interactive visuals.
Input folder:
`reports/custom-queries-csv/Security/`

CSV files expected:
- `Security_Configurations.csv`
- `Spring_Security.csv`
- `Unsecured_Endpoints.csv`

Conventions:
- If a CSV is missing or has no rows/required columns, the notebook **prints an info message** (no placeholder charts).
- Only small table previews (max 5 rows) to keep the flow focused on visuals.


In [None]:

# Setup and helper utilities
import os, glob
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display

pd.set_option('future.no_silent_downcasting', True)

def resolve_reports_dir():
    """Resolve base 'reports' directory with env var fallback and upward search."""
    env = os.environ.get("REPORTS_DIRECTORY")
    if env:
        p = Path(env).expanduser().resolve()
        if p.exists():
            return str(p)
    cwd = Path.cwd()
    for i in range(0, 8):
        base = cwd if i == 0 else cwd.parents[i-1]
        cand = base / "reports"
        if cand.exists():
            return str(cand.resolve())
    return str((Path.cwd() / "reports").resolve())

REPORTS_DIR = resolve_reports_dir()
SEC_DIR = os.path.join(REPORTS_DIR, "custom-queries-csv", "Security")
print("Reports:", SEC_DIR)

NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(p):
    """Read CSV gracefully with tolerant NA handling."""
    if not os.path.exists(p):
        print(f"[info] Missing CSV: {p}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(p, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        return df.dropna(how="all")
    except Exception as e:
        print(f"[warn] Failed to read {p}: {e}")
        return pd.DataFrame()

def find_col(df, *cands, default=None, contains=None):
    """Return a column name by exact candidate(s) or substring (case-insensitive)."""
    if df is None or df.empty:
        return default
    low = {c.lower(): c for c in df.columns}
    for c in cands:
        if c and c.lower() in low:
            return low[c.lower()]
    if contains:
        for k, orig in low.items():
            if contains.lower() in k:
                return orig
    return default

MAX_ROWS_PREVIEW = 5
MAX_BARS = 30


## 1) Security Configurations

In [None]:

path = os.path.join(SEC_DIR, "Security_Configurations.csv")
df_sc = read_csv_safe(path)

c_cls   = find_col(df_sc, "securityConfigClass", contains="config")
c_ext   = find_col(df_sc, "extendsClass", contains="extend", default="extendsClass")
c_annc  = find_col(df_sc, "annotationsCount", contains="annot", default="annotationsCount")
c_anns  = find_col(df_sc, "annotations", contains="annot", default="annotations")
c_cfgc  = find_col(df_sc, "configMethodsCount", contains="config", default="configMethodsCount")
c_cfgs  = find_col(df_sc, "configMethods", contains="config", default="configMethods")
c_depr  = find_col(df_sc, "usesDeprecatedAdapter", contains="deprecated", default="usesDeprecatedAdapter")

required = [c_cls, c_ext, c_annc, c_cfgc, c_depr]
if df_sc.empty or any(col is None for col in required):
    print("[info] No data for Security_Configurations (missing CSV or required columns).")
else:
    # Tiny preview
    display(df_sc[[c_cls, c_ext, c_annc, c_cfgc, c_depr]].head(MAX_ROWS_PREVIEW))

    # Ensure numeric/bool types
    df_sc[c_annc] = pd.to_numeric(df_sc[c_annc], errors="coerce").fillna(0)
    df_sc[c_cfgc] = pd.to_numeric(df_sc[c_cfgc], errors="coerce").fillna(0)
    if df_sc[c_depr].dtype != bool:
        # Accept 'true'/'false'/'1'/'0' as well
        df_sc[c_depr] = df_sc[c_depr].astype(str).str.lower().isin(["true","1","yes","y"])

    # A) Deprecated adapter usage (donut)
    donut = df_sc[c_depr].value_counts().rename_axis("usesDeprecatedAdapter").reset_index(name="count")
    donut["usesDeprecatedAdapter"] = donut["usesDeprecatedAdapter"].map({True:"Yes", False:"No"})
    fig = px.pie(donut, values="count", names="usesDeprecatedAdapter",
                 title="Uses deprecated adapter (WebSecurityConfigurerAdapter/SecurityConfigurerAdapter)?",
                 hole=0.5)
    fig.update_layout(height=480, width=640)
    fig.show()

    # B) Sunburst: extendsClass → securityConfigClass (size by configMethodsCount)
    sb = df_sc.copy()
    # handle empty extendsClass
    sb[c_ext] = sb[c_ext].fillna("").replace({"": "(no parent)"})
    fig = px.sunburst(sb, path=[c_ext, c_cls], values=c_cfgc,
                      title="Security configs by parent class (size = config method count)")
    fig.update_layout(height=650, width=900)
    fig.show()

    # C) Treemap by annotation count per class
    fig = px.treemap(df_sc.sort_values(c_annc, ascending=False).head(60),
                     path=[c_cls], values=c_annc,
                     title="Security configurations — annotation density (Treemap)")
    fig.update_layout(height=650, width=900)
    fig.show()

    # D) Optional: exploded view of config methods (if present)
    if c_cfgs and c_cfgs in df_sc.columns:
        # Split semicolon-joined methods
        exploded = (df_sc[[c_cls, c_cfgs]]
                    .assign(methods=df_sc[c_cfgs].fillna("").astype(str).str.split(";"))
                    .explode("methods"))
        exploded["methods"] = exploded["methods"].str.strip()
        exploded = exploded[exploded["methods"] != ""]
        if not exploded.empty:
            agg = exploded["methods"].value_counts().rename_axis("method").reset_index(name="count")
            fig = px.pie(agg, values="count", names="method",
                         title="Configuration methods used (across all config classes)", hole=0.45)
            fig.update_layout(height=520, width=720)
            fig.show()
        else:
            print("[info] Config methods column present but contains no values.")


## 2) Spring Security — Annotated Methods

In [None]:

path = os.path.join(SEC_DIR, "Spring_Security.csv")
df_ss = read_csv_safe(path)

c_decl = find_col(df_ss, "declaringClass", contains="declaring")
c_meth = find_col(df_ss, "methodName", contains="method")
c_ann  = find_col(df_ss, "annotationName", contains="annot")

required = [c_decl, c_meth, c_ann]
if df_ss.empty or any(col is None for col in required):
    print("[info] No data for Spring_Security (missing CSV or required columns).")
else:
    display(df_ss[[c_decl, c_meth, c_ann]].head(MAX_ROWS_PREVIEW))

    # A) Annotations popularity (donut)
    by_ann = df_ss[c_ann].value_counts().rename_axis("annotation").reset_index(name="count")
    fig = px.pie(by_ann, values="count", names="annotation",
                 title="Security annotations used (global)", hole=0.45)
    fig.update_layout(height=480, width=640)
    fig.show()

    # B) Sunburst DeclaringClass → Annotation (size = count)
    sb = df_ss.groupby([c_decl, c_ann]).size().reset_index(name="count")
    fig = px.sunburst(sb, path=[c_decl, c_ann], values="count",
                      title="Annotated methods by class and annotation")
    fig.update_layout(height=650, width=900)
    fig.show()

    # C) Treemap of classes by number of annotated methods (top N)
    by_class = df_ss[c_decl].value_counts().rename_axis("class").reset_index(name="count")
    fig = px.treemap(by_class.head(60), path=["class"], values="count",
                     title="Top classes by number of security-annotated methods (Treemap)")
    fig.update_layout(height=650, width=900)
    fig.show()


## 3) Potentially Unsecured Endpoints

In [None]:

path = os.path.join(SEC_DIR, "Unsecured_Endpoints.csv")
df_ue = read_csv_safe(path)

c_ctrl = find_col(df_ue, "Controller", contains="controller")
c_meth = find_col(df_ue, "Method", contains="method")
c_http = find_col(df_ue, "HttpMethod", contains="http")
c_ep   = find_col(df_ue, "CompleteEndpoint", contains="endpoint")
c_stat = find_col(df_ue, "SecurityStatus", contains="security", default="SecurityStatus")

required = [c_ctrl, c_meth, c_http, c_ep, c_stat]
if df_ue.empty or any(col is None for col in required):
    print("[info] No data for Unsecured_Endpoints (missing CSV or required columns).")
else:
    display(df_ue[[c_ctrl, c_http, c_ep]].head(MAX_ROWS_PREVIEW))

    # A) Distribution by HTTP method (donut)
    by_http = df_ue[c_http].value_counts().rename_axis("httpMethod").reset_index(name="count")
    fig = px.pie(by_http, values="count", names="httpMethod",
                 title="Potentially unsecured endpoints by HTTP method", hole=0.45)
    fig.update_layout(height=480, width=640)
    fig.show()

    # B) Sunburst Controller → HTTP Method (size = count)
    sun = df_ue.groupby([c_ctrl, c_http]).size().reset_index(name="count")
    fig = px.sunburst(sun, path=[c_ctrl, c_http], values="count",
                      title="Potentially unsecured endpoints by controller and method")
    fig.update_layout(height=650, width=900)
    fig.show()

    # C) Treemap by Controller (top N)
    by_ctrl = df_ue[c_ctrl].value_counts().rename_axis("controller").reset_index(name="count")
    fig = px.treemap(by_ctrl.head(60), path=["controller"], values="count",
                     title="Controllers with most potentially unsecured endpoints (Treemap)")
    fig.update_layout(height=650, width=900)
    fig.show()

    # D) Optional compact list of top endpoints (by simple frequency of path)
    if c_ep:
        by_ep = df_ue[c_ep].value_counts().rename_axis("endpoint").reset_index(name="count")
        display(by_ep.head(10))
    else:
        print("[info] Endpoint column not available for top list.")
