# Configuration & Environment — Report

Visualizes CSV outputs generated by the **Configuration_Environment** block.

**Source folder:** `reports/custom-queries-csv/Configuration_Environment/`

Included CSVs:
- `Configuration_Classes.csv`
- `Configuration_Files.csv`
- `Feature_Flags.csv`
- `Injected_Properties.csv`

> Notes
> - The dataset contains real `N/A` values for some properties. We keep them as-is to reflect missing keys.
> - Charts are limited to the 4 most useful summaries for this block.


In [1]:
# Setup: paths, imports, helpers
import os
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px

def resolve_reports_dir():
    env = os.environ.get("REPORTS_DIRECTORY")
    if env:
        p = Path(env).expanduser().resolve()
        if p.exists():
            return str(p)
    # fallback: search up to 5 parents
    cwd = Path.cwd()
    for i in range(6):
        cand = (cwd if i==0 else cwd.parents[i-1]) / "reports"
        if cand.exists():
            return str(cand.resolve())
    return "reports"

REPORTS_DIR = resolve_reports_dir()
CONF_DIR = os.path.join(REPORTS_DIR, "custom-queries-csv", "Configuration_Environment")
print("REPORTS_DIR:", REPORTS_DIR)
print("CONF_DIR:", CONF_DIR)

# NA policy
NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(p):
    if not os.path.exists(p):
        print(f"[info] Missing CSV: {p}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(p, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        return df
    except Exception as e:
        print(f"[warn] Failed to read {p}: {e}")
        return pd.DataFrame()

def labelize_na(s, label="N/A"):
    s = s.copy()
    s = s.mask(s.isna(), label).astype(str)
    s = s.replace({"nan": label, "NaN": label})
    return s

def pick_col(df, names=None, kind=None):
    names = names or []
    by_lower = {c.lower(): c for c in df.columns}
    if kind == "numeric":
        nums = list(df.select_dtypes(include=[np.number]).columns)
        return nums[0] if nums else None
    if kind == "text":
        objs = [c for c in df.columns if df[c].dtype == "object"]
        return objs[0] if objs else (df.columns[0] if len(df.columns) else None)
    for n in names:
        got = by_lower.get(n.lower())
        if got: return got
    return None

def ext_from_name(x: str) -> str:
    s = str(x)
    if "." in s:
        return s.split(".")[-1].lower()
    return "unknown"


REPORTS_DIR: /Users/jonathan.nervaez/Documents/AppModPractice/E2E-decomposition/reports
CONF_DIR: /Users/jonathan.nervaez/Documents/AppModPractice/E2E-decomposition/reports/custom-queries-csv/Configuration_Environment


## 1) Configuration classes — by annotation type

In [2]:
path = os.path.join(CONF_DIR, "Configuration_Classes.csv")
df_cfg = read_csv_safe(path)
display(df_cfg.head(10))

if not df_cfg.empty:
    c_ann = pick_col(df_cfg, ["annotationType","annotation","type"], kind=None)
    if c_ann:
        counts = labelize_na(df_cfg[c_ann]).value_counts().reset_index()
        counts.columns = ["annotationType", "count"]
        fig = px.pie(counts, names="annotationType", values="count", hole=0.35,
                     title="Configuration classes by annotation type")
        fig.update_layout(width=760, height=460)
        fig.update_traces(textposition="outside")
        fig.show()
    else:
        print("[info] No annotation column found — skipping pie.")
else:
    print("[info] Configuration_Classes.csv missing or empty.")


Unnamed: 0,configClass,propertyPrefix,annotationType,Source Cypher File: Custom_Queries/Configuration_Environment/Configuration_Classes.cypher
0,com.salesmanager.core.business.configuration.A...,,ConfigurationProperties,
1,com.salesmanager.core.business.configuration.A...,,ConfigurationProperties,
2,com.salesmanager.core.business.configuration.A...,,Configuration,
3,com.salesmanager.core.business.configuration.A...,,Configuration,
4,com.salesmanager.core.business.configuration.C...,,Configuration,
5,com.salesmanager.core.business.configuration.C...,,Configuration,
6,com.salesmanager.core.business.configuration.D...,,Configuration,
7,com.salesmanager.core.business.configuration.D...,,Configuration,
8,com.salesmanager.core.business.configuration.M...,,Configuration,
9,com.salesmanager.core.business.configuration.M...,,Configuration,


## 2) Configuration files — by extension

In [3]:
path = os.path.join(CONF_DIR, "Configuration_Files.csv")
df_files = read_csv_safe(path)
display(df_files.head(10))

if not df_files.empty:
    # try to find a name-like column
    name_col = None
    for cand in ["configurationFile.name","name","fileName","filename","path","configurationFile"]:
        candidates = [c for c in df_files.columns if c.lower() == cand.lower()]
        if candidates:
            name_col = candidates[0]; break
    if name_col is None:
        name_col = pick_col(df_files, kind="text")

    if name_col:
        names = labelize_na(df_files[name_col])
        df_files["ext"] = names.map(ext_from_name)
        ext_counts = df_files["ext"].value_counts().reset_index()
        ext_counts.columns = ["extension", "count"]
        fig = px.bar(ext_counts, x="extension", y="count", text_auto=True,
                     title="Configuration files by extension")
        fig.update_layout(width=820, height=440, xaxis_title="extension", yaxis_title="count")
        fig.show()
    else:
        print("[info] Could not detect file-name column — skipping chart.")
else:
    print("[info] Configuration_Files.csv missing or empty.")


Unnamed: 0,configurationFile,Source Cypher File: Custom_Queries/Configuration_Environment/Configuration_Files.cypher


[info] Configuration_Files.csv missing or empty.


## 3) Feature flags — sources (Hardcoded vs @Value)

In [4]:
path = os.path.join(CONF_DIR, "Feature_Flags.csv")
df_flags = read_csv_safe(path)
display(df_flags.head(10))

if not df_flags.empty:
    c_src = pick_col(df_flags, ["source","origin"], kind=None)
    if c_src:
        counts = labelize_na(df_flags[c_src]).value_counts().reset_index()
        counts.columns = ["source", "count"]
        fig = px.pie(counts, names="source", values="count", hole=0.35,
                     title="Feature-flag sources")
        fig.update_layout(width=720, height=420)
        fig.update_traces(textposition="outside")
        fig.show()
    else:
        print("[info] No 'source' column — skipping pie.")
else:
    print("[info] Feature_Flags.csv missing or empty.")


Unnamed: 0,fieldName,declaringClass,source,Source Cypher File: Custom_Queries/Configuration_Environment/Feature_Flags.cypher
0,$assertionsDisabled,org.hibernate.Query$1,Hardcoded,
1,$assertionsDisabled,org.hibernate.boot.internal.MetadataImpl,Hardcoded,
2,$assertionsDisabled,org.hibernate.boot.internal.SessionFactoryOpti...,Hardcoded,
3,$assertionsDisabled,org.hibernate.bytecode.enhance.internal.bytebu...,Hardcoded,
4,$assertionsDisabled,org.hibernate.bytecode.enhance.spi.interceptor...,Hardcoded,
5,$assertionsDisabled,org.hibernate.bytecode.enhance.spi.interceptor...,Hardcoded,
6,$assertionsDisabled,org.hibernate.cache.internal.StrategyCreatorRe...,Hardcoded,
7,$assertionsDisabled,org.hibernate.cache.spi.AbstractRegionFactory,Hardcoded,
8,$assertionsDisabled,org.hibernate.cfg.InheritanceState,Hardcoded,
9,$assertionsDisabled,org.hibernate.cfg.PropertyContainer,Hardcoded,


## 4) Injected properties — field types (Top 25)

In [5]:
path = os.path.join(CONF_DIR, "Injected_Properties.csv")
df_inj = read_csv_safe(path)
display(df_inj.head(10))

if not df_inj.empty:
    c_type = pick_col(df_inj, ["fieldType","type","signature"], kind=None)
    if c_type:
        counts = labelize_na(df_inj[c_type]).value_counts().reset_index()
        counts.columns = ["fieldType", "count"]
        fig = px.bar(counts.head(25), x="fieldType", y="count", text_auto=True,
                     title="Injected field types (Top 25)")
        fig.update_layout(width=1100, height=500, xaxis_tickangle=45,
                          xaxis_title="fieldType", yaxis_title="count")
        fig.show()
    else:
        print("[info] No field type column — skipping bar.")
else:
    print("[info] Injected_Properties.csv missing or empty.")


Unnamed: 0,fieldName,propertyKey,fieldType,Source Cypher File: Custom_Queries/Configuration_Environment/Injected_Properties.cypher
0,tokenHeader,,java.lang.String tokenHeader,
1,displayShipping,,java.lang.String displayShipping,
2,expiration,,java.lang.Long expiration,
3,secret,,java.lang.String secret,
4,method,,java.lang.String method,
5,staticPath,,java.lang.String staticPath,
6,contentUrl,,java.lang.String contentUrl,
7,secretKey,,java.lang.String secretKey,
8,initDefaultData,,boolean initDefaultData,
9,noIndex,,boolean noIndex,
