
# Technology Stack — Report (CSV → Plotly)

Input folder:
`reports/custom-queries-csv/Technology_Stack/`

Expected CSVs:
- `Build_System.csv`
- `Java_Version.csv`

Conventions:
- Small previews only (≤5 rows).
- If a CSV is missing or lacks the required columns, we print a simple `[info]` message — no placeholder charts.


In [None]:

# Setup & helpers
import os
from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display

pd.set_option('future.no_silent_downcasting', True)

def resolve_reports_dir():
    env = os.environ.get("REPORTS_DIRECTORY")
    if env:
        p = Path(env).expanduser().resolve()
        if p.exists():
            return str(p)
    cwd = Path.cwd()
    for i in range(0, 8):
        base = cwd if i == 0 else cwd.parents[i-1]
        cand = base / "reports"
        if cand.exists():
            return str(cand.resolve())
    return str((Path.cwd() / "reports").resolve())

REPORTS_DIR = resolve_reports_dir()
TS_DIR = os.path.join(REPORTS_DIR, "custom-queries-csv", "Technology_Stack")
print("Reports:", TS_DIR)

NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(p):
    if not os.path.exists(p):
        print(f"[info] Missing CSV: {p}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(p, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        return df.dropna(how="all")
    except Exception as e:
        print(f"[warn] Failed to read {p}: {e}")
        return pd.DataFrame()

def find_col(df, *cands, default=None, contains=None):
    if df is None or df.empty:
        return default
    low = {c.lower(): c for c in df.columns}
    for c in cands:
        if c and c.lower() in low:
            return low[c.lower()]
    if contains:
        for k, orig in low.items():
            if contains and contains.lower() in k:
                return orig
    return default

MAX_ROWS_PREVIEW = 5


## 1) Build System

In [None]:

path = os.path.join(TS_DIR, "Build_System.csv")
df_bs = read_csv_safe(path)

c_sys = find_col(df_bs, "BuildSystem", contains="build")
c_name = find_col(df_bs, "ProjectName", contains="project")
c_ver = find_col(df_bs, "ProjectVersion", contains="version")
c_pkg = find_col(df_bs, "Packaging", contains="packag")

required = [c_sys, c_name, c_ver, c_pkg]
if df_bs.empty or any(col is None for col in required):
    print("[info] No data for Build_System (missing CSV or required columns).")
else:
    # Minimal preview
    display(df_bs[[c_sys, c_name, c_ver, c_pkg]].head(MAX_ROWS_PREVIEW))

    dist = df_bs[c_sys].value_counts().rename_axis("buildSystem").reset_index(name="count")

    # Pie (donut)
    fig = px.pie(dist, values="count", names="buildSystem",
                 title="Build systems detected", hole=0.45)
    fig.update_layout(height=460, width=620)
    fig.show()

    # Bar counts
    fig = px.bar(dist, x="buildSystem", y="count", text="count",
                 title="Build systems (counts)")
    fig.update_traces(textposition="outside", cliponaxis=False)
    fig.update_layout(height=480, width=700)
    fig.show()


## 2) Java Version (Pie + Bar + Indicator)

In [None]:

path = os.path.join(TS_DIR, "Java_Version.csv")
df_jv = read_csv_safe(path)

c_ver = find_col(df_jv, "JavaVersionFromBytecode", contains="javavers")
if df_jv.empty or c_ver is None:
    print("[info] No data for Java_Version (missing CSV or required column).")
else:
    # Minimal preview
    display(df_jv[[c_ver]].head(MAX_ROWS_PREVIEW))

    dist = (df_jv[c_ver].astype(str)
            .replace({"nan":"Unknown", "None":"Unknown", "":"Unknown"})
            .value_counts()
            .rename_axis("javaVersion").reset_index(name="count"))

    # Sort versions numerically if possible
    def sort_key(v):
        try:
            return (0, float(v))
        except:
            return (1, float("inf"))
    dist = dist.sort_values(by="javaVersion", key=lambda s: s.map(sort_key))

    # Pie (donut)
    fig = px.pie(dist, values="count", names="javaVersion",
                 title="Java versions detected (from bytecode)", hole=0.45)
    fig.update_layout(height=460, width=620)
    fig.show()

    # Bar counts
    fig = px.bar(dist, x="javaVersion", y="count", text="count",
                 title="Java version distribution (counts)")
    fig.update_traces(textposition="outside", cliponaxis=False)
    fig.update_layout(height=480, width=700)
    fig.show()

    # Optional single indicator: modal (most common) version
    modal_row = dist.iloc[0] if len(dist)>0 else None
    if modal_row is not None:
        fig = go.Figure(go.Indicator(
            mode="number",
            value=float(modal_row["count"]),
            title={"text": f"Most common Java version: {modal_row['javaVersion']}"}
        ))
        fig.update_layout(height=220, width=360)
        fig.show()
