# Technology Stack â€” Report

This notebook visualizes the CSV outputs generated by the **Technology_Stack** block.

## What this notebook shows
- **Build systems**: Distribution (Donut) and counts (Bar).
- **Java versions**: Distribution (Donut), counts (Bar), and a small indicator for the most common version.

> If a CSV is missing or empty, the cell prints an info message and skips the chart.


In [None]:
# Setup & helpers
# - CSVs are read from reports/csv-reports/<CATEGORY>/<file>.csv relative to this notebook folder.
# - Minimal console output; only show information if a CSV is missing/empty.
# - Bar charts use an explicit default color so it's easy to tweak later.
# - Titles are standardized without block prefixes.

import os
from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display

pd.set_option('future.no_silent_downcasting', True)

CATEGORY = "Technology_Stack"
CSV_BASE = Path("../reports/csv-reports").resolve()
TS_DIR = CSV_BASE / CATEGORY

# Explicit default color for all bar charts in this notebook
DEFAULT_BAR_COLOR = ["#1f77b4"]

# CSV IO helpers
NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(p: Path) -> pd.DataFrame:
    """Read a CSV if present; otherwise return an empty DataFrame.
    Prints a minimal info message when missing or unreadable."""
    p = Path(p)
    if not p.exists():
        print(f"[info] Missing CSV: {p}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(p, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        return df.dropna(how="all")
    except Exception as e:
        print(f"[warn] Failed to read {p}: {e}")
        return pd.DataFrame()

def find_col(df, *cands, default=None, contains=None):
    """Return a column name by exact candidate(s) or substring (case-insensitive)."""
    if df is None or df.empty:
        return default
    low = {c.lower(): c for c in df.columns}
    for c in cands:
        if c and c.lower() in low:
            return low[c.lower()]
    if contains:
        for k, orig in low.items():
            if contains.lower() in k:
                return orig
    return default

MAX_ROWS_PREVIEW = 5


## 1) Build systems

In [None]:
# Charts generated here:
#  - 1A) Build systems detected (Donut)
#  - 1B) Build systems (counts) (Bar, explicit color)

path = TS_DIR / "Build_System.csv"
df_bs = read_csv_safe(path)

c_sys = find_col(df_bs, "BuildSystem", contains="build")
c_name = find_col(df_bs, "ProjectName", contains="project")
c_ver  = find_col(df_bs, "ProjectVersion", contains="version")
c_pkg  = find_col(df_bs, "Packaging", contains="packag")

required = [c_sys, c_name, c_ver, c_pkg]
if df_bs.empty or any(col is None for col in required):
    print("[info] No data for Build_System (missing CSV or required columns).")
else:
    # Minimal preview
    display(df_bs[[c_sys, c_name, c_ver, c_pkg]].head(MAX_ROWS_PREVIEW))

    dist = df_bs[c_sys].astype(str).replace({"nan":"Unknown","": "Unknown"}).value_counts().rename_axis("buildSystem").reset_index(name="count")

    # 1A) Pie (donut)
    fig = px.pie(dist, values="count", names="buildSystem",
                 title="Build systems detected", hole=0.45)
    fig.update_layout(height=460, width=620)
    fig.show()

    # 1B) Bar counts
    fig = px.bar(dist, x="buildSystem", y="count", text="count",
                 title="Build systems (counts)",
                 color_discrete_sequence=DEFAULT_BAR_COLOR)
    fig.update_traces(textposition="outside", cliponaxis=False)
    fig.update_layout(height=480, width=720, xaxis_title="build system", yaxis_title="count")
    fig.show()


## 2) Java version

In [None]:
# Charts generated here:
#  - 2A) Java versions detected (Donut)
#  - 2B) Java version distribution (counts) (Bar, explicit color)
#  - 2C) Indicator for the most common version

path = TS_DIR / "Java_Version.csv"
df_jv = read_csv_safe(path)

c_ver = find_col(df_jv, "JavaVersionFromBytecode", contains="javavers")
if df_jv.empty or c_ver is None:
    print("[info] No data for Java_Version (missing CSV or required column).")
else:
    # Minimal preview
    display(df_jv[[c_ver]].head(MAX_ROWS_PREVIEW))

    dist = (df_jv[c_ver].astype(str)
            .replace({"nan":"Unknown", "None":"Unknown", "": "Unknown"})
            .value_counts()
            .rename_axis("javaVersion").reset_index(name="count"))

    # Optional numeric sort for versions (Unknowns last)
    def sort_key(v):
        try:
            return (0, float(v))
        except:
            return (1, float("inf"))
    dist = dist.sort_values(by="javaVersion", key=lambda s: s.map(sort_key))

    # 2A) Pie (donut)
    fig = px.pie(dist, values="count", names="javaVersion",
                 title="Java versions detected (from bytecode)", hole=0.45)
    fig.update_layout(height=460, width=620)
    fig.show()

    # 2B) Bar counts
    fig = px.bar(dist, x="javaVersion", y="count", text="count",
                 title="Java version distribution (counts)",
                 color_discrete_sequence=DEFAULT_BAR_COLOR)
    fig.update_traces(textposition="outside", cliponaxis=False)
    fig.update_layout(height=480, width=720, xaxis_title="java version", yaxis_title="count")
    fig.show()

    # 2C) Single indicator: modal (most common) version
    modal_row = dist.iloc[0] if len(dist) > 0 else None
    if modal_row is not None:
        fig = go.Figure(go.Indicator(
            mode="number",
            value=float(modal_row["count"]),
            title={"text": f"Most common Java version: {modal_row['javaVersion']}"}
        ))
        fig.update_layout(height=220, width=360)
        fig.show()
