# Dependencies — Report

This notebook visualizes the CSV outputs generated by the **Dependencies** block.

## What this notebook shows
- **Circular dependencies** between packages (Top pairs).
- **External dependencies** overview (group → artifact).
- **Lines of code**: Top classes by LoC (Bar) and share (Donut) for the same Top set.
- **Modules & artifacts**: In/Out degree per artifact (Scatter) and Top outgoing.
- **Package dependencies**: Grouped bars of total dependencies & distinct dependent types (Top origins).
- **Package dependencies — classes**: Top class-to-class dependency pairs by weight (Bar).

> If a CSV is missing or empty, the cell prints an info message and skips the chart.


In [None]:
# Setup: imports, paths, helpers
# - CSVs are read from reports/csv-reports/<CATEGORY>/<file>.csv relative to this notebook folder.
# - Minimal console output; only show information if a CSV is missing/empty.
# - Bar charts use an explicit default color so it's easy to tweak later.
# - Titles are standardized without block prefixes.

import os
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('future.no_silent_downcasting', True)

CATEGORY = "Dependencies"
CSV_BASE = Path("../reports/csv-reports").resolve()
DEPS_DIR = CSV_BASE / CATEGORY

# Explicit default color for all bar charts in this notebook
DEFAULT_BAR_COLOR = ["#1f77b4"]

# CSV IO helpers
NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(p: Path) -> pd.DataFrame:
    """Read a CSV if present; otherwise return an empty DataFrame.
    Prints a minimal info message when missing or unreadable."""
    p = Path(p)
    if not p.exists():
        print(f"[info] Missing CSV: {p}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(p, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        df = df.dropna(how="all")
        return df
    except Exception as e:
        print(f"[warn] Failed to read {p}: {e}")
        return pd.DataFrame()

def labelize_na(s, label="N/A"):
    s = s.copy()
    s = s.mask(s.isna(), label).astype(str)
    s = s.replace({"nan": label, "NaN": label})
    return s

def find_col(df, *cands, default=None, contains=None):
    """Find a column by exact candidates or by substring (contains)."""
    low = {c.lower(): c for c in df.columns}
    for c in cands:
        if c and c.lower() in low:
            return low[c.lower()]
    if contains:
        for k, orig in low.items():
            if contains.lower() in k:
                return orig
    return default

MAX_BARS = 25  # cap for long bar charts

# Import shared chart functions from interface
import sys
sys.path.append(str(Path("../interface").resolve()))
from charts.dependencies_charts import (
    create_circular_pairs_bar,
    create_circular_heatmap,
    create_external_treemap,
    create_top_groups_bar,
    create_top_loc_bar,
    create_loc_share_donut,
    create_artifact_degree_scatter,
    create_top_outgoing_bar,
    create_package_deps_grouped_bar,
    create_package_pairs_heatmap,
    create_class_pairs_bar
)


## 1) Circular dependencies (package ↔ package)

In [None]:
# Charts for Circular_Dependencies
# Where charts are generated:
#  - 1A) Top circular package pairs by total dependencies (Bar, explicit color)
#  - 1B) Circular dependencies heatmap (Top pairs)

path = DEPS_DIR / "Circular_Dependencies.csv"
df_circ = read_csv_safe(path)

if not df_circ.empty:
    c_p1 = find_col(df_circ, "package1", contains="package1", default=None)
    c_p2 = find_col(df_circ, "package2", contains="package2", default=None)
    c_fwd = find_col(df_circ, "totalDepsP1toP2", contains="p1top2", default=None)
    c_bwd = find_col(df_circ, "totalDepsP2toP1", contains="p2top1", default=None)

    if c_p1 and c_p2 and c_fwd and c_bwd:
        fig = create_circular_pairs_bar(df_circ, c_p1, c_p2, c_fwd, c_bwd)
        if fig:
            fig.show()
        else:
            print("[info] No data for circular pairs bar chart")

        fig2 = create_circular_heatmap(df_circ, c_p1, c_p2, c_fwd, c_bwd)
        if fig2:
            fig2.show()
        else:
            print("[info] No data for circular heatmap")
    else:
        print("[info] Circular_Dependencies.csv lacks expected columns — skipping charts.")
else:
    print("[info] Circular_Dependencies.csv missing or empty.")


## 2) External dependencies (group → artifact)

In [None]:
# Charts for External_Dependencies
# Where charts are generated:
#  - 2A) External dependencies treemap (group → artifact)
#  - 2B) Top groups by number of artifacts used (Bar, explicit color)

path = DEPS_DIR / "External_Dependencies.csv"
df_ext = read_csv_safe(path)

if not df_ext.empty:
    c_group = find_col(df_ext, "group", "artifact.group", contains="group", default=None)
    c_name  = find_col(df_ext, "name", "artifact.name", contains="name", default=None)

    if c_group and c_name:
        fig = create_external_treemap(df_ext, c_group, c_name)
        if fig:
            fig.show()
        else:
            print("[info] No data for external treemap")

        fig2 = create_top_groups_bar(df_ext, c_group, c_name)
        if fig2:
            fig2.show()
        else:
            print("[info] No data for top groups bar")
    else:
        print("[info] External_Dependencies.csv lacks expected columns — skipping charts.")
else:
    print("[info] External_Dependencies.csv missing or empty.")


## 3) Lines of code (per class)

In [None]:
# Charts for Lines_Of_Code
# Where charts are generated:
#  - 3A) Top classes by lines of code (Bar, explicit color)
#  - 3B) LoC share for the same Top set (Donut)

path = DEPS_DIR / "Lines_Of_Code.csv"
df_loc = read_csv_safe(path)

if not df_loc.empty:
    c_cls = find_col(df_loc, "CompleteClassPath", contains="class", default=None)
    c_loc = find_col(df_loc, "LoC", contains="loc", default=None)

    if c_cls and c_loc:
        fig = create_top_loc_bar(df_loc, c_cls, c_loc)
        if fig:
            fig.show()
        else:
            print("[info] No data for top LoC bar chart")

        fig2 = create_loc_share_donut(df_loc, c_cls, c_loc)
        if fig2:
            fig2.show()
        else:
            print("[info] No data for LoC share donut")
    else:
        print("[info] Lines_Of_Code.csv lacks expected columns — skipping charts.")
else:
    print("[info] Lines_Of_Code.csv missing or empty.")


## 4) Modules & artifacts (in/out degree per artifact)

In [None]:
# Charts for Modules_And_Artifacts
# Where charts are generated:
#  - 4A) Artifact degree: outgoing vs incoming (Scatter, size = total)
#  - 4B) Top artifacts by number of outgoing dependencies (Bar, explicit color)

path = DEPS_DIR / "Modules_And_Artifacts.csv"
df_mod = read_csv_safe(path)

if not df_mod.empty:
    c_a1 = find_col(df_mod, "Artifact_1_Name", contains="_1_name", default=None)
    c_a2 = find_col(df_mod, "Artifact_2_Name", contains="_2_name", default=None)

    if c_a1 and c_a2:
        fig = create_artifact_degree_scatter(df_mod, c_a1, c_a2)
        if fig:
            fig.show()
        else:
            print("[info] No data for artifact degree scatter")

        fig2 = create_top_outgoing_bar(df_mod, c_a1, c_a2)
        if fig2:
            fig2.show()
        else:
            print("[info] No data for top outgoing bar")
    else:
        print("[info] Modules_And_Artifacts.csv lacks expected columns — skipping charts.")
else:
    print("[info] Modules_And_Artifacts.csv missing or empty.")


## 5) Package dependencies (origin → destination)

In [None]:
# Charts for Package_Dependencies
# Where charts are generated:
#  - 5A) Top origin packages: total deps vs distinct dependent types (Grouped bars, explicit color)
#  - 5B) Top origin → destination package pairs by total dependencies (Heatmap)

path = DEPS_DIR / "Package_Dependencies.csv"
df_pkg = read_csv_safe(path)

if not df_pkg.empty:
    c_org = find_col(df_pkg, "originPackage", contains="origin", default=None)
    c_dst = find_col(df_pkg, "destinationPackage", contains="destination", default=None)
    c_types = find_col(df_pkg, "typesThatDepend", contains="types", default=None)
    c_total = find_col(df_pkg, "totalDependencies", contains="total", default=None)

    if c_org and c_dst and c_types and c_total:
        fig = create_package_deps_grouped_bar(df_pkg, c_org, c_dst, c_types, c_total)
        if fig:
            fig.show()
        else:
            print("[info] No data for package deps grouped bar")

        fig2 = create_package_pairs_heatmap(df_pkg, c_org, c_dst, c_total)
        if fig2:
            fig2.show()
        else:
            print("[info] No data for package pairs heatmap")
    else:
        print("[info] Package_Dependencies.csv lacks expected columns — skipping charts.")
else:
    print("[info] Package_Dependencies.csv missing or empty.")


## 6) Package dependencies — classes (top pairs by weight)

In [None]:
# Charts for Package_Dependencies_Classes
# Where chart is generated:
#  - 6A) Top class-to-class dependencies by weight (Bar, explicit color)

path = DEPS_DIR / "Package_Dependencies_Classes.csv"
df_cls = read_csv_safe(path)

if not df_cls.empty:
    c_c1 = find_col(df_cls, "Class_1_fqn", contains="_1_fqn", default=None)
    c_w  = find_col(df_cls, "dependencyWeight", contains="weight", default=None)
    c_c2 = find_col(df_cls, "Class_2_fqn", contains="_2_fqn", default=None)

    if c_c1 and c_w and c_c2:
        fig = create_class_pairs_bar(df_cls, c_c1, c_w, c_c2)
        if fig:
            fig.show()
        else:
            print("[info] No data for class pairs bar")
    else:
        print("[info] Package_Dependencies_Classes.csv lacks expected columns — skipping charts.")
else:
    print("[info] Package_Dependencies_Classes.csv missing or empty.")
