# High-Level Architecture — Report

This notebook visualizes the CSV outputs generated by the **High_Level_Architecture** block.

## What this notebook shows
- **Architectural layer violations**: Controller → Repository (bypassing Service) overview.
- **Cyclomatic complexity**: Top methods, distribution, and a quick violin view.
- **Deepest inheritance**: Classes with the highest inheritance depth.
- **Excessive dependencies**: Classes with unusually high dependency counts.
- **General count overview**: Quick totals (packages, classes, artifacts, etc.).
- **God classes**: Classes with a very high number of methods.
- **Highest number of methods**: Top classes by method count.
- **Inheritance between classes**: Lightweight relationship view (sampled Sankey).
- **Package structure**: Package hierarchy treemap and icicle.

> If a CSV is missing or empty, the cell prints an info message and skips the chart.


In [None]:
# Setup & helpers
# - CSVs are read from reports/csv-reports/<CATEGORY>/<file>.csv relative to this notebook folder.
# - Minimal console output; only show information if a CSV is missing/empty.

import os
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display
import sys

# Import shared chart functions
sys.path.append(str(Path("../interface").resolve()))
from charts.high_level_architecture_charts import *

pd.set_option('future.no_silent_downcasting', True)

CATEGORY = "High_Level_Architecture"
CSV_BASE = Path("../reports/csv-reports").resolve()
HLA_DIR = CSV_BASE / CATEGORY

# CSV IO helpers
NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(p: Path) -> pd.DataFrame:
    """Read a CSV if present; otherwise return an empty DataFrame."""
    p = Path(p)
    if not p.exists():
        print(f"[info] Missing CSV: {p}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(p, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        return df.dropna(how="all")
    except Exception as e:
        print(f"[warn] Failed to read {p}: {e}")
        return pd.DataFrame()

def find_col(df, *cands, default=None, contains=None):
    """Return a column name by exact candidates or substring (case-insensitive)."""
    if df is None or df.empty:
        return default
    low = {c.lower(): c for c in df.columns}
    for c in cands:
        if c and c.lower() in low:
            return low[c.lower()]
    if contains:
        for k, orig in low.items():
            if contains.lower() in k:
                return orig
    return default

def show_empty(msg: str):
    """Show a small placeholder chart when there's no data."""
    fig = go.Figure()
    fig.update_layout(
        title=msg,
        annotations=[dict(text="No data", x=0.5, y=0.5, showarrow=False)]
    )
    fig.show()

MAX_SHOW = 25

## 1) Architectural layer violations

In [None]:
# Section 1: Architectural layer violations
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "Architectural_Layer_Violation.csv"
df_lv = read_csv_safe(path)

c_controller = find_col(df_lv, "Controller", contains="controller")
c_repository = find_col(df_lv, "Repository", contains="repositor")

if df_lv.empty or not (c_controller and c_repository):
    print("[info] No data found for Architectural Layer Violations (missing CSV or required columns).")
else:
    display(df_lv[[c_controller, c_repository]].head(5))

    # 1A) Controllers with most violations (Donut)
    fig = create_controllers_violations_chart(df_lv, c_controller)
    if fig:
        fig.show()

    # 1B) Repositories most bypassed (Treemap)
    fig = create_repositories_bypassed_chart(df_lv, c_repository)
    if fig:
        fig.show()

    # 1C) Controller → Repository (Sankey)
    fig = create_layer_violation_sankey(df_lv, c_controller, c_repository)
    if fig:
        fig.show()

## 2) Cyclomatic complexity (> 10)

In [None]:
# Section 2: Cyclomatic complexity
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "Cyclomatic_Complexity.csv"
df_cc = read_csv_safe(path)

c_class  = find_col(df_cc, "Class", contains="class")
c_method = find_col(df_cc, "Method", contains="method")
c_cc     = find_col(df_cc, "cyclomaticComplexity", contains="complex", default="cyclomaticComplexity")

if df_cc.empty or not (c_class and c_method and c_cc):
    show_empty("Cyclomatic Complexity")
else:
    display(df_cc[[c_class, c_method, c_cc]].head(5))

    # 2A) Violin chart
    fig = create_complexity_violin_chart(df_cc, c_cc)
    if fig:
        fig.show()

    # 2B) Scatter plot
    fig = create_complexity_scatter_chart(df_cc, c_class, c_method, c_cc)
    if fig:
        fig.show()

## 3) Deepest inheritance

In [None]:
# Section 3: Deepest inheritance
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "Deepest_Inheritance.csv"
df_di = read_csv_safe(path)

c_class = find_col(df_di, "class.fqn", "Class", contains="class")
c_depth = find_col(df_di, "Depth", contains="depth", default="Depth")

if df_di.empty or not (c_class and c_depth):
    show_empty("Deepest Inheritance")
else:
    display(df_di[[c_class, c_depth]].head(5))

    # 3A) Bar chart
    fig = create_deepest_inheritance_bar(df_di, c_class, c_depth)
    if fig:
        fig.show()

    # 3B) Histogram
    fig = create_inheritance_distribution_histogram(df_di, c_depth)
    if fig:
        fig.show()

## 4) Excessive dependencies (> 15)

In [None]:
# Section 4: Excessive dependencies
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "Excessive_Dependencies.csv"
df_ed = read_csv_safe(path)

c_fqn = find_col(df_ed, "classFqn", contains="fqn", default="classFqn")
c_dep = find_col(df_ed, "dependencies", contains="depend", default="dependencies")

if df_ed.empty or not (c_fqn and c_dep):
    show_empty("Excessive Dependencies")
else:
    fig = create_excessive_dependencies_treemap(df_ed, c_fqn, c_dep)
    if fig:
        fig.show()

## 5) General count overview

In [None]:
# Section 5: General count overview
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "General_Count_Overview.csv"
df_go = read_csv_safe(path)

c_info  = find_col(df_go, "Info", contains="info", default="Info")
c_count = find_col(df_go, "Count", contains="count", default="Count")

if df_go.empty or not (c_info and c_count):
    show_empty("General Count Overview")
else:
    fig = create_general_count_donut(df_go, c_info, c_count)
    if fig:
        fig.show()

## 6) God classes (> 20 methods)

In [None]:
# Section 6: God classes
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "God_Classes.csv"
df_gc = read_csv_safe(path)

c_fqn = find_col(df_gc, "fqn_god_class", contains="fqn", default="fqn_god_class")
c_cnt = find_col(df_gc, "methodCount", contains="method", default="methodCount")

if df_gc.empty or not (c_fqn and c_cnt):
    show_empty("God Classes")
else:
    # 6A) Treemap
    fig = create_god_classes_treemap(df_gc, c_fqn, c_cnt)
    if fig:
        fig.show()

    # 6B) Histogram
    fig = create_god_classes_histogram(df_gc, c_cnt)
    if fig:
        fig.show()

## 7) Highest number of methods per class (> 15)

In [None]:
# Section 7: Highest number of methods per class
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "Highest_Number_Methods_Class.csv"
df_hm = read_csv_safe(path)

c_class = find_col(df_hm, "class.fqn", "Class", contains="class")
c_cnt   = find_col(df_hm, "methodCount", contains="method", default="methodCount")

if df_hm.empty or not (c_class and c_cnt):
    show_empty("Highest Number of Methods per Class")
else:
    # 7A) Polar chart
    fig = create_methods_polar_chart(df_hm, c_class, c_cnt)
    if fig:
        fig.show()

    # 7B) Violin
    fig = create_methods_violin_chart(df_hm, c_cnt)
    if fig:
        fig.show()

## 8) Inheritance between classes

In [None]:
# Section 8: Inheritance between classes
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "Inheritance_Between_Classes.csv"
df_ibc = read_csv_safe(path)

c_c1 = find_col(df_ibc, "class_1_fqn", contains="class_1")
c_c2 = find_col(df_ibc, "class_2_fqn", contains="class_2")

if df_ibc.empty or not (c_c1 and c_c2):
    show_empty("Inheritance Between Classes")
else:
    fig = create_inheritance_sankey(df_ibc, c_c1, c_c2)
    if fig:
        fig.show()
    else:
        show_empty("Inheritance Between Classes")

## 9) Package structure (Treemap + Icicle)

In [None]:
# Section 9: Package structure
# Using shared functions from interface/charts/high_level_architecture_charts.py

path = HLA_DIR / "Package_Structure.csv"
df_pkg = read_csv_safe(path)

c_pkg = find_col(df_pkg, "packageFqn", contains="package", default=None)

if df_pkg.empty or not c_pkg:
    show_empty("Package Structure")
else:
    fqns = df_pkg[c_pkg].dropna().astype(str).tolist()

    # 9A) Treemap
    fig = create_package_treemap(fqns)
    if fig:
        fig.show()
    else:
        show_empty("Package Structure")

    # 9B) Icicle
    fig = create_package_icicle(fqns)
    if fig:
        fig.show()
    else:
        show_empty("Package Structure")