# Fan In / Fan Out — Report

This notebook visualizes the CSV outputs generated by the **Fan_In_Fan_Out** block.

## What this notebook shows
- **Top Fan-In** classes (Bar).
- **Top Fan-Out** classes (Bar).
- **Fan-In vs Fan-Out** scatter (each dot = class).
- **Distributions** for both metrics (Histograms).
- **Ratio** view (Fan-In / (Fan-Out + 1)) to spot potential hotspots (Bar).

> If a CSV is missing or empty, the cell prints an info message and skips the chart.


In [None]:
# Setup: imports, paths, helpers
# - CSVs are read from reports/csv-reports/<CATEGORY>/<file>.csv relative to this notebook folder.
# - Minimal console output; only show information if a CSV is missing/empty.
# - Bar charts use an explicit default color so it's easy to tweak later.
# - Titles are standardized without block prefixes.

import os
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('future.no_silent_downcasting', True)

CATEGORY = "Fan_In_Fan_Out"
CSV_BASE = Path("../reports/csv-reports").resolve()
FIO_DIR = CSV_BASE / CATEGORY

# Explicit default color for all bar charts in this notebook
DEFAULT_BAR_COLOR = ["#1f77b4"]

# CSV IO helper
NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(p: Path) -> pd.DataFrame:
    """Read a CSV if present; otherwise return an empty DataFrame.
    Prints a minimal info message when missing or unreadable."""
    p = Path(p)
    if not p.exists():
        print(f"[info] Missing CSV: {p}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(p, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        df = df.dropna(how="all")
        return df
    except Exception as e:
        print(f"[warn] Failed to read {p}: {e}")
        return pd.DataFrame()

def find_col(df, *cands, default=None, contains=None):
    """Find a column by exact candidates or by substring (contains)."""
    low = {c.lower(): c for c in df.columns}
    for c in cands:
        if c and c.lower() in low:
            return low[c.lower()]
    if contains:
        for k, orig in low.items():
            if contains.lower() in k:
                return orig
    return default


## 1) Load & unify metrics

In [None]:
# Load Fan_In.csv and Fan_Out.csv, then merge on class/type
# No charts here — just data preparation for the following sections.

# Read CSVs
path_in = FIO_DIR / "Fan_In.csv"
df_in = read_csv_safe(path_in)

path_out = FIO_DIR / "Fan_Out.csv"
df_out = read_csv_safe(path_out)

# Locate columns
c_type_in  = find_col(df_in,  "type", contains="type", default=None) if not df_in.empty else None
c_fanin    = find_col(df_in,  "fanIn", contains="fanin", default=None) if not df_in.empty else None
c_type_out = find_col(df_out, "type", contains="type", default=None) if not df_out.empty else None
c_fanout   = find_col(df_out, "fanOut", contains="fanout", default=None) if not df_out.empty else None

# Merge into a single dataframe keyed by class/type
if c_type_in or c_type_out:
    a = df_in[[c_type_in, c_fanin]].copy() if (c_type_in and c_fanin) else pd.DataFrame(columns=["type","fanIn"])
    if not a.empty:
        a.columns = ["type", "fanIn"]
    b = df_out[[c_type_out, c_fanout]].copy() if (c_type_out and c_fanout) else pd.DataFrame(columns=["type","fanOut"])
    if not b.empty:
        b.columns = ["type", "fanOut"]

    merged = pd.merge(a, b, on="type", how="outer").fillna(0)
    merged["fanIn"]  = pd.to_numeric(merged["fanIn"], errors="coerce").fillna(0).astype(int)
    merged["fanOut"] = pd.to_numeric(merged["fanOut"], errors="coerce").fillna(0).astype(int)
else:
    merged = pd.DataFrame(columns=["type","fanIn","fanOut"])

display(merged.head(10))


## 2) Top Fan-In classes

In [None]:
# Chart generated here:
#  - 2A) Top classes by Fan-In (Bar, explicit color)

MAX_BARS = 25

if merged.empty:
    print("[info] No data available for Fan-In / Fan-Out.")
else:
    top_in = merged.sort_values("fanIn", ascending=False).head(MAX_BARS)
    fig = px.bar(top_in, x="type", y="fanIn", text="fanIn",
                 title="Top classes by Fan-In",
                 color_discrete_sequence=DEFAULT_BAR_COLOR)
    fig.update_traces(textposition="outside", cliponaxis=False)
    fig.update_layout(xaxis_tickangle=-35, width=1200, height=550,
                      xaxis_title="class", yaxis_title="fan-in")
    fig.show()


## 3) Top Fan-Out classes

In [None]:
# Chart generated here:
#  - 3A) Top classes by Fan-Out (Bar, explicit color)

MAX_BARS = 25

if not merged.empty:
    top_out = merged.sort_values("fanOut", ascending=False).head(MAX_BARS)
    fig = px.bar(top_out, x="type", y="fanOut", text="fanOut",
                 title="Top classes by Fan-Out",
                 color_discrete_sequence=DEFAULT_BAR_COLOR)
    fig.update_traces(textposition="outside", cliponaxis=False)
    fig.update_layout(xaxis_tickangle=-35, width=1200, height=550,
                      xaxis_title="class", yaxis_title="fan-out")
    fig.show()


## 4) Fan-In vs Fan-Out — scatter

In [None]:
# Chart generated here:
#  - 4A) Fan-In vs Fan-Out (Scatter, size = fanIn + fanOut)

if not merged.empty:
    merged["total"] = merged["fanIn"] + merged["fanOut"]
    fig = px.scatter(merged, x="fanOut", y="fanIn", size="total",
                     hover_name="type",
                     title="Fan-In vs Fan-Out (size = fanIn + fanOut)")
    fig.update_layout(width=950, height=700, xaxis_title="fan-out", yaxis_title="fan-in")
    fig.show()


## 5) Distributions — histograms

In [None]:
# Charts generated here:
#  - 5A) Distribution of Fan-In (Histogram)
#  - 5B) Distribution of Fan-Out (Histogram)

if not merged.empty:
    fig1 = px.histogram(merged, x="fanIn", nbins=30, title="Distribution of Fan-In")
    fig1.update_layout(width=800, height=450, xaxis_title="fan-in", yaxis_title="count")
    fig1.show()

    fig2 = px.histogram(merged, x="fanOut", nbins=30, title="Distribution of Fan-Out")
    fig2.update_layout(width=800, height=450, xaxis_title="fan-out", yaxis_title="count")
    fig2.show()


## 6) Ratio view — Fan-In / (Fan-Out + 1)

In [None]:
# Chart generated here:
#  - 6A) Top classes by Fan-In to Fan-Out ratio (Bar, explicit color)

MAX_BARS = 25

if not merged.empty:
    merged["ratio_in_out"] = merged["fanIn"] / (merged["fanOut"] + 1.0)
    top_ratio = merged.sort_values("ratio_in_out", ascending=False).head(MAX_BARS)
    fig = px.bar(top_ratio, x="type", y="ratio_in_out", text="ratio_in_out",
                 title="Top classes by Fan-In to Fan-Out ratio",
                 color_discrete_sequence=DEFAULT_BAR_COLOR)
    fig.update_traces(texttemplate='%{text:.2f}', textposition="outside", cliponaxis=False)
    fig.update_layout(xaxis_tickangle=-35, width=1200, height=550,
                      xaxis_title="class", yaxis_title="ratio (fan-in / (fan-out + 1))")
    fig.show()
