
# Fan In / Fan Out — Report

Interactive visuals for the **Fan_In_Fan_Out** block using CSV files located at:
`reports/custom-queries-csv/Fan_In_Fan_Out/`

**What's Included**
- **Top Fan-In** classes (bar).
- **Top Fan-Out** classes (bar).
- **Fan-In vs Fan-Out** scatter (each dot = class).
- **Distribution** histograms for both metrics.
- **Ratio** view (Fan-In / (Fan-Out+1)) to spot potential hotspots.

> The notebook is resilient: if a CSV is missing, an info message is printed and the rest continues.


In [1]:

# Setup: imports, path resolver, helpers
import os, glob
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('future.no_silent_downcasting', True)

# ---- Path resolution ----
def resolve_reports_dir():
    # 1) Honor environment variable if valid
    env = os.environ.get("REPORTS_DIRECTORY")
    if env:
        p = Path(env).expanduser().resolve()
        if p.exists():
            return str(p)
    # 2) Walk upwards to find a 'reports' folder
    cwd = Path.cwd()
    for i in range(0, 8):
        base = cwd if i == 0 else cwd.parents[i-1]
        cand = base / "reports"
        if cand.exists():
            return str(cand.resolve())
    # 3) Fallback relative to CWD
    return str((Path.cwd() / "reports").resolve())

REPORTS_DIR = resolve_reports_dir()
FIO_DIR = os.path.join(REPORTS_DIR, "custom-queries-csv", "Fan_In_Fan_Out")

print("REPORTS_DIR =", REPORTS_DIR)
print("FIO_DIR     =", FIO_DIR)
print("FIO_DIR exists? ->", os.path.exists(FIO_DIR))
print("Files under FIO_DIR:")
for p in sorted(glob.glob(os.path.join(FIO_DIR, "*"))):
    print(" -", p)

# ---- CSV IO helper ----
NA_LITS = ["", " ", "NA", "N/A", "n/a", "NaN", "NULL", "Null", "null", "None", "none", "-", "--"]

def read_csv_safe(p):
    if not os.path.exists(p):
        print(f"[info] Missing CSV: {p}")
        return pd.DataFrame()
    try:
        df = pd.read_csv(p, na_values=NA_LITS, keep_default_na=True)
        df.columns = [str(c).strip() for c in df.columns]
        df = df.dropna(how="all")
        return df
    except Exception as e:
        print(f"[warn] Failed to read {p}: {e}")
        return pd.DataFrame()

def find_col(df, *cands, default=None, contains=None):
    """Find a column by exact candidates or by substring (contains)."""
    low = {c.lower(): c for c in df.columns}
    for c in cands:
        if c and c.lower() in low:
            return low[c.lower()]
    if contains:
        for k, orig in low.items():
            if contains.lower() in k:
                return orig
    return default

MAX_BARS = 25


REPORTS_DIR = /Users/jonathan.nervaez/Documents/AppModPractice/E2E-decomposition/reports
FIO_DIR     = /Users/jonathan.nervaez/Documents/AppModPractice/E2E-decomposition/reports/custom-queries-csv/Fan_In_Fan_Out
FIO_DIR exists? -> True
Files under FIO_DIR:
 - /Users/jonathan.nervaez/Documents/AppModPractice/E2E-decomposition/reports/custom-queries-csv/Fan_In_Fan_Out/Fan_In.csv
 - /Users/jonathan.nervaez/Documents/AppModPractice/E2E-decomposition/reports/custom-queries-csv/Fan_In_Fan_Out/Fan_Out.csv


## 1) Load & unify metrics

In [2]:

# Read Fan_In.csv
path_in = os.path.join(FIO_DIR, "Fan_In.csv")
df_in = read_csv_safe(path_in)
display(df_in.head(10))

# Read Fan_Out.csv
path_out = os.path.join(FIO_DIR, "Fan_Out.csv")
df_out = read_csv_safe(path_out)
display(df_out.head(10))

# Locate columns
c_type_in  = find_col(df_in,  "type", contains="type", default=None) if not df_in.empty else None
c_fanin    = find_col(df_in,  "fanIn", contains="fanin", default=None) if not df_in.empty else None
c_type_out = find_col(df_out, "type", contains="type", default=None) if not df_out.empty else None
c_fanout   = find_col(df_out, "fanOut", contains="fanout", default=None) if not df_out.empty else None

# Merge into a single dataframe keyed by class/type
if c_type_in or c_type_out:
    a = df_in[[c_type_in, c_fanin]].copy() if (c_type_in and c_fanin) else pd.DataFrame(columns=["type","fanIn"])
    if not a.empty:
        a.columns = ["type", "fanIn"]
    b = df_out[[c_type_out, c_fanout]].copy() if (c_type_out and c_fanout) else pd.DataFrame(columns=["type","fanOut"])
    if not b.empty:
        b.columns = ["type", "fanOut"]

    merged = pd.merge(a, b, on="type", how="outer").fillna(0)
    merged["fanIn"]  = pd.to_numeric(merged["fanIn"], errors="coerce").fillna(0).astype(int)
    merged["fanOut"] = pd.to_numeric(merged["fanOut"], errors="coerce").fillna(0).astype(int)
else:
    merged = pd.DataFrame(columns=["type","fanIn","fanOut"])

print("Merged shape:", merged.shape)
display(merged.head(10))


Unnamed: 0,type,fanIn,Source Cypher File: Custom_Queries/Fan_In_Fan_Out/Fan_In.cypher
0,com.salesmanager.core.model.merchant.MerchantS...,403,
1,com.salesmanager.core.model.reference.language...,299,
2,com.salesmanager.core.business.exception.Servi...,207,
3,com.salesmanager.core.model.merchant.MerchantS...,202,
4,com.salesmanager.core.model.reference.language...,194,
5,com.salesmanager.core.business.exception.Servi...,145,
6,com.salesmanager.core.model.merchant.MerchantS...,141,
7,com.salesmanager.core.model.catalog.product.Pr...,114,
8,com.salesmanager.core.model.generic.SalesManag...,91,
9,com.salesmanager.core.model.customer.Customer,76,


Unnamed: 0,type,fanOut,Source Cypher File: Custom_Queries/Fan_In_Fan_Out/Fan_Out.cypher
0,com.salesmanager.shop.store.controller.order.f...,207,
1,com.salesmanager.shop.store.controller.custome...,157,
2,com.salesmanager.core.business.modules.integra...,130,
3,com.salesmanager.shop.store.facade.user.UserFa...,129,
4,com.salesmanager.shop.populator.catalog.Readab...,125,
5,com.salesmanager.shop.mapper.catalog.product.R...,116,
6,com.salesmanager.shop.store.api.v1.order.OrderApi,116,
7,com.salesmanager.core.business.services.order....,115,
8,com.salesmanager.shop.store.api.v1.product.Pro...,112,
9,com.salesmanager.core.business.services.shippi...,110,


Merged shape: (1868, 3)


Unnamed: 0,type,fanIn,fanOut
0,com.salesmanager.core.business.configuration.A...,2,14
1,com.salesmanager.core.business.configuration.C...,1,20
2,com.salesmanager.core.business.configuration.C...,1,20
3,com.salesmanager.core.business.configuration.D...,0,27
4,com.salesmanager.core.business.configuration.D...,3,41
5,com.salesmanager.core.business.configuration.M...,0,15
6,com.salesmanager.core.business.configuration.P...,0,13
7,com.salesmanager.core.business.configuration.d...,0,10
8,com.salesmanager.core.business.configuration.e...,0,13
9,com.salesmanager.core.business.configuration.e...,2,8


## 2) Top Fan-In classes

In [3]:

if merged.empty:
    print("[info] No data available for Fan-In / Fan-Out.")
else:
    top_in = merged.sort_values("fanIn", ascending=False).head(MAX_BARS)
    fig = px.bar(top_in, x="type", y="fanIn", text="fanIn",
                 title="Top classes by Fan-In (how many depend on them)")
    fig.update_traces(textposition="outside", cliponaxis=False)
    fig.update_layout(xaxis_tickangle=-35, width=1200, height=550)
    fig.show()


## 3) Top Fan-Out classes

In [4]:

if not merged.empty:
    top_out = merged.sort_values("fanOut", ascending=False).head(MAX_BARS)
    fig = px.bar(top_out, x="type", y="fanOut", text="fanOut",
                 title="Top classes by Fan-Out (how many they depend on)")
    fig.update_traces(textposition="outside", cliponaxis=False)
    fig.update_layout(xaxis_tickangle=-35, width=1200, height=550)
    fig.show()


## 4) Fan-In vs Fan-Out — scatter

In [5]:

if not merged.empty:
    merged["total"] = merged["fanIn"] + merged["fanOut"]
    fig = px.scatter(merged, x="fanOut", y="fanIn", size="total",
                     hover_name="type",
                     title="Fan-In vs Fan-Out (size = fanIn + fanOut)")
    fig.update_layout(width=950, height=700)
    fig.show()


## 5) Distributions — histograms

In [6]:

if not merged.empty:
    fig1 = px.histogram(merged, x="fanIn", nbins=30, title="Distribution of Fan-In")
    fig1.update_layout(width=800, height=450)
    fig1.show()

    fig2 = px.histogram(merged, x="fanOut", nbins=30, title="Distribution of Fan-Out")
    fig2.update_layout(width=800, height=450)
    fig2.show()


## 6) Ratio view — (Fan-In / (Fan-Out + 1))

In [7]:

if not merged.empty:
    merged["ratio_in_out"] = merged["fanIn"] / (merged["fanOut"] + 1.0)

    top_ratio = merged.sort_values("ratio_in_out", ascending=False).head(MAX_BARS)
    fig = px.bar(top_ratio, x="type", y="ratio_in_out", text="ratio_in_out",
                 title="Top classes by Fan-In to Fan-Out ratio")
    fig.update_traces(texttemplate='%{text:.2f}', textposition="outside", cliponaxis=False)
    fig.update_layout(xaxis_tickangle=-35, width=1200, height=550)
    fig.show()
