# Análisis de redes Pasco - Ancash por corte de periodo de gobierno regional

In [5]:
from pathlib import Path
import pandas as pd, numpy as np, networkx as nx, json, re

# ============================================================
# PARÁMETROS
# ============================================================
RUTA_ARCHIVO = "contratos.xlsx"      # archivo con tu data
HOJA_EXCEL = 0
REGIONES_OBJ = ["ANCASH", "PASCO"]   # regiones incluidas
ANIO_MIN, ANIO_MAX = 2005, 2024
PRORRATEAR_MONTO_CONSORCIO = False
SALIDA = Path("docs")                # carpeta para GitHub Pages


# ============================================================
# PERIODOS DE GOBIERNO (ajusta según tu tesis)
# ============================================================
PERIODOS = [
    (pd.Timestamp("2003-01-01"), pd.Timestamp("2006-12-31"), "2003-2006"),
    (pd.Timestamp("2007-01-01"), pd.Timestamp("2010-12-31"), "2007-2010"),
    (pd.Timestamp("2011-01-01"), pd.Timestamp("2014-12-31"), "2011-2014"),
    (pd.Timestamp("2015-01-01"), pd.Timestamp("2018-12-31"), "2015-2018"),
    (pd.Timestamp("2019-01-01"), pd.Timestamp("2022-12-31"), "2019-2022"),
    (pd.Timestamp("2023-01-01"), pd.Timestamp("2026-12-31"), "2023-2026"),
]

def asignar_periodo(fecha):
    if pd.isna(fecha):
        return None
    for ini, fin, nombre in PERIODOS:
        if ini <= fecha <= fin:
            return nombre
    return None


def slugify_periodo(p):
    return str(p).replace(" ", "_").replace("/", "-")


# ============================================================
# FUNCIONES DE LIMPIEZA
# ============================================================
def _norm_region(s):
    if pd.isna(s):
        return s
    s2 = str(s).strip().upper()
    for a, b in zip("ÁÉÍÓÚ", "AEIOU"):
        s2 = s2.replace(a, b)
    return s2

def _norm_emp(x):
    if pd.isna(x):
        return np.nan
    s = str(x).strip()
    s = s.replace("RUC", "").replace(":", "").replace(" ", "")
    return s if s else np.nan

def _clean_monto(x):
    if pd.isna(x):
        return 0.0
    s = str(x).strip().replace("S/","").replace("s/","").replace("soles","").replace("SOLES","").strip()
    if s.count(".") > 1:
        parts = s.split(".")
        s = "".join(parts[:-1]) + "." + parts[-1]
    if "," in s and "." not in s:
        s = s.replace(",", ".")
    if s.count(",") > 0 and s.count(".") == 1:
        s = s.replace(",", "")
    s = s.replace(" ", "")
    try:
        return float(s)
    except Exception:
        t = "".join(re.findall(r"[0-9.,]", s))
        if t.count(".") > 1:
            parts = t.split(".")
            t = "".join(parts[:-1]) + "." + parts[-1]
        t = t.replace(",", "")
        try:
            return float(t)
        except Exception:
            return 0.0


# ============================================================
# CARGA Y LIMPIEZA DEL DATASET
# ============================================================
df = pd.read_excel(RUTA_ARCHIVO, sheet_name=HOJA_EXCEL)

df["FECHA"] = pd.to_datetime(df["FECHA"], dayfirst=True, errors="coerce")
df["ANIO"]  = df["FECHA"].dt.year
df["REGION_NORM"] = df["REGION"].apply(_norm_region)
df["PERIODO"] = df["FECHA"].apply(asignar_periodo)

REGIONES_NORM = [_norm_region(r) for r in REGIONES_OBJ]

emp_cols = [c for c in df.columns if str(c).upper().startswith("EMPRESA")]
if not emp_cols:
    raise ValueError("No hay columnas EMPRESA_n en el archivo.")

for c in emp_cols:
    df[c] = df[c].apply(_norm_emp)

df["MONTO"] = df["MONTO"].apply(_clean_monto)

df = df[
    df["REGION_NORM"].isin(REGIONES_NORM)
    & df["ANIO"].between(ANIO_MIN, ANIO_MAX)
    & df["PERIODO"].notna()
].reset_index(drop=True)

if df.empty:
    raise ValueError("No hay registros para las regiones/años/periodos especificados.")

df["CONTRATO_ID"] = df.index.astype(str) + "_" + df["REGION_NORM"] + "_" + df["FECHA"].astype(str)
df["N_EMP_CONTRATO"] = df[emp_cols].notna().sum(axis=1).astype(int).replace(0, 1)


# ============================================================
# FORMATO LARGO (contrato-empresa)
# ============================================================
rows = []
for _, r in df.iterrows():
    anio    = int(r["ANIO"])
    periodo = r["PERIODO"]
    region  = r["REGION_NORM"]
    monto   = float(r["MONTO"])
    nemp    = int(r["N_EMP_CONTRATO"])
    cid     = r["CONTRATO_ID"]

    for c in emp_cols:
        e = r[c]
        if pd.notna(e) and str(e).strip():
            m = (monto / nemp) if PRORRATEAR_MONTO_CONSORCIO else monto
            rows.append((anio, periodo, region, e, m, cid))

long = pd.DataFrame(
    rows,
    columns=["ANIO", "PERIODO", "REGION", "EMPRESA", "MONTO_CONTRATO", "CONTRATO_ID"]
)

edges = (
    long.groupby(["PERIODO", "REGION", "EMPRESA"], as_index=False)
        .agg(
            monto_total=("MONTO_CONTRATO","sum"),
            n_contratos=("CONTRATO_ID","nunique")
        )
)

PERIODOS_UNICOS = sorted(edges["PERIODO"].unique())
print("Periodos encontrados:", PERIODOS_UNICOS)


# ============================================================
# GRAFO BIPARTITO REGIÓN–EMPRESA POR PERIODO
# ============================================================
def build_graph_for_period(periodo):

    dfa = edges[edges["PERIODO"] == periodo].copy()
    if dfa.empty:
        return [], [], {
            "periodo": periodo,
            "n_regiones": 0,
            "n_empresas": 0,
            "n_contratos": 0,
            "density": float("nan"),
        }

    # ---------- 1) GRAFO BIPARTITO Región–Empresa ----------
    G = nx.Graph()

    for reg in dfa["REGION"].unique():
        G.add_node(reg, tipo="region", label=reg.title())

    for emp in dfa["EMPRESA"].unique():
        G.add_node(emp, tipo="empresa", label=str(emp))

    for _, r in dfa.iterrows():
        reg = r["REGION"]
        emp = r["EMPRESA"]
        w   = int(r["n_contratos"])
        mt  = float(r["monto_total"])

        if G.has_edge(reg, emp):
            G[reg][emp]["weight"]      += w
            G[reg][emp]["monto_total"] += mt
        else:
            G.add_edge(reg, emp, weight=w, monto_total=mt)

    # Métricas bipartitas
    deg_bip  = dict(G.degree())
    degw_bip = dict(G.degree(weight="weight"))
    bet_bip  = nx.betweenness_centrality(G, weight="weight")
    clo_bip  = nx.closeness_centrality(G)

    # ---------- 2) GRAFO DE CO-CONTRATACIÓN ENTRE EMPRESAS ----------
    long_p = long[long["PERIODO"] == periodo].copy()
    H = nx.Graph()

    for cid, g in long_p.groupby("CONTRATO_ID"):
        emps = list(g["EMPRESA"].dropna().unique())
        if len(emps) < 2:
            continue  # sin consorcio
        for i in range(len(emps)):
            for j in range(i + 1, len(emps)):
                e1, e2 = emps[i], emps[j]
                if H.has_edge(e1, e2):
                    H[e1][e2]["weight"] += 1
                else:
                    H.add_edge(e1, e2, weight=1)

    if H.number_of_nodes() > 0:
        bet_co = nx.betweenness_centrality(H, weight="weight")
        clo_co = nx.closeness_centrality(H)
    else:
        bet_co, clo_co = {}, {}

    # ---------- 3) MÉTRICAS GLOBALES ----------
    total_contr = int(dfa["n_contratos"].sum())
    n_empresas  = int(dfa["EMPRESA"].nunique())
    n_regiones  = int(dfa["REGION"].nunique())
    dens        = float(nx.density(G)) if G.number_of_nodes() > 1 else float("nan")

    stats = {
        "periodo": periodo,
        "n_regiones": n_regiones,
        "n_empresas": n_empresas,
        "n_contratos": total_contr,
        "density": dens,
    }

    # ---------- 4) NODOS PARA HTML (tooltips claros) ----------
    nodes = []

    for n in G.nodes():
        tipo  = G.nodes[n]["tipo"]
        label = G.nodes[n]["label"]

        if tipo == "region":
            bet_v = bet_bip.get(n, 0.0)
            clo_v = clo_bip.get(n, 0.0)
            size  = 40
            color = "#1f77b4"
        else:
            bet_v = bet_co.get(n, 0.0)
            clo_v = clo_co.get(n, 0.0)
            size  = 6 + 2 * degw_bip.get(n, 1)
            color = "#FFD12A"

        tipo_txt = "Región" if tipo == "region" else "Empresa"

        title = (
            f"{tipo_txt}: {label}<br>"
            f"Degree (bipartito): {deg_bip.get(n,0)}<br>"
            f"Degree ponderado (# contratos): {degw_bip.get(n,0)}<br>"
            f"Betweenness: {bet_v:.3f} | Closeness: {clo_v:.3f}"
        )

        nodes.append({
            "id": n,
            "label": label,
            "shape": "circle" if tipo == "region" else "dot",
            "size": size,
            "color": color,
            "title": title,
            "group": tipo,
        })

    # ---------- 5) ARISTAS PARA HTML ----------
    edges_list = []
    for u, v, data in G.edges(data=True):
        w  = int(data.get("weight", 1))
        mt = float(data.get("monto_total", 0.0))
        width = 1 + min(4, 0.4 * w)  # controla grosor
        title = f"# contratos: {w} | Monto total: S/ {mt:,.2f}"
        edges_list.append({
            "from": u,
            "to": v,
            "value": w,
            "width": width,
            "title": title,
        })

    return nodes, edges_list, stats


# ============================================================
# HTML CON VIS-NETWORK (bipartito y empresas usan esto)
# ============================================================
def _write_visnetwork_html(nodes, edges_list, outfile_path, title_text):
    options = {
        "nodes": {
            "borderWidth": 1,
            "shape": "dot",
            "font": {"size": 12}
        },
        "edges": {
            "smooth": {"type": "dynamic"},
            "selectionWidth": 0
        },
        "interaction": {
            "hover": True,
            "tooltipDelay": 100,
            "dragNodes": True
        },
        "physics": {
            "enabled": True,
            "stabilization": {"iterations": 800}
        }
    }

    html = f"""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="utf-8"/>
<title>{title_text}</title>
<style>
body{{margin:0;font-family:Arial, sans-serif;background:#f7f7f7;}}
#title{{padding:10px;border-bottom:1px solid #ddd;background:#fff}}
#network{{width:100vw;height:calc(100vh - 48px);background:#f7f7f7;}}
</style>
<script src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
</head>
<body>
<div id="title">{title_text}</div>
<div id="network"></div>
<script>
const nodes = new vis.DataSet({json.dumps(nodes, ensure_ascii=False)});
const edges = new vis.DataSet({json.dumps(edges_list, ensure_ascii=False)});
const container = document.getElementById('network');
const data = {{ nodes, edges }};
const options = {json.dumps(options)};
const network = new vis.Network(container, data, options);

// dejar que se estabilice y luego apagar física
network.once('stabilizationIterationsDone', function () {{
  network.setOptions({{ physics: false }});
}});
</script>
</body>
</html>
"""
    Path(outfile_path).write_text(html, encoding="utf-8")


# ============================================================
# GUARDAR HTML BIPARTITO POR PERIODO
# ============================================================
def save_period_html(periodo, folder=SALIDA):
    dfa = edges[edges["PERIODO"] == periodo]
    if dfa.empty:
        return None

    nodes, edges_list, stats = build_graph_for_period(periodo)
    title = f"Red de contratación — {', '.join(REGIONES_OBJ)} — {periodo}"

    folder.mkdir(parents=True, exist_ok=True)
    name = f"red_{slugify_periodo(periodo)}.html"
    path = str(folder / name)

    _write_visnetwork_html(nodes, edges_list, path, title)
    return name, stats


# ============================================================
# PROYECCIÓN SOLO EMPRESAS (CONSORCIOS) POR PERIODO
# ============================================================
def build_empresa_projection(periodo):
    """
    Red 1-modo de EMPRESAS:
      - Nodos: empresas
      - Aristas: empresas que fueron juntas en al menos un contrato (consorcio)
      - Peso: # de contratos compartidos
    """
    long_p = long[long["PERIODO"] == periodo].copy()

    H = nx.Graph()
    n_consortios = 0

    for cid, g in long_p.groupby("CONTRATO_ID"):
        emps = list(g["EMPRESA"].dropna().unique())
        if len(emps) < 2:
            continue  # sin consorcio
        n_consortios += 1
        for i in range(len(emps)):
            for j in range(i + 1, len(emps)):
                e1, e2 = emps[i], emps[j]
                if H.has_edge(e1, e2):
                    H[e1][e2]["weight"] += 1
                else:
                    H.add_edge(e1, e2, weight=1)

    if H.number_of_nodes() == 0:
        return [], [], {
            "periodo": periodo,
            "n_empresas": 0,
            "n_aristas": 0,
            "n_consortios": 0,
            "density": float("nan"),
        }

    # Métricas
    deg   = dict(H.degree())
    deg_w = dict(H.degree(weight="weight"))
    bet   = nx.betweenness_centrality(H, weight="weight")
    clo   = nx.closeness_centrality(H)

    n_empresas = H.number_of_nodes()
    n_aristas  = H.number_of_edges()
    dens       = float(nx.density(H))

    stats = {
        "periodo": periodo,
        "n_empresas": n_empresas,
        "n_aristas": n_aristas,
        "n_consortios": n_consortios,
        "density": dens,
    }

    # Nodos para HTML
    nodes = []
    for n in H.nodes():
        size = 6 + 2 * deg_w.get(n, 1)
        title = (
            f"Empresa: {n}<br>"
            f"Degree (co-contratación): {deg.get(n,0)}<br>"
            f"Degree ponderado (# contratos en consorcio): {deg_w.get(n,0)}<br>"
            f"Betweenness: {bet.get(n,0):.3f} | Closeness: {clo.get(n,0):.3f}"
        )
        nodes.append({
            "id": n,
            "label": str(n),
            "shape": "dot",
            "size": size,
            "color": "#FFD12A",
            "title": title,
            "group": "empresa",
        })

    # Aristas
    edges_list = []
    for u, v, data in H.edges(data=True):
        w = int(data.get("weight", 1))
        width = 1 + min(4, 0.5 * w)
        title = f"# contratos compartidos (consorcios): {w}"
        edges_list.append({
            "from": u,
            "to": v,
            "value": w,
            "width": width,
            "title": title,
        })

    return nodes, edges_list, stats


def save_period_html_empresas(periodo, folder=SALIDA):
    nodes, edges_list, stats = build_empresa_projection(periodo)
    if not nodes:
        return None

    title = f"Proyección sobre empresas — consorcios — {periodo}"

    folder.mkdir(parents=True, exist_ok=True)
    name = f"empresas_consorcios_{slugify_periodo(periodo)}.html"
    path = str(folder / name)

    _write_visnetwork_html(nodes, edges_list, path, title)
    return name, stats


# ============================================================
# GENERAR TODOS LOS HTMLs + INDEX BIPARTITO
# ============================================================
items = []

for p in PERIODOS_UNICOS:
    res = save_period_html(p)
    if res:
        fname, st = res
        dens = float(st["density"]) if pd.notna(st["density"]) else None
        items.append({
            "p": str(st["periodo"]),
            "f": fname,
            "density": dens,
            "n_regiones": int(st["n_regiones"]),
            "n_empresas": int(st["n_empresas"]),
            "n_contratos": int(st["n_contratos"]),
        })

print("HTMLs bipartitos generados:", [it["f"] for it in items])

SALIDA.mkdir(parents=True, exist_ok=True)
index_path = SALIDA / "index.html"
items_js = json.dumps(items, ensure_ascii=False)

index_code = """<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="utf-8"/>
<title>Redes de contratación por periodo</title>
<style>
body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Arial,sans-serif;margin:0;padding:20px;background:#f5f5f5;}
.container{max-width:1200px;margin:0 auto;}
h2{margin-top:0;margin-bottom:10px;}
iframe{width:100%;height:80vh;border:1px solid #ddd;border-radius:8px;background:#fff;}
.bar{display:flex;gap:10px;align-items:center;flex-wrap:wrap;margin-bottom:15px;}
select,button{padding:6px 10px;border-radius:6px;border:1px solid #ccc;background:#fff;cursor:pointer;}
button:hover{background:#f0f0f0;}
.kpi{background:#fff;border:1px solid #ddd;padding:6px 12px;border-radius:6px;}
</style>
</head>
<body>
<div class="container">
  <h2>Redes de contratación — Periodos de gobierno (bipartito regiones–empresas)</h2>
  <div class="bar">
    <button onclick="prev()">⟨ Anterior</button>
    <label for="period">Periodo:</label>
    <select id="period" onchange="go(this.value)"></select>
    <button onclick="next()">Siguiente ⟩</button>
    <span class="kpi" id="kpi"></span>
  </div>
  <iframe id="view" src="" loading="lazy"></iframe>
</div>
<script>
const items = __ITEMS__;
let idx = 0;

function fillSelect(){
  const sel = document.getElementById("period");
  sel.innerHTML = "";
  items.forEach((it,i)=>{
    const o = document.createElement("option");
    o.value = i;
    o.textContent = it.p;
    sel.appendChild(o);
  });
}

function updateKPI(){
  const it = items[idx];
  const dens = (it.density !== null && !isNaN(it.density)) ? it.density.toFixed(4) : "—";
  document.getElementById("kpi").innerHTML =
    "<b>Densidad:</b> "+dens+
    " | <b>Regiones:</b> "+it.n_regiones+
    " | <b>Empresas:</b> "+it.n_empresas+
    " | <b>Contratos:</b> "+it.n_contratos;
}

function load(){
  if(!items.length) return;
  const it = items[idx];
  document.getElementById("view").src = it.f;
  document.getElementById("period").selectedIndex = idx;
  updateKPI();
}

function go(i){ idx = parseInt(i); load(); }
function prev(){ idx = (idx - 1 + items.length) % items.length; load(); }
function next(){ idx = (idx + 1) % items.length; load(); }

fillSelect();
load();
</script>
</body>
</html>
"""

index_code = index_code.replace("__ITEMS__", items_js)
index_path.write_text(index_code, encoding="utf-8")

print("✅ Index bipartito generado en:", index_path.resolve())


# ============================================================
# GENERAR HTMLs + INDEX PARA PROYECCIÓN DE EMPRESAS
# ============================================================
items_emp = []

for p in PERIODOS_UNICOS:
    res = save_period_html_empresas(p)
    if res:
        fname, st = res
        dens = float(st["density"]) if pd.notna(st["density"]) else None
        items_emp.append({
            "p": str(st["periodo"]),
            "f": fname,
            "density": dens,
            "n_empresas": int(st["n_empresas"]),
            "n_aristas": int(st["n_aristas"]),
            "n_consortios": int(st["n_consortios"]),
        })

print("HTMLs proyección empresas generados:", [it["f"] for it in items_emp])

index_emp_path = SALIDA / "index_empresas.html"
items_emp_js = json.dumps(items_emp, ensure_ascii=False)

index_emp_code = """<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="utf-8"/>
<title>Proyección sobre empresas — consorcios</title>
<style>
body{font-family:system-ui,-apple-system,Segoe UI,Roboto,Arial,sans-serif;margin:0;padding:20px;background:#f5f5f5;}
.container{max-width:1200px;margin:0 auto;}
h2{margin-top:0;margin-bottom:10px;}
iframe{width:100%;height:80vh;border:1px solid #ddd;border-radius:8px;background:#fff;}
.bar{display:flex;gap:10px;align-items:center;flex-wrap:wrap;margin-bottom:15px;}
select,button{padding:6px 10px;border-radius:6px;border:1px solid #ccc;background:#fff;cursor:pointer;}
button:hover{background:#f0f0f0;}
.kpi{background:#fff;border:1px solid #ddd;padding:6px 12px;border-radius:8px;}
</style>
</head>
<body>
<div class="container">
  <h2>Proyección sobre empresas — Consorcios por periodo</h2>
  <div class="bar">
    <button onclick="prev()">⟨ Anterior</button>
    <label for="period_emp">Periodo:</label>
    <select id="period_emp" onchange="go(this.value)"></select>
    <button onclick="next()">Siguiente ⟩</button>
    <span class="kpi" id="kpi_emp"></span>
  </div>
  <iframe id="view_emp" src="" loading="lazy"></iframe>
</div>
<script>
const items = __ITEMS_EMP__;
let idx = 0;

function fillSelect(){
  const sel = document.getElementById("period_emp");
  sel.innerHTML = "";
  items.forEach((it,i)=>{
    const o = document.createElement("option");
    o.value = i;
    o.textContent = it.p;
    sel.appendChild(o);
  });
}

function updateKPI(){
  const it = items[idx];
  const dens = (it.density !== null && !isNaN(it.density)) ? it.density.toFixed(4) : "—";
  document.getElementById("kpi_emp").innerHTML =
    "<b>Densidad:</b> "+dens+
    " | <b>Empresas:</b> "+it.n_empresas+
    " | <b>Aristas (pares de empresas):</b> "+it.n_aristas+
    " | <b>Contratos en consorcio:</b> "+it.n_consortios;
}

function load(){
  if(!items.length) return;
  const it = items[idx];
  document.getElementById("view_emp").src = it.f;
  document.getElementById("period_emp").selectedIndex = idx;
  updateKPI();
}

function go(i){ idx = parseInt(i); load(); }
function prev(){ idx = (idx - 1 + items.length) % items.length; load(); }
function next(){ idx = (idx + 1) % items.length; load(); }

fillSelect();
load();
</script>
</body>
</html>
"""

index_emp_code = index_emp_code.replace("__ITEMS_EMP__", items_emp_js)
index_emp_path.write_text(index_emp_code, encoding="utf-8")

print("✅ Index empresas generado en:", index_emp_path.resolve())


Periodos encontrados: ['2003-2006', '2007-2010', '2011-2014', '2015-2018', '2019-2022', '2023-2026']
HTMLs bipartitos generados: ['red_2003-2006.html', 'red_2007-2010.html', 'red_2011-2014.html', 'red_2015-2018.html', 'red_2019-2022.html', 'red_2023-2026.html']
✅ Index bipartito generado en: C:\Users\aaro\OneDrive - APOYO COMUNICACION\Documentos\GitHub\Tesis_Redes\Redes_Pasco_Ancash_Periodos\docs\index.html
HTMLs proyección empresas generados: ['empresas_consorcios_2003-2006.html', 'empresas_consorcios_2007-2010.html', 'empresas_consorcios_2011-2014.html', 'empresas_consorcios_2015-2018.html', 'empresas_consorcios_2019-2022.html', 'empresas_consorcios_2023-2026.html']
✅ Index empresas generado en: C:\Users\aaro\OneDrive - APOYO COMUNICACION\Documentos\GitHub\Tesis_Redes\Redes_Pasco_Ancash_Periodos\docs\index_empresas.html
