In [15]:
import os, glob, io, re
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Font, PatternFill
from openpyxl.utils import get_column_letter
import numpy as np
from datetime import date
import unicodedata


# Ruta base (ajústala si cambia)
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G")
DEFAULT_IGNORE = ["SubNetwork,", "instance(s)", "NodeId"]

# Lista de 31 encabezados, en el orden en que los quieres (eNBId queda en W si mantienes este orden)
HEADERS = [
     "RNC","administrativeState","cId","iubLinkRef",
    "localCellId","lac","maximumTransmissionPower","maxPwrMax",
    "mocnCellProfileRef","operationalState","primaryCpichPower","primaryScramblingCode",
    "rac","sac","tCell","uarfcnDl",
    "uarfcnUl","uraList","UtranCellId",
    "NodeB","NodeBUnique","LAT","LON","AT&T_Site_Name",
    "Node_B_ID", "Gestor"
]




In [16]:
def _filtered_lines(path, ignorelines):
    """Genera las líneas útiles (no vacías y sin patrones a ignorar)."""
    with open(path, "r", encoding="utf-8") as f:
        for ln in f:
            if not ln.strip():
                continue
            if any(pat in ln for pat in (ignorelines or [])):
                continue
            yield ln

def read_pattern_to_df(filenamepattern: str, ignorelines=None, sep="\t", header=None):
    """
    Lee todos los TXT que matchean '<pattern>_*.txt', limpiaLineas,
    y devuelve un único DataFrame con una columna extra 'Gestor' (sufijo numérico del archivo).
    No escribe nada a disco.
    """
    ignorelines = DEFAULT_IGNORE if ignorelines is None else ignorelines
    searchpattern = str(BASE_DIR / f"{filenamepattern}_*.txt")
    files = glob.glob(searchpattern)

    dfs = []
    for path in files:
        # Extrae NN de *_NN.txt
        m = re.search(r"_(\d+)\.txt$", os.path.basename(path), flags=re.IGNORECASE)
        gestor = m.group(1) if m else ""

        # Filtra/limpia antes de parsear
        buf = io.StringIO("".join(_filtered_lines(path, ignorelines)))
        if buf.tell() == 0:
            # StringIO vacío → intenta leer igualmente (por seguridad)
            buf.seek(0)

        # Importante: sin encabezados en origen
        df = pd.read_csv(buf, sep=sep, header=header, engine="python")
        if df.empty:
            continue

        # Añade Gestor como última columna
        df["Gestor"] = gestor
        dfs.append(df)

    if not dfs:
        return pd.DataFrame()

    return pd.concat(dfs, ignore_index=True)

In [17]:

# EUtranCell
eu_df   = read_pattern_to_df("UtranCell")


In [18]:
# Copia para no tocar el original
eu_df_mod = eu_df.copy()

print("Shape original:", eu_df_mod.shape)

# Eliminar columna B (índice 1 en pandas, 0-based)
if eu_df_mod.shape[1] >= 2:
    eu_df_mod.drop(eu_df_mod.columns[1], axis=1, inplace=True)
else:
    print("Aviso: no hay suficiente número de columnas para eliminar la columna B.")

print("Shape nuevo:", eu_df_mod.shape)

# Vista de verificación (muestra solo unas filas)
pd.set_option("display.max_columns", None)  # opcional
try:
    display(eu_df_mod.head(10))
except NameError:
    # Por si no estás en notebook
    print(eu_df_mod.head(10).to_string(index=False))

Shape original: (24706, 22)
Shape nuevo: (24706, 21)


Unnamed: 0,0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,Gestor
0,BCSRNC212,EMUBCSPAZ0420_1_H,UNLOCKED,65055,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",65055,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,350,48,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",0,687,287,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",,14
1,BCSRNC212,MUBCSCAB0405_1_D,UNLOCKED,23334,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",23334,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",DISABLED,350,297,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",0,637,237,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",MUBCSCAB0405_1_D,14
2,BCSRNC212,EMUBCSCAB0382_3_H,UNLOCKED,65009,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",65009,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,300,400,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",2,687,287,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",,14
3,BCSRNC212,UBCSCAB04085,UNLOCKED,4085,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",4085,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,300,304,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",2,637,237,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",UBCSCAB04085,14
4,BCSRNC212,EMUBCSPAZ0431_1_H,UNLOCKED,65072,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",65072,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,350,312,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",0,687,287,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",,14
5,BCSRNC212,EMUBCSCAB0378_1_H,UNLOCKED,65001,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",65001,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,330,320,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",0,687,287,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",,14
6,BCSRNC212,EMUBCSPAZ0415_3_H,UNLOCKED,65051,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",65051,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,350,16,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",2,687,287,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",,14
7,BCSRNC212,MUBCSCAB0387_4_H,UNLOCKED,23925,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",23925,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,320,111,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",8,687,287,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",MUBCSCAB0387_4_H,14
8,BCSRNC212,EMUBCSCAB0408_1_H,UNLOCKED,65046,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",65046,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,300,296,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",0,687,287,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",,14
9,BCSRNC212,EMUBCSCAB0401_3_H,UNLOCKED,65033,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",65033,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",460,30,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",ENABLED,300,121,"SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...","SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,M...",2,687,287,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=BCSRNC212,...",,14


In [34]:

# ── Helpers rápidos ──────────────────────────────────────────
def pad_to_n_cols_fast(df: pd.DataFrame, n: int) -> pd.DataFrame:
    """Rellena columnas faltantes en un solo paso usando concat (soporta nombres no-string)."""
    if df.shape[1] >= n:
        return df
    add = pd.DataFrame(pd.NA, index=df.index, columns=range(df.shape[1], n))
    return pd.concat([df, add], axis=1)

def move_col_fast(df: pd.DataFrame, src_idx: int, dst_idx: int) -> pd.DataFrame:
    """Mueve columna por índice reordenando de una sola vez (sin drop/insert)."""
    idx = list(range(df.shape[1]))
    col = idx.pop(src_idx)
    if dst_idx > src_idx:
        dst_idx -= 1
    idx.insert(dst_idx, col)
    return df.iloc[:, idx]

# ── 1) Base SIN headers (integrado desde TXT) ─────────────────
# Integra y limpia: agrega al final la columna 'Gestor' cruda por sufijo *_NN.txt
eu_df = read_pattern_to_df("UtranCell")

# Si aún no eliminaste la columna B, hazlo aquí (índice 1, 0-based)
eu_df_mod = eu_df.drop(eu_df.columns[1], axis=1) if eu_df.shape[1] >= 2 else eu_df

# Este es tu "Modified_workfile.xlsx" en memoria
df_base = eu_df_mod
print("Start shape df_base:", df_base.shape)

# ── 2) Asegurar mínimo 20 columnas para poder mover B→S (S = 19) ─
df_base = pad_to_n_cols_fast(df_base, 20)

# ── 3) Mover B (1) → S (19) sin copias grandes ─────────────────
df_base = move_col_fast(df_base, src_idx=1, dst_idx=19)
print("Tras mover B→S:", df_base.shape)

# ── 4) Capturar y quitar última como 'Gestor' (venía del sufijo) ─
gestor_series = df_base.iloc[:, -1]
df_base = df_base.iloc[:, :-1]

# ── 5) Mapear por POSICIÓN a HEADERS (ahora HEADERS SÍ incluye 'Gestor') ─
#     Primero mapeamos SIN 'Gestor', luego reinsertamos 'Gestor' y reordenamos final.
expected_wo_gestor = len(HEADERS) - 1
df_base = pad_to_n_cols_fast(df_base, expected_wo_gestor)

# Si sobran columnas, recorta en un solo iloc
if df_base.shape[1] > expected_wo_gestor:
    df_base = df_base.iloc[:, :expected_wo_gestor]

# Nombra por posición (todos menos 'Gestor')
df_base.columns = HEADERS[:-1]

# Reinsertar 'Gestor' y reordenar exactamente a HEADERS
df_base["Gestor"] = gestor_series.values
df_base = df_base[HEADERS]

print("Final shape mapeado:", df_base.shape)

# ── 6) Vista de verificación (sin Excel) ─────────────────────
pd.set_option("display.max_columns", None)
try:
    display(df_base.head(12))
except NameError:
    print(df_base.head(12).to_string(index=False))


  nodeb_final = nodeb_final.combine_first(s)


Unnamed: 0,NodeB,NodeBUnique,LAT,LON,AT&T_Site_Name
1,,,,,
3,,,,,
7,,,,,
13,,,,,
20,,,,,
21,,,,,
23,,,,,
30,,,,,
31,,,,,
33,,,,,


PermissionError: [Errno 13] Permission denied: 'C:\\Users\\SCaracoza\\Documents\\AT&T\\LST Cell Ran\\Ericsson\\3G\\All_Ericsson_3G_202510.xlsx'

In [21]:

# Columnas con cadenas estilo "k1=v1,k2=v2,..."
cols_mo = [
    "iubLinkRef",        # ... ,IubLink=Iub_0420     -> Iub_0420
    "lac",               # ... ,LocationArea=41416   -> 41416
    "mocnCellProfileRef",# ... ,MocnCellProfile=...  -> Telefonica
    "rac",               # ... ,RoutingArea=146      -> 146
    "sac",               # ... ,ServiceArea=65055    -> 65055
    "uraList",           # [ ...,Ura=146]            -> 146
]

def extract_last_token_after_equal(s: pd.Series) -> pd.Series:
    """
    Devuelve el segmento después del ÚLTIMO '='.
    Limpia espacios, corchetes (), [], y coma/; final si existe.
    Cuando no hay '=', devuelve NA.
    """
    s_str = s.astype(str)
    out = (
        s_str
        .str.rsplit('=', n=1).str[-1]
        .str.strip()
        .str.strip('[]()')
        .str.replace(r'[,\;]$', '', regex=True)
    )
    has_equal = s_str.str.contains('=', regex=False)
    return out.where(has_equal, other=pd.NA)

# 1) Normalizar columnas (si existen en df_base)
for c in cols_mo:
    if c in df_base.columns:
        df_base[c] = extract_last_token_after_equal(df_base[c])

# 2) Convertir a numérico donde corresponde
cols_numericas = ["lac", "rac", "sac", "uraList"]
for c in cols_numericas:
    if c in df_base.columns:
        df_base[c] = pd.to_numeric(df_base[c], errors="coerce")

# 3) Vista de verificación (sin Excel)
cols_preview = [c for c in cols_mo if c in df_base.columns]
pd.set_option("display.max_columns", None)
print("Extracción OK → vista previa de columnas normalizadas:")
try:
    display(df_base[cols_preview].head(12))
except NameError:
    print(df_base[cols_preview].head(12).to_string(index=False))


Extracción OK → vista previa de columnas normalizadas:


Unnamed: 0,iubLinkRef,lac,mocnCellProfileRef,rac,sac,uraList
0,Iub_0420,41416,Telefonica,146,65055,146.0
1,Iub_405,41418,Telefonica,148,23334,148.0
2,Iub_0382,41418,Telefonica,148,65009,148.0
3,Iub_0408,41418,ATT,148,4085,148.0
4,Iub_0431,41416,Telefonica,146,65072,146.0
5,Iub_0378,41418,Telefonica,148,65001,148.0
6,Iub_0415,41416,Telefonica,146,65051,146.0
7,Iub_0387,41418,Telefonica,148,23925,148.0
8,Iub_0408,41418,ATT,148,65046,148.0
9,Iub_0401,41418,Telefonica,148,65033,148.0


In [39]:


# ---------- Helpers ----------
def _is_blank(s: pd.Series) -> pd.Series:
    return s.isna() | s.astype(str).str.strip().eq("")

def _is_numlike(s: pd.Series) -> pd.Series:
    # detecta strings que parecen números (ej. "33", "420.0")
    return s.astype(str).str.fullmatch(r"\s*\d+(?:\.\d+)?\s*")


def coalesce_to_single_column(df, variants, target):
    present = [c for c in variants if c in df.columns]
    if not present:
        return df
    cols = [target] + [c for c in present if c != target] if target in present else present
    df[target] = df[cols].bfill(axis=1).iloc[:, 0]
    to_drop = [c for c in present if c != target]
    df.drop(columns=to_drop, inplace=True, errors="ignore")
    return df

def prev_month_yyyymm(today=None):
    if today is None:
        today = date.today()
    y = today.year
    m = today.month - 1
    if m == 0:
        y -= 1
        m = 12
    return f"{y}{m:02d}"

# ---------- Config ----------
# prev_yymm = prev_month_yyyymm()
prev_yymm = "202508"  # como en tu ejemplo

ae_path  = BASE_DIR / f"All_Ericsson_3G_{prev_yymm}.xlsx"
ept_glob = str(BASE_DIR / "EPT_ATT_UMTS_LTE_*.xlsx")

# ========== ETAPA 0: punto de partida (en memoria) ==========
merged = df_base.copy()  # No volver a usar df_base después de aquí

# ========== ETAPA 1: AE por UtranCellId + fallback EPT ==========
# --- All_Ericsson por UtranCellId ---
if ae_path.exists():
    ae_cols = ["UtranCellId", "NodeB", "LAT", "LON", "AT&T_Site_Name"]
    ae_df = pd.read_excel(ae_path, usecols=ae_cols)
    if "UtranCellId" not in ae_df.columns:
        raise KeyError(f"{ae_path} no tiene 'UtranCellId'.")

    ae_df["UtranCellId"] = ae_df["UtranCellId"].astype(str).str.strip()
    ae_df = ae_df.drop_duplicates(subset=["UtranCellId"], keep="first")

    # LAT/LON a numérico
    for c in ["LAT", "LON"]:
        if c in ae_df.columns:
            ae_df[c] = pd.to_numeric(ae_df[c], errors="coerce")

    # Normaliza llave y merge (creará NodeB_ae si ya existe NodeB en merged)
    merged["UtranCellId"] = merged["UtranCellId"].astype(str).str.strip()
    merged = merged.merge(ae_df, on="UtranCellId", how="left", suffixes=("", "_ae"))

    # Coalesce SOLO LAT/LON/Site desde AE (NO tocar NodeB aquí)
    for col in ["LAT", "LON", "AT&T_Site_Name"]:
        aux = f"{col}_ae"
        if aux in merged.columns:
            mask = _is_blank(merged[col]) if col in merged.columns else pd.Series(True, index=merged.index)
            merged[col] = merged[col].where(~mask, merged[aux])
            merged.drop(columns=[aux], inplace=True, errors="ignore")
else:
    print(f"⚠️ No se encontró {ae_path}. Se salta AE en etapa 1.")

# --- Fallback EPT por UtranCellId (completar faltantes y obtener NodeB_ept) ---
faltan1 = (
    _is_blank(merged.get("NodeB", pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("LAT",   pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("LON",   pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("AT&T_Site_Name", pd.Series(False, index=merged.index)))
)

if faltan1.any():
    ept_matches = glob.glob(ept_glob)
    if ept_matches:
        ept_file = ept_matches[0]
        ept_sheets = ["EPT_3G_LTE_OUTDOOR", "PLAN_OUTDOOR", "EPT_3G_LTE_INDOOR", "PLAN_INDOOR", "Eventos_Especiales"]

        frames = []
        for sh in ept_sheets:
            try:
                tmp = pd.read_excel(ept_file, sheet_name=sh, engine="openpyxl")
                frames.append(tmp)
            except Exception:
                pass

        if frames:
            ept_df = pd.concat(frames, ignore_index=True)

            # Unificar nombres a los destinos esperados para lookup por UtranCellId
            ept_df = coalesce_to_single_column(ept_df, ["ATT_CELL_ID_Name", "UtranCellId"], "UtranCellId")
            ept_df = coalesce_to_single_column(ept_df, ["AT&T_Node_Name", "NodeB"], "NodeB")
            ept_df = coalesce_to_single_column(ept_df, ["Latitud", "LAT"], "LAT")
            ept_df = coalesce_to_single_column(ept_df, ["Longitud", "LON"], "LON")
            ept_df = coalesce_to_single_column(ept_df, ["AT&T_Site_Name"], "AT&T_Site_Name")

            if "UtranCellId" not in ept_df.columns:
                raise KeyError("EPT no tiene 'UtranCellId'/'ATT_CELL_ID_Name' para fallback (etapa 1).")

            ept_df["UtranCellId"] = ept_df["UtranCellId"].astype(str).str.strip()
            for c in ["LAT", "LON"]:
                if c in ept_df.columns:
                    ept_df[c] = (
                        ept_df[c].astype(str).str.strip("[]").str.replace(",", "", regex=False)
                    )
                    ept_df[c] = pd.to_numeric(ept_df[c], errors="coerce")

            ept_df = ept_df.drop_duplicates(subset=["UtranCellId"], keep="first")

            # Build lookup (mantener NodeB_ept para construir NodeB final)
            cols_keep = [c for c in ["UtranCellId", "NodeB", "LAT", "LON", "AT&T_Site_Name"] if c in ept_df.columns]
            ept_lookup = ept_df[cols_keep].rename(columns={
                "NodeB": "NodeB_ept",
                "LAT": "LAT_ept",
                "LON": "LON_ept",
                "AT&T_Site_Name": "AT&T_Site_Name_ept",
            })

            merged = merged.merge(ept_lookup, on="UtranCellId", how="left")

            # Completar SOLO faltantes de LAT/LON/Site desde EPT
            need_lat  = _is_blank(merged["LAT"]) if "LAT" in merged.columns else pd.Series(False, index=merged.index)
            need_lon  = _is_blank(merged["LON"]) if "LON" in merged.columns else pd.Series(False, index=merged.index)
            need_site = _is_blank(merged["AT&T_Site_Name"]) if "AT&T_Site_Name" in merged.columns else pd.Series(False, index=merged.index)

            if "LAT_ept" in merged.columns:            merged.loc[need_lat,  "LAT"] = merged.loc[need_lat,  "LAT_ept"]
            if "LON_ept" in merged.columns:            merged.loc[need_lon,  "LON"] = merged.loc[need_lon,  "LON_ept"]
            if "AT&T_Site_Name_ept" in merged.columns: merged.loc[need_site, "AT&T_Site_Name"] = merged.loc[need_site, "AT&T_Site_Name_ept"]

            # Limpia auxiliares (NodeB_ept se conserva para NodeB final)
            merged.drop(columns=[c for c in ["LAT_ept","LON_ept","AT&T_Site_Name_ept"] if c in merged.columns],
                        inplace=True, errors="ignore")
        else:
            print("⚠️ EPT sin hojas legibles; no se aplicó fallback en etapa 1.")
    else:
        print("⚠️ No se encontró archivo EPT para etapa 1.")
else:
    print("AE cubrió 100% en etapa 1; no se necesita EPT.")

# --- Construcción de NodeB SOLO desde AE/EPT (manteniendo posición) ---
cands = []
if "NodeB_ae" in merged.columns:  cands.append(merged["NodeB_ae"])
if "NodeB_ept" in merged.columns: cands.append(merged["NodeB_ept"])

if cands:
    nodeb_final = cands[0]
    for s in cands[1:]:
        nodeb_final = nodeb_final.combine_first(s)   # toma el primero no-nulo
else:
    nodeb_final = pd.Series(pd.NA, index=merged.index)

nodeb_final = nodeb_final.fillna("")

# Asignar sobre 'NodeB' manteniendo su posición (o insertar en índice de HEADERS)
if "NodeB" in merged.columns:
    merged.loc[:, "NodeB"] = nodeb_final
else:
    idx_nodeb = HEADERS.index("NodeB") if "NodeB" in HEADERS else len(merged.columns)
    merged.insert(idx_nodeb, "NodeB", nodeb_final)

# Limpia auxiliares de NodeB
merged.drop(columns=[c for c in ["NodeB_ae","NodeB_ept"] if c in merged.columns],
            inplace=True, errors="ignore")

# ======== Vista de verificación (sin guardar) ========
cols_preview = [c for c in ["UtranCellId","NodeB","LAT","LON","AT&T_Site_Name"] if c in merged.columns]
print("ETAPA 1 OK → preview:")
try:
    display(merged.loc[:, cols_preview].head(12))
except NameError:
    print(merged.loc[:, cols_preview].head(12).to_string(index=False))

# `merged` queda listo para siguientes etapas en memoria.


ETAPA 1 OK → preview:


Unnamed: 0,UtranCellId,NodeB,LAT,LON,AT&T_Site_Name
0,EMUBCSPAZ0420_1_H,UBCSPAZ0420,24.395315,-110.690603,BCSPAZ0420
1,MUBCSCAB0405_1_D,,,,
2,EMUBCSCAB0382_3_H,UBCSCAB0382,22.929556,-109.864103,BCSCAB0382
3,UBCSCAB04085,,,,
4,EMUBCSPAZ0431_1_H,UBCSPAZ0431,24.26109,-110.33373,BCSPAZ0431
5,EMUBCSCAB0378_1_H,UBCSCAB0378,23.151139,-109.7065,BCSCAB0378
6,EMUBCSPAZ0415_3_H,UBCSPAZ0415,24.091031,-110.368263,BCSPAZ0415
7,MUBCSCAB0387_4_H,,,,
8,EMUBCSCAB0408_1_H,UBCSCAB0408,23.288741,-109.762422,BCSCAB0408
9,EMUBCSCAB0401_3_H,UBCSCAB0401,22.912046,-109.92511,BCSCAB0401


In [40]:
# ========== ETAPA 2: AE por (RNC+NodeB) → Node_B_ID + fallback EPT por (RNC+AT&T_Node_Name) ==========
# Continuamos usando `merged` que viene de la etapa 1 (NO volver a df_base)

# --- AE por (RNC, NodeB) ---
if ae_path.exists():
    ae_cols2 = ["RNC", "NodeB", "Node_B_ID"]
    ae_df2 = pd.read_excel(ae_path, usecols=[c for c in ae_cols2 if c])

    missing = [c for c in ["RNC", "NodeB"] if c not in ae_df2.columns]
    if missing:
        print(f"⚠️ AE sin columnas {missing} para etapa 2. Se salta AE etapa 2.")
    else:
        ae_df2["RNC"]   = ae_df2["RNC"].astype(str).str.strip()
        ae_df2["NodeB"] = ae_df2["NodeB"].astype(str).str.strip()
        ae_df2 = ae_df2.drop_duplicates(subset=["RNC", "NodeB"], keep="first")
        ae_df2 = ae_df2.rename(columns={"Node_B_ID": "Node_B_ID_ae"})

        merged["RNC"]   = merged["RNC"].astype(str).str.strip()
        merged["NodeB"] = merged["NodeB"].astype(str).str.strip()

        merged = merged.merge(ae_df2, on=["RNC", "NodeB"], how="left")

        # Completar SOLO Node_B_ID desde AE
        if "Node_B_ID_ae" in merged.columns:
            if "Node_B_ID" in merged.columns:
                mask = _is_blank(merged["Node_B_ID"])
                merged.loc[mask, "Node_B_ID"] = merged.loc[mask, "Node_B_ID_ae"]
            else:
                merged["Node_B_ID"] = merged["Node_B_ID_ae"]
            merged.drop(columns=["Node_B_ID_ae"], inplace=True, errors="ignore")
else:
    print(f"⚠️ No se encontró {ae_path} para etapa 2.")

# --- Fallback EPT por (RNC, AT&T_Node_Name) ---
faltan2 = _is_blank(merged.get("Node_B_ID", pd.Series(False, index=merged.index)))
if faltan2.any():
    ept_matches = glob.glob(ept_glob)
    if ept_matches:
        ept_file = ept_matches[0]
        ept_sheets = ["EPT_3G_LTE_OUTDOOR", "PLAN_OUTDOOR", "EPT_3G_LTE_INDOOR", "PLAN_INDOOR", "Eventos_Especiales"]

        frames = []
        for sh in ept_sheets:
            try:
                tmp = pd.read_excel(ept_file, sheet_name=sh, engine="openpyxl")
                frames.append(tmp)
            except Exception:
                pass

        if frames:
            ept_df2 = pd.concat(frames, ignore_index=True)

            # Normalizar nombres de columnas que varían
            ept_df2 = coalesce_to_single_column(ept_df2, ["RNC"], "RNC")
            ept_df2 = coalesce_to_single_column(ept_df2, ["AT&T_Node_Name", "NodeB", "Node_B_Name"], "AT&T_Node_Name")
            ept_df2 = coalesce_to_single_column(ept_df2, ["Node_B_ID", "NodeB_ID", "NodeB Id", "Node_B Id"], "Node_B_ID")

            missing = [c for c in ["RNC", "AT&T_Node_Name", "Node_B_ID"] if c not in ept_df2.columns]
            if missing:
                raise KeyError(f"EPT carece de columnas para etapa 2: {missing}")

            for c in ["RNC", "AT&T_Node_Name"]:
                ept_df2[c] = ept_df2[c].astype(str).str.strip()

            # Lookup único por (RNC, AT&T_Node_Name)
            ept_lookup2 = (
                ept_df2[["RNC", "AT&T_Node_Name", "Node_B_ID"]]
                .dropna(subset=["RNC", "AT&T_Node_Name"])
                .drop_duplicates(subset=["RNC", "AT&T_Node_Name"], keep="first")
                .rename(columns={"Node_B_ID": "Node_B_ID_ept"})
            )

            merged["RNC"]   = merged["RNC"].astype(str).str.strip()
            merged["NodeB"] = merged["NodeB"].astype(str).str.strip()

            merged = merged.merge(
                ept_lookup2,
                left_on=["RNC", "NodeB"],
                right_on=["RNC", "AT&T_Node_Name"],
                how="left"
            )

            # Completar SOLO faltantes
            need_id = _is_blank(merged["Node_B_ID"]) if "Node_B_ID" in merged.columns else pd.Series(False, index=merged.index)
            if "Node_B_ID_ept" in merged.columns:
                merged.loc[need_id, "Node_B_ID"] = merged.loc[need_id, "Node_B_ID_ept"]

            # Limpieza
            merged.drop(columns=[c for c in ["Node_B_ID_ept", "AT&T_Node_Name"] if c in merged.columns],
                        inplace=True, errors="ignore")
        else:
            print("⚠️ EPT sin hojas legibles; no se aplicó fallback en etapa 2.")
    else:
        print("⚠️ No se encontró archivo EPT para etapa 2.")
else:
    print("AE cubrió 100% Node_B_ID en etapa 2; no se necesita EPT.")

# ========== Conformar columnas a HEADERS (sin extras) ==========
final_cols = HEADERS[:]  # HEADERS ya incluye "Node_B_ID" y (si lo agregaste) "Gestor"
for c in final_cols:
    if c not in merged.columns:
        merged[c] = pd.NA
merged = merged[final_cols]

# ========== Vista previa (monitoreo) ==========
cols_preview = [c for c in ["RNC", "NodeB", "Node_B_ID"] if c in merged.columns]
print("ETAPA 2 OK → preview de claves:")
try:
    display(merged.loc[:, cols_preview].head(12))
except NameError:
    print(merged.loc[:, cols_preview].head(12).to_string(index=False))

# (Opcional) si quieres guardar aquí:
# final_path = BASE_DIR / f"All_Ericsson_3G_{date.today().year}{date.today().month:02d}.xlsx"
# merged.to_excel(final_path, index=False)
# print("✅ Guardado FINAL →", final_path)


ETAPA 2 OK → preview de claves:


Unnamed: 0,RNC,NodeB,Node_B_ID
0,BCSRNC212,UBCSPAZ0420,420.0
1,BCSRNC212,,
2,BCSRNC212,UBCSCAB0382,382.0
3,BCSRNC212,,
4,BCSRNC212,UBCSPAZ0431,431.0
5,BCSRNC212,UBCSCAB0378,378.0
6,BCSRNC212,UBCSPAZ0415,415.0
7,BCSRNC212,,
8,BCSRNC212,UBCSCAB0408,408.0
9,BCSRNC212,UBCSCAB0401,401.0


In [48]:
# === 0) Config y fecha actual ===
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G")
today   = date.today()
yyyymm  = f"{today.year}{today.month:02d}"

final_excel = BASE_DIR / f"All_Ericsson_3G_{yyyymm}.xlsx"
tmp_excel   = BASE_DIR / f"~tmp_All_Ericsson_3G_{yyyymm}.xlsx"

# Usa tu DataFrame final en memoria
df_out = merged.copy()  # o df_sorted si ya lo traes ordenado

# === 1) Asegurar columnas y orden base ===
HEADERS_FINAL = HEADERS[:] if "NodeBUnique" in HEADERS else HEADERS + ["NodeBUnique"]

for col in HEADERS_FINAL:
    if col not in df_out.columns:
        df_out[col] = pd.NA

df_out = df_out[HEADERS_FINAL]

# === 2) Guardar sin formato, sin 'nan' ===
df_out.to_excel(final_excel, index=False, na_rep="")

print(f"✅ Archivo final sin formato guardado → {final_excel}")

✅ Archivo final sin formato guardado → C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\All_Ericsson_3G_202510.xlsx


In [49]:

def _is_numlike(s: pd.Series) -> pd.Series:
    return s.astype(str).str.fullmatch(r"\s*\d+(?:\.\d+)?\s*")

# === Config y rutas ===
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G")
today   = date.today()
yyyymm  = f"{today.year}{today.month:02d}"

final_excel = BASE_DIR / f"All_Ericsson_3G_{yyyymm}.xlsx"
tmp_excel   = BASE_DIR / f"~tmp_All_Ericsson_3G_{yyyymm}.xlsx"

# === 1) Leer el archivo final existente ===
df = pd.read_excel(final_excel, engine="openpyxl")

if "NodeB" not in df.columns:
    raise KeyError("El archivo no contiene la columna 'NodeB'.")

# === 2) Limpiar NodeB y ordenar (A→Z, estable) ===
df["NodeB"] = (
    df["NodeB"]
    .astype("string")
    .str.strip()
    .where(~_is_numlike(df["NodeB"].astype("string")), "")  # si parece numérico → vacío
)

df = df.sort_values(
    by="NodeB",
    key=lambda s: s.astype("string").str.strip().str.casefold(),
    kind="mergesort",
    na_position="last",
).reset_index(drop=True)

# === 3) Calcular NodeBUnique (solo primera aparición) ===
key = df["NodeB"].astype("string").str.strip().str.casefold()
first_hit = ~key.duplicated(keep="first") & key.ne("")
df["NodeBUnique"] = df["NodeB"].where(first_hit, "")

# === 4) Asegurar columnas/orden para exportar ===
HEADERS_FINAL = HEADERS[:] if "NodeBUnique" in HEADERS else HEADERS + ["NodeBUnique"]
for col in HEADERS_FINAL:
    if col not in df.columns:
        df[col] = pd.NA
df = df[HEADERS_FINAL]

# === 5) Escribir temporal sin 'nan' ===
df.to_excel(tmp_excel, index=False, na_rep="")

# === 6) Estilos SOBRE el archivo ya ordenado ===
wb = load_workbook(tmp_excel)
ws = wb.active

# Congelar fila de encabezado y aplicar autofiltro
ws.freeze_panes = "A2"
ws.auto_filter.ref = ws.dimensions

# Encabezados rotados y fondo gris
header_fill = PatternFill(fill_type="solid", start_color="FFBFBFBF", end_color="FFBFBFBF")
ws.row_dimensions[1].height = 90
for col_idx, header in enumerate(HEADERS_FINAL, start=1):
    c = ws.cell(row=1, column=col_idx)
    c.value = header
    c.font = Font(name="Aptos Narrow", size=9, bold=True)
    c.alignment = Alignment(textRotation=90, horizontal="center", vertical="bottom", wrap_text=True)
    c.fill = header_fill

# Anchos recomendados (ajusta a gusto)
width_overrides = {
    "NodeB": 15,
    "AT&T_Site_Name": 15,
    "RNC": 14,
    "NodeBUnique": 15,
    "UtranCellId": 20,
    "LAT": 15,
    "LON": 15,
}
for col_name, width in width_overrides.items():
    if col_name in HEADERS_FINAL:
        col_letter = get_column_letter(HEADERS_FINAL.index(col_name) + 1)
        ws.column_dimensions[col_letter].width = width

# Alineación vertical del cuerpo
for row in ws.iter_rows(min_row=2, max_row=ws.max_row, min_col=1, max_col=ws.max_column):
    for cell in row:
        cell.alignment = Alignment(vertical="center")

# === 7) Guardar definitivo (sobrescribe el mismo nombre) y limpiar tmp ===
wb.save(final_excel)
wb.close()  # evita WinError 32 en Windows

try:
    tmp_excel.unlink()
except Exception as e:
    print("(Aviso) No se pudo borrar temporal:", e)

print(f"✅ Ordenado, NodeBUnique calculado y estilo aplicado → {final_excel}")


(Aviso) No se pudo borrar temporal: [WinError 32] El proceso no tiene acceso al archivo porque está siendo utilizado por otro proceso: 'C:\\Users\\SCaracoza\\Documents\\AT&T\\LST Cell Ran\\Ericsson\\3G\\~tmp_All_Ericsson_3G_202510.xlsx'
✅ Ordenado, NodeBUnique calculado y estilo aplicado → C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\All_Ericsson_3G_202510.xlsx
