In [31]:
import os, glob, io, re
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Font
import numpy as np
from datetime import date
import time

import unicodedata


# Ruta base (aj√∫stala si cambia)
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\5G")
DEFAULT_IGNORE = ["SubNetwork,", "instance(s)", "NodeId"]
# Lista de 31 encabezados, en el orden en que los quieres (eNBId queda en W si mantienes este orden)
HEADERS = [
     "Gestor","NodeId","NRCellDUId","administrativeState","availabilityStatus",
    "bandList","cellBarred","cellLocalId","cellRange",
    "cellReservedForOperator","cellState","configuredEpsTAC","csiRsShiftingPrimary",
    "csiRsShiftingSecondary","dftSOfdmPuschStartRsrpThresh","maxUeSpeed","nCI",
    "nRCellDUId","nRPCI","nRSectorCarrierRef",
    "nRTAC","operationalState","pLMNIdList",
    "pMax", "pointAArfcnDlFdd", "pointAArfcnUlFdd", "pointAFrequencyDlFdd", "rachRootSequence", "serviceState", "ssbFrequency", "ssbFrequencyAutoSelected", "tddSpecialSlotPattern", "tddUlDlPattern", "trsPeriodicity", "TAC", "Name Unique","LAT","LON","AT&T_Site_Name",

]




In [32]:
def _filtered_lines(path, ignorelines):
    """Genera las l√≠neas √∫tiles (no vac√≠as y sin patrones a ignorar)."""
    with open(path, "r", encoding="utf-8") as f:
        for ln in f:
            if not ln.strip():
                continue
            if any(pat in ln for pat in (ignorelines or [])):
                continue
            yield ln

def read_pattern_to_df(filenamepattern: str, ignorelines=None, sep="\t", header=None):
    """
    Lee todos los TXT que matchean '<pattern>_*.txt', limpia,
    y devuelve un √∫nico DataFrame con una columna extra 'Gestor' (sufijo num√©rico del archivo).
    No escribe nada a disco.
    """
    ignorelines = DEFAULT_IGNORE if ignorelines is None else ignorelines
    searchpattern = str(BASE_DIR / f"{filenamepattern}_*.txt")
    files = glob.glob(searchpattern)

    dfs = []
    for path in files:
        # Extrae NN de *_NN.txt
        m = re.search(r"_(\d+)\.txt$", os.path.basename(path), flags=re.IGNORECASE)
        gestor = m.group(1) if m else ""

        # Filtra/limpia antes de parsear
        buf = io.StringIO("".join(_filtered_lines(path, ignorelines)))
        if buf.tell() == 0:
            # StringIO vac√≠o ‚Üí intenta leer igualmente (por seguridad)
            buf.seek(0)

        # Importante: sin encabezados en origen
        df = pd.read_csv(buf, sep=sep, header=header, engine="python")
        if df.empty:
            continue

        # A√±ade Gestor como √∫ltima columna
        df["Gestor"] = gestor
        dfs.append(df)

    if not dfs:
        return pd.DataFrame()

    return pd.concat(dfs, ignore_index=True)

In [33]:

# --- Uso (todo queda en memoria) ---
nd_df   = read_pattern_to_df("5G")



In [34]:
# Suponiendo que ya tienes en memoria tu DataFrame:
# por ejemplo, eu_df = read_pattern_to_df("EUtranCellFDD")

# Mostrar forma original
print("Shape original:", nd_df.shape)

# 1) Eliminar columna B (√≠ndice 1, porque pandas es 0-based)
eu_df_mod = nd_df.drop(nd_df.columns[1], axis=1)

# 2) Verificar nueva forma
print("Shape nuevo:", eu_df_mod.shape)

# 3) Mostrar una vista previa (primeras 10 filas)
pd.set_option("display.max_columns", None)  # opcional, para ver todas las columnas
display(eu_df_mod.head(10))


Shape original: (6588, 35)
Shape nuevo: (6588, 34)


Unnamed: 0,0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,Gestor
0,NLEAPO0540,EMRNLEAPO0540_1_T,UNLOCKED,,[7],NOT_BARRED,1,15000,NOT_RESERVED,ACTIVE,4004.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128508000.0,EMRNLEAPO0540_1_T,453,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,563,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
1,NLEAPO0540,EMRNLEAPO0540_2_T,UNLOCKED,,[7],NOT_BARRED,2,15000,NOT_RESERVED,ACTIVE,4004.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128508000.0,EMRNLEAPO0540_2_T,454,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,571,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
2,NLEAPO0540,EMRNLEAPO0540_3_T,UNLOCKED,,[7],NOT_BARRED,3,15000,NOT_RESERVED,ACTIVE,4004.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128508000.0,EMRNLEAPO0540_3_T,455,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,579,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
3,NLEAPO1403,EMRNLEAPO1403_1_T,UNLOCKED,,[7],NOT_BARRED,1,15000,NOT_RESERVED,ACTIVE,4110.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128689000.0,EMRNLEAPO1403_1_T,404,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,629,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
4,NLEAPO1403,EMRNLEAPO1403_2_T,UNLOCKED,,[7],NOT_BARRED,2,15000,NOT_RESERVED,ACTIVE,4110.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128689000.0,EMRNLEAPO1403_2_T,403,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,114,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
5,NLEAPO1403,EMRNLEAPO1403_3_T,UNLOCKED,,[7],NOT_BARRED,3,15000,NOT_RESERVED,ACTIVE,4110.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128689000.0,EMRNLEAPO1403_3_T,402,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,199,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
6,NLEAPO1402,EMRNLEAPO1402_1_T,UNLOCKED,,[7],NOT_BARRED,14,15000,NOT_RESERVED,ACTIVE,4050.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3131163000.0,EMRNLEAPO1402_1_T,338,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,634,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
7,NLEAPO1402,EMRNLEAPO1402_2_T,UNLOCKED,,[7],NOT_BARRED,15,15000,NOT_RESERVED,ACTIVE,4050.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3131163000.0,EMRNLEAPO1402_2_T,19,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,446,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
8,NLEAPO1402,EMRNLEAPO1402_3_T,UNLOCKED,,[7],NOT_BARRED,16,15000,NOT_RESERVED,ACTIVE,4050.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3131163000.0,EMRNLEAPO1402_3_T,27,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,374,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1
9,NLESNG0962,EMRNLESNG0962_1_T,UNLOCKED,,[7],NOT_BARRED,14,15000,NOT_RESERVED,ACTIVE,4031.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128033000.0,EMRNLESNG0962_1_T,3,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,211,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,1


In [35]:
# ================== Definici√≥n de HEADERS (Opci√≥n C) ==================
# "Gestor" VA AL INICIO en el orden final:

# Lista sin "Gestor" para operar con el TXT (que no trae 'Gestor')
HEADERS_NO_GESTOR = [c for c in HEADERS if c != "Gestor"]

# ================== Bloque de armado del DF ==================
# Partimos del DF reacomodado SIN headers (num√©ricos 0..N-1).
df_base = eu_df_mod.copy()

print("Shape inicial df_base:", df_base.shape)

# 1) Hasta d√≥nde llega el bloque base del TXT (hasta BASE_LAST inclusive)
BASE_LAST = "trsPeriodicity"
if BASE_LAST not in HEADERS_NO_GESTOR:
    raise ValueError(f"'{BASE_LAST}' no est√° en HEADERS_NO_GESTOR")
base_len = HEADERS_NO_GESTOR.index(BASE_LAST) + 1   # columnas base (sin Gestor)
n_cols = df_base.shape[1]

# 2) Si hay una columna extra inmediatamente despu√©s del bloque base, interpr√©tala como 'Gestor'
gestor_series = pd.Series(pd.NA, index=df_base.index)
if n_cols >= base_len + 1:
    gestor_series = df_base.iloc[:, base_len]
    df_base.drop(df_base.columns[base_len], axis=1, inplace=True)
    n_cols -= 1

# 3) Asignar nombres SIN incluir 'Gestor' todav√≠a. Rellena faltantes si hiciera falta.
expected_without_gestor = len(HEADERS_NO_GESTOR)
if n_cols < expected_without_gestor:
    for i in range(expected_without_gestor - n_cols):
        df_base[f"__tmp_empty_{i}"] = pd.NA
    n_cols = expected_without_gestor

# Corta/exacta y nombra SIN 'Gestor'
df_base = df_base.iloc[:, :expected_without_gestor]
df_base.columns = HEADERS_NO_GESTOR  # nombra todas menos 'Gestor'

# 4) Inserta 'Gestor' (al inicio) usando reindex con HEADERS finales
df_base["Gestor"] = gestor_series
df_base = df_base.reindex(columns=HEADERS)  # HEADERS ya tiene 'Gestor' primero

# === Vista de verificaci√≥n ===
print("Shape final df_base:", df_base.shape)
pd.set_option("display.max_columns", None)
display(df_base.head(12))
# display(df_base.head(5).T)  # alternativa vertical


Shape inicial df_base: (6588, 34)
Shape final df_base: (6588, 39)


Unnamed: 0,Gestor,NodeId,NRCellDUId,administrativeState,availabilityStatus,bandList,cellBarred,cellLocalId,cellRange,cellReservedForOperator,cellState,configuredEpsTAC,csiRsShiftingPrimary,csiRsShiftingSecondary,dftSOfdmPuschStartRsrpThresh,maxUeSpeed,nCI,nRCellDUId,nRPCI,nRSectorCarrierRef,nRTAC,operationalState,pLMNIdList,pMax,pointAArfcnDlFdd,pointAArfcnUlFdd,pointAFrequencyDlFdd,rachRootSequence,serviceState,ssbFrequency,ssbFrequencyAutoSelected,tddSpecialSlotPattern,tddUlDlPattern,trsPeriodicity,TAC,Name Unique,LAT,LON,AT&T_Site_Name
0,1,NLEAPO0540,EMRNLEAPO0540_1_T,UNLOCKED,,[7],NOT_BARRED,1,15000,NOT_RESERVED,ACTIVE,4004.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128508000.0,EMRNLEAPO0540_1_T,453,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,563,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
1,1,NLEAPO0540,EMRNLEAPO0540_2_T,UNLOCKED,,[7],NOT_BARRED,2,15000,NOT_RESERVED,ACTIVE,4004.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128508000.0,EMRNLEAPO0540_2_T,454,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,571,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
2,1,NLEAPO0540,EMRNLEAPO0540_3_T,UNLOCKED,,[7],NOT_BARRED,3,15000,NOT_RESERVED,ACTIVE,4004.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128508000.0,EMRNLEAPO0540_3_T,455,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,579,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
3,1,NLEAPO1403,EMRNLEAPO1403_1_T,UNLOCKED,,[7],NOT_BARRED,1,15000,NOT_RESERVED,ACTIVE,4110.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128689000.0,EMRNLEAPO1403_1_T,404,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,629,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
4,1,NLEAPO1403,EMRNLEAPO1403_2_T,UNLOCKED,,[7],NOT_BARRED,2,15000,NOT_RESERVED,ACTIVE,4110.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128689000.0,EMRNLEAPO1403_2_T,403,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,114,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
5,1,NLEAPO1403,EMRNLEAPO1403_3_T,UNLOCKED,,[7],NOT_BARRED,3,15000,NOT_RESERVED,ACTIVE,4110.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128689000.0,EMRNLEAPO1403_3_T,402,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,199,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
6,1,NLEAPO1402,EMRNLEAPO1402_1_T,UNLOCKED,,[7],NOT_BARRED,14,15000,NOT_RESERVED,ACTIVE,4050.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3131163000.0,EMRNLEAPO1402_1_T,338,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,634,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
7,1,NLEAPO1402,EMRNLEAPO1402_2_T,UNLOCKED,,[7],NOT_BARRED,15,15000,NOT_RESERVED,ACTIVE,4050.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3131163000.0,EMRNLEAPO1402_2_T,19,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,446,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
8,1,NLEAPO1402,EMRNLEAPO1402_3_T,UNLOCKED,,[7],NOT_BARRED,16,15000,NOT_RESERVED,ACTIVE,4050.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3131163000.0,EMRNLEAPO1402_3_T,27,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,374,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,
9,1,NLESNG0962,EMRNLESNG0962_1_T,UNLOCKED,,[7],NOT_BARRED,14,15000,NOT_RESERVED,ACTIVE,4031.0,DEACTIVATED,DEACTIVATED,-104,UP_TO_100KMPH,3128033000.0,EMRNLESNG0962_1_T,3,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=RADIONODE,...",0,ENABLED,"[{mcc=334, mnc=050}, {mcc=334, mnc=03}]",23,530092.0,506092.0,2650460.0,211,IN_SERVICE,532000,,TDD_SPECIAL_SLOT_PATTERN_00,TDD_ULDL_PATTERN_00,40,,,,,


In [36]:

# --- utilidades ---
def _is_blank(s: pd.Series) -> pd.Series:
    return s.isna() | s.astype(str).str.strip().eq("")

def coalesce_to_single_column(df: pd.DataFrame, variants, target):
    present = [c for c in variants if c in df.columns]
    if not present:
        return df
    cols = [target] + [c for c in present if c != target] if target in present else present
    merged_series = df[cols].bfill(axis=1).iloc[:, 0]
    df[target] = merged_series
    to_drop = [c for c in present if c != target]
    df.drop(columns=to_drop, inplace=True, errors="ignore")
    return df



# =====================================================================
# 2) Ordenar por eNodeB Name y marcar "eNodeB Name Unique"
# =====================================================================
df_base["NodeId"] = df_base["NodeId"].astype(str).str.strip()
df_out = df_base.sort_values(by="NodeId", ascending=True).reset_index(drop=True)

_name = df_out["NodeId"].astype(str).fillna("").str.strip()
is_new = _name.ne(_name.shift())
df_out["Name Unique"] = np.where(is_new & _name.ne(""), df_out["NodeId"], "")

display(df_out.loc[:, ["NodeId", "Name Unique"]].head(12))

# =====================================================================
# 3) LAT/LON/AT&T_Site_Name desde All_Ericsson_5G_{YYYYMM} (mes anterior)
#     + 3b) Fallback EPT optimizado con cache Parquet
# =====================================================================
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\5G")

# ---------- Helpers ----------
def _is_blank_series(s: pd.Series) -> pd.Series:
    """True si es NaN o cadena vac√≠a/espacios."""
    if s.dtype == "O" or pd.api.types.is_string_dtype(s):
        return s.isna() | (s.astype(str).str.strip() == "")
    else:
        return s.isna()

def coalesce_block(df: pd.DataFrame, candidates, target) -> pd.DataFrame:
    """Rellena target con el primer no-nulo izquierda,derecha de candidates."""
    cols = [c for c in candidates if c in df.columns]
    if not cols:
        df[target] = pd.NA
        return df
    df[target] = df[cols].bfill(axis=1).iloc[:, 0]
    return df

# ---------- 3) All_Ericsson (mes anterior) ----------
today = date.today()
prev_year  = today.year if today.month > 1 else today.year - 1
prev_month = today.month - 1 or 12
yyyymm = f"{prev_year}{prev_month:02d}"

ae_path = BASE_DIR / f"All_Ericsson_5G_{yyyymm}.xlsx"

ae = pd.read_excel(
    ae_path,
    usecols=["NodeId", "LAT", "LON", "AT&T_Site_Name"],
    dtype={"NodeId": "string", "LAT": "string", "LON": "string", "AT&T_Site_Name": "string"},
    engine="openpyxl"
)
ae["NodeId"] = ae["NodeId"].astype("string").str.strip()
ae = ae.drop_duplicates(subset=["NodeId"], keep="first").set_index("NodeId")

df_out["NodeId"] = df_out["NodeId"].astype("string").str.strip()

for c in ["LAT", "LON", "AT&T_Site_Name"]:
    if c not in df_out.columns:
        df_out[c] = pd.NA

    need = _is_blank_series(df_out[c])

    # En lugar de reindex + loc con m√°scara, usa map desde el √≠ndice:
    if c in ae.columns:
        mapped = df_out["NodeId"].map(ae[c])   # Series alineada por √≠ndice de df_out
        # Asigna solo a los faltantes; al ser Series, alinea por √≠ndice (no hace falta .values)
        df_out.loc[need, c] = mapped[need]
    else:
        # Si por alguna raz√≥n la columna no vino en ae, no intentes asignar
        pass

faltan = (
    _is_blank_series(df_out["LAT"]) |
    _is_blank_series(df_out["LON"]) |
    _is_blank_series(df_out["AT&T_Site_Name"])
)


if not faltan.any():
    print("All_Ericsson cubri√≥ 100% (LAT/LON/AT&T_Site_Name).")
    display(df_out.loc[:, ["NodeId","LAT","LON","AT&T_Site_Name"]].head(10))
else:
    print(f"Quedan {int(faltan.sum())} filas con faltantes. Se aplica fallback EPT‚Ä¶")

    # =================================================================
    # 3b) Fallback EPT (lee 1 sola vez, coalesce, map, y cachea Parquet)
    # =================================================================
    ept_matches = glob.glob(str(BASE_DIR / "EPT_ATT_UMTS_LTE_*.xlsx"))
    if not ept_matches:
        print("No se encontr√≥ archivo EPT_ATT_UMTS_LTE_*.xlsx; se omite fallback.")
    else:
        ept_file   = ept_matches[0]
        ept_sheets = ["EPT_3G_LTE_OUTDOOR","PLAN_OUTDOOR","EPT_3G_LTE_INDOOR","PLAN_INDOOR","Eventos_Especiales"]
        need_cols  = {"AT&T_Node_Name","LAT","LON","Latitud","Longitud","AT&T_Site_Name"}
        cache_path = BASE_DIR / "EPT_cache.parquet"

        # Decide si reconstruir cache (si no existe o el Excel es m√°s nuevo)
        rebuild = True
        if os.path.exists(cache_path):
            try:
                rebuild = os.path.getmtime(cache_path) < os.path.getmtime(ept_file)
            except Exception:
                rebuild = True

        if rebuild:
            try:
                xf = pd.ExcelFile(ept_file, engine="openpyxl")  # abre 1 vez
                frames = []
                for sh in ept_sheets:
                    if sh in xf.sheet_names:
                        tmp = pd.read_excel(xf,
                            sheet_name=sh,
                            # Solo columnas de inter√©s:
                            usecols=lambda c: (c in need_cols),
                            dtype="string"
                        )
                        frames.append(tmp)
                if frames:
                    ept_df = pd.concat(frames, ignore_index=True)
                else:
                    ept_df = pd.DataFrame(columns=list(need_cols), dtype="string")
            except Exception as e:
                print(f"(Aviso) No se pudieron leer hojas del EPT: {e}. Se omite fallback.")
                ept_df = pd.DataFrame(columns=list(need_cols), dtype="string")
        else:
            # Cargar cache
            ept_df = pd.read_parquet(cache_path)
            # Asegura tipos string (por si engine devolvi√≥ otros dtypes)
            for c in ["Cell Name","AT&T_Site_Name"]:
                if c in ept_df.columns:
                    ept_df[c] = ept_df[c].astype("string")

        if ept_df.empty and rebuild:
            # Si ven√≠amos de reconstruir y no se ley√≥ nada, guarda cache vac√≠o para evitar reintentos.
            try:
                pd.DataFrame(columns=["Cell Name","LAT","LON","AT&T_Site_Name"]).to_parquet(cache_path, index=False)
            except Exception:
                pass

        if not ept_df.empty:
            # --- Coalesce a columnas objetivo ---
            ept_df = coalesce_block(ept_df, ["AT&T_Node_Name"], "Cell Name")
            ept_df = coalesce_block(ept_df, ["LAT","Latitud"], "LAT")
            ept_df = coalesce_block(ept_df, ["LON","Longitud"], "LON")
            ept_df = coalesce_block(ept_df, ["AT&T_Site_Name"], "AT&T_Site_Name")

            # Limpieza y llaves √∫nicas
            ept_df["Cell Name"] = ept_df["Cell Name"].astype("string").str.strip()
            ept_df = ept_df.dropna(subset=["Cell Name"]).drop_duplicates(subset=["Cell Name"], keep="first")

            # Convierte LAT/LON a n√∫mero (al final, sobre columnas ya coalescidas)
            for c in ["LAT","LON"]:
                if c in ept_df.columns:
                    ept_df[c] = pd.to_numeric(
                        ept_df[c].astype("string").str.replace(",", "", regex=False).str.strip("[]"),
                        errors="coerce"
                    )

            # Asegura tipos
            if "AT&T_Site_Name" in ept_df.columns:
                ept_df["AT&T_Site_Name"] = ept_df["AT&T_Site_Name"].astype("string").str.strip()

            # Guarda/actualiza cache parquet con lo m√≠nimo
            try:
                ept_df[["Cell Name","LAT","LON","AT&T_Site_Name"]].to_parquet(cache_path, index=False)
            except Exception:
                pass
        else:
            # Si est√° vac√≠o por cualquier raz√≥n y no hay cache √∫til, no hay fallback que hacer
            pass

        # ----- Aplicaci√≥n del fallback sobre df_out -----
        if os.path.exists(cache_path):
            ept_df = pd.read_parquet(cache_path)

        if not ept_df.empty:
            # Asegura llave en base
            created_temp_key = False
            if "Cell Name" not in df_out.columns:
                if "NodeId" in df_out.columns:
                    df_out["Cell Name"] = df_out["NodeId"].astype("string").str.strip()
                    created_temp_key = True
                else:
                    raise KeyError("Falta 'Cell Name' o 'NodeId' en base para hacer fallback EPT.")

            df_out["Cell Name"] = df_out["Cell Name"].astype("string").str.strip()

            # Series indexadas para map r√°pido
            ept_idx = ept_df.set_index("Cell Name")
            ept_lat  = ept_idx["LAT"] if "LAT" in ept_idx.columns else pd.Series(dtype="float64")
            ept_lon  = ept_idx["LON"] if "LON" in ept_idx.columns else pd.Series(dtype="float64")
            ept_site = ept_idx["AT&T_Site_Name"] if "AT&T_Site_Name" in ept_idx.columns else pd.Series(dtype="string")

            need_lat  = _is_blank_series(df_out["LAT"])
            need_lon  = _is_blank_series(df_out["LON"])
            need_site = _is_blank_series(df_out["AT&T_Site_Name"])

            if not ept_lat.empty:
                df_out.loc[need_lat,  "LAT"] = df_out.loc[need_lat,  "Cell Name"].map(ept_lat)
            if not ept_lon.empty:
                df_out.loc[need_lon,  "LON"] = df_out.loc[need_lon,  "Cell Name"].map(ept_lon)
            if not ept_site.empty:
                df_out.loc[need_site, "AT&T_Site_Name"] = df_out.loc[need_site, "Cell Name"].map(ept_site)

            if created_temp_key:
                df_out.drop(columns=["Cell Name"], inplace=True)

            print("Fallback EPT aplicado.")
            display(df_out.loc[:, ["NodeId","LAT","LON","AT&T_Site_Name"]].head(10))
        else:
            print("No se pudieron obtener datos √∫tiles del EPT; se omite fallback.")


# =====================================================================
# 4) Calcular TAC = IF(configuredEpsTAC es NaN o "", usar nRTAC; de lo contrario, usar configuredEpsTAC)
# =====================================================================

col_conf = "configuredEpsTAC"
col_nrt  = "nRTAC"

# Normalizar ambas columnas a string y limpiar espacios
df_out[col_conf] = df_out[col_conf].astype(str).str.strip()
df_out[col_nrt]  = df_out[col_nrt].astype(str).str.strip()

# Reemplazar cadenas vac√≠as y "nan" (texto) por NaN
df_out[col_conf] = df_out[col_conf].replace(["", "nan", "NaN", "None"], pd.NA)
df_out[col_nrt]  = df_out[col_nrt].replace(["", "nan", "NaN", "None"], pd.NA)

# --- üîß Eliminar decimales innecesarios en configuredEpsTAC ---
# Si tiene forma '41001.0' ‚Üí convertir a int ‚Üí string '41001'
df_out[col_conf] = (
    pd.to_numeric(df_out[col_conf], errors="coerce")
    .dropna()
    .astype("Int64")
    .astype(str)
).reindex(df_out.index, fill_value=pd.NA)

# L√≥gica IF(): si configuredEpsTAC est√° vac√≠o ‚Üí usar nRTAC, si no ‚Üí usar configuredEpsTAC
df_out["TAC"] = np.where(df_out[col_conf].isna(), df_out[col_nrt], df_out[col_conf])

try:
    # Si el resultado es puramente num√©rico, lo pasamos a int sin decimales
    df_out["TAC"] = pd.to_numeric(df_out["TAC"], errors="coerce").astype("Int64")
except Exception:
    pass

print("‚úÖ Columna 'TAC' calculada correctamente (sin decimales innecesarios).")
display(df_out.loc[:, [col_conf, col_nrt, "TAC"]].head(10))


# =====================================================================
# 7) Resultado final en memoria (sin guardar). Vista de control.
# =====================================================================
print("Resultado final en memoria ‚Üí shape:", df_out.shape)
cols_check = ["NodeId","LAT","LON","AT&T_Site_Name","TAC"]
display(df_out.loc[:, [c for c in cols_check if c in df_out.columns]].head(20))


Unnamed: 0,NodeId,Name Unique
0,ADIFIZC6380,ADIFIZC6380
1,ADIFIZC6380,
2,ADIFIZC6380,
3,ADIFIZC6380,
4,ADIFIZC6381,ADIFIZC6381
5,ADIFIZC6381,
6,ADIFIZC6381,
7,ADIFIZC6381,
8,ADIFIZC6382,ADIFIZC6382
9,ADIFIZC6382,


Quedan 8 filas con faltantes. Se aplica fallback EPT‚Ä¶
Fallback EPT aplicado.


Unnamed: 0,NodeId,LAT,LON,AT&T_Site_Name
0,ADIFIZC6380,19.40513,-99.0956611,DIFIZC6380
1,ADIFIZC6380,19.40513,-99.0956611,DIFIZC6380
2,ADIFIZC6380,19.40513,-99.0956611,DIFIZC6380
3,ADIFIZC6380,19.40513,-99.0956611,DIFIZC6380
4,ADIFIZC6381,19.40513,-99.0956611,DIFIZC6381
5,ADIFIZC6381,19.40513,-99.0956611,DIFIZC6381
6,ADIFIZC6381,19.40513,-99.0956611,DIFIZC6381
7,ADIFIZC6381,19.40513,-99.0956611,DIFIZC6381
8,ADIFIZC6382,19.40513,-99.0956611,DIFIZC6382
9,ADIFIZC6382,19.40513,-99.0956611,DIFIZC6382


‚úÖ Columna 'TAC' calculada correctamente (sin decimales innecesarios).


Unnamed: 0,configuredEpsTAC,nRTAC,TAC
0,9652,0,9652
1,9652,0,9652
2,9652,0,9652
3,9652,0,9652
4,9652,0,9652
5,9652,0,9652
6,9652,0,9652
7,9652,0,9652
8,9652,0,9652
9,9652,0,9652


Resultado final en memoria ‚Üí shape: (6588, 39)


Unnamed: 0,NodeId,LAT,LON,AT&T_Site_Name,TAC
0,ADIFIZC6380,19.40513,-99.0956611,DIFIZC6380,9652
1,ADIFIZC6380,19.40513,-99.0956611,DIFIZC6380,9652
2,ADIFIZC6380,19.40513,-99.0956611,DIFIZC6380,9652
3,ADIFIZC6380,19.40513,-99.0956611,DIFIZC6380,9652
4,ADIFIZC6381,19.40513,-99.0956611,DIFIZC6381,9652
5,ADIFIZC6381,19.40513,-99.0956611,DIFIZC6381,9652
6,ADIFIZC6381,19.40513,-99.0956611,DIFIZC6381,9652
7,ADIFIZC6381,19.40513,-99.0956611,DIFIZC6381,9652
8,ADIFIZC6382,19.40513,-99.0956611,DIFIZC6382,9652
9,ADIFIZC6382,19.40513,-99.0956611,DIFIZC6382,9652


In [37]:
# df_out y HEADERS ya definidos
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\5G")
today   = date.today()
yyyymm  = f"{today.year}{today.month:02d}"
final_excel = BASE_DIR / f"All_Ericsson_5G_{yyyymm}.xlsx"

# 1) Asegurar columnas/orden y GUARDAR DIRECTO
for col in HEADERS:
    if col not in df_out.columns:
        df_out[col] = pd.NA
df_out = df_out[HEADERS]
df_out.to_excel(final_excel, index=False, na_rep="")

# 2) Reabrir el MISMO archivo y aplicar formato
wb = load_workbook(final_excel)
ws = wb.active

ws.freeze_panes = "A2"
for col_idx, header in enumerate(HEADERS, start=1):
    cell = ws.cell(row=1, column=col_idx)
    cell.value = header
    cell.font = Font(name="Aptos Narrow", size=11)
    cell.alignment = Alignment(textRotation=90, horizontal="center", vertical="bottom", wrap_text=True)

wb.save(final_excel)
wb.close()

print(f"‚úÖ Archivo final guardado y formateado ‚Üí {final_excel}")

‚úÖ Archivo final guardado y formateado ‚Üí C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\5G\All_Ericsson_5G_202510.xlsx
