In [1]:
import os, glob, io, re
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Font
import numpy as np
from datetime import date
import time

import unicodedata


# Ruta base
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\4G")
DEFAULT_IGNORE = ["SubNetwork,", "instance(s)", "NodeId"]
# Lista de 31 encabezados, en el orden en que los quieres (eNBId queda en W si mantienes este orden)
HEADERS = [
     "Gestor","eNodeB Name","CellName","activePlmnList_mcc","additionalPlmnList_mcc",
    "administrativeState","cellBarred","cellId","cellSubscriptionCapacity",
    "channelSelectionSetSize","dlChannelBandwidth","earfcndl","earfcnul",
    "freqBand","noOfPucchCqiUsers","noOfPucchSrUsers","operationalState",
    "physicalLayerCellIdGroup","physicalLayerSubCellId","sectorCarrierRef",
    "tac","timeOfLastModification","ulChannelBandwidth",
    "eNBId","eNodeB Name Unique","LAT","LON","PCI","AT&T_Site_Name",
    "MOCN Activo por Celda","Al menos una celda de MOCN encendida","MME TEF"
]

HEADERS_NO_GESTOR = [c for c in HEADERS if c != "Gestor"]



In [2]:
def _filtered_lines(path, ignorelines):
    """Genera las líneas útiles (no vacías y sin patrones a ignorar)."""
    with open(path, "r", encoding="utf-8") as f:
        for ln in f:
            if not ln.strip():
                continue
            if any(pat in ln for pat in (ignorelines or [])):
                continue
            yield ln

def read_pattern_to_df(filenamepattern: str, ignorelines=None, sep="\t", header=None):
    """
    Lee todos los TXT que matchean '<pattern>_*.txt', limpia líneas,
    y devuelve un único DataFrame con una columna extra 'Gestor' (sufijo numérico del archivo).
    No escribe nada a disco.
    """
    ignorelines = DEFAULT_IGNORE if ignorelines is None else ignorelines
    searchpattern = str(BASE_DIR / f"{filenamepattern}_*.txt")
    files = glob.glob(searchpattern)

    dfs = []
    for path in files:
        # Extrae NN de *_NN.txt
        m = re.search(r"_(\d+)\.txt$", os.path.basename(path), flags=re.IGNORECASE)
        gestor = f"ENM-{m.group(1)}" if m else ""

        # Filtra/limpia antes de parsear
        buf = io.StringIO("".join(_filtered_lines(path, ignorelines)))
        if buf.tell() == 0:
            # StringIO vacío → intenta leer igualmente (por seguridad)
            buf.seek(0)

        # Importante: sin encabezados en origen
        df = pd.read_csv(buf, sep=sep, header=header, engine="python")
        if df.empty:
            continue

        # Añade Gestor como última columna
        df["Gestor"] = gestor
        dfs.append(df)

    if not dfs:
        return pd.DataFrame()

    return pd.concat(dfs, ignore_index=True)

In [3]:

# --- Uso (todo queda en memoria) ---
eu_df   = read_pattern_to_df("EUtranCellFDD")
nb_df   = read_pattern_to_df("ENodeBFunction")
nd_df   = read_pattern_to_df("nodeid")
mme_df  = read_pattern_to_df("MME")

# Ojo: en tu código original había un mix en NbIot
# Aquí simplemente:
nbiot_df = read_pattern_to_df("NbIotCell")




In [4]:
# Suponiendo que ya tienes en memoria tu DataFrame:
# por ejemplo, eu_df = read_pattern_to_df("EUtranCellFDD")

# Mostrar forma original
print("Shape original:", eu_df.shape)

# 1) Eliminar columna B
eu_df_mod = eu_df.drop(eu_df.columns[1], axis=1)

# 2) Verificar nueva forma
print("Shape nuevo:", eu_df_mod.shape)

# 3) Mostrar una vista previa (primeras 10 filas)
pd.set_option("display.max_columns", None)  # opcional, para ver todas las columnas
display(eu_df_mod.head(10))


Shape original: (55769, 24)
Shape nuevo: (55769, 23)


Unnamed: 0,0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,Gestor
0,BCNENS0039,EMLBCNENS0039_3_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,93,34000,2,20000,3350,21350,7,640,810,ENABLED,13,2,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,ENM-1
1,BCNENS0039,EMLBCNENS0039_2_T,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,5,34000,2,20000,3150,21150,7,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,ENM-1
2,BCNENS0039,EMLBCNENS0039_2_S,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,11,17000,2,10000,650,18650,2,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,10000,ENM-1
3,BCNENS0039,EMLBCNENS0039_2_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,92,34000,2,20000,3350,21350,7,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,ENM-1
4,BCNENS0039,EMLBCNENS0039_1_T,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,4,34000,2,20000,3150,21150,7,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,ENM-1
5,BCNENS0039,EMLBCNENS0039_3_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,3,34000,2,20000,2250,20250,4,640,810,ENABLED,13,2,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-28 01:28:41,20000,ENM-1
6,BCNENS0039,EMLBCNENS0039_2_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,2,34000,2,20000,2250,20250,4,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-29 00:02:20,20000,ENM-1
7,BCNENS0039,EMLBCNENS0039_1_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,1,34000,2,20000,2250,20250,4,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-26 20:35:14,20000,ENM-1
8,BCNENS0039,EMLBCNENS0039_1_S,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,10,17000,2,10000,650,18650,2,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,10000,ENM-1
9,BCNENS0039,EMLBCNENS0039_1_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,91,34000,2,20000,3350,21350,7,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,ENM-1


In [5]:
df_base = eu_df_mod.copy()
print("Shape inicial df_base:", df_base.shape)

# Tomar Gestor del sufijo (última col) y quitarla para renombrar por posición
gestor_suffix = df_base.iloc[:, -1].astype("string").str.strip()
gestor_suffix = gestor_suffix.mask(gestor_suffix.eq(""), pd.NA)
df_base = df_base.iloc[:, :-1]

# ¿Hasta dónde llega el bloque base?
BASE_LAST = "ulChannelBandwidth"
if BASE_LAST not in HEADERS_NO_GESTOR:
    raise ValueError(f"'{BASE_LAST}' no está en HEADERS_NO_GESTOR")

base_len = HEADERS_NO_GESTOR.index(BASE_LAST) + 1
n_cols   = df_base.shape[1]

# Posible Gestor inline: columna inmediatamente después del bloque base
gestor_inline = pd.Series(pd.NA, index=df_base.index, dtype="string")
if n_cols >= base_len + 1:
    gestor_inline = df_base.iloc[:, base_len].astype("string").str.strip()
    gestor_inline = gestor_inline.mask(gestor_inline.eq(""), pd.NA)
    df_base.drop(df_base.columns[base_len], axis=1, inplace=True)
    n_cols -= 1

# Coalesce de Gestor
gestor_final = gestor_inline.fillna(gestor_suffix)

# Asegurar ancho exacto SIN Gestor (rellena/corta)
expected_wo_gestor = len(HEADERS_NO_GESTOR)
if df_base.shape[1] < expected_wo_gestor:
    add = pd.DataFrame(pd.NA, index=df_base.index, columns=range(df_base.shape[1], expected_wo_gestor))
    df_base = pd.concat([df_base, add], axis=1)
elif df_base.shape[1] > expected_wo_gestor:
    df_base = df_base.iloc[:, :expected_wo_gestor]

# Nombrar por posición (todos menos Gestor)
df_base.columns = HEADERS_NO_GESTOR

# Insertar Gestor al INICIO via reindex exacto
df_base["Gestor"] = gestor_final
df_base = df_base.reindex(columns=HEADERS)

print("Shape final df_base:", df_base.shape)
display(df_base.head(12))


Shape inicial df_base: (55769, 23)
Shape final df_base: (55769, 32)


Unnamed: 0,Gestor,eNodeB Name,CellName,activePlmnList_mcc,additionalPlmnList_mcc,administrativeState,cellBarred,cellId,cellSubscriptionCapacity,channelSelectionSetSize,dlChannelBandwidth,earfcndl,earfcnul,freqBand,noOfPucchCqiUsers,noOfPucchSrUsers,operationalState,physicalLayerCellIdGroup,physicalLayerSubCellId,sectorCarrierRef,tac,timeOfLastModification,ulChannelBandwidth,eNBId,eNodeB Name Unique,LAT,LON,PCI,AT&T_Site_Name,MOCN Activo por Celda,Al menos una celda de MOCN encendida,MME TEF
0,ENM-1,BCNENS0039,EMLBCNENS0039_3_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,93,34000,2,20000,3350,21350,7,640,810,ENABLED,13,2,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,
1,ENM-1,BCNENS0039,EMLBCNENS0039_2_T,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,5,34000,2,20000,3150,21150,7,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,
2,ENM-1,BCNENS0039,EMLBCNENS0039_2_S,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,11,17000,2,10000,650,18650,2,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,10000,,,,,,,,,
3,ENM-1,BCNENS0039,EMLBCNENS0039_2_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,92,34000,2,20000,3350,21350,7,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,
4,ENM-1,BCNENS0039,EMLBCNENS0039_1_T,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,4,34000,2,20000,3150,21150,7,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,
5,ENM-1,BCNENS0039,EMLBCNENS0039_3_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,3,34000,2,20000,2250,20250,4,640,810,ENABLED,13,2,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-28 01:28:41,20000,,,,,,,,,
6,ENM-1,BCNENS0039,EMLBCNENS0039_2_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,2,34000,2,20000,2250,20250,4,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-29 00:02:20,20000,,,,,,,,,
7,ENM-1,BCNENS0039,EMLBCNENS0039_1_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,1,34000,2,20000,2250,20250,4,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-26 20:35:14,20000,,,,,,,,,
8,ENM-1,BCNENS0039,EMLBCNENS0039_1_S,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,10,17000,2,10000,650,18650,2,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,10000,,,,,,,,,
9,ENM-1,BCNENS0039,EMLBCNENS0039_1_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,91,34000,2,20000,3350,21350,7,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,


In [6]:
# === 1) Asegurar columnas y orden de df_base contra HEADERS ===
# (df_base viene de tu paso anterior ya con 'Gestor' acoplado)
df_base = df_base.loc[:, [c for c in df_base.columns if c in HEADERS]]
for c in HEADERS:
    if c not in df_base.columns:
        df_base[c] = pd.NA
df_base = df_base[HEADERS]

print("df_base aligned →", df_base.shape)
# Vista rápida:
try:
    display(df_base.head(8))
except NameError:
    print(df_base.head(8).to_string(index=False))


# === 2) Cargar NbIot en memoria (sin headers), NO Excel ===
# Asumiendo que ya integras y limpias TXT con tu nueva función en memoria:
# nbiot_df = read_pattern_to_df("NbIotCell")
# Ese DF trae la última columna 'Gestor' añadida por el sufijo NN del archivo

df_nbiot = nbiot_df.copy()
if df_nbiot.shape[1] < 15:
    raise ValueError("NbIotCell debe tener al menos 15 columnas (incluyendo 'Gestor' como última).")
df_nbiot = df_nbiot.iloc[:, :15]

print("df_nbiot raw →", df_nbiot.shape)
display(df_nbiot.head(5))

# Mapeo a 4G HEADERS (Gestor al inicio)
df_nb = pd.DataFrame(pd.NA, index=df_nbiot.index, columns=HEADERS)
# Gestor (última del TXT de NbIot)
df_nb["Gestor"]                      = df_nbiot.iloc[:, 14]  # O → Gestor
# Mapeos base (como tenías, ajustando a HEADERS actuales)
df_nb["eNodeB Name"]                 = df_nbiot.iloc[:, 0]   # A -> eNodeB Name
df_nb["CellName"]                    = df_nbiot.iloc[:, 2]   # C -> CellName
df_nb["activePlmnList_mcc"]          = df_nbiot.iloc[:, 3]   # D -> activePlmnList_mcc
df_nb["additionalPlmnList_mcc"]      = df_nbiot.iloc[:,11]   # L -> additionalPlmnList_mcc
df_nb["administrativeState"]         = df_nbiot.iloc[:, 4]   # E -> administrativeState
df_nb["cellBarred"]                  = df_nbiot.iloc[:, 5]   # F -> cellBarred
df_nb["cellId"]                      = df_nbiot.iloc[:, 6]   # G -> cellId
df_nb["earfcndl"]                    = df_nbiot.iloc[:, 7]   # H -> earfcndl
df_nb["earfcnul"]                    = df_nbiot.iloc[:, 8]   # I -> earfcnul
df_nb["operationalState"]            = df_nbiot.iloc[:, 9]   # J -> operationalState
df_nb["physicalLayerCellIdGroup"]    = df_nbiot.iloc[:,10]   # K -> physicalLayerCellIdGroup
df_nb["sectorCarrierRef"]            = df_nbiot.iloc[:,12]   # M -> sectorCarrierRef
df_nb["tac"]                         = df_nbiot.iloc[:,13]   # N -> tac

print("df_nb mapped →", df_nb.shape)
display(df_nb.head(8))

# ================== Unir base + NbIot ==================
df_out = pd.concat([df_base, df_nb], ignore_index=True)

print("df_out final →", df_out.shape)
cols_check = [
    "Gestor",
    "eNodeB Name","CellName","activePlmnList_mcc","additionalPlmnList_mcc",
    "administrativeState","cellBarred","cellId","earfcndl","earfcnul",
    "operationalState","physicalLayerCellIdGroup","sectorCarrierRef","tac"
]
display(df_out.loc[:, cols_check].head(12))


df_base aligned → (55769, 32)


Unnamed: 0,Gestor,eNodeB Name,CellName,activePlmnList_mcc,additionalPlmnList_mcc,administrativeState,cellBarred,cellId,cellSubscriptionCapacity,channelSelectionSetSize,dlChannelBandwidth,earfcndl,earfcnul,freqBand,noOfPucchCqiUsers,noOfPucchSrUsers,operationalState,physicalLayerCellIdGroup,physicalLayerSubCellId,sectorCarrierRef,tac,timeOfLastModification,ulChannelBandwidth,eNBId,eNodeB Name Unique,LAT,LON,PCI,AT&T_Site_Name,MOCN Activo por Celda,Al menos una celda de MOCN encendida,MME TEF
0,ENM-1,BCNENS0039,EMLBCNENS0039_3_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,93,34000,2,20000,3350,21350,7,640,810,ENABLED,13,2,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,
1,ENM-1,BCNENS0039,EMLBCNENS0039_2_T,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,5,34000,2,20000,3150,21150,7,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,
2,ENM-1,BCNENS0039,EMLBCNENS0039_2_S,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,11,17000,2,10000,650,18650,2,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,10000,,,,,,,,,
3,ENM-1,BCNENS0039,EMLBCNENS0039_2_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,92,34000,2,20000,3350,21350,7,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,
4,ENM-1,BCNENS0039,EMLBCNENS0039_1_T,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,4,34000,2,20000,3150,21150,7,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,,20000,,,,,,,,,
5,ENM-1,BCNENS0039,EMLBCNENS0039_3_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,3,34000,2,20000,2250,20250,4,640,810,ENABLED,13,2,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-28 01:28:41,20000,,,,,,,,,
6,ENM-1,BCNENS0039,EMLBCNENS0039_2_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,2,34000,2,20000,2250,20250,4,640,810,ENABLED,13,1,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-29 00:02:20,20000,,,,,,,,,
7,ENM-1,BCNENS0039,EMLBCNENS0039_1_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,1,34000,2,20000,2250,20250,4,640,810,ENABLED,13,0,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220,2020-05-26 20:35:14,20000,,,,,,,,,


df_nbiot raw → (8436, 15)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,Gestor
0,BCNENS0033,1,EMNBCNENS0033_3_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,52,2240,-1,ENABLED,83,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21221,ENM-1
1,BCNENS0033,1,EMNBCNENS0033_1_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,50,2240,-1,ENABLED,81,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21221,ENM-1
2,BCNENS0033,1,EMNBCNENS0033_2_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,51,2240,-1,ENABLED,82,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21221,ENM-1
3,BCNENS0031,1,EMNBCNENS0031_2_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,51,2240,-1,ENABLED,28,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21220,ENM-1
4,BCNENS0031,1,EMNBCNENS0031_3_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,52,2240,-1,ENABLED,29,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21220,ENM-1


df_nb mapped → (8436, 32)


Unnamed: 0,Gestor,eNodeB Name,CellName,activePlmnList_mcc,additionalPlmnList_mcc,administrativeState,cellBarred,cellId,cellSubscriptionCapacity,channelSelectionSetSize,dlChannelBandwidth,earfcndl,earfcnul,freqBand,noOfPucchCqiUsers,noOfPucchSrUsers,operationalState,physicalLayerCellIdGroup,physicalLayerSubCellId,sectorCarrierRef,tac,timeOfLastModification,ulChannelBandwidth,eNBId,eNodeB Name Unique,LAT,LON,PCI,AT&T_Site_Name,MOCN Activo por Celda,Al menos una celda de MOCN encendida,MME TEF
0,ENM-1,BCNENS0033,EMNBCNENS0033_3_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,52,,,,2240,-1,,,,ENABLED,83,,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21221,,,,,,,,,,,
1,ENM-1,BCNENS0033,EMNBCNENS0033_1_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,50,,,,2240,-1,,,,ENABLED,81,,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21221,,,,,,,,,,,
2,ENM-1,BCNENS0033,EMNBCNENS0033_2_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,51,,,,2240,-1,,,,ENABLED,82,,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21221,,,,,,,,,,,
3,ENM-1,BCNENS0031,EMNBCNENS0031_2_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,51,,,,2240,-1,,,,ENABLED,28,,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21220,,,,,,,,,,,
4,ENM-1,BCNENS0031,EMNBCNENS0031_3_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,52,,,,2240,-1,,,,ENABLED,29,,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21220,,,,,,,,,,,
5,ENM-1,BCNENS0031,EMNBCNENS0031_1_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,50,,,,2240,-1,,,,ENABLED,27,,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21220,,,,,,,,,,,
6,ENM-1,BCNENS0037,EMNBCNENS0037_1_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,50,,,,2240,-1,,,,ENABLED,33,,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21220,,,,,,,,,,,
7,ENM-1,BCNENS0037,EMNBCNENS0037_2_L,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...",UNLOCKED,NOT_BARRED,51,,,,2240,-1,,,,ENABLED,34,,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",21220,,,,,,,,,,,


df_out final → (64205, 32)


Unnamed: 0,Gestor,eNodeB Name,CellName,activePlmnList_mcc,additionalPlmnList_mcc,administrativeState,cellBarred,cellId,earfcndl,earfcnul,operationalState,physicalLayerCellIdGroup,sectorCarrierRef,tac
0,ENM-1,BCNENS0039,EMLBCNENS0039_3_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,93,3350,21350,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
1,ENM-1,BCNENS0039,EMLBCNENS0039_2_T,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,5,3150,21150,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
2,ENM-1,BCNENS0039,EMLBCNENS0039_2_S,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,11,650,18650,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
3,ENM-1,BCNENS0039,EMLBCNENS0039_2_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,92,3350,21350,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
4,ENM-1,BCNENS0039,EMLBCNENS0039_1_T,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,4,3150,21150,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
5,ENM-1,BCNENS0039,EMLBCNENS0039_3_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,3,2250,20250,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
6,ENM-1,BCNENS0039,EMLBCNENS0039_2_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,2,2250,20250,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
7,ENM-1,BCNENS0039,EMLBCNENS0039_1_J,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,1,2250,20250,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
8,ENM-1,BCNENS0039,EMLBCNENS0039_1_S,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,10,650,18650,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220
9,ENM-1,BCNENS0039,EMLBCNENS0039_1_M,"[{mncLength=3, mcc=334, mnc=50}, {mncLength=3,...","[{mncLength=3, mcc=334, mnc=90}, {mncLength=2,...",UNLOCKED,NOT_BARRED,91,3350,21350,ENABLED,13,"[SubNetwork=ONRM_ROOT_MO,SubNetwork=TIJUANA,Me...",1220


In [7]:

# --- utilidades ---
def _is_blank(s: pd.Series) -> pd.Series:
    return s.isna() | s.astype(str).str.strip().eq("")

def coalesce_to_single_column(df: pd.DataFrame, variants, target):
    present = [c for c in variants if c in df.columns]
    if not present:
        return df
    s = pd.Series(pd.NA, index=df.index, dtype="object")
    for c in present:
        cand = df[c].astype(str).str.strip()
        s = s.where(~(s.isna() | (s.astype(str).str.strip().eq(""))), cand.where(~cand.eq(""), pd.NA))
    df[target] = s
    df.drop(columns=[c for c in present if c != target], inplace=True, errors="ignore")
    return df


# =====================================================================
# 1) eNBId desde ENodeBFunction (todo en memoria)
# =====================================================================
# nb_df proviene de read_pattern_to_df("ENodeBFunction")
df_nodeb = nb_df.iloc[:, :3].copy()  # usecols [0,1,2]
df_nodeb.columns = ["NodeId", "ENodeBFunctionId", "eNBIdnew"]

# normaliza tipos/espacios
df_out["eNodeB Name"] = df_out["eNodeB Name"].astype(str).str.strip()
df_nodeb["NodeId"]    = df_nodeb["NodeId"].astype(str).str.strip()

# dedup catálogo
df_nodeb = df_nodeb.drop_duplicates(subset=["NodeId"], keep="first")

# merge para traer eNBId
tmp = df_out.merge(df_nodeb[["NodeId", "eNBIdnew"]],
                   left_on="eNodeB Name", right_on="NodeId", how="left")
df_out["eNBId"] = tmp["eNBIdnew"]

# =====================================================================
# 2) Ordenar por eNodeB Name y marcar "eNodeB Name Unique"
# =====================================================================
df_out["eNodeB Name"] = df_out["eNodeB Name"].astype(str).str.strip()
df_out = df_out.sort_values(by="eNodeB Name", ascending=True).reset_index(drop=True)

_name = df_out["eNodeB Name"].astype(str).fillna("").str.strip()
is_new = _name.ne(_name.shift())
df_out["eNodeB Name Unique"] = np.where(is_new & _name.ne(""), df_out["eNodeB Name"], "")

print("Tras eNBId + orden + unique →", df_out.shape)
display(df_out.loc[:, ["eNodeB Name","eNBId","eNodeB Name Unique"]].head(12))

# =====================================================================
# 3) LAT/LON/AT&T_Site_Name desde EPT (primario) + All_Ericsson_4G (fallback)
# =====================================================================
def find_latest_ae_fast(base_dir: Path) -> Path | None:
    files = list(base_dir.glob("All_Ericsson_4G_*.xlsx"))
    if not files:
        return None
    # max con key=mtime regresa el archivo modificado más recientemente
    return max(files, key=lambda p: p.stat().st_mtime)

merged = df_out.copy()

# ---------------------------------------------------------------------
# 3A) EPT como fuente primaria
# ---------------------------------------------------------------------
faltan_ept = (
    _is_blank(merged.get("LAT",             pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("LON",             pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("AT&T_Site_Name",  pd.Series(False, index=merged.index)))
)

if faltan_ept.any():
    ept_matches = glob.glob(str(BASE_DIR / "EPT_ATT_UMTS_LTE_*.xlsx"))
    if ept_matches:
        ept_file   = ept_matches[0]
        ept_sheets = [
            "EPT_3G_LTE_OUTDOOR","PLAN_OUTDOOR",
            "EPT_3G_LTE_INDOOR","PLAN_INDOOR",
            "Eventos_Especiales", "R&R Ericsson", "Overlay Ericsson"
        ]

        frames = []
        for sh in ept_sheets:
            try:
                tmp = pd.read_excel(ept_file, sheet_name=sh, engine="openpyxl")
                frames.append(tmp)
            except Exception:
                pass

        if frames:
            ept_df = pd.concat(frames, ignore_index=True)

            # --- Normalización robusta para llaves y textos ---
            def _norm(s: pd.Series) -> pd.Series:
                s = s.astype(str)
                s = (
                    s.str.replace("\xa0", " ", regex=False)
                     .str.replace(r"[\u200B-\u200F\u202A-\u202E]", "", regex=True)
                     .str.replace(r"[\r\n]", " ", regex=True)
                     .str.replace(r"\s+", " ", regex=True)
                     .str.strip()
                )
                s = s.mask(
                    s.eq("") | s.str.lower().isin({"nan","none","null","<na>","n/a","-"}),
                    pd.NA
                )
                return s

            # --- Normaliza llaves del EPT ---
            for c in ["AT&T_Node_Name", "Node_B_U2000", "CellName"]:
                if c in ept_df.columns:
                    ept_df[c] = _norm(ept_df[c])

            # --- Normaliza campos numéricos y de texto objetivo ---
            for c in ["Latitud","Longitud"]:
                if c in ept_df.columns:
                    ept_df[c] = (
                        ept_df[c].astype(str)
                                 .str.strip("[]")
                                 .str.replace(",", "", regex=False)
                    )
                    ept_df[c] = pd.to_numeric(ept_df[c], errors="coerce")

            if "AT&T_Site_Name" in ept_df.columns:
                ept_df["AT&T_Site_Name"] = _norm(ept_df["AT&T_Site_Name"])

            # --- Construye lookups A y B (llave: "Cell Name" lógico) ---
            # Lookup A: por AT&T_Node_Name
            lookup_a_cols = []
            if "AT&T_Node_Name" in ept_df.columns:
                lookup_a_cols.append("AT&T_Node_Name")
            if "Latitud" in ept_df.columns:
                lookup_a_cols.append("Latitud")
            if "Longitud" in ept_df.columns:
                lookup_a_cols.append("Longitud")
            if "AT&T_Site_Name" in ept_df.columns:
                lookup_a_cols.append("AT&T_Site_Name")

            lookup_a = (
                ept_df[lookup_a_cols]
                .rename(columns={
                    "AT&T_Node_Name": "Cell Name",
                    "Latitud": "LAT",
                    "Longitud": "LON",
                })
                .dropna(subset=["Cell Name"])
                .drop_duplicates(subset=["Cell Name"], keep="first")
                if lookup_a_cols else pd.DataFrame(columns=["Cell Name","LAT","LON","AT&T_Site_Name"])
            )

            # Lookup B: por Node_B_U2000
            lookup_b_cols = []
            if "Node_B_U2000" in ept_df.columns:
                lookup_b_cols.append("Node_B_U2000")
            if "Latitud" in ept_df.columns:
                lookup_b_cols.append("Latitud")
            if "Longitud" in ept_df.columns:
                lookup_b_cols.append("Longitud")
            if "AT&T_Site_Name" in ept_df.columns:
                lookup_b_cols.append("AT&T_Site_Name")

            lookup_b = (
                ept_df[lookup_b_cols]
                .rename(columns={
                    "Node_B_U2000": "Cell Name",
                    "Latitud": "LAT",
                    "Longitud": "LON",
                })
                .dropna(subset=["Cell Name"])
                .drop_duplicates(subset=["Cell Name"], keep="first")
                if lookup_b_cols else pd.DataFrame(columns=["Cell Name","LAT","LON","AT&T_Site_Name"])
            )

            # --- Garantiza la llave "Cell Name" en la base 'merged' (para A/B) ---
            if "Cell Name" not in merged.columns:
                if "eNodeB Name" in merged.columns:
                    merged["Cell Name"] = _norm(merged["eNodeB Name"])
                else:
                    raise KeyError("Falta 'Cell Name'/'eNodeB Name' en base para join con EPT (A/B).")

            # --- Primer merge por AT&T_Node_Name (preferencia A) ---
            if not lookup_a.empty:
                merged = merged.merge(
                    lookup_a,
                    on="Cell Name",
                    how="left",
                    suffixes=("", "_A")
                )

            # --- Segundo merge por Node_B_U2000 (preferencia B como respaldo) ---
            if not lookup_b.empty:
                merged = merged.merge(
                    lookup_b,
                    on="Cell Name",
                    how="left",
                    suffixes=("", "_B")
                )

            # --- Lookup C: por CellName (tu DF) vs CellName (EPT) ---
            lookup_c = None
            if "CellName" in ept_df.columns:
                lookup_c_cols = ["CellName"]
                if "Latitud" in ept_df.columns:
                    lookup_c_cols.append("Latitud")
                if "Longitud" in ept_df.columns:
                    lookup_c_cols.append("Longitud")
                if "AT&T_Site_Name" in ept_df.columns:
                    lookup_c_cols.append("AT&T_Site_Name")

                lookup_c = (
                    ept_df[lookup_c_cols]
                    .rename(columns={"Latitud": "LAT", "Longitud": "LON"})
                    .dropna(subset=["CellName"])
                    .drop_duplicates(subset=["CellName"], keep="first")
                )

            # Asegura 'CellName' en la base para esta tercera llave (si ya existe, solo normaliza)
            if lookup_c is not None:
                if "CellName" in merged.columns:
                    merged["CellName"] = _norm(merged["CellName"])
                else:
                    # Si tu DF no trae 'CellName', este paso C se omite sin error.
                    lookup_c = None

            # --- Tercer merge por CellName (preferencia C como último respaldo) ---
            if lookup_c is not None:
                merged = merged.merge(
                    lookup_c,
                    on="CellName",
                    how="left",
                    suffixes=("", "_C")
                )

            # --- Coalesce campo a campo con prioridad A -> B -> C ---
            for col in ["LAT","LON","AT&T_Site_Name"]:
                colA = f"{col}_A"
                colB = f"{col}_B"
                colC = f"{col}_C"
                if col not in merged.columns:
                    merged[col] = pd.NA
                if colA in merged.columns:
                    merged[col] = merged[col].combine_first(merged[colA])
                if colB in merged.columns:
                    merged[col] = merged[col].combine_first(merged[colB])
                if colC in merged.columns:
                    merged[col] = merged[col].combine_first(merged[colC])

            # Limpieza de columnas temporales _A/_B/_C
            merged.drop(
                columns=[c for c in [
                    "LAT_A","LON_A","AT&T_Site_Name_A",
                    "LAT_B","LON_B","AT&T_Site_Name_B",
                    "LAT_C","LON_C","AT&T_Site_Name_C"
                ] if c in merged.columns],
                inplace=True,
                errors="ignore"
            )

            print("EPT aplicado como fuente primaria (A: AT&T_Node_Name, B: Node_B_U2000, C: CellName).")
        else:
            print("No se pudieron leer hojas del EPT; se omite EPT como primario.")
    else:
        print("No se encontró archivo EPT_ATT_UMTS_LTE_*.xlsx; se omite EPT como primario.")
else:
    print("df_out ya traía 100% de LAT/LON/Site; no se necesita EPT como primario.")


# ---------------------------------------------------------------------
# 3B) AE como fallback (solo donde siga faltando)
# ---------------------------------------------------------------------
faltan_ae = (
    _is_blank(merged.get("LAT",             pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("LON",             pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("AT&T_Site_Name",  pd.Series(False, index=merged.index)))
)

if faltan_ae.any():
    ae_path = find_latest_ae_fast(BASE_DIR)
    if ae_path and ae_path.exists():
        ae_df = pd.read_excel(
            ae_path,
            usecols=["eNodeB Name", "LAT", "LON", "AT&T_Site_Name"]
        )
        ae_df["eNodeB Name"] = ae_df["eNodeB Name"].astype(str).str.strip()
        ae_df = ae_df.drop_duplicates(subset=["eNodeB Name"], keep="first")

        # LAT/LON a numérico
        for c in ["LAT", "LON"]:
            if c in ae_df.columns:
                ae_df[c] = pd.to_numeric(ae_df[c], errors="coerce")

        # Normaliza llave y merge
        if "eNodeB Name" not in merged.columns:
            raise KeyError("Falta 'eNodeB Name' en la base para merge con AE.")

        merged["eNodeB Name"] = merged["eNodeB Name"].astype(str).str.strip()
        merged = merged.merge(
            ae_df,
            on="eNodeB Name",
            how="left",
            suffixes=("", "_ae")
        )

        # Coalesce SOLO LAT/LON/Site desde AE (solo donde sigan vacíos)
        for col in ["LAT", "LON", "AT&T_Site_Name"]:
            aux = f"{col}_ae"
            if aux in merged.columns:
                m = _is_blank(merged[col]) if col in merged.columns \
                    else pd.Series(True, index=merged.index)
                merged.loc[m, col] = merged.loc[m, aux]
                merged.drop(columns=[aux], inplace=True, errors="ignore")

        print("AE aplicado como fallback (solo valores aún faltantes).")
    else:
        print("No se encontró All_Ericsson_4G_*.xlsx; no se pudo aplicar AE como fallback.")
else:
    print("EPT (y/o df_out) cubrió 100%; no es necesario AE como fallback.")

df_out = merged

try:
    display(df_out.loc[:, ["eNodeB Name","Cell Name","CellName","LAT","LON","AT&T_Site_Name"]].head(10))
except Exception:
    pass


# =====================================================================
# 4) Calcular PCI = IF(R blank, Q, Q*3 + R)
# =====================================================================
q_col, r_col = "physicalLayerCellIdGroup", "physicalLayerSubCellId"

df_out[q_col] = pd.to_numeric(df_out[q_col], errors="coerce")
df_out[r_col] = pd.to_numeric(df_out[r_col].astype(str).str.strip().replace({"": None}), errors="coerce")

df_out["PCI"] = pd.Series(
    np.where(df_out[r_col].isna(), df_out[q_col], df_out[q_col]*3 + df_out[r_col]),
    index=df_out.index,
    dtype="Int64"
)

print("PCI calculado.")
display(df_out.loc[:, ["eNodeB Name", q_col, r_col, "PCI"]].head(10))

# =====================================================================
# 5) Marcar "MOCN Activo por Celda" y "Al menos una celda de MOCN encendida"
# =====================================================================
pattern = "[{mncLength=3, mcc=334, mnc=90}, {mncLength=2, mcc=334, mnc=3}, {mncLength=2, mcc=1, mnc=1}, {mncLength=2, mcc=1, mnc=1}, {mncLength=2, mcc=1, mnc=1}]"

df_out["MOCN Activo por Celda"] = np.where(
    df_out["additionalPlmnList_mcc"].astype(str).str.strip() == pattern, "Si", "No"
)

truthy = {"si", "sí", "yes", "true", "1"}
enb = df_out["eNodeB Name"].astype(str).str.strip()
mocn = df_out["MOCN Activo por Celda"].astype(str).str.strip().str.lower()
enbs_con_mocn = set(enb[mocn.isin(truthy)])

df_out["Al menos una celda de MOCN encendida"] = np.where(enb.isin(enbs_con_mocn), "Si", "No")

print("Marcadores MOCN listos.")
display(df_out.loc[:, ["eNodeB Name","MOCN Activo por Celda","Al menos una celda de MOCN encendida"]].head(12))

# =====================================================================
# 6) "MME TEF" desde MME (en memoria)
# =====================================================================
# mme_df proviene de read_pattern_to_df("MME")
df_MME = mme_df.iloc[:, :3].copy()
df_MME.columns = ["NodeId", "eNodeBFunction", "TermPointToMmeId"]
df_MME["NodeId"] = df_MME["NodeId"].astype(str).str.strip()

# Filtra TermPointToMmeId de longitud 7
df_MME_7 = df_MME[df_MME["TermPointToMmeId"].astype(str).str.len() == 7].copy()

# COUNTIF por NodeId
mme_counts = df_MME_7["NodeId"].value_counts()

# Map por eNodeB Name
df_out["eNodeB Name"] = df_out["eNodeB Name"].astype(str).str.strip()
df_out["MME TEF"] = df_out["eNodeB Name"].map(mme_counts).fillna(0).astype("Int64")

print("MME TEF mapeado.")
display(df_out.loc[:, ["eNodeB Name","MME TEF"]].head(12))

# =====================================================================
# 7) Resultado final en memoria (sin guardar). Vista de control.
# =====================================================================
print("Resultado final en memoria → shape:", df_out.shape)
cols_check = ["eNodeB Name","eNBId","LAT","LON","AT&T_Site_Name","PCI",
              "MOCN Activo por Celda","Al menos una celda de MOCN encendida","MME TEF"]
display(df_out.loc[:, [c for c in cols_check if c in df_out.columns]].head(20))


Tras eNBId + orden + unique → (64205, 32)


Unnamed: 0,eNodeB Name,eNBId,eNodeB Name Unique
0,ADIFIZC6380,90433,ADIFIZC6380
1,ADIFIZC6380,90433,
2,ADIFIZC6380,90433,
3,ADIFIZC6380,90433,
4,ADIFIZC6380,90433,
5,ADIFIZC6380,90433,
6,ADIFIZC6380,90433,
7,ADIFIZC6380,90433,
8,ADIFIZC6380,90433,
9,ADIFIZC6380,90433,


  merged[col] = merged[col].combine_first(merged[colA])
  merged[col] = merged[col].combine_first(merged[colA])


EPT aplicado como fuente primaria (A: AT&T_Node_Name, B: Node_B_U2000, C: CellName).
AE aplicado como fallback (solo valores aún faltantes).


Unnamed: 0,eNodeB Name,Cell Name,CellName,LAT,LON,AT&T_Site_Name
0,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_12_T,19.40513,-99.095661,DIFIZC6380
1,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_10_J,19.40513,-99.095661,DIFIZC6380
2,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_9_T,19.40513,-99.095661,DIFIZC6380
3,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_12_M,19.40513,-99.095661,DIFIZC6380
4,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_11_T,19.40513,-99.095661,DIFIZC6380
5,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_12_J,19.40513,-99.095661,DIFIZC6380
6,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_10_M,19.40513,-99.095661,DIFIZC6380
7,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_10_T,19.40513,-99.095661,DIFIZC6380
8,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_11_J,19.40513,-99.095661,DIFIZC6380
9,ADIFIZC6380,ADIFIZC6380,EiLDIFIZC6380_9_J,19.40513,-99.095661,DIFIZC6380


PCI calculado.


Unnamed: 0,eNodeB Name,physicalLayerCellIdGroup,physicalLayerSubCellId,PCI
0,ADIFIZC6380,91,2.0,275
1,ADIFIZC6380,91,0.0,273
2,ADIFIZC6380,89,2.0,269
3,ADIFIZC6380,91,2.0,275
4,ADIFIZC6380,91,1.0,274
5,ADIFIZC6380,91,2.0,275
6,ADIFIZC6380,91,0.0,273
7,ADIFIZC6380,91,0.0,273
8,ADIFIZC6380,91,1.0,274
9,ADIFIZC6380,89,2.0,269


Marcadores MOCN listos.


Unnamed: 0,eNodeB Name,MOCN Activo por Celda,Al menos una celda de MOCN encendida
0,ADIFIZC6380,Si,Si
1,ADIFIZC6380,Si,Si
2,ADIFIZC6380,Si,Si
3,ADIFIZC6380,Si,Si
4,ADIFIZC6380,Si,Si
5,ADIFIZC6380,Si,Si
6,ADIFIZC6380,Si,Si
7,ADIFIZC6380,Si,Si
8,ADIFIZC6380,Si,Si
9,ADIFIZC6380,Si,Si


MME TEF mapeado.


Unnamed: 0,eNodeB Name,MME TEF
0,ADIFIZC6380,2
1,ADIFIZC6380,2
2,ADIFIZC6380,2
3,ADIFIZC6380,2
4,ADIFIZC6380,2
5,ADIFIZC6380,2
6,ADIFIZC6380,2
7,ADIFIZC6380,2
8,ADIFIZC6380,2
9,ADIFIZC6380,2


Resultado final en memoria → shape: (64205, 33)


Unnamed: 0,eNodeB Name,eNBId,LAT,LON,AT&T_Site_Name,PCI,MOCN Activo por Celda,Al menos una celda de MOCN encendida,MME TEF
0,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,275,Si,Si,2
1,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,273,Si,Si,2
2,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,269,Si,Si,2
3,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,275,Si,Si,2
4,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,274,Si,Si,2
5,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,275,Si,Si,2
6,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,273,Si,Si,2
7,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,273,Si,Si,2
8,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,274,Si,Si,2
9,ADIFIZC6380,90433,19.40513,-99.095661,DIFIZC6380,269,Si,Si,2


In [8]:
today = date.today()
yyyymm = f"{today.year}{today.month:02d}"

final_excel = BASE_DIR / f"All_Ericsson_4G_{yyyymm}.xlsx"
tmp_excel   = BASE_DIR / f"~tmp_All_Ericsson_4G_{yyyymm}.xlsx"

# === 2) Garantizar columnas y orden ===
for col in HEADERS:
    if col not in df_out.columns:
        df_out[col] = pd.NA

df_out = df_out[HEADERS]

# === 3) Escribir temporal ===
df_out.to_excel(tmp_excel, index=False)

# === 4) Aplicar formato a encabezados ===
wb = load_workbook(tmp_excel)
ws = wb.active

# Congelar fila de encabezado
ws.freeze_panes = "A2"

# Estilo vertical (como antes)
for col_idx, header in enumerate(HEADERS, start=1):
    cell = ws.cell(row=1, column=col_idx)
    cell.value = header
    cell.font = Font(name="Aptos Narrow", size=11)
    cell.alignment = Alignment(
        textRotation=90, horizontal="center", vertical="bottom", wrap_text=True
    )

# Guardar versión final
wb.save(final_excel)
wb.close()

# Pequeña pausa para liberar el handle en Windows
time.sleep(0.5)
# === 5) Limpiar temporal ===
try:
    tmp_excel.unlink()
except Exception as e:
    print("No se pudo borrar temporal:", e)

print(f"✅ Archivo final guardado -> {final_excel}")

✅ Archivo final guardado -> C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\4G\All_Ericsson_4G_202601.xlsx
