In [64]:
import os, glob
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Font, PatternFill
from openpyxl.utils import get_column_letter
import numpy as np
from datetime import date
import unicodedata


# Ruta base (ajústala si cambia)
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G")

# Lista de 31 encabezados, en el orden en que los quieres (eNBId queda en W si mantienes este orden)
HEADERS = [
     "RNC","administrativeState","cId","iubLinkRef",
    "localCellId","lac","maximumTransmissionPower","maxPwrMax",
    "mocnCellProfileRef","operationalState","primaryCpichPower","primaryScramblingCode",
    "rac","sac","tCell","uarfcnDl",
    "uarfcnUl","uraList","UtranCellId",
    "NodeB","NodeBUnique","LAT","LON","AT&T_Site_Name",
    "Node_B_ID", "Gestor"
]




In [66]:

import re

def appendfiles(filenamepattern: str) -> str:
    """
    Integra todos los TXT que matchean pattern + '_*.txt' en un solo archivo,
    agregando como ÚLTIMA COLUMNA el 'Gestor' derivado del sufijo numérico del archivo.
    Devuelve el nombre del archivo integrado (sin ruta).
    """
    searchpattern = str(BASE_DIR / f"{filenamepattern}_*.txt")
    filestoread = glob.glob(searchpattern)

    outputfile_name = f"Integrated_{filenamepattern}_files.txt"
    output_path = BASE_DIR / outputfile_name

    print("Buscando:", searchpattern)
    print("Archivos:", filestoread)

    with open(output_path, "w", encoding="utf-8") as outputfile:
        for name in filestoread:
            # Extrae número después del guion bajo: *_NN.txt
            m = re.search(r"_(\d+)\.txt$", os.path.basename(name), flags=re.IGNORECASE)
            gestor = m.group(1) if m else ""

            with open(name, "r", encoding="utf-8") as f:
                for line in f:
                    # evita líneas vacías puras
                    if not line.strip():
                        continue
                    # agrega el Gestor como última columna
                    line = line.rstrip("\n")
                    outputfile.write(f"{line}\t{gestor}\n")
            print("Agregado:", name)

    print("Integrado =>", outputfile_name)
    return outputfile_name



def cleanfile(filename: str, ignorelines=None) -> str:
    """
    Elimina líneas que contengan cualquiera de los patrones indicados.
    Devuelve el nombre del archivo limpio (sin ruta).
    """
    if ignorelines is None:
        ignorelines = ["SubNetwork,", "instance(s)", "NodeId"]

    inputfile = BASE_DIR / filename
    cleanfile_name = f"Clean_{filename}"
    cleanfile_path = BASE_DIR / cleanfile_name

    with open(inputfile, 'r', encoding="utf-8") as f_in:
        lines = f_in.readlines()

    kept = []
    for line in lines:
        if any(p in line for p in ignorelines):
            continue
        kept.append(line)

    with open(cleanfile_path, 'w', encoding="utf-8") as f_out:
        f_out.writelines(kept)

    print(f"Limpieza OK -> {cleanfile_name} ({len(kept)} líneas)")
    return cleanfile_name


def convert_to_excel(cleanfile_name: str) -> str:
    """
    Lee TXT tab-delimited sin encabezados y guarda a Excel.
    Devuelve el nombre del archivo Excel (sin ruta).
    """
    cleanfile_path = BASE_DIR / cleanfile_name
    out_xlsx = f"Converted_{cleanfile_name}.xlsx"
    out_path = BASE_DIR / out_xlsx

    df = pd.read_csv(cleanfile_path, delimiter='\t', header=None)
    df.to_excel(out_path, index=False, header=None)
    print(f"Convertido a Excel -> {out_xlsx}  (shape={df.shape})")
    return out_xlsx



In [67]:

# EUtranCellFDD
eu_txt = appendfiles('UtranCell')
eu_clean = cleanfile(eu_txt)
eu_xlsx = convert_to_excel(eu_clean)

# nodeid
nd_txt = appendfiles('nodeid')
nd_clean = cleanfile(nd_txt)
nd_xlsx = convert_to_excel(nd_clean)




Buscando: C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\UtranCell_*.txt
Archivos: ['C:\\Users\\SCaracoza\\Documents\\AT&T\\LST Cell Ran\\Ericsson\\3G\\UtranCell_14.txt', 'C:\\Users\\SCaracoza\\Documents\\AT&T\\LST Cell Ran\\Ericsson\\3G\\UtranCell_9.txt']
Agregado: C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\UtranCell_14.txt
Agregado: C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\UtranCell_9.txt
Integrado => Integrated_UtranCell_files.txt
Limpieza OK -> Clean_Integrated_UtranCell_files.txt (24706 líneas)
Convertido a Excel -> Converted_Clean_Integrated_UtranCell_files.txt.xlsx  (shape=(24706, 22))
Buscando: C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\nodeid_*.txt
Archivos: ['C:\\Users\\SCaracoza\\Documents\\AT&T\\LST Cell Ran\\Ericsson\\3G\\nodeid_14.txt', 'C:\\Users\\SCaracoza\\Documents\\AT&T\\LST Cell Ran\\Ericsson\\3G\\nodeid_9.txt']
Agregado: C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\nodeid_14.txt
Agregado: C

In [70]:
# Archivo base desde la conversión de EUtranCellFDD
wb = load_workbook(BASE_DIR / eu_xlsx)  # ej. Converted_Clean_Integrated_UtranCell_files.txt.xlsx
ws = wb.active

# Elimina la columna B (índice 2). 'amount=1' borra solo una columna.
# openpyxl recorrerá C→B, D→C, etc.
if ws.max_column >= 2:
    ws.delete_cols(idx=2, amount=1)

wb.save(BASE_DIR / "Modified_workfile.xlsx")
print("Reacomodo OK -> Modified_workfile.xlsx (columna B eliminada)")


Reacomodo OK -> Modified_workfile.xlsx (columna B eliminada)


In [74]:
# Leemos el archivo reacomodado SIN headers
df_base = pd.read_excel(BASE_DIR / "Modified_workfile.xlsx", header=None)

# --- mover B -> S (índices base 0: B=1, S=19) ---
while df_base.shape[1] < 20:
    df_base[df_base.shape[1]] = pd.NA

src_idx = 1  # B
dst_idx = 19  # S
colB = df_base.iloc[:, src_idx].copy()
df_base.drop(df_base.columns[src_idx], axis=1, inplace=True)
if dst_idx > src_idx:
    dst_idx -= 1
df_base.insert(dst_idx, colB.name, colB)
# --- fin mover B -> S ---

# === 1) Captura y ELIMINA la última columna cruda (Gestor del TXT) ===
gestor_series = df_base.iloc[:, -1].copy()
df_base = df_base.iloc[:, :-1]  # quita la última columna para que no ocupe otra cabecera

# === 2) Mapea el resto por POSICIÓN usando HEADERS sin 'Gestor' ===
headers_wo_gestor = [h for h in HEADERS if h != "Gestor"]
expected_wo_gestor = len(headers_wo_gestor)

# Completa o recorta al tamaño esperado (sin Gestor)
if df_base.shape[1] < expected_wo_gestor:
    for _ in range(expected_wo_gestor - df_base.shape[1]):
        df_base[df_base.shape[1]] = pd.NA
elif df_base.shape[1] > expected_wo_gestor:
    extra_cols = df_base.shape[1] - expected_wo_gestor
    print(f"⚠️ Se detectaron {extra_cols} columnas extra (sin Gestor). Serán descartadas.")
    df_base = df_base.iloc[:, :expected_wo_gestor]

# Asigna nombres por posición para estas columnas
df_base.columns = headers_wo_gestor

# === 3) Inserta de nuevo 'Gestor' con la serie capturada ===
df_base["Gestor"] = gestor_series.values

# === 4) Reordena exactamente según HEADERS completos (columna Z para Gestor) ===
df_base = df_base[HEADERS]

# === 5) Guardar ===
out_path = BASE_DIR / "Modified_with_headers.xlsx"
df_base.to_excel(out_path, index=False)
print("✅ Archivo final generado:", out_path)


✅ Archivo final generado: C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\Modified_with_headers.xlsx


In [75]:
# Columnas con cadenas estilo "k1=v1,k2=v2,..."
cols_mo = [
    "iubLinkRef",       # ejemplo: ... ,IubLink=Iub_0420  -> Iub_0420
    "lac",              # ... ,LocationArea=41416         -> 41416
    "mocnCellProfileRef",# ... ,MocnCellProfile=Telefonica -> Telefonica
    "rac",              # ... ,RoutingArea=146            -> 146
    "sac",              # ... ,ServiceArea=65055          -> 65055
    "uraList",          # [ ...,Ura=146]                  -> 146
]

def extract_last_token_after_equal(s: pd.Series) -> pd.Series:
    """
    Toma una Serie de strings y devuelve el segmento que está después del último '='.
    Limpiaespacios, corchetes y comas/paréntesis finales si los hubiera.
    """
    out = (
        s.astype(str)
         # divide solo una vez desde la derecha: ['prefix', 'ultimo_valor']
         .str.rsplit('=', n=1).str[-1]
         .str.strip()                  # quita espacios
         .str.strip('[]()')            # quita corchetes/paréntesis de extremos
         .str.replace(r'[,\;]$', '', regex=True)  # quita coma/; final si existe
    )
    # Cuando no había '=', rsplit devuelve todo; si quieres NA en esos casos:
    has_equal = s.astype(str).str.contains('=', regex=False)
    out = out.where(has_equal, other=pd.NA)
    return out

# Creamos columnas normalizadas (puedes sobreescribir las originales si prefieres)
for c in cols_mo:
    df_base[c] = extract_last_token_after_equal(df_base[c])

# Convierte a numérico donde corresponde (las que deben ser números):
cols_numericas = ["lac", "rac", "sac", "uraList"]
for c in cols_numericas:
    if c in df_base.columns:
        df_base[c] = pd.to_numeric(df_base[c], errors="coerce")

# Guarda resultado (opcional)
df_base.to_excel(BASE_DIR / "Modified_with_headers_extracted.xlsx", index=False)

print("Extracción OK -> valores finales asignados en", cols_mo)

Extracción OK -> valores finales asignados en ['iubLinkRef', 'lac', 'mocnCellProfileRef', 'rac', 'sac', 'uraList']


In [76]:
# ---------- Helpers ----------
def _is_blank(s: pd.Series) -> pd.Series:
    return s.isna() | s.astype(str).str.strip().eq("")

def coalesce_to_single_column(df, variants, target):
    present = [c for c in variants if c in df.columns]
    if not present:
        return df
    cols = [target] + [c for c in present if c != target] if target in present else present
    df[target] = df[cols].bfill(axis=1).iloc[:, 0]
    to_drop = [c for c in present if c != target]
    df.drop(columns=to_drop, inplace=True, errors="ignore")
    return df

def prev_month_yyyymm(today=None):
    if today is None:
        today = date.today()
    y = today.year
    m = today.month - 1
    if m == 0:
        y -= 1
        m = 12
    return f"{y}{m:02d}"

# ---------- Config ----------
#prev_yymm = prev_month_yyyymm()
prev_yymm = f"202508"

ae_path   = BASE_DIR / f"All_Ericsson_3G_{prev_yymm}.xlsx"
ept_glob  = str(BASE_DIR / "EPT_ATT_UMTS_LTE_*.xlsx")

# ========== ETAPA 0: punto de partida ==========
merged = df_base.copy()  #NOn volver a usar df_base después de aquí

# ========== ETAPA 1: AE por UtranCellId (NodeB, LAT, LON, AT&T_Site_Name) + fallback EPT por UtranCellId ==========
# --- All_Ericsson por UtranCellId ---
if ae_path.exists():
    ae_cols = ["UtranCellId", "NodeB", "LAT", "LON", "AT&T_Site_Name"]
    ae_df = pd.read_excel(ae_path, usecols=ae_cols)
    if "UtranCellId" not in ae_df.columns:
        raise KeyError(f"{ae_path} no tiene 'UtranCellId'.")

    ae_df["UtranCellId"] = ae_df["UtranCellId"].astype(str).str.strip()
    ae_df = ae_df.drop_duplicates(subset=["UtranCellId"], keep="first")

    # LAT/LON a numérico si existen
    for c in ["LAT", "LON"]:
        if c in ae_df.columns:
            ae_df[c] = pd.to_numeric(ae_df[c], errors="coerce")

    # Normaliza llave en base y merge
    merged["UtranCellId"] = merged["UtranCellId"].astype(str).str.strip()
    merged = merged.merge(ae_df, on="UtranCellId", how="left", suffixes=("", "_ae"))

    # Coalesce SOLO LAT/LON/Site desde AE (NO tocar NodeB aquí)
    for col in ["LAT", "LON", "AT&T_Site_Name"]:
        aux = f"{col}_ae"
        if aux in merged.columns:
            mask = _is_blank(merged[col]) if col in merged.columns else pd.Series(True, index=merged.index)
            merged[col] = merged[col].where(~mask, merged[aux])
            merged.drop(columns=[aux], inplace=True, errors="ignore")
else:
    print(f"⚠️ No se encontró {ae_path}. Se salta AE en etapa 1.")

# --- Fallback EPT por UtranCellId (completar solo faltantes de LAT/LON/Site y obtener candidato NodeB_ept) ---
faltan1 = (
    _is_blank(merged.get("NodeB", pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("LAT",   pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("LON",   pd.Series(False, index=merged.index))) |
    _is_blank(merged.get("AT&T_Site_Name", pd.Series(False, index=merged.index)))
)

if faltan1.any():
    ept_matches = glob.glob(ept_glob)
    if ept_matches:
        ept_file = ept_matches[0]
        ept_sheets = ["EPT_3G_LTE_OUTDOOR", "PLAN_OUTDOOR", "EPT_3G_LTE_INDOOR", "PLAN_INDOOR", "Eventos_Especiales"]

        frames = []
        for sh in ept_sheets:
            try:
                tmp = pd.read_excel(ept_file, sheet_name=sh, engine="openpyxl")
                frames.append(tmp)
            except Exception:
                pass

        if frames:
            ept_df = pd.concat(frames, ignore_index=True)

            # Unificar nombres a los destinos esperados para lookup por UtranCellId
            ept_df = coalesce_to_single_column(ept_df, ["ATT_CELL_ID_Name", "UtranCellId"], "UtranCellId")
            ept_df = coalesce_to_single_column(ept_df, ["AT&T_Node_Name", "NodeB"], "NodeB")
            ept_df = coalesce_to_single_column(ept_df, ["Latitud", "LAT"], "LAT")
            ept_df = coalesce_to_single_column(ept_df, ["Longitud", "LON"], "LON")
            ept_df = coalesce_to_single_column(ept_df, ["AT&T_Site_Name"], "AT&T_Site_Name")

            if "UtranCellId" not in ept_df.columns:
                raise KeyError("EPT no tiene 'UtranCellId'/'ATT_CELL_ID_Name' para fallback (etapa 1).")

            ept_df["UtranCellId"] = ept_df["UtranCellId"].astype(str).str.strip()
            for c in ["LAT", "LON"]:
                if c in ept_df.columns:
                    ept_df[c] = ept_df[c].astype(str).str.strip("[]").str.replace(",", "", regex=False)
                    ept_df[c] = pd.to_numeric(ept_df[c], errors="coerce")

            ept_df = ept_df.drop_duplicates(subset=["UtranCellId"], keep="first")

            # Build lookup (mantener NodeB_ept para construir NodeB final)
            cols_keep = [c for c in ["UtranCellId", "NodeB", "LAT", "LON", "AT&T_Site_Name"] if c in ept_df.columns]
            ept_lookup = ept_df[cols_keep].rename(columns={
                "NodeB": "NodeB_ept",
                "LAT": "LAT_ept",
                "LON": "LON_ept",
                "AT&T_Site_Name": "AT&T_Site_Name_ept",
            })

            merged = merged.merge(ept_lookup, on="UtranCellId", how="left")

            # Completar SOLO faltantes de LAT/LON/Site desde EPT
            need_lat  = _is_blank(merged["LAT"])   if "LAT"   in merged.columns else pd.Series(False, index=merged.index)
            need_lon  = _is_blank(merged["LON"])   if "LON"   in merged.columns else pd.Series(False, index=merged.index)
            need_site = _is_blank(merged["AT&T_Site_Name"]) if "AT&T_Site_Name" in merged.columns else pd.Series(False, index=merged.index)

            if "LAT_ept" in merged.columns:               merged.loc[need_lat,  "LAT"] = merged.loc[need_lat,  "LAT_ept"]
            if "LON_ept" in merged.columns:               merged.loc[need_lon,  "LON"] = merged.loc[need_lon,  "LON_ept"]
            if "AT&T_Site_Name_ept" in merged.columns:    merged.loc[need_site, "AT&T_Site_Name"] = merged.loc[need_site, "AT&T_Site_Name_ept"]

            # Limpia auxiliares de LAT/LON/Site (conserva NodeB_ept para armar NodeB final)
            merged.drop(columns=[c for c in ["LAT_ept","LON_ept","AT&T_Site_Name_ept"] if c in merged.columns],
                        inplace=True, errors="ignore")
        else:
            print("⚠️ EPT sin hojas legibles; no se aplicó fallback en etapa 1.")
    else:
        print("⚠️ No se encontró archivo EPT para etapa 1.")
else:
    print("AE cubrió 100% en etapa 1; no se necesita EPT.")

# --- Construcción de NodeB SOLO desde AE/EPT (sin mover su posición y sin FutureWarning) ---
cands = []
if "NodeB_ae" in merged.columns:  cands.append(merged["NodeB_ae"])
if "NodeB_ept" in merged.columns: cands.append(merged["NodeB_ept"])

if cands:
    nodeb_final = cands[0]
    for s in cands[1:]:
        nodeb_final = nodeb_final.combine_first(s)   # evita FutureWarning
else:
    nodeb_final = pd.Series(pd.NA, index=merged.index)

nodeb_final = nodeb_final.fillna("")   # <- deja vacío en lugar de NaN


# Asignar sobre 'NodeB' manteniendo su posición; si no existe, insertarla en la posición de HEADERS
if "NodeB" in merged.columns:
    merged.loc[:, "NodeB"] = nodeb_final
else:
    idx_nodeb = HEADERS.index("NodeB") if "NodeB" in HEADERS else len(merged.columns)
    merged.insert(idx_nodeb, "NodeB", nodeb_final)

# Limpia auxiliares de NodeB
merged.drop(columns=[c for c in ["NodeB_ae","NodeB_ept"] if c in merged.columns],
            inplace=True, errors="ignore")
# (Opcional) Guardar intermedio por trazabilidad
intermediate_path = BASE_DIR / "Datos_Modified_etapa1.xlsx"
try:
    merged.to_excel(intermediate_path, index=False)
    print("Guardado intermedio (etapa 1) →", intermediate_path)
except Exception as e:
    print("No se guardó intermedio etapa 1:", e)




Guardado intermedio (etapa 1) → C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\Datos_Modified_etapa1.xlsx


In [77]:
# ========== ETAPA 2: AE por (RNC+NodeB) → Node_B_ID + fallback EPT por (RNC+AT&T_Node_Name) ==========
# Importante: seguir sobre 'merged' (NO volver a df_base)

# AE por RNC+NodeB
if ae_path.exists():
    ae_cols2 = ["RNC", "NodeB", "Node_B_ID"]
    ae_df2 = pd.read_excel(ae_path, usecols=[c for c in ae_cols2 if c])
    missing = [c for c in ["RNC","NodeB"] if c not in ae_df2.columns]
    if missing:
        print(f"⚠️ AE sin columnas {missing} para etapa 2. Se salta AE etapa 2.")
    else:
        ae_df2["RNC"]   = ae_df2["RNC"].astype(str).str.strip()
        ae_df2["NodeB"] = ae_df2["NodeB"].astype(str).str.strip()
        ae_df2 = ae_df2.drop_duplicates(subset=["RNC","NodeB"], keep="first")
        ae_df2 = ae_df2.rename(columns={"Node_B_ID": "Node_B_ID_ae"})

        merged["RNC"]   = merged["RNC"].astype(str).str.strip()
        merged["NodeB"] = merged["NodeB"].astype(str).str.strip()

        merged = merged.merge(ae_df2, on=["RNC","NodeB"], how="left")
        # Completar SOLO Node_B_ID desde AE
        if "Node_B_ID_ae" in merged.columns:
            if "Node_B_ID" in merged.columns:
                mask = _is_blank(merged["Node_B_ID"])
                merged.loc[mask, "Node_B_ID"] = merged.loc[mask, "Node_B_ID_ae"]
            else:
                merged["Node_B_ID"] = merged["Node_B_ID_ae"]
            merged.drop(columns=["Node_B_ID_ae"], inplace=True, errors="ignore")
else:
    print(f"⚠️ No se encontró {ae_path} para etapa 2.")

# Fallback EPT por (RNC + AT&T_Node_Name) → Node_B_ID
faltan2 = _is_blank(merged.get("Node_B_ID", pd.Series(False, index=merged.index)))
if faltan2.any():
    ept_matches = glob.glob(ept_glob)
    if ept_matches:
        ept_file = ept_matches[0]
        ept_sheets = ["EPT_3G_LTE_OUTDOOR", "PLAN_OUTDOOR", "EPT_3G_LTE_INDOOR", "PLAN_INDOOR", "Eventos_Especiales"]
        frames = []
        for sh in ept_sheets:
            try:
                tmp = pd.read_excel(ept_file, sheet_name=sh, engine="openpyxl")
                frames.append(tmp)
            except Exception:
                pass
        if frames:
            ept_df2 = pd.concat(frames, ignore_index=True)

            ept_df2 = coalesce_to_single_column(ept_df2, ["RNC"], "RNC")
            ept_df2 = coalesce_to_single_column(ept_df2, ["AT&T_Node_Name", "NodeB", "Node_B_Name"], "AT&T_Node_Name")
            ept_df2 = coalesce_to_single_column(ept_df2, ["Node_B_ID", "NodeB_ID", "NodeB Id", "Node_B Id"], "Node_B_ID")

            missing = [c for c in ["RNC","AT&T_Node_Name","Node_B_ID"] if c not in ept_df2.columns]
            if missing:
                raise KeyError(f"EPT carece de columnas para etapa 2: {missing}")

            for c in ["RNC","AT&T_Node_Name"]:
                ept_df2[c] = ept_df2[c].astype(str).str.strip()

            ept_lookup2 = (
                ept_df2[["RNC","AT&T_Node_Name","Node_B_ID"]]
                .dropna(subset=["RNC","AT&T_Node_Name"])
                .drop_duplicates(subset=["RNC","AT&T_Node_Name"], keep="first")
                .rename(columns={"Node_B_ID":"Node_B_ID_ept"})
            )

            merged["RNC"]   = merged["RNC"].astype(str).str.strip()
            merged["NodeB"] = merged["NodeB"].astype(str).str.strip()

            merged = merged.merge(
                ept_lookup2,
                left_on=["RNC","NodeB"],
                right_on=["RNC","AT&T_Node_Name"],
                how="left"
            )

            need_id = _is_blank(merged["Node_B_ID"]) if "Node_B_ID" in merged.columns else pd.Series(False, index=merged.index)
            if "Node_B_ID_ept" in merged.columns:
                merged.loc[need_id, "Node_B_ID"] = merged.loc[need_id, "Node_B_ID_ept"]

            merged.drop(columns=[c for c in ["Node_B_ID_ept","AT&T_Node_Name"] if c in merged.columns],
                        inplace=True, errors="ignore")
        else:
            print("⚠️ EPT sin hojas leíbles; no se aplicó fallback en etapa 2.")
    else:
        print("⚠️ No se encontró archivo EPT para etapa 2.")
else:
    print("AE cubrió 100% Node_B_ID en etapa 2; no se necesita EPT.")

# ========== ETAPA FINAL: conformar columnas y guardar ==========
# Asegura tener exactamente los HEADERS definidos (sin extras)
final_cols = HEADERS[:]  # HEADERS ya incluye "Node_B_ID"
for c in final_cols:
    if c not in merged.columns:
        merged[c] = pd.NA
merged = merged[final_cols]

final_path = BASE_DIR / "Datos_Modified.xlsx"
merged.to_excel(final_path, index=False)
print("✅ Guardado FINAL →", final_path)

✅ Guardado FINAL → C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\Datos_Modified.xlsx


In [78]:
from pathlib import Path
import pandas as pd

# === Configura tu carpeta base (una CARPETA, no un archivo) ===
BASE_DIR = Path(r"C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G")

# Si te pasaron un archivo completo en BASE_DIR, normalízalo:
if BASE_DIR.suffix.lower() == ".xlsx":
    src = BASE_DIR
    BASE_DIR = src.parent
else:
    # Caso normal: BASE_DIR es carpeta; el archivo esperado es Datos_Modified.xlsx
    src = BASE_DIR / "Datos_Modified.xlsx"

dst = BASE_DIR / "Datos_Modified_order.xlsx"

# Validaciones útiles
if not BASE_DIR.exists():
    raise FileNotFoundError(f"La carpeta base no existe: {BASE_DIR}")

if src.is_dir():
    raise IsADirectoryError(
        f"'{src}' es una carpeta. Debes apuntar a un archivo .xlsx, por ejemplo: {src / 'Datos_Modified.xlsx'}"
    )

if not src.exists():
    similares = list(BASE_DIR.glob("*Datos_Modified*.xlsx"))
    sugerencia = f"\nSugeridos: {similares}" if similares else ""
    raise FileNotFoundError(f"No encontré el archivo de entrada: {src}{sugerencia}")

# === Leer
try:
    df = pd.read_excel(src, engine="openpyxl")
except PermissionError as e:
    raise PermissionError(
        f"No pude leer '{src}'. ¿Está abierto en Excel? Ciérralo e intenta de nuevo."
    ) from e

if "NodeB" not in df.columns:
    raise KeyError("El archivo no contiene la columna 'NodeB'.")

# === Ordenar por NodeB (A→Z, case-insensitive, ignorando espacios)
df_sorted = df.sort_values(
    by="NodeB",
    key=lambda s: s.astype("string").str.strip().str.casefold(),
    kind="mergesort",
    na_position="last",
)

# === Calcular NodeBUnique: solo la primera aparición de cada NodeB
key = df_sorted["NodeB"].astype("string").str.strip().str.casefold()
first_hit = ~key.duplicated(keep="first") & key.fillna("").ne("")
df_sorted["NodeBUnique"] = df_sorted["NodeB"].where(first_hit, "")

# (Si prefieres NaN en lugar de cadena vacía, usa pd.NA en la línea anterior)

# === Guardar
try:
    df_sorted.to_excel(dst, index=False, na_rep="")
except PermissionError as e:
    raise PermissionError(
        f"No pude escribir '{dst}'. Si existe, asegúrate de que NO esté abierto en Excel."
    ) from e

print("✅ Archivo ordenado con NodeBUnique guardado en:", dst)


✅ Archivo ordenado con NodeBUnique guardado en: C:\Users\SCaracoza\Documents\AT&T\LST Cell Ran\Ericsson\3G\Datos_Modified_order.xlsx


In [79]:


final_excel = BASE_DIR / "Datos_Modified_order.xlsx"
tmp_excel   = BASE_DIR / "~tmp_Datos_Modified_order.xlsx"

# === 1) Releer, asegurar columnas y orden ===
df_out = pd.read_excel(final_excel)

for col in HEADERS:
    if col not in df_out.columns:
        df_out[col] = pd.NA

df_out = df_out[HEADERS]

# Escribe temporal sin 'nan'
df_out.to_excel(tmp_excel, index=False, na_rep="")

# === 2) Estilos y ajustes selectivos ===
wb = load_workbook(tmp_excel)
ws = wb.active

# Congelar encabezado y aplicar autofiltro
ws.freeze_panes = "A2"
ws.auto_filter.ref = ws.dimensions

# Estilo de encabezados
header_fill = PatternFill(fill_type="solid", start_color="FFBFBFBF", end_color="FFBFBFBF")
ws.row_dimensions[1].height = 90  # más alto para leer el header rotado

for col_idx, header in enumerate(HEADERS, start=1):
    c = ws.cell(row=1, column=col_idx)
    c.value = header
    c.font = Font(name="Aptos Narrow", size=9, bold=True)
    c.alignment = Alignment(textRotation=90, horizontal="center", vertical="bottom", wrap_text=True)
    c.fill = header_fill

# ======== SOLO AJUSTAR ALGUNAS COLUMNAS ========
# Opción A: anchos FIJOS por columna (recomendado si ya sabes los tamaños)
width_overrides = {
    "NodeB": 15,
    "AT&T_Site_Name": 15,
    "RNC": 14,
    "NodeBUnique": 15,
    "UtranCellId":20,
    "LAT": 15,
    "LON":15
}

for col_name, width in width_overrides.items():
    if col_name in HEADERS:
        col_letter = get_column_letter(HEADERS.index(col_name) + 1)
        ws.column_dimensions[col_letter].width = width

# --- Opción B (opcional): auto-fit SOLO para algunas columnas ---
#   Si prefieres autoajustar *solo* algunas (y no tocar el resto), usa esta lista:
AUTO_FIT = []  # por ejemplo: ["AT&T_Site_Name", "NodeB"]

for col_name in AUTO_FIT:
    if col_name in HEADERS:
        col_idx = HEADERS.index(col_name) + 1
        col_letter = get_column_letter(col_idx)
        # calcula ancho por contenido + header (percentil 95), con límites
        lens = df_out[col_name].astype(str).replace("nan", "").str.len()
        p95 = int(lens.quantile(0.95)) if len(lens) else 0
        header_len = len(str(col_name))
        width = min(max(8, max(p95, header_len) + 2), 50)
        ws.column_dimensions[col_letter].width = width

# (Opcional) shrink-to-fit en cuerpo para que el texto largo se vea mejor sin cambiar ancho:
for row in ws.iter_rows(min_row=2, max_row=ws.max_row, min_col=1, max_col=ws.max_column):
    for cell in row:
        cell.alignment = Alignment(vertical="center")

# === 3) Guardar y limpiar tmp ===
wb.save(final_excel)

try:
    tmp_excel.unlink()
except Exception as e:
    print("No se pudo borrar temporal:", e)

print("Ajuste final OK → headers rotados y grises; anchos aplicados solo a columnas seleccionadas.")


No se pudo borrar temporal: [WinError 32] El proceso no tiene acceso al archivo porque está siendo utilizado por otro proceso: 'C:\\Users\\SCaracoza\\Documents\\AT&T\\LST Cell Ran\\Ericsson\\3G\\~tmp_Datos_Modified_order.xlsx'
Ajuste final OK → headers rotados y grises; anchos aplicados solo a columnas seleccionadas.
