In [None]:
# =====================================================================
# Processo de Automatización NOKIA 5G
# =====================================================================

import os, glob, io, re
from pathlib import Path
import pandas as pd
from IPython.lib.deepreload import load_next
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Font, Border, PatternFill
from openpyxl.utils import get_column_letter
import numpy as np
from datetime import date
import unicodedata
from charset_normalizer import from_path


# Ruta base (ajústala si cambia)
#BASE_DIR = Path(r"C:\Users\EAlor\OneDrive - ACS Solutions\Documents\AT&T\LST Cell Ran\Nokia New\Nokia Noviembre")
BASE_DIR = Path(r"C:\Users\EAlor\OneDrive - ACS Solutions\Documents\AT&T\LST Cell Ran\Nokia New\XML_Output\Diciembre")

HEADERS = ["AT&T_Site_Name", "Site ID", "VERSION", "DISTNAME", "MOID", "angle", "name", "actDl256Qam", "administrativeState", "availabilityStatus", "cellBarred", "cellName", "freqBandIndicatorNR", "lcrId", "nrCellIdentity", "nrCellType", "operationalState", "pMax", "physCellId", "arfcnSsbPbch", "chBwDl", "chBwUl", "nrarfcnDl", "nrarfcnUl", "enbPlmn_mcc_mnc_mncLength", "ltePhyCellId", "ssbPosition", "configuredEpsTac", "nrPlmnDNList", "LAT", "LON"]


# Lista de encabezados, en el orden requerido
HEADER_NRCELL = ["VERSION", "DISTNAME", "MOID", "angle", "name", "actDl256Qam", "administrativeState", "availabilityStatus", "cellBarred", "cellName", "freqBandIndicatorNR", "lcrId", "nrCellIdentity", "nrCellType", "operationalState", "pMax", "physCellId", "arfcnSsbPbch"]

HEADER_MRBTS = ["FILENAME", "DATETIME", "VERSION", "DISTNAME", "MOID", "name", "altitude", "btsName", "latitude", "longitude", "blockingState"]

HEADER_NRCELL_FDD = ["DISTNAME", "chBwDl", "chBwUl", "nrarfcnDl", "nrarfcnUl"]

HEADER_NRDSSLTE = ["DISTNAME", "enbPlmn_mcc_mnc_mncLength", "ltePhyCellId", "ssbPosition"]

HEADER_NRPLMNSET_NSA = ["DISTNAME", "configuredEpsTac", "nrPlmnDNList"]


In [None]:
def read_csv_files(filename: str, header, encoder):
    filepath = str(BASE_DIR / f"{filename}")
    # Leer sólo las columnas necesarias del csv
    df = pd.read_csv(filepath, usecols = header, encoding = encoder, dtype=str)[header]

#    print(nrcell_df.head())

    return df


In [None]:

nrcell_df = read_csv_files("NRCELL.csv", HEADER_NRCELL, "utf-8")
mrbts_df = read_csv_files("MRBTS.csv", HEADER_MRBTS, "latin-1")
nrcell_fdd_df = read_csv_files("NRCELL_FDD.csv", HEADER_NRCELL_FDD, "utf-8")
nrdsslte_df = read_csv_files("NRDSSLTE.csv",HEADER_NRDSSLTE, "utf-8")
nrplmnset_df = read_csv_files("NRPLMNSET_NSA.csv",HEADER_NRPLMNSET_NSA, "utf-8")


print("Shape NRCELL original:", nrcell_df.shape, "\nShape MRBTS original:", mrbts_df.shape, "\nShape NRCELL_FDD original:", nrcell_fdd_df.shape, "\nShape NRDSSLTE original:", nrdsslte_df.shape, "\nShape NRPLMNSET_NSA original:", nrplmnset_df.shape)
# print(NRCELL_fdd_df.head(5).to_string(index=False))

try:
    display(nrcell_df.head(5))
    display(mrbts_df.head(5))
    display(nrcell_fdd_df.head(5))
    display(nrdsslte_df.head(5))
    display(nrplmnset_df.head(5))
except NameError:
    # Por si no estás en notebook
    print(nrcell_df.head(5).to_string(index=False))
    print(mrbts_df.head(5).to_string(index=False))
    print(nrcell_fdd_df.head(5).to_string(index=False))
    print(nrdsslte_df.head(5).to_string(index=False))
    print(nrplmnset_df.head(5).to_string(index=False))



In [None]:
# =====================================================================
# 1) Insertar columnas AT&TSite_Name y Site ID
# =====================================================================

# Copia para no tocar los originales
nrcell_df_mod = nrcell_df.copy()


print("Shape original:", nrcell_df_mod.shape)

# --- Crear columna 'AT&T_Site_Name' a partir de mrbts_df ---
# 1. Crear un mapeo DISTNAME -> name
mapa = dict(zip(mrbts_df['DISTNAME'], mrbts_df['name']))

# 2. Generar la clave intermedia a partir de DISTNAME
substr_distname = nrcell_df_mod['DISTNAME'].str.split('/', n=2).str[:2].str.join('/')

# 3. Mapear con el diccionario
tmp_name = substr_distname.map(mapa)

# 4. Aplicar la lógica condicional:
#    - Si contiene '-', usar la segunda parte (x.split('-', 2)[1])
#    - Si no, dejar el valor original
#    - Si está vacío, dejar NaN
nrcell_df_mod.insert(
    0,
    'AT&T_Site_Name',
    tmp_name.apply(
        lambda x: x.split('-', 2)[1].strip() if isinstance(x, str) and '-' in x else x
    )
)

# 5. Rellenar los NaN con el valor original de mrbts_df['name']
nrcell_df_mod['AT&T_Site_Name'].fillna(substr_distname.map(mapa), inplace=True)

# 6. Contar y reportar cuántos valores quedaron vacíos
nan_count = nrcell_df_mod['AT&T_Site_Name'].isna().sum()
if nan_count > 0:
    print(f"⚠️ {nan_count} registros sin 'AT&T_Site_Name' encontrados.")
else:
    print("✅ Todos los registros tienen 'AT&T_Site_Name'.")

# Insertar la nueva columna Site ID extrayendo el texto deseado de DISTNAME
nrcell_df_mod.insert(1, 'Site ID', nrcell_df_mod['DISTNAME'].str.split('-', n=3).str[3].str.split('/',n=2).str[0] )

print("Shape nuevo:", nrcell_df_mod.shape)

# Vista de verificación (muestra solo unas filas)
pd.set_option("display.max_columns", None)  # opcional
try:
    display(nrcell_df_mod.head(5))
except NameError:
    # Por si no estás en notebook
    print(nrcell_df_mod.head(5).to_string(index=False))


In [None]:
# =====================================================================
# 2) Información de NRCELL_FDD
# =====================================================================

nrcell_df_merged = nrcell_df_mod.copy()

print("Shape original:", nrcell_df_merged.shape)

# Remueve la ultima parte de DISTNAME en NRCELL_FDD para que haga match con DISTNAME de NRCELL
nrcell_fdd_df['DISTNAME'] = nrcell_fdd_df['DISTNAME'].str.rsplit('/', n=1).str[0]

columnas_a_insertar = ["chBwDl", "chBwUl", "nrarfcnDl", "nrarfcnUl"]

# limpia string de DISTNAME
nrcell_df_merged["DISTNAME"] = nrcell_df_merged["DISTNAME"].astype(str).str.strip().str.upper()
nrcell_fdd_df["DISTNAME"] = nrcell_fdd_df["DISTNAME"].astype(str).str.strip().str.upper()

coincidencias = set(nrcell_df_merged["DISTNAME"]) & set(nrcell_fdd_df["DISTNAME"])
print(len(coincidencias))

nrcell_df_merged = nrcell_df_merged.merge(
    nrcell_fdd_df[["DISTNAME"] + columnas_a_insertar],
    on = "DISTNAME",
    how = "left")

print("Shape nuevo:", nrcell_df_merged.shape)

# Vista de verificación (muestra solo unas filas)
pd.set_option("display.max_columns", None)  # opcional
try:
    display(nrcell_df_merged.head(5))
except NameError:
    # Por si no estás en notebook
    print(nrcell_df_merged.head(5).to_string(index=False))


In [None]:
# =====================================================================
# 3) Información de NRDSSLTE
# =====================================================================

nrcell_df_mergelte = nrcell_df_merged.copy()

print("Shape original:", nrcell_df_mergelte.shape)

# Remueve la ultima parate de DISTNAME en NRDSSLTE para que haga match con DISTNAME de NRCELL
nrdsslte_df['DISTNAME'] = nrdsslte_df['DISTNAME'].str.rsplit('/', n=1).str[0]

columnas_a_insertar = ["enbPlmn_mcc_mnc_mncLength", "ltePhyCellId", "ssbPosition"]

# limpia string de DISTNAME
nrcell_df_mergelte["DISTNAME"] = nrcell_df_mergelte["DISTNAME"].astype(str).str.strip().str.upper()
nrdsslte_df["DISTNAME"] = nrdsslte_df["DISTNAME"].astype(str).str.strip().str.upper()

coincidencias = set(nrcell_df_mergelte["DISTNAME"]) & set(nrdsslte_df["DISTNAME"])
print(len(coincidencias))

nrcell_df_mergelte = nrcell_df_mergelte.merge(
    nrdsslte_df[["DISTNAME"] + columnas_a_insertar],
    on = "DISTNAME",
    how = "left")

print("Shape nuevo:", nrcell_df_mergelte.shape)

# Vista de verificación (muestra solo unas filas)
pd.set_option("display.max_columns", None)  # opcional
try:
    display(nrcell_df_mergelte.head(5))
except NameError:
    # Por si no estás en notebook
    print(nrcell_df_mergelte.head(5).to_string(index=False))


In [None]:
# =====================================================================
# 4) Información de NRPLMNSET_NSA
# =====================================================================

nrcell_df_mergelmnset = nrcell_df_mergelte.copy()

print("Shape original:", nrcell_df_mergelmnset.shape)

# Remueve la ultima parte de DISTNAME en NRDSSLTE para que haga match con DISTNAME de NRCELL
nrplmnset_df['DISTNAME'] = nrplmnset_df['DISTNAME'].str.rsplit('/', n=1).str[0]

columnas_a_insertar = ["configuredEpsTac", "nrPlmnDNList"]

# limpia string de DISTNAME
nrcell_df_mergelmnset["DISTNAME"] = nrcell_df_mergelmnset["DISTNAME"].astype(str).str.strip().str.upper()
nrplmnset_df["DISTNAME"] = nrplmnset_df["DISTNAME"].astype(str).str.strip().str.upper()

coincidencias = set(nrcell_df_mergelmnset["DISTNAME"]) & set(nrplmnset_df["DISTNAME"])
print(len(coincidencias))

nrcell_df_mergelmnset = nrcell_df_mergelmnset.merge(
    nrplmnset_df[["DISTNAME"] + columnas_a_insertar],
    on = "DISTNAME",
    how = "left")

print("Shape nuevo:", nrcell_df_mergelmnset.shape)

# Vista de verificación (muestra solo unas filas)
pd.set_option("display.max_columns", None)  # opcional
try:
    display(nrcell_df_mergelmnset.head(5))
except NameError:
    # Por si no estás en notebook
    print(nrcell_df_mergelmnset.head(5).to_string(index=False))



In [None]:
# =====================================================================
# 5) LAT/LON desde All_Nokia_5G_{YYYYMM} (mes anterior)
# =====================================================================
nrcell_df_mergelatlon = nrcell_df_mergelmnset.copy()

today = date.today()
prev_year  = today.year if today.month > 1 else today.year - 1
prev_month = today.month - 1 or 12
yyyymm = f"{prev_year}{prev_month:02d}"
print(yyyymm)

# an_path = BASE_DIR / f"All_Nokia_5G_{yyyymm}.xlsx"
an_path = BASE_DIR / f"All_Nokia_5G_20260121.xlsx"
an_df = pd.read_excel(an_path, usecols=["AT&T_Site_Name", "LAT", "LON"])
an_df["AT&T_Site_Name"] = an_df["AT&T_Site_Name"].astype(str).str.strip()
an_df = an_df.drop_duplicates(subset=["AT&T_Site_Name"], keep="first")
display(an_df.head(5))

nrcell_df_mergelatlon["AT&T_Site_Name"] = nrcell_df_mergelatlon["AT&T_Site_Name"].astype(str).str.strip()

merged_df = nrcell_df_mergelatlon.merge(
    an_df,
    on="AT&T_Site_Name",
    how="left",
    suffixes=("", "_an")
)

"""
merged = nrcell_df_mod.merge(
    an_df[["AT&T_Site_Name", "LAT", "LON"]],
    on = "AT&T_Site_Name",
    how="left")
for col in ["LAT", "LON"]:
    m = _is_blank(merged[col]) if col in merged.columns else pd.Series(True, index=merged.index)
    merged.loc[m, col] = merged.loc[m, col + "_an"]
    if col + "_an" in merged:
        merged.drop(columns=[col + "_an"], inplace=True)

faltan = (
    _is_blank(merged["LAT"]) |
    _is_blank(merged["LON"])
)

if not faltan.any():
    nrcell_df_mod = merged
    print("All_Nokia cubrió 100% (LAT/LON).")
    display(df_out.loc[:, ["AT&T_Site_Name","LAT","LON"]].head(5))
else:
    print(f"Quedan {int(faltan.sum())} filas con faltantes. Se aplica fallback EPT…") """

print("Shape nuevo:", merged_df.shape)

# Vista de verificación (muestra solo unas filas)
pd.set_option("display.max_columns", None)  # opcional
try:
    display(merged_df.head(5))
except NameError:
    # Por si no estás en notebook
    print(merged_df.head(5).to_string(index=False))


In [None]:
# =====================================================================
# 6) LAT/LON desde EPT (mas reciente)
# =====================================================================

if merged_df[["LAT", "LON"]].isna().any(axis=1).any():
    # Ejecuta el proceso si hay al menos un NaN en LAT o LON

    nan_count = merged_df[["LAT", "LON"]].isna().sum().sum()
    print("Se necesita buscar LAT/LON restantes en EPT")

    ruta_ept = BASE_DIR

    # Prefijo del archivo
    prefijo_ept = "EPT_ATT_UMTS_LTE_"

    # Busca archivo que empiece con el prefijo
    archivo = glob.glob(os.path.join(ruta_ept, f"{prefijo_ept}*.xlsx"))

    # Verifica si se encontró archivo
    if archivo:
        archivo_encontrado = archivo[0]
        nombre_archivo = os.path.basename(archivo_encontrado)

        # Lista de hojas a leer
        hojas_fijas = [
            "EPT_3G_LTE_OUTDOOR",
            "PLAN_OUTDOOR",
            "EPT_3G_LTE_INDOOR",
            "PLAN_INDOOR",
            "Eventos_Especiales"
        ]

        # Detecta automáticamente las hojas que contienen "Nokia" (para este vendor en particular)
        todas_las_hojas = pd.ExcelFile(archivo_encontrado, engine="openpyxl").sheet_names
        hojas_vendor = [h for h in todas_las_hojas if "nokia" in h.lower()]
        # Combina ambas listas (sin duplicar)
        hojas = list(dict.fromkeys(hojas_fijas + hojas_vendor))
        print(hojas)

        # Lee todas las hojas y agrega el nombre de la hoja en columna
        dfs = [
            pd.read_excel(archivo_encontrado, sheet_name=hoja, usecols=["AT&T_Site_Name", "Latitud", "Longitud"], engine="openpyxl")
            .assign(Hoja=hoja, Origen=nombre_archivo)
            for hoja in hojas
        ]

        # Concatena todo en un solo DataFrame
        df_EPT_inicial = pd.concat(dfs, ignore_index=True).drop_duplicates(subset=["AT&T_Site_Name"])

        # Unir con df principal (solo para los NaN)
        merged_df = merged_df.merge(df_EPT_inicial, on="AT&T_Site_Name", how="left", suffixes=("", "_extra"))
        merged_df["LAT"] = merged_df["LAT"].fillna(merged_df["Latitud"])
        merged_df["LON"] = merged_df["LON"].fillna(merged_df["Longitud"])
        merged_df = merged_df.drop(columns=["Latitud", "Longitud"])

        if merged_df[["LAT", "LON"]].isna().any(axis=1).any():
            nan_count = merged_df[["LAT", "LON"]].isna().sum().sum()
            print(nan_count, " LAT/LON no encontrados. Se producirá archivo excel con estos faltantes.")

        # Vista de verificación (muestra solo unas filas)
        pd.set_option("display.max_columns", None)  # opcional
        try:
            display(merged_df.head(5))
        except NameError:
            # Por si no estás en notebook
            print(merged_df.head(5).to_string(index=False))
    else:
        print("⚠️ No se encontró archivo EPT")
else:
    print("LAT y LON encontrados en su totalidad en archivo anterior. No se necesita EPT.")

pd.set_option("display.max_columns", None)  # opcional
try:
    display(merged_df.head(5))
except NameError:
    # Por si no estás en notebook
    print(merged_df.head(5).to_string(index=False))



In [None]:
# =====================================================================
# 7) Archivo final en excel
# =====================================================================
# Eliminar columnas auxiliares si existen
merged_df = merged_df.drop(columns=[c for c in ["Hoja", "Origen"] if c in merged_df.columns])

# === 0) Config y fecha actual ===

today   = date.today()
yyyymm  = f"{today.year}{today.month:02d}{today.day:02d}"

final_excel = BASE_DIR / f"All_Nokia_5G_{yyyymm}.xlsx"
tmp_excel   = BASE_DIR / f"~tmp_All_Nokia_5G_{yyyymm}.xlsx"

# Usa tu DataFrame final en memoria
df_out = merged_df.copy()  # o df_sorted si ya lo traes ordenado

pd.set_option("display.max_columns", None)  # opcional
try:
    display(df_out.head(5))
except NameError:
    # Por si no estás en notebook
    print(df_out.head(5).to_string(index=False))

# === 2) Guardar sin formato, sin 'nan' ===
df_out.to_excel(final_excel, index=False, na_rep="")

# 2) Reabrir el MISMO archivo y aplicar formato
wb = load_workbook(final_excel)
ws = wb.active

ws.freeze_panes = "A2"
for col_idx, header in enumerate(HEADERS, start=1):
    cell = ws.cell(row=1, column=col_idx)
    cell.value = header
    cell.font = Font(name="Aptos Narrow", size=11)
    cell.alignment = Alignment(textRotation=90, horizontal="center", vertical="center", wrap_text=True)
    cell.border = Border()

# Agregar filtro automático en el header
ws.auto_filter.ref = ws.dimensions  # aplica el filtro a todo el rango con datos

wb.save(final_excel)
wb.close()

print(f"✅ Archivo final sin formato guardado → {final_excel}")