In [None]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_info_columns', 10000)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:.2f}'.format)


In [None]:
def get_dataframe_summary(df):
    """
    Returns a summary DataFrame for the given DataFrame.
    
    The summary includes:
      - Data Type
      - Non Null Count
      - Null Count
      - Null Percentage
      - Unique Values count
    """
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)
    
    summary_df = pd.DataFrame({
        'Data Type': df.dtypes,
        'Non Null Count': df.count(),
        'Null Count': df.isna().sum(),
        'Null Percentage': (df.isna().sum() / len(df) * 100).round(2),
        'Unique Values': [df[col].nunique() for col in df.columns],
    })
    
    return summary_df

In [None]:
from pathlib import Path
import sys

# 1) Where is this notebook?
notebook_dir = Path.cwd()

# 2) Climb up until you get to the folder that contains "app/"
#    parents[2] goes up from objetivo_2 → notebooks → objetivos → …
#    count how many levels from objetivo_2 to BOTS_RPA: in your case it's 8 levels
project_root = notebook_dir.parents[8]

# 3) Insert it at front of sys.path
sys.path.insert(0, str(project_root))

# 4) Now imports of "app.…" will succeed




In [None]:
from app.modules.sga.minpub.report_validator.service.objetivos.calculations import extract_indisponibilidad_anexos
from app.modules.sga.minpub.report_validator.service.objetivos.preprocessing import preprocess_corte_excel
from app.modules.sga.minpub.report_validator.service.objetivos.preprocessing import preprocess_335
from app.modules.sga.minpub.report_validator.service.objetivos.preprocessing import preprocess_380
from app.modules.sga.minpub.report_validator.service.objetivos.preprocessing import preprocess_df_cid_cuismp_sharepoint
from app.modules.sga.minpub.report_validator.service.objetivos.preprocessing import preprocess_df_word_datos_anexos_indis
from app.modules.sga.minpub.report_validator.service.objetivos.mergers.merge_sga_335_corte_excel_sharepoint_cuismp_sga380 import merge_sga_335_corte_excel_sharepoint_cuismp_sga380
from app.modules.sga.minpub.report_validator.service.objetivos.mergers.merge_word_datos_anexos_disponibilidad_dfs_merged_sga import merge_word_datos_anexos_disponibilidad_df_merged_sga
from app.modules.sga.minpub.report_validator.service.objetivos.objetivo_3.o1_anexos_sustentos_paradas_reloj_validator import validate_anexos_indisponibilidad_word
from app.modules.sga.minpub.report_validator.service.objetivos.objetivo_3.o1_anexos_sustentos_paradas_reloj_validator import build_failure_messages_validate_anexos_indisponibilidad_word

In [None]:
BASE_DIR = Path.cwd().parent.parent.parent.parent.parent.parent.parent.parent.parent
SAVE_DIR_EXTRACT_EXCEL = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "excel"/ "CORTE 2.xlsx"
SAVE_DIR_EXTRACT_SGA_335 = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "sga_335" / "sga_reporte_30-03-2025_06-04-2025_20250410_173936.xlsx"
CID_CUISMP_PATH = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "sharepoint_cid_cuismp" / "MINPU - CID-CUISMP - AB.xlsx"
DIR_PARADAS_RELOJ = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "pausa_cliente" / "sga_reporte_30-03-2025_06-04-2025_20250428_151430.xlsx"
DIR_WORD_DATOS = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "word_datos" / "COMPONENTE 2-DATOS - CORTE 2.docx"
DIR_WORD_TELEFONIA = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "word_telefonia" / "COMPONENTE 4 - TELEFONOS - CORTE 2.docx"



In [None]:
df_corte_excel = pd.read_excel(SAVE_DIR_EXTRACT_EXCEL, skipfooter=2, engine="openpyxl")
df_sga_dinamico_335 = pd.read_excel(SAVE_DIR_EXTRACT_SGA_335) 
df_sga_dinamico_380 = pd.read_excel(DIR_PARADAS_RELOJ)
df_cid_cuismp_sharepoint = pd.read_excel(CID_CUISMP_PATH)


df_word_datos_anexos_indis =  extract_indisponibilidad_anexos(DIR_WORD_DATOS)
df_word_telefonia_anexos_indis = extract_indisponibilidad_anexos(DIR_WORD_TELEFONIA)



In [None]:
df_word_datos_anexos_indis

In [None]:

df_process_word = preprocess_df_word_datos_anexos_indis(df_word_datos_anexos_indis)
summary = get_dataframe_summary(df_process_word)
summary

In [None]:

df_preprocss_excel = preprocess_corte_excel(df_corte_excel)
df_preprocss_sharepoint_cuismp = preprocess_df_cid_cuismp_sharepoint(df_cid_cuismp_sharepoint)
df_preprocss_sga335 = preprocess_335(df_sga_dinamico_335)
df_preprocss_sga380 = preprocess_380(df_sga_dinamico_380)



In [None]:
df_process_word.head(1)

In [None]:
df_matched_corte_sga335_Sharepoint_cuismp_sga380 = merge_sga_335_corte_excel_sharepoint_cuismp_sga380(
        df_preprocss_excel, df_preprocss_sga335,
        df_preprocss_sharepoint_cuismp, df_preprocss_sga380,
        'both'
        )
df_matched_corte_sga335_Sharepoint_cuismp_sga380.head(1)
df_matched_corte_sga335_Sharepoint_cuismp_sga380[df_matched_corte_sga335_Sharepoint_cuismp_sga380['nro_incidencia']=='21798497']

In [None]:
merge_word_dfs_merged_anexos_dato = merge_word_datos_anexos_disponibilidad_df_merged_sga(
    df_process_word,
    df_matched_corte_sga335_Sharepoint_cuismp_sga380,
      'both' )
merge_word_dfs_merged_anexos_dato.head(1)

In [None]:
summa_word = get_dataframe_summary(df_process_word)
summa_word

In [None]:
sum_excel = get_dataframe_summary(df_preprocss_excel)
#sum_excel

In [None]:
row = df_preprocss_excel[df_preprocss_excel['nro_incidencia'] == '21798497']
#row

In [None]:
df_vali = validate_anexos_indisponibilidad_word(merge_word_dfs_merged_anexos_dato, 'COMPONENTE II')
#df_vali.head(5)
#df_vali[['nro_incidencia','indisponibilidad_extract','expected_indisponibilidad']]

In [None]:
df_mess = build_failure_messages_validate_anexos_indisponibilidad_word(df_vali)
df_mess