In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_info_columns', 10000)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:.2f}'.format)


In [2]:
from pathlib import Path
import sys

# 1) Where is this notebook?
notebook_dir = Path.cwd()

# 2) Climb up until you get to the folder that contains "app/"
#    parents[2] goes up from objetivo_2 → notebooks → objetivos → …
#    count how many levels from objetivo_2 to BOTS_RPA: in your case it's 8 levels
project_root = notebook_dir.parents[8]

# 3) Insert it at front of sys.path
sys.path.insert(0, str(project_root))

# 4) Now imports of "app.…" will succeed


In [3]:
# EXTRACT IMPORTS
from app.modules.sga.minpub.report_validator.service.objetivos.etl.extract.cuadro_averias import (
    extract_averias_table
) 
from app.modules.sga.minpub.report_validator.service.objetivos.etl.extract.informe_tecnico import (
    extract_tecnico_reports_without_hours_last_dates
)
from app.modules.sga.minpub.report_validator.service.objetivos.etl.extract.anexo_indisponibilidad import (
    extract_indisponibilidad_anexos
)

In [4]:
# TRANSFORM IMPORTS
from app.modules.sga.minpub.report_validator.service.objetivos.etl.transform.averias import ( 
    preprocess_df_word_averias
)
from app.modules.sga.minpub.report_validator.service.objetivos.etl.transform.informe_tecnico import ( 
    preprocess_df_word_informe_tecnico
)
from app.modules.sga.minpub.report_validator.service.objetivos.etl.transform.anexos import ( 
    preprocess_df_word_anexos_indisponibilidad
)
from app.modules.sga.minpub.report_validator.service.objetivos.etl.transform.sga_335 import ( 
    preprocess_335
)
from app.modules.sga.minpub.report_validator.service.objetivos.etl.transform.sga_380 import ( 
    preprocess_380
)
from app.modules.sga.minpub.report_validator.service.objetivos.etl.transform.cuismp_sharepoint import ( 
    preprocess_df_cid_cuismp_sharepoint
)
from app.modules.sga.minpub.report_validator.service.objetivos.etl.transform.corte_excel import ( 
    preprocess_corte_excel
)

In [5]:
# MERGE IMPORTS
from app.modules.sga.minpub.report_validator.service.objetivos.etl.merge.excel_sga.excel_sga import ( 
merge_sga_335_corte_excel_sharepoint_cuismp_sga380
)


In [6]:
from app.modules.sga.minpub.report_validator.service.objetivos.validators.objetivo_1.o8_medidas_correctivas import validation_medidas_correctivas
from app.modules.sga.minpub.report_validator.service.objetivos.validators.objetivo_1.o8_medidas_correctivas import build_failure_messages_medidas_correctivas

In [7]:
BASE_DIR = Path.cwd().parent.parent.parent.parent.parent.parent.parent.parent.parent
SAVE_DIR_EXTRACT_EXCEL = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "excel"/ "CORTE 2 - 02.06 AL 08.06.xlsx" 
SAVE_DIR_EXTRACT_SGA_335 = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "sga_335" / "sga_reporte_31-05-2025_09-06-2025_20250611_094354_20250616_143347.xlsx"
CID_CUISMP_PATH = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "sharepoint_cid_cuismp" / "MINPU - CID-CUISMP - AB (25).xlsx"
DIR_PARADAS_RELOJ = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "pausa_cliente" / "sga_reporte_31-05-2025_09-06-2025_20250611_094926_20250616_143347.xlsx"
DIR_WORD_DATOS = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "word_datos" / "COMPONENTE 2-DATOS (12).docx"
DIR_WORD_TELEFONIA = BASE_DIR / "media" / "minpub" / "validator_report" / "extract" / "word_telefonia" / "COMPONENTE 4 - TELEFONOS (9).docx"

In [8]:
#df_corte_excel = pd.read_excel(SAVE_DIR_EXTRACT_EXCEL, skipfooter=0, engine="openpyxl")
#df_corte_excel = pd.read_excel(SAVE_DIR_EXTRACT_EXCEL, engine="openpyxl")
#df_sga_dinamico_335 = pd.read_excel(SAVE_DIR_EXTRACT_SGA_335) 
from app.modules.sga.minpub.report_validator.service.objetivos.etl.extract.corte_excel import extract_corte_excel
from app.modules.sga.minpub.report_validator.service.objetivos.etl.extract.sga_335 import extract_sga_335


df_sga_dinamico_335 = extract_sga_335(SAVE_DIR_EXTRACT_SGA_335) 
df_corte_excel = extract_corte_excel(SAVE_DIR_EXTRACT_EXCEL, skipfooter=0)
df_sga_dinamico_380 = pd.read_excel(DIR_PARADAS_RELOJ)
df_cid_cuismp_sharepoint = pd.read_excel(CID_CUISMP_PATH)

In [9]:

df_corte_excel = preprocess_corte_excel(df_corte_excel)
df_cid_cuismp_sharepoint = preprocess_df_cid_cuismp_sharepoint(df_cid_cuismp_sharepoint)
df_sga_dinamico_335 = preprocess_335(df_sga_dinamico_335)
df_sga_dinamico_380 = preprocess_380(df_sga_dinamico_380)

In [10]:
#df_corte_excel

In [None]:
row = df_corte_excel[df_corte_excel['nro_incidencia'] == '21830861']
row

In [12]:
df_matched_corte_sga335_Sharepoint_cuismp_sga380 = merge_sga_335_corte_excel_sharepoint_cuismp_sga380(
        df_corte_excel, df_sga_dinamico_335,
        df_cid_cuismp_sharepoint, df_sga_dinamico_380,
        'both'
        )



In [None]:
row = df_matched_corte_sga335_Sharepoint_cuismp_sga380[df_matched_corte_sga335_Sharepoint_cuismp_sga380['nro_incidencia'] == '21830861']
row

In [14]:
# # Filtra la fila por el número de incidente
# filtro = df_matched_corte_sga335_Sharepoint_cuismp_sga380['nro_incidencia'] == "21830861"
# fila = df_matched_corte_sga335_Sharepoint_cuismp_sga380.loc[filtro, 'MEDIDAS CORRECTIVAS Y/O PREVENTIVAS TOMADAS']


# print(repr(fila))

In [15]:

# def get_dataframe_summary(df):
#     """
#     Returns a summary DataFrame for the given DataFrame.
    
#     The summary includes:
#       - Data Type
#       - Non Null Count
#       - Null Count
#       - Null Percentage
#       - Unique Values count
#     """
#     pd.set_option('display.max_rows', None)
#     pd.set_option('display.max_columns', None)
#     pd.set_option('display.width', 1000)
    
#     summary_df = pd.DataFrame({
#         'Data Type': df.dtypes,
#         'Non Null Count': df.count(),
#         'Null Count': df.isna().sum(),
#         'Null Percentage': (df.isna().sum() / len(df) * 100).round(2),
#         'Unique Values': [df[col].nunique() for col in df.columns],
#     })
    
#     return summary_df

In [None]:


from app.modules.sga.minpub.report_validator.service.objetivos.etl.transform.corte_excel import get_dataframe_summary


df_show = get_dataframe_summary(df_matched_corte_sga335_Sharepoint_cuismp_sga380)
df_show

In [None]:
from app.modules.sga.minpub.report_validator.service.objetivos.utils.calculations import has_multiple_A_traves_mayus


df_matched_corte_sga335_Sharepoint_cuismp_sga380['no_repeticion_A_traves_ok'] = ~df_matched_corte_sga335_Sharepoint_cuismp_sga380['MEDIDAS CORRECTIVAS Y/O PREVENTIVAS TOMADAS'].apply(has_multiple_A_traves_mayus)
filtro = df_matched_corte_sga335_Sharepoint_cuismp_sga380[df_matched_corte_sga335_Sharepoint_cuismp_sga380['no_repeticion_A_traves_ok'] == False ]
filtro

In [None]:
df_vali = validation_medidas_correctivas(df_matched_corte_sga335_Sharepoint_cuismp_sga380)
df_vali
row = df_vali[df_vali['nro_incidencia'] == '21830861']
row


In [None]:
df_mess = build_failure_messages_medidas_correctivas(df_vali)
df_mess