In [1]:
import os
import pandas as pd

base_path   = '/home/jupyter-jphuser2'
output_path = 'training_times_summary.csv'
pareto_path = 'pareto_front_models.csv'

records = []

for folder in os.listdir(base_path):
    if not folder.startswith("logs_"):
        continue
    feature_selection = folder.replace("logs_", "")
    folder_path = os.path.join(base_path, folder)

    for config in os.listdir(folder_path):
        metrics_path = os.path.join(folder_path, config, "version_0", "metrics.csv")
        if not os.path.exists(metrics_path):
            continue

        try:
            df = pd.read_csv(metrics_path)

            last_train_time = df['train_time'].dropna().iloc[-1]   if 'train_time' in df else pd.NA
            last_train_loss = df['train_loss'].dropna().iloc[-1]  if 'train_loss' in df else pd.NA
            last_val_loss   = df['val_loss'].dropna().iloc[-1]    if 'val_loss' in df else pd.NA

            parts = config.split("_")
            optimizer  = parts[0]
            loss       = parts[1]
            activation = "_".join(parts[2:])

            records.append({
                "Feature_Selection":    feature_selection,
                "Optimizer":            optimizer,
                "Loss_Function":        loss,
                "Activation_Function":  activation,
                "Total_Train_Time":     last_train_time,
                "Last_Train_Loss":      last_train_loss,
                "Last_Val_Loss":        last_val_loss
            })

        except Exception as e:
            print(f"Erro ao processar {metrics_path}: {e}")

# monta DataFrame
results_df = pd.DataFrame(records)

# ordena alfabeticamente por Feature_Selection
results_df = results_df.sort_values('Feature_Selection')

# calcula métrica de custo-benefício
results_df['Cost_Benefit'] = results_df['Last_Val_Loss'] / results_df['Total_Train_Time']

# função para identificar fronteira de Pareto (menor tempo & menor val_loss são melhores)
def is_pareto(df):
    times = df['Total_Train_Time'].values
    losses = df['Last_Val_Loss'].values
    pareto_mask = []
    for i, (t_i, l_i) in enumerate(zip(times, losses)):
        # verifica se existe outro j que seja melhor ou igual em ambos e estritamente melhor em ao menos um
        dominated = (
            (times <= t_i) &
            (losses <= l_i) &
            ((times < t_i) | (losses < l_i))
        )
        pareto_mask.append(not dominated.any())
    return pareto_mask

# aplica e salva
results_df['Pareto'] = is_pareto(results_df)
pareto_df = results_df[results_df['Pareto']]

# salva resultados
results_df.to_csv(output_path, index=False)
pareto_df.to_csv(pareto_path, index=False)

print(f"Todos os modelos salvos em: {output_path}")
print(f"Modelos na fronteira de Pareto salvos em: {pareto_path}")


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7fa28ba92450>>
Traceback (most recent call last):
  File "/opt/tljh/user/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 

KeyboardInterrupt



In [3]:
!pip install ace-tools

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [6]:
import pandas as pd

# Carrega o arquivo
file_path = "mape_summary.csv"
df = pd.read_csv(file_path)

# Ordena por MAPE e seleciona os top 5 por função de perda
top5_per_loss = df.sort_values(by="MAPE(%)").groupby("Loss_Function", group_keys=False).head(5)

# Exibe todas as colunas
pd.set_option('display.max_columns', None)

# Mostra o DataFrame
top5_per_loss


Unnamed: 0,Feature_Selection,Optimizer,Loss_Function,Activation_Function,MAPE(%)
386,XGBoost_50pct_03,Adam,SmoothL1Loss,PReLU,8.262638
376,XGBoost_50pct_03,AdamW,SmoothL1Loss,PReLU,8.289172
381,XGBoost_50pct_03,AdamW,MAE,PReLU,8.303679
226,XGBoost_30pct_03,AdamW,SmoothL1Loss,PReLU,8.359385
373,XGBoost_50pct_03,Adam,MAE,PReLU,8.360605
223,XGBoost_30pct_03,Adam,MAE,PReLU,8.367517
231,XGBoost_30pct_03,AdamW,MAE,PReLU,8.38938
7,DecisionTree_30pct_03,Adam,MAE,PReLU,8.410168
236,XGBoost_30pct_03,Adam,SmoothL1Loss,PReLU,8.427913
10,DecisionTree_30pct_03,AdamW,SmoothL1Loss,PReLU,8.494961


In [1]:
import pandas as pd

# Carrega o arquivo
file_path = "mape_summary.csv"
df = pd.read_csv(file_path)

# Filtra apenas as configurações com 50% das features
df_50pct = df[df["Feature_Selection"].str.contains("50pct", na=False)]

# Ordena por MAPE e seleciona os top 5 por função de perda
top5_50pct_per_loss = df_50pct.sort_values(by="MAPE(%)").groupby("Loss_Function", group_keys=False).head(5)

# Exibe todas as colunas
pd.set_option('display.max_columns', None)

# Mostra o DataFrame
top5_50pct_per_loss


Unnamed: 0,Feature_Selection,Optimizer,Loss_Function,Activation_Function,MAPE(%)
386,XGBoost_50pct_03,Adam,SmoothL1Loss,PReLU,8.262638
376,XGBoost_50pct_03,AdamW,SmoothL1Loss,PReLU,8.289172
381,XGBoost_50pct_03,AdamW,MAE,PReLU,8.303679
373,XGBoost_50pct_03,Adam,MAE,PReLU,8.360605
247,GradientBoosting_50pct_03,Adam,MAE,PReLU,8.411491
130,RandomForest_50pct_03,AdamW,SmoothL1Loss,PReLU,8.497087
250,GradientBoosting_50pct_03,AdamW,SmoothL1Loss,PReLU,8.509797
255,GradientBoosting_50pct_03,AdamW,MAE,PReLU,8.524006
183,DecisionTree_50pct_03,AdamW,MAE,PReLU,8.580697
178,DecisionTree_50pct_03,AdamW,SmoothL1Loss,PReLU,8.592368
