In [137]:
import sys
import os

# Get the path to the project directory using the current working directory
notebook_dir = os.getcwd()
project_path = os.path.abspath(os.path.join(notebook_dir, ".."))

# Add the project path to sys.path if it's not already present
if project_path not in sys.path:
    sys.path.append(project_path)


In [138]:
import pandas as pd

# Load the CSV file
file_path = "../results/experiment_results_time_llm_inference.csv"  # Replace with your actual file path
df = pd.read_csv(file_path)
df.head()


Unnamed: 0,seed,model,dim,seq,context,pred,patch,epochs,patient_id,log_datetime,rmse,mae,mape
0,247659,GPT2,768,6,6,9,6,0,596,2025-02-27_14-24-47,25.065336,17.461512,0.127615
1,247659,GPT2,768,6,6,9,6,0,584,2025-02-27_14-25-18,33.209133,21.477936,0.133182
2,247659,GPT2,768,6,6,9,6,0,552,2025-02-27_14-25-48,24.636288,17.030685,0.129215
3,247659,GPT2,768,6,6,9,6,0,575,2025-02-27_14-26-17,29.433447,20.44198,0.146677
4,247659,GPT2,768,6,6,9,6,0,544,2025-02-27_14-26-47,27.177835,19.272171,0.125249


In [139]:
def summarize_by_column(df,columns):
    # Define the configuration columns (excluding 'seed', 'patient_id', 'log_datetime')
    config_columns = list(df.columns)
    
    metrics = ['rmse', 'mae', 'mape']
    
    for i in metrics:
        config_columns.remove(i)
    for i in columns:
        config_columns.remove(i)
    print(config_columns)

    # Metrics to summarize

    # Group by config columns and calculate mean and standard deviation for each metric
    time_llm_train_summary_df = df.groupby(config_columns)[metrics].agg(['mean', 'std']).reset_index()

    # Rename columns for clarity (e.g., 'rmse_mean', 'rmse_std', etc.)
    time_llm_train_summary_df.columns = ['_'.join(col).rstrip('_') for col in time_llm_train_summary_df.columns]
    return time_llm_train_summary_df


In [140]:
time_llm_train_summary_df=summarize_by_column(df,["seed","log_datetime"])

# Save the summarized results
time_llm_train_summary_df.to_csv("summary_results_time_llm_inference.csv", index=False)

# Display the summary
time_llm_train_summary_df.head()


['model', 'dim', 'seq', 'context', 'pred', 'patch', 'epochs', 'patient_id']


Unnamed: 0,model,dim,seq,context,pred,patch,epochs,patient_id,rmse_mean,rmse_std,mae_mean,mae_std,mape_mean,mape_std
0,BERT,768,6,6,6,6,0,540,28.804449,0.21965,20.70697,0.068201,0.145381,0.001457
1,BERT,768,6,6,6,6,0,544,22.888493,0.269176,16.176063,0.040098,0.105544,0.000704
2,BERT,768,6,6,6,6,0,552,20.977481,0.184533,14.424541,0.063539,0.109376,0.000929
3,BERT,768,6,6,6,6,0,559,24.784531,0.204223,16.966347,0.047806,0.11134,0.000895
4,BERT,768,6,6,6,6,0,563,20.780558,0.175174,14.942282,0.065584,0.094216,0.000616


In [141]:

# Convert 'seq' and 'pred' columns to integers if needed
time_llm_train_summary_df['seq'] = time_llm_train_summary_df['seq'].astype(int)
time_llm_train_summary_df['pred'] = time_llm_train_summary_df['pred'].astype(int)

# Filter for 6,6 and 6,9 configurations
df_66 = time_llm_train_summary_df[(time_llm_train_summary_df['seq'] == 6) & (time_llm_train_summary_df['pred'] == 6)]
df_69 = time_llm_train_summary_df[(time_llm_train_summary_df['seq'] == 6) & (time_llm_train_summary_df['pred'] == 9)]

# Define function to generate LaTeX table with updated format
def generate_latex_table(df, title, label):
    models = df['model'].unique()

    # Start LaTeX table
    latex_code = f"""
\\begin{{table}}[h]
    \\centering
    \\caption{{{title}}}
    \\resizebox{{\\columnwidth}}{{!}}{{%
    \\begin{{tabular}}{{c|""" + "cc|" * (len(models) - 1) + "cc}\n"

    # Top rule
    latex_code += "        \\toprule\n"

    # Header row with model names
    model_headers = " & ".join(
        [f"\\multicolumn{{2}}{{c{'|' if i < len(models) - 1 else ''}}}{{\\textbf{{{m}}}}}" for i, m in enumerate(models)]
    )
    latex_code += f"        {{\\textbf{{Patient ID}}}} & {model_headers} \\\\\n"

    # Mid rules for metric alignment
    col_ranges = [f"{2 + i * 2}-{3 + i * 2}" for i in range(len(models))]
    latex_code += "        " + " ".join([f"\\cmidrule(lr){{{r}}}" for r in col_ranges]) + "\n"
    latex_code += "         & " + " & ".join(["MAE & RMSE"] * len(models)) + " \\\\\n"
    latex_code += "         \\midrule\n"

    # Patient rows
    for patient in df['patient_id'].unique():
        row = [str(patient)]
        for model in models:
            model_data = df[(df['patient_id'] == patient) & (df['model'] == model)]
            if not model_data.empty:
                mae_mean = model_data['mae_mean'].values[0]
                mae_std = model_data['mae_std'].values[0]
                rmse_mean = model_data['rmse_mean'].values[0]
                rmse_std = model_data['rmse_std'].values[0]
                row.append(f"{mae_mean:.2f} $\\pm$ {mae_std:.2f}")
                row.append(f"{rmse_mean:.2f} $\\pm$ {rmse_std:.2f}")
            else:
                row.append("--")  # Placeholder if missing data
                row.append("--")
        latex_code += "         " + " & ".join(row) + " \\\\\n"

    # Bottom rule and closing
    latex_code += "         \\bottomrule\n"
    latex_code += "    \\end{tabular}%\n"
    latex_code += "    }\n"
    latex_code += f"    \\label{{{label}}}\n"
    latex_code += "\\end{table}\n"

    return latex_code

# Generate LaTeX tables
latex_66 = generate_latex_table(df_66, "Zero-shot Performance for Time-LLM Models (30-minute Forecast)", "tab:timellm_zero_shot_30min")
latex_69 = generate_latex_table(df_69, "Zero-shot Performance for Time-LLM Models (45-minute Forecast)", "tab:timellm_zero_shot_45min")

# # Save to files
# with open("table_30min.tex", "w") as f:
#     f.write(latex_66)

# with open("table_45min.tex", "w") as f:
#     f.write(latex_69)

# Print LaTeX tables
print(latex_66)
print("\n\n")
print(latex_69)



\begin{table}[h]
    \centering
    \caption{Zero-shot Performance for Time-LLM Models (30-minute Forecast)}
    \resizebox{\columnwidth}{!}{%
    \begin{tabular}{c|cc|cc}
        \toprule
        {\textbf{Patient ID}} & \multicolumn{2}{c|}{\textbf{BERT}} & \multicolumn{2}{c}{\textbf{GPT2}} \\
        \cmidrule(lr){2-3} \cmidrule(lr){4-5}
         & MAE & RMSE & MAE & RMSE \\
         \midrule
         540 & 20.71 $\pm$ 0.07 & 28.80 $\pm$ 0.22 & 20.59 $\pm$ 0.04 & 28.38 $\pm$ 0.13 \\
         544 & 16.18 $\pm$ 0.04 & 22.89 $\pm$ 0.27 & 16.07 $\pm$ 0.02 & 22.48 $\pm$ 0.25 \\
         552 & 14.42 $\pm$ 0.06 & 20.98 $\pm$ 0.18 & 14.31 $\pm$ 0.03 & 20.63 $\pm$ 0.12 \\
         559 & 16.97 $\pm$ 0.05 & 24.78 $\pm$ 0.20 & 16.84 $\pm$ 0.04 & 24.38 $\pm$ 0.13 \\
         563 & 14.94 $\pm$ 0.07 & 20.78 $\pm$ 0.18 & 14.81 $\pm$ 0.04 & 20.43 $\pm$ 0.11 \\
         567 & 20.31 $\pm$ 0.08 & 30.61 $\pm$ 0.29 & 20.13 $\pm$ 0.03 & 29.98 $\pm$ 0.15 \\
         570 & 13.85 $\pm$ 0.05 & 19.52 $\pm$ 0.15

In [142]:
# Load the CSV file
file_path = "../results/experiment_results_time_llm_training.csv"  # Replace with your actual file path
time_llm_train_df = pd.read_csv(file_path)
time_llm_train_df .head()


Unnamed: 0,seed,model,dim,seq,context,pred,patch,epochs,patient_id,log_datetime,rmse,mae,mape
0,427368,GPT2,768,6,6,6,6,20,567,2025-02-27_15-09-58,20.258608,11.532954,0.079846
1,427368,GPT2,768,6,6,6,6,20,591,2025-02-27_15-18-46,17.423256,11.32707,0.086261
2,427368,GPT2,768,6,6,6,6,20,552,2025-02-27_15-27-24,13.374063,8.705806,0.066171
3,427368,GPT2,768,6,6,6,6,20,584,2025-02-27_15-34-58,20.786112,11.697576,0.074243
4,427368,GPT2,768,6,6,6,6,20,570,2025-02-27_15-44-36,13.17941,8.576118,0.042822


In [143]:
time_llm_train_summary_df=summarize_by_column(time_llm_train_df,["seed","log_datetime"])

# Save the summarized results
time_llm_train_summary_df.to_csv("summary_results_time_llm_training.csv", index=False)

# Display the summary
time_llm_train_summary_df.head()


['model', 'dim', 'seq', 'context', 'pred', 'patch', 'epochs', 'patient_id']


Unnamed: 0,model,dim,seq,context,pred,patch,epochs,patient_id,rmse_mean,rmse_std,mae_mean,mae_std,mape_mean,mape_std
0,BERT,768,6,6,6,6,20,540,17.307049,0.46894,11.098783,0.354971,0.078612,0.002583
1,BERT,768,6,6,6,6,20,544,14.434252,0.170235,9.18786,0.193501,0.060315,0.001294
2,BERT,768,6,6,6,6,20,552,13.11149,0.430155,8.450665,0.330488,0.064163,0.002594
3,BERT,768,6,6,6,6,20,559,16.677227,0.380274,9.382021,0.171149,0.062522,0.001326
4,BERT,768,6,6,6,6,20,563,15.339079,0.283863,9.523784,0.18765,0.060622,0.001422


In [144]:

# Convert 'seq' and 'pred' columns to integers if needed
time_llm_train_summary_df['seq'] = time_llm_train_summary_df['seq'].astype(int)
time_llm_train_summary_df['pred'] = time_llm_train_summary_df['pred'].astype(int)

# Filter for 6,6 and 6,9 configurations
df_66 = time_llm_train_summary_df[(time_llm_train_summary_df['seq'] == 6) & (time_llm_train_summary_df['pred'] == 6)]
df_69 = time_llm_train_summary_df[(time_llm_train_summary_df['seq'] == 6) & (time_llm_train_summary_df['pred'] == 9)]


# Generate LaTeX tables
latex_66 = generate_latex_table(df_66, "Few-shot Performance for Time-LLM Models (30-minute Forecast)", "tab:timellm_few_shot_30min")
latex_69 = generate_latex_table(df_69, "Few-shot Performance for Time-LLM Models (45-minute Forecast)", "tab:timellm_few_shot_45min")


# Print LaTeX tables
print(latex_66)
print("\n\n")
print(latex_69)



\begin{table}[h]
    \centering
    \caption{Few-shot Performance for Time-LLM Models (30-minute Forecast)}
    \resizebox{\columnwidth}{!}{%
    \begin{tabular}{c|cc|cc}
        \toprule
        {\textbf{Patient ID}} & \multicolumn{2}{c|}{\textbf{BERT}} & \multicolumn{2}{c}{\textbf{GPT2}} \\
        \cmidrule(lr){2-3} \cmidrule(lr){4-5}
         & MAE & RMSE & MAE & RMSE \\
         \midrule
         540 & 11.10 $\pm$ 0.35 & 17.31 $\pm$ 0.47 & 11.30 $\pm$ 0.29 & 17.48 $\pm$ 0.40 \\
         544 & 9.19 $\pm$ 0.19 & 14.43 $\pm$ 0.17 & 9.02 $\pm$ 0.36 & 14.05 $\pm$ 0.41 \\
         552 & 8.45 $\pm$ 0.33 & 13.11 $\pm$ 0.43 & 8.46 $\pm$ 0.25 & 13.08 $\pm$ 0.34 \\
         559 & 9.38 $\pm$ 0.17 & 16.68 $\pm$ 0.38 & 10.01 $\pm$ 0.38 & 16.93 $\pm$ 0.44 \\
         563 & 9.52 $\pm$ 0.19 & 15.34 $\pm$ 0.28 & 9.42 $\pm$ 0.16 & 14.85 $\pm$ 0.22 \\
         567 & 11.91 $\pm$ 0.62 & 21.24 $\pm$ 0.60 & 11.72 $\pm$ 0.59 & 20.44 $\pm$ 0.53 \\
         570 & 8.23 $\pm$ 0.19 & 12.80 $\pm$ 0.27 & 8.40 $