In [None]:
import re
from pathlib import Path
import pandas as pd
import csv


for path_temp in ['Validation', 'Testing', 'Augmentation']:
    # --- folder with CSVs ---
    folder = Path(f"TFT_Results/{path_temp}")

    # regex: capture model_name, seed, testing_stage, pinball_loss_usage
    # and ignore any other parts before "_metrics.csv"
    pat = re.compile(
        r"""^(?P<model_name>.+?)_
            (?P<seed>[^_]+)_
            testing(?P<testing_stage>[^_]+)_
            pinball(?P<pinball_loss_usage>[^_]+)    # capture pinball value
            (?:_.*)?                                # allow optional extra parts
            _metrics\.csv$""",
        re.X
    )

    def maybe_bool(s: str):
        t = s.lower()
        return True if t == "true" else False if t == "false" else s

    # --- loop through all csvs ---
    for csv_path in folder.glob("*.csv"):
        m = pat.match(csv_path.name)
        if not m:
            print(f"⚠️ Skipping {csv_path.name} (pattern mismatch)")
            continue

        testing_stage = maybe_bool(m.group("testing_stage"))
        pinball_loss_usage = maybe_bool(m.group("pinball_loss_usage"))

        # read file
        df = pd.read_csv(csv_path)

        # add new columns
        df["batch_size"] = 16
        df["testing_stage"] = testing_stage
        df["pinball_loss_usage"] = pinball_loss_usage

        # save back with all values quoted
        df.to_csv(csv_path, index=False, quoting=csv.QUOTE_ALL)
        print(f"✅ Processed {csv_path.name}")


✅ Processed TFT_42_testingTrue_pinballFalse_augTrue_fake_data_length1600_metrics.csv
✅ Processed TFT_777_testingTrue_pinballFalse_augTrue_fake_data_length400_metrics.csv
✅ Processed TFT_2021_testingTrue_pinballTrue_augTrue_fake_data_length200_metrics.csv
✅ Processed TFT_4242_testingTrue_pinballTrue_augTrue_fake_data_length400_metrics.csv
✅ Processed TFT_777_testingTrue_pinballFalse_augTrue_fake_data_length200_metrics.csv
✅ Processed TFT_2021_testingTrue_pinballTrue_augTrue_fake_data_length800_metrics.csv
✅ Processed TFT_777_testingTrue_pinballTrue_augTrue_fake_data_length800_metrics.csv
✅ Processed TFT_42_testingTrue_pinballFalse_augTrue_fake_data_length1200_metrics.csv
✅ Processed TFT_4242_testingTrue_pinballFalse_augTrue_fake_data_length1200_metrics.csv
✅ Processed TFT_2021_testingTrue_pinballFalse_augTrue_fake_data_length800_metrics.csv
✅ Processed TFT_777_testingTrue_pinballFalse_augTrue_fake_data_length1600_metrics.csv
✅ Processed TFT_4242_testingTrue_pinballTrue_augTrue_fake_data

In [None]:
import pandas as pd
from pathlib import Path
import csv

for path_temp in ['Validation', 'Testing', 'Augmentation']:
# for path_temp in ['Augmentation']:
    # Folder with individual CSVs
    folder = Path(f"TFT_Results/{path_temp}")

    # Collect all dataframes
    dfs = []
    for csv_path in folder.glob("*.csv"):
        try:
            df = pd.read_csv(csv_path)
            dfs.append(df)
            print(f"✅ Loaded {csv_path.name}")
        except Exception as e:
            print(f"⚠️ Skipping {csv_path.name}: {e}")

    # Concatenate vertically
    if dfs:
        combined = pd.concat(dfs, ignore_index=True)

        # Save unified CSV, quoting all values
        out_path = folder / f"{path_temp}_all_combined.csv"
        combined.to_csv(out_path, index=False, quoting=csv.QUOTE_ALL)

        print(f"\n📁 Saved unified CSV: {out_path}")
        print(f"Rows combined: {len(combined)}")
    else:
        print("No CSVs found in folder.")


✅ Loaded TFT_42_testingTrue_pinballFalse_augTrue_fake_data_length1600_metrics.csv
✅ Loaded TFT_777_testingTrue_pinballFalse_augTrue_fake_data_length400_metrics.csv
✅ Loaded TFT_2021_testingTrue_pinballTrue_augTrue_fake_data_length200_metrics.csv
✅ Loaded TFT_4242_testingTrue_pinballTrue_augTrue_fake_data_length400_metrics.csv
✅ Loaded TFT_777_testingTrue_pinballFalse_augTrue_fake_data_length200_metrics.csv
✅ Loaded TFT_2021_testingTrue_pinballTrue_augTrue_fake_data_length800_metrics.csv
✅ Loaded TFT_777_testingTrue_pinballTrue_augTrue_fake_data_length800_metrics.csv
✅ Loaded TFT_42_testingTrue_pinballFalse_augTrue_fake_data_length1200_metrics.csv
✅ Loaded TFT_4242_testingTrue_pinballFalse_augTrue_fake_data_length1200_metrics.csv
✅ Loaded TFT_2021_testingTrue_pinballFalse_augTrue_fake_data_length800_metrics.csv
✅ Loaded TFT_777_testingTrue_pinballFalse_augTrue_fake_data_length1600_metrics.csv
✅ Loaded TFT_4242_testingTrue_pinballTrue_augTrue_fake_data_length800_metrics.csv
✅ Loaded TFT_

In [4]:
import pandas as pd
import csv
from pathlib import Path
for path_temp in ['Validation', 'Testing', 'Augmentation']:
    # Path to your combined CSV
    combined_path = Path(f"TFT_Results/{path_temp}/{path_temp}_all_combined.csv")

    # Load combined file
    df = pd.read_csv(combined_path)

    # Normalize boolean/string values for consistency
    def to_str(v):
        if isinstance(v, str):
            return v
        if isinstance(v, bool):
            return "True" if v else "False"
        return str(v)

    # Rebuild Name column
    df["Name"] = (
        df["model_name"].astype(str)
        + "-" + df["scaler_name"].astype(str)
        + "-bs" + df["batch_size"].astype(str)
        + "-aug" + df["data_augmentation"].map(to_str)
        + "--seed" + df["seed"].astype(str)
        + "--fake_data_length" + df["fake_data_length"].astype(str)
        + "---pinball_loss_usage" + df["pinball_loss_usage"].map(to_str)
        + "---testing_stage" + df["testing_stage"].map(to_str)
    )

    # Save back with quotes
    df.to_csv(combined_path, index=False, quoting=csv.QUOTE_ALL)

    print("✅ Updated Name column in combined CSV")


✅ Updated Name column in combined CSV
✅ Updated Name column in combined CSV
✅ Updated Name column in combined CSV


In [6]:
import pandas as pd
from pathlib import Path
import csv

# Folder with individual CSVs
folder = Path("TFT_Results/Combined")

# Collect all dataframes
dfs = []
for csv_path in folder.glob("*.csv"):
    try:
        df = pd.read_csv(csv_path)
        dfs.append(df)
        print(f"✅ Loaded {csv_path.name}")
    except Exception as e:
        print(f"⚠️ Skipping {csv_path.name}: {e}")

# Concatenate vertically
if dfs:
    combined = pd.concat(dfs, ignore_index=True)

    # Save unified CSV, quoting all values
    out_path = folder / "All_combined.csv"
    combined.to_csv(out_path, index=False, quoting=csv.QUOTE_ALL)

    print(f"\n📁 Saved unified CSV: {out_path}")
    print(f"Rows combined: {len(combined)}")
else:
    print("No CSVs found in folder.")


✅ Loaded Augmentation_all_combined.csv
✅ Loaded Testing_all_combined.csv
✅ Loaded Validation_all_combined.csv

📁 Saved unified CSV: TFT_Results/Combined/All_combined.csv
Rows combined: 70


In [7]:
import pandas as pd
import csv
from pathlib import Path

# Path to your combined CSV
combined_path = Path("TFT_Results/Combined/All_combined.csv")

# Load combined file
df = pd.read_csv(combined_path)

# If pinball_loss_usage was saved as string "True"/"False", normalize it
df["pinball_loss_usage"] = df["pinball_loss_usage"].astype(str).str.lower().map(
    {"true": True, "false": False}
)

# Split into two DataFrames
df_pinball = df[df["pinball_loss_usage"] == True]
df_mse     = df[df["pinball_loss_usage"] == False]

# Save
out_pinball = combined_path.parent / "TFT_pinball.csv"
out_mse     = combined_path.parent / "TFT_mse.csv"

df_pinball.to_csv(out_pinball, index=False, quoting=csv.QUOTE_ALL)
df_mse.to_csv(out_mse, index=False, quoting=csv.QUOTE_ALL)

print(f"✅ Saved {len(df_pinball)} rows to {out_pinball}")
print(f"✅ Saved {len(df_mse)} rows to {out_mse}")


✅ Saved 35 rows to TFT_Results/Combined/TFT_pinball.csv
✅ Saved 35 rows to TFT_Results/Combined/TFT_mse.csv
