In [None]:
import pandas as pd
from pathlib import Path

def read_training_csv(p: Path) -> pd.DataFrame:
 
    df = pd.read_csv(p)

   
    if df.shape[1] == 1 and ";" in df.columns[0]:
        df = pd.read_csv(p, sep=";", decimal=",")

    # Timestamp conversion (safe)
    if "Timestamp" not in df.columns:
        raise ValueError(f"'Timestamp' column missing after parsing: {p.name}")

    df["Timestamp"] = pd.to_datetime(df["Timestamp"], errors="coerce")
    return df

def combine_training_csvs(input_dir="data/raw/training", output_path="data/combined.csv"):
    input_dir = Path(input_dir)
    files = sorted(input_dir.glob("*.csv"))
    if not files:
        raise FileNotFoundError(f"No CSV files found in {input_dir.resolve()}")

    dfs = []
    for p in files:
        pump = p.stem.split("_")[0]
        df = read_training_csv(p)

        # normalize columns
        df.columns = [c.replace(pump + "_", "") for c in df.columns]
        df["pump_id"] = pump
        dfs.append(df)

    combined = (pd.concat(dfs, ignore_index=True)
                  .sort_values(["pump_id", "Timestamp"])
                  .reset_index(drop=True))

    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    combined.to_csv(output_path, index=False)
    return combined


In [63]:
combined = combine_training_csvs()
combined.head()


Unnamed: 0,Timestamp,ACR_Mot.PV,ACR_Mot.SV,ACR_Mot.TV,ACR_Pmp.PV,ACR_Pmp.SV,ACR_Pmp.TV,Pres.PV,Temp.PV,Barometer,Temperature,pump_id
0,2024-04-10 12:00:00+00:00,0.001184,0.426753,17.976562,0.000464,0.607295,18.21875,0.451494,18.061707,1018.053265,20.478157,A
1,2024-04-10 12:00:05+00:00,0.001184,0.426753,17.976562,0.000464,0.607295,18.21875,0.451494,18.061707,1018.053265,20.478157,A
2,2024-04-10 12:00:10+00:00,0.001184,0.426753,17.976562,0.000464,0.607295,18.21875,0.451494,18.061707,1018.053265,20.478157,A
3,2024-04-10 12:00:15+00:00,0.001184,0.426753,17.976562,0.000464,0.607295,18.21875,0.451494,18.061707,1018.053265,20.478157,A
4,2024-04-10 12:00:20+00:00,0.001184,0.426753,17.976562,0.000464,0.607295,18.21875,0.451494,18.061707,1018.053265,20.412823,A
