# 01 · Preparación de datos (QC + splits + tasks.json)

In [1]:
# -*- coding: utf-8 -*-
"""Prepara splits (train/val/test) y genera tasks.json para el pipeline continual.

Qué hace:
1) Carga y normaliza el driving_log.csv de cada recorrido (RUNS).
2) Normaliza rutas (soporta barras invertidas y recorte a 'IMG/...').
3) Filtra filas sin imágenes (center/left/right).
4) Split estratificado por bins de 'steering'.
5) Guarda:
   - data/processed/<run>/canonical.csv
   - data/processed/<run>/{train,val,test}.csv
   - data/processed/tasks.json
"""

from pathlib import Path
import sys, json
import pandas as pd

ROOT = Path.cwd().parents[0] if (Path.cwd().name == "notebooks") else Path.cwd()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from src.prep.udacity_prep import PrepConfig, run_prep, verify_processed_splits  # <- usa tu módulo
RAW  = ROOT / "data" / "raw" / "udacity"
PROC = ROOT / "data" / "processed"



In [2]:
# Parámetros

RUNS = ["circuito1", "circuito2"]
CFG = PrepConfig(
    root=ROOT,
    runs=RUNS,
    use_left_right=False,  # aquí sólo QC + split (sin expandir)
    steer_shift=0.2,
    bins=21,
    train=0.70,
    val=0.15,
    seed=42,
    target_per_bin="auto", # balanceo lo haremos en 01A; aquí no importa
    cap_per_bin=12000,
)
CFG


PrepConfig(root=PosixPath('/home/cesar/proyectos/TFM_SNN'), runs=['circuito1', 'circuito2'], use_left_right=False, steer_shift=0.2, bins=21, train=0.7, val=0.15, seed=42, target_per_bin='auto', cap_per_bin=12000)

In [3]:
# Ejecutar prep + verificar
manifest = run_prep(CFG)
print("OK:", PROC/"prep_manifest.json")
verify_processed_splits(PROC, RUNS)
print("OK: splits 'train/val/test' encontrados.")


OK: /home/cesar/proyectos/TFM_SNN/data/processed/prep_manifest.json
OK: splits 'train/val/test' encontrados.


In [4]:
# Resumen rápido: Tamaños por split
tasks_json = json.loads((PROC/"tasks.json").read_text(encoding="utf-8"))
pd.DataFrame({
    "run": tasks_json["tasks_order"],
    "train_csv": [tasks_json["splits"][r]["train"] for r in tasks_json["tasks_order"]],
    "val_csv":   [tasks_json["splits"][r]["val"]   for r in tasks_json["tasks_order"]],
    "test_csv":  [tasks_json["splits"][r]["test"]  for r in tasks_json["tasks_order"]],
})


Unnamed: 0,run,train_csv,val_csv,test_csv
0,circuito1,/home/cesar/proyectos/TFM_SNN/data/processed/c...,/home/cesar/proyectos/TFM_SNN/data/processed/c...,/home/cesar/proyectos/TFM_SNN/data/processed/c...
1,circuito2,/home/cesar/proyectos/TFM_SNN/data/processed/c...,/home/cesar/proyectos/TFM_SNN/data/processed/c...,/home/cesar/proyectos/TFM_SNN/data/processed/c...
