# Short Script for Reading CPLD Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import logging
from pathlib import Path
from datetime import datetime, timedelta
import csv
import gc
import pandas as pd



logging.disable(logging.INFO)

def extract_time_segments(
    path: Path,
    is_csv: bool = False,
    time_col: int = 0,
    gap_threshold: timedelta = timedelta(minutes=1)
):
    """
    Lee el fichero línea a línea (CSV o DAT sin csv.reader), extrae segmentos
    de tiempo cuando los saltos superan gap_threshold.
    """
    segments = []
    prev_time = None
    current_start = None

    line_no = 0
    report_interval = 100_000

    with open(path, 'r') as f:
        if is_csv:
            # Para CSV, podemos seguir usando DictReader
            import csv
            reader = csv.DictReader(f)
            for row in reader:
                line_no += 1
                if line_no % report_interval == 0:
                    logging.info(f"{path.name}: procesadas {line_no} líneas...")
                try:
                    # t = datetime.utcfromtimestamp(float(row['Time']))
                    # t = datetime.fromisoformat(row['Time'])
                    t = pd.to_datetime(row['Time'])
                    # Si quieres un datetime puro:
                    t = t.to_pydatetime()


                except Exception as e:
                    logging.warning(f"{path.name} línea {line_no}: no pude parsear Time ({e})")
                    continue

                # ... (igual que antes) ...
                if prev_time is None:
                    current_start = t; prev_time = t; continue
                diff = t - prev_time
                if diff > gap_threshold:
                    segments.append((current_start, prev_time))
                    current_start = t
                prev_time = t

        else:
            # Para DAT, lectura manual y split()
            for raw in f:
                line_no += 1
                if line_no % report_interval == 0:
                    logging.info(f"{path.name}: procesadas {line_no} líneas...")
                if raw.startswith('#') or not raw.strip():
                    continue
                parts = raw.strip().split()
                # Si no hay suficientes partes, skip
                if len(parts) <= time_col:
                    logging.warning(f"{path.name} línea {line_no}: sólo {len(parts)} campos")
                    continue
                try:
                    t = datetime.utcfromtimestamp(float(parts[time_col]))
                except Exception as e:
                    logging.warning(f"{path.name} línea {line_no}: token inválido '{parts[time_col]}' ({e})")
                    continue

                if prev_time is None:
                    current_start = t; prev_time = t; continue
                diff = t - prev_time
                if diff > gap_threshold:
                    segments.append((current_start, prev_time))
                    current_start = t
                prev_time = t

    # cierra último segmento
    if prev_time and current_start:
        segments.append((current_start, prev_time))

    # libera memoria
    gc.collect()
    return segments



# 1) Carga del archivo CPLD
    # utilizar glob para analizar
data_path = "../data_run/cpld_data_2022_*"
