In [4]:
import glob
import pandas as pd
from io import StringIO
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from ROOT import TFile, TTree, array

In [5]:
!which python

/home/fabian/.pyenv/versions/root-py3112/bin/python


## Lectura de Ficheros

Se denotan las columnas 'time', 'lfsrTMR', 'B0', 'B1'

In [None]:
# Función parse_message actualizada (igual que antes)
def parse_message(raw: str):
    line = raw.replace('*', '').replace(' #', ',').strip()
    parts = line.split(',')
    if len(parts) < 4 or len(parts[2]) != 4 or len(parts[3]) != 4:
        raise ValueError("Formato inválido")
    ts = float(parts[0])
    time = datetime.utcfromtimestamp(ts) + timedelta(hours=2)
    lfsr = int(parts[1])
    b0, b1 = parts[2], parts[3]
    return time, lfsr, b0, b1

def count_fails(hex_str: str) -> int:
    """
    Convierte hex_str a entero, aplica máscara y negación,
    cuenta bits a '1' en los 8 bits superiores.
    """
    try:
        val = int(hex_str, 16)
        masked = (~val & 0xFF00) >> 8
        # Conteo de bits
        return bin(masked).count('1')
    except ValueError:
        return 0  # En caso de error de parseo
    
def compute_periodic(counts: list[int], window: int = 3) -> int:
    """
    Detecta patrones de incremento repetido en 'counts'.
    Retorna el número de patrones periódicos encontrados.
    """
    periodic = 0
    for i in range(window, len(counts)):
        if (counts[i]   == counts[i-1] + 1 and
            counts[i-1] == counts[i-2]     and
            counts[i-2] == counts[i-3] + 1 ):
            periodic += 1
    return periodic

# -- lectura ---
ts_threshold = datetime(2022, 1, 1) # threshold, no tendría sentido datos previo a esto
fnames = glob.glob('../0_raw/Campaign3/cpld/run/cpld_data_*.dat')
records = []
bad_records = []
bad_count = 0
drops_invalid_ts = 0
last_ts = None

for fn in fnames:
    text = open(fn).read().replace('*', '').replace(' #', '')
    for raw in text.splitlines():
        # Extraer timestamp bruto
        parts = raw.split(',')
        try:
            ts_cand = float(parts[0])
            last_ts = datetime.utcfromtimestamp(ts_cand) + timedelta(hours=2)
        except:
            pass  # mantenemos el último válido

        # Intentar parseo completo
        try:
            time, lfsr, b0, b1 = parse_message(raw)
            records.append((time, lfsr, b0, b1))
        except Exception:
            bad_count += 1
            # Solo registramos si last_ts es válido y supera el umbral
            if last_ts and last_ts >= ts_threshold:
                bad_records.append({"ts": last_ts})
            else:
                drops_invalid_ts += 1
            continue

# Construir DataFrame
df = pd.DataFrame(records, columns=['time', 'lfsrTMR', 'B0', 'B1'])
df.sort_values('time', inplace=True)
df.reset_index(drop=True, inplace=True)

# Error DF
print(f"Líneas descartadas por parseo: {bad_count}")
print(f"Bad_records registrados (ts ≥ {ts_threshold.date()}): {len(bad_records)}")
print(f"Líneas descartadas por ts inválido: {drops_invalid_ts}")

# Crear DataFrame de bad_records y agrupar
df_bad = pd.DataFrame(bad_records)
df_bad['date'] = df_bad['ts'].dt.date
errors_por_dia = df_bad.groupby('date').size().reset_index(name='bad_lines')

df_bad['hour'] = df_bad['ts'].dt.floor('h')
errors_por_hora = df_bad.groupby('hour').size().reset_index(name='bad_lines')

# Resultados
print("\nErrores por día (filtrados):")
print(errors_por_dia)

print("\nErrores por hora (filtrados):")
print(errors_por_hora)

Líneas descartadas por parseo: 20241
Bad_records registrados (ts ≥ 2022-01-01): 20235
Líneas descartadas por ts inválido: 6

Errores por día (filtrados):
         date  bad_lines
0  2022-11-09       6448
1  2022-11-10       1881
2  2022-11-11       2544
3  2022-11-12        631
4  2022-11-13       4644
5  2022-11-14       1602
6  2022-11-15       2485

Errores por hora (filtrados):
                   hour  bad_lines
0   2022-11-09 11:00:00         14
1   2022-11-09 12:00:00         85
2   2022-11-09 13:00:00        111
3   2022-11-09 14:00:00        940
4   2022-11-09 15:00:00       1659
..                  ...        ...
101 2022-11-15 06:00:00         39
102 2022-11-15 07:00:00       1834
103 2022-11-15 08:00:00         31
104 2022-11-15 09:00:00          1
105 2022-11-15 10:00:00          1

[106 rows x 2 columns]


In [7]:
len(df)

1101639

In [19]:
import time
import numpy as np
import re


# Patrón para 4 dígitos hexadecimales
hex_pat = re.compile(r'^[0-9A-Fa-f]{4}$')
valid_mask = ( # Filtrar el DataFrame original
    df['B0'].astype(str).str.fullmatch(hex_pat.pattern) &
    df['B1'].astype(str).str.fullmatch(hex_pat.pattern)
)
df_valid = df[valid_mask].reset_index(drop=True)


# --- Medidor de tiempo total ---
t_start = time.perf_counter()

# ------------------------------------ Información inicial
n_samples = len(df_valid)
print(f"[0] Número total de muestras: {n_samples}")

# Convertir columnas hex a uint16 a manera de tener el valor de los bits cambiados
t1 = time.perf_counter()
b0_int = df_valid['B0'].apply(lambda s: int(s, 16)).to_numpy(dtype=np.uint16)
b1_int = df_valid['B1'].apply(lambda s: int(s, 16)).to_numpy(dtype=np.uint16)
print(f"[1] Conversión hex→uint16: {time.perf_counter() - t1:.2f}s")


# Aplicar máscara y extraer byte alto
t2 = time.perf_counter()
masked0 = ((~b0_int) & 0xFF00) >> 8
masked1 = ((~b1_int) & 0xFF00) >> 8
print(f"[2] Enmascarado de bytes altos: {time.perf_counter() - t2:.2f}s")



[0] Número total de muestras: 1081006
[1] Conversión hex→uint16: 0.49s
[2] Enmascarado de bytes altos: 0.00s


In [20]:

# ---------------Desplegar bits: matriz (n_muestras × 8) para cada byte
t3 = time.perf_counter()

masked0_uint8 = masked0.astype(np.uint8)          # shape: (N,)
masked1_uint8 = masked1.astype(np.uint8)          # shape: (N,)
# expandir dimensión para que unpackbits opere por fila
masked0_2d = masked0_uint8[:, np.newaxis]         # shape: (N,1)
masked1_2d = masked1_uint8[:, np.newaxis]         # shape: (N,1)

# desempacar bits little-endian a lo largo de cada fila → (N,8)
bits0 = np.unpackbits(masked0_2d, axis=1, bitorder='little')  # shape: (N,8)
bits1 = np.unpackbits(masked1_2d, axis=1, bitorder='little')  # shape: (N,8)

# legacy
# bits0 = np.unpackbits(masked0.view(np.uint8), axis=0, bitorder='little')[:, :8]
# bits1 = np.unpackbits(masked1.view(np.uint8), axis=0, bitorder='little')[:, :8]

print(f"[3] Unpack de bits por byte: {time.perf_counter() - t3:.2f}s")

# 4. Concatenar para obtener (n_muestras × 16)
t4 = time.perf_counter()
bits = np.hstack([bits0, bits1]).astype(bool) 
print(bits.shape)  
print(f"[4] Concatenación de bits0+b1: {time.perf_counter() - t4:.2f}s")

# 5. Conteo instantáneo de fallas (popcount)
t5 = time.perf_counter()
fails_inst = bits.sum(axis=1)
print(fails_inst.shape)
print(f"[5] Conteo instantáneo de fallas: {time.perf_counter() - t5:.2f}s")

# 6. Detección de resets para bias
t6 = time.perf_counter()
resets = (fails_inst == 0) & (np.concatenate([[False], fails_inst[:-1] > 0]))
reset_indices = np.nonzero(resets)[0]
print(f"[6] Detección de resets ({len(reset_indices)} encontrados): {time.perf_counter() - t6:.2f}s")

# 7. Cálculo de acumulado ajustado
t7 = time.perf_counter()
cumsum = fails_inst.cumsum()
bias_array = np.zeros_like(cumsum)
for idx in reset_indices:
    bias_array[idx:] += cumsum[idx - 1]
fails_acum = np.maximum.accumulate(cumsum + bias_array)
print(f"[7] Cálculo de acumulado ajustado: {time.perf_counter() - t7:.2f}s")

# 8. Conteo de flancos de subida (edges) y acumulado por bit
t8 = time.perf_counter()
prev = np.vstack([np.zeros((1, 16), bool), bits[:-1]])
edges = bits & (~prev)
bit_counts = edges.cumsum(axis=0)
print(f"[8] Conteo de flancos de subida por bit: {time.perf_counter() - t8:.2f}s")

# 9. Detección de errores periódicos por bit
t9 = time.perf_counter()
kernel = np.array([1, 0, 1])
bit_periodic = np.zeros_like(bit_counts, dtype=int)

for k in range(16):
    # 1) Calcular la convolución en modo 'valid'
    conv = np.convolve(bit_counts[:, k], kernel, mode='valid')  # longitud = N-2

    # 2) Calcular cumsum solo donde conv == 2
    periodic_cumsum = (conv == 2).cumsum()  # longitud = N-2

    # 3) Alinear y asignar a bit_periodic:
    #    conv[i] corresponde al bit de error periódico en la muestra i+2,
    #    así que asignamos desde bit_periodic[2:, k]
    bit_periodic[2:, k] = periodic_cumsum

    if k % 4 == 0:
        print(f"  - Procesando patrón periódico para bit {k}/15")
print(f"[9] Detección de errores periódicos: {time.perf_counter() - t9:.2f}s")

# --- Asignar resultados de vuelta a df_valid ---
t10 = time.perf_counter()
df_valid['fails_inst'] = fails_inst
df_valid['fails_acum'] = fails_acum
for k in range(16):
    df_valid[f'bitn{k}']  = bit_counts[:, k]
    df_valid[f'bitnP{k}'] = bit_periodic[:, k]
print(f"[10] Escritura en DataFrame: {time.perf_counter() - t10:.2f}s")

# --- Tiempo total ---
print(f"Tiempo total aproximado: {time.perf_counter() - t_start:.2f}s")


[3] Unpack de bits por byte: 0.01s
(1081006, 16)
[4] Concatenación de bits0+b1: 0.01s
(1081006,)
[5] Conteo instantáneo de fallas: 0.02s
[6] Detección de resets (29574 encontrados): 0.00s
[7] Cálculo de acumulado ajustado: 6.09s
[8] Conteo de flancos de subida por bit: 0.39s
  - Procesando patrón periódico para bit 0/15
  - Procesando patrón periódico para bit 4/15
  - Procesando patrón periódico para bit 8/15
  - Procesando patrón periódico para bit 12/15
[9] Detección de errores periódicos: 0.55s
[10] Escritura en DataFrame: 0.28s
Tiempo total aproximado: 12.59s


In [11]:
# --- codigo old, demora 2 horas quizas ----

import re
import pandas as pd
from datetime import datetime, timedelta

# Patrón para 4 dígitos hexadecimales
hex_pat = re.compile(r'^[0-9A-Fa-f]{4}$')

# Filtrar el DataFrame original
valid_mask = (
    df['B0'].astype(str).str.fullmatch(hex_pat.pattern) &
    df['B1'].astype(str).str.fullmatch(hex_pat.pattern)
)
df_valid = df[valid_mask].reset_index(drop=True)
bad_hex_rows = len(df) - len(df_valid)
print(f"Filas removidas por hex inválido: {bad_hex_rows}/{len(df)} = {bad_hex_rows/len(df)}")

# Inicializar contadores sobre df_valid
Nbits = 16
fails_inst = []
fails_acum = []
bit_counts   = [[0]*len(df_valid) for _ in range(Nbits)]
bit_periodic = [[0]*len(df_valid) for _ in range(Nbits)]
bias = 0
cum_total = 0
bit_cum  = [0]*Nbits
bit_prev = [0]*Nbits

# Función para conteo rápido de bits
def count_ones_upper_byte(hex_str):
    val = int(hex_str, 16)
    return bin((~val & 0xFF00) >> 8).count('1')

# Bucle de cómputo sin desbordes
for i, row in df_valid.iterrows():
    f0 = count_ones_upper_byte(row['B0'])
    f1 = count_ones_upper_byte(row['B1'])
    total = f0 + f1

    # Ajuste de bias sobre la última entrada válida
    if fails_inst and total == 0 and fails_inst[-1] != 0:
        bias = fails_acum[-1]
    cum_total = max(cum_total + total, cum_total, total + bias)

    # Conteo por bit
    masked0 = ((~int(row['B0'], 16)) & 0xFF00) >> 8
    masked1 = ((~int(row['B1'], 16)) & 0xFF00) >> 8
    bits = [(masked0 >> k) & 1 for k in range(8)] + [(masked1 >> k) & 1 for k in range(8)]

    for k in range(Nbits):
        if bit_prev[k] == 0 and bits[k] == 1:
            bit_cum[k] += 1
        bit_counts[k][i]   = bit_cum[k]
        # Suponiendo compute_periodic definido antes
        bit_periodic[k][i] = compute_periodic(bit_counts[k][:i+1])
        bit_prev[k] = bits[k]

    fails_inst.append(total)
    fails_acum.append(cum_total)

# Agregar resultados al DataFrame válido
df_valid['fails_inst'] = fails_inst
df_valid['fails_acum'] = fails_acum
for k in range(Nbits):
    df_valid[f'bitn{k}']  = bit_counts[k]
    df_valid[f'bitnP{k}'] = bit_periodic[k]



Filas removidas por hex inválido: 20633/1101639 = 0.018729365971974485


KeyboardInterrupt: 

In [None]:
# Agregar al DataFrame
df['fails_inst'] = fails_inst
df['fails_acum'] = fails_acum
for k in range(Nbits):
    df[f'bitn{k}'] = bit_counts[k]
    df[f'bitnP{k}'] = bit_periodic[k]

# 4. Generación de gráficos

plt.figure(figsize=(16,8))
for k in range(Nbits):
    plt.plot(df['time'], df[f'bitn{k}'], linestyle='--', marker='.', label=f'bit{k}')
plt.xlabel('Tiempo')
plt.ylabel('Errores acumulados por bit')
plt.legend(ncol=4)
plt.grid(True)
plt.tight_layout()
plt.show()

# 5. Escritura de ROOT TTree

ofile = TFile("cpld_data.root", 'RECREATE')
tree = TTree("tr", "CPLD data")
t_arr = array('d', [0.0])
b_arr = array('i', Nbits*[0])
bP_arr = array('i', Nbits*[0])

tree.Branch("t", t_arr, "t/D")
tree.Branch("bit", b_arr, f"bit[{Nbits}]/I")
tree.Branch("bitP", bP_arr, f"bitP[{Nbits}]/I")

for i, row in df.iterrows():
    t_arr[0] = (row['time'] - datetime(1970,1,1)).total_seconds()
    for k in range(Nbits):
        b_arr[k] = row[f'bitn{k}']
        bP_arr[k] = row[f'bitnP{k}']
    tree.Fill()

ofile.Write()
ofile.Close()


In [17]:

# 3. Cálculo de errores instantáneos y acumulados

Nbits = 16
fails_inst = []
fails_acum = []
bit_counts = [[0]*len(df) for _ in range(Nbits)]
bit_periodic = [[0]*len(df) for _ in range(Nbits)]

bias = 0
cum_total = 0
bit_cum = [0]*Nbits
bit_prev = [0]*Nbits

for i, row in df.iterrows():
    f0 = count_fails(row['B0'])
    f1 = count_fails(row['B1'])
    total = f0 + f1
    # Ajuste de bias si reset detectado
    if i>0 and total == 0 and (fails_inst[i-1] != 0):
        bias = fails_acum[i-1]
    cum_total = max(cum_total + total, cum_total, total + bias)
    
    # Conteo por bit
    masked0 = ((~int(row['B0'],16)) & 0xFF00) >> 8
    masked1 = ((~int(row['B1'],16)) & 0xFF00) >> 8
    bits = [(masked0>>k)&1 for k in range(8)] + [(masked1>>k)&1 for k in range(8)]
    
    for k in range(Nbits):
        # Detectar flancos de subida para conteo
        if bit_prev[k]==0 and bits[k]==1:
            bit_cum[k] += 1
        bit_counts[k][i] = bit_cum[k]
        # Patrón periódico
        bit_periodic[k][i] = compute_periodic(bit_counts[k][:i+1])
        bit_prev[k] = bits[k]
    
    fails_inst.append(total)
    fails_acum.append(cum_total)

# Agregar al DataFrame
df['fails_inst'] = fails_inst
df['fails_acum'] = fails_acum
for k in range(Nbits):
    df[f'bitn{k}'] = bit_counts[k]
    df[f'bitnP{k}'] = bit_periodic[k]

# 4. Generación de gráficos

plt.figure(figsize=(16,8))
for k in range(Nbits):
    plt.plot(df['time'], df[f'bitn{k}'], linestyle='--', marker='.', label=f'bit{k}')
plt.xlabel('Tiempo')
plt.ylabel('Errores acumulados por bit')
plt.legend(ncol=4)
plt.grid(True)
plt.tight_layout()
plt.show()

# 5. Escritura de ROOT TTree

ofile = TFile("cpld_data.root", 'RECREATE')
tree = TTree("tr", "CPLD data")
t_arr = array('d', [0.0])
b_arr = array('i', Nbits*[0])
bP_arr = array('i', Nbits*[0])

tree.Branch("t", t_arr, "t/D")
tree.Branch("bit", b_arr, f"bit[{Nbits}]/I")
tree.Branch("bitP", bP_arr, f"bitP[{Nbits}]/I")

for i, row in df.iterrows():
    t_arr[0] = (row['time'] - datetime(1970,1,1)).total_seconds()
    for k in range(Nbits):
        b_arr[k] = row[f'bitn{k}']
        bP_arr[k] = row[f'bitnP{k}']
    tree.Fill()

ofile.Write()
ofile.Close()


ValueError: invalid literal for int() with base 16: 'FF#0'