# Cruce DMM a Beam

In [5]:
import pandas as pd

# ===== DMM ==================================
columns = ["ts", "IDC", "IAC"]
#file_path = r'..\0_raw\dmm_data6894024_2022_05_27_212453_all.dat' # este no calza con ningun beam data por los tiempos
file_path = r'..\0_raw\dmm_data_2022_06_02-14_all.dat'

# Posibles errores al leer este tipo de archivos
#Abro el fichero, reemplazo los '\x00' y guardo en memoria
with open(file_path, 'r', encoding='latin-1', errors='ignore') as f:
    text = f.read().replace('\x00', '')

from io import StringIO
df = pd.read_csv( StringIO(text), delim_whitespace=True,
    names=columns, comment="#", engine="python"
)

df["Time"] = pd.to_datetime(df["ts"], unit="s") + pd.Timedelta(hours=2) # time stamps
df = df.set_index("Time")

print(df.isna().sum())                     # Buscar NaN

# Aseguramos que 'df' tenga columna Time:
if df.index.name == "Time" or df.index.dtype.kind == "M":
    df_curr = df.reset_index()
else:
    df_curr = df.copy()
    if "Time" not in df_curr.columns:
        raise KeyError("df debe tener columna o índice llamado 'Time' en formato datetime.")
    df_curr["Time"] = pd.to_datetime(df_curr["Time"])

# ===== Beam ===================================
beam_data = pd.read_csv("./csv/beam_data.csv")

print(beam_data.isna().sum())              # Buscar NaN



ts     0
IDC    0
IAC    0
dtype: int64
Time         0
TID          0
HEH          0
N1MeV        0
run_group    0
dtype: int64


In [2]:
beam_data.iloc[0]

Time         2022-05-25 10:09:53.517351680
TID                             488.022822
HEH                   1060544258068.813232
N1MeV                 5839896442321.196289
run_group                                7
Name: 0, dtype: object

In [7]:
df_curr.iloc[0]

Time    2022-06-01 12:01:01.485782784
ts                  1654077661.485783
IDC                            0.6231
IAC                          0.000012
Name: 0, dtype: object

cruce con datos beam

In [8]:
# Ordenar ambos DataFrames por Time
df_curr = df_curr.sort_values("Time").reset_index(drop=True)
beam_data_sorted = beam_data.sort_values("Time").reset_index(drop=True)

# Merge asof con tolerancia de 1 segundo
merged = pd.merge_asof(
    df_curr,
    beam_data_sorted,
    on="Time",
    direction="nearest",
    tolerance=pd.Timedelta(seconds=1)
)

# Verificar filas sin correspondencia (NaN en columnas de beam_data)
#    Suponemos que beam_data tiene columnas ["TID", "HEH", "N1MeV", "run_group"]
beam_cols = ["TID", "HEH", "N1MeV", "run_group"]
mask_missing = merged[beam_cols].isna().any(axis=1)

if mask_missing.any():
    bad_times = merged.loc[mask_missing, "Time"]
    raise ValueError(
        f"Error al fusionar: {mask_missing.sum()} filas de 'df' no encontraron registro de beam "
        f"dentro de 1 s.\nTimestamps sin match:\n{bad_times.tolist()}"
    )

# Si todo está bien, restauramos índice en Time (opcional) y mostramos resultado
merged = merged.set_index("Time")
print("Merge exitoso. DataFrame resultante:")
print(merged.head())

MergeError: Incompatible merge dtype, dtype('O') and dtype('<M8[ns]'), both sides must have numeric dtype

In [12]:
# chatgpt try on error
# Antes de hacer el `merge_asof`, debemos garantizar que ambas columnas `Time` sean de tipo `datetime64[ns]`. El error “Incompatible merge dtype, dtype('O') and dtype('<M8[ns]')” indica que en `df_curr["Time"]` queda como objeto (string), mientras que en `beam_data["Time"]` ya es datetime. A continuación el código corregido:

# 1. Convertir `df_curr["Time"]` a `datetime64[ns]`.  
# 2. Repetir chequeo en `beam_data["Time"]`.  
# 3. Ejecutar `merge_asof` con tolerancia de 1 s.  
# 4. Lanzar error si hay filas sin match.


import pandas as pd
import numpy as np

# --- 1) Asegurar que beam_data["Time"] sea datetime64 ---
beam_data["Time"] = pd.to_datetime(beam_data["Time"])

# --- 2) Asegurar que df_curr["Time"] sea datetime64 ---
# Si 'Time' está en el índice, trasladarlo a columna primero
if df_curr.index.name == "Time" or df_curr.index.dtype.kind == "M":
    df_curr = df_curr.reset_index()

# Convertir a datetime (aún si ya parece datetime, nos aseguramos)
df_curr["Time"] = pd.to_datetime(df_curr["Time"])

# --- 3) Ordenar ambos DataFrames por Time ---
df_curr = df_curr.sort_values("Time").reset_index(drop=True)
beam_data_sorted = beam_data.sort_values("Time").reset_index(drop=True)

# --- 4) Merge asof con tolerancia de 1 segundo ---
merged = pd.merge_asof(
    beam_data_sorted,df_curr,
    on="Time",
    direction="nearest",
    tolerance=pd.Timedelta(seconds=3600 * 2)
)

# --- 5) Verificar filas sin correspondencia ---
beam_cols = ["TID", "HEH", "N1MeV", "run_group"]
mask_missing = merged[beam_cols].isna().any(axis=1)
if mask_missing.any():
    bad_times = merged.loc[mask_missing, "Time"]
    raise ValueError(
        f"Error al fusionar: {mask_missing.sum()} filas de 'df_curr' no encontraron registro de beam "
        f"dentro de 1 s.\nTimestamps sin match:\n{bad_times.tolist()}"
    )

# --- 6) Todo OK: reindexar en Time y mostrar resultado ---
merged = merged.set_index("Time")
print("Merge exitoso. DataFrame resultante:")
print(merged.head())


Merge exitoso. DataFrame resultante:
                                      TID           HEH         N1MeV  \
Time                                                                    
2022-05-25 10:09:53.517351680  488.022822  1.060544e+12  5.839896e+12   
2022-05-25 10:10:44.276027648  488.027711  1.060544e+12  5.841454e+12   
2022-05-25 10:11:35.034703616  488.025729  1.060544e+12  5.842470e+12   
2022-05-25 10:12:25.793379328  488.033527  1.060544e+12  5.843457e+12   
2022-05-25 10:13:16.552055040  488.017482  1.060544e+12  5.844933e+12   

                               run_group  ts  IDC  IAC  
Time                                                    
2022-05-25 10:09:53.517351680          7 NaN  NaN  NaN  
2022-05-25 10:10:44.276027648          7 NaN  NaN  NaN  
2022-05-25 10:11:35.034703616          7 NaN  NaN  NaN  
2022-05-25 10:12:25.793379328          7 NaN  NaN  NaN  
2022-05-25 10:13:16.552055040          7 NaN  NaN  NaN  


In [13]:
merged

Unnamed: 0_level_0,TID,HEH,N1MeV,run_group,ts,IDC,IAC
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-05-25 10:09:53.517351680,488.022822,1.060544e+12,5.839896e+12,7,,,
2022-05-25 10:10:44.276027648,488.027711,1.060544e+12,5.841454e+12,7,,,
2022-05-25 10:11:35.034703616,488.025729,1.060544e+12,5.842470e+12,7,,,
2022-05-25 10:12:25.793379328,488.033527,1.060544e+12,5.843457e+12,7,,,
2022-05-25 10:13:16.552055040,488.017482,1.060544e+12,5.844933e+12,7,,,
...,...,...,...,...,...,...,...
2022-11-15 09:33:14.725472512,322.043390,6.725460e+11,3.623988e+12,4,,,
2022-11-15 09:34:05.780378112,322.053560,6.725652e+11,3.624082e+12,4,,,
2022-11-15 09:34:56.835283456,322.058669,6.725748e+11,3.624129e+12,4,,,
2022-11-15 09:35:47.890189056,322.067587,6.725917e+11,3.624212e+12,4,,,
