# Fusion de Données : TDMS (Groupe Variable) + Xsens

Ce notebook fusionne les données d'un groupe TDMS spécifique avec les données Xsens.
**Stratégie :**
- **Base de Temps (Master)** : Xsens (Timestamp UTC).
- **TDMS** : Interpolé sur les temps Xsens.
- **Rapport** : Affiche les fréquences, durées et pertes de données.

In [29]:
import numpy as np
import pandas as pd
from nptdms import TdmsFile
from datetime import datetime, timedelta
import os
from scipy.interpolate import interp1d
import re

# ================= PARAMÈTRES =================
TDMS_PATH = r'Sync/TestSyncGPSok.tdms'
TXT_PATH = r'Sync/TestSyncGPSok_P2.txt'
TARGET_GROUP = 'P2'
OUTPUT_CSV = 'Merged_Data.csv'
# ==============================================

In [30]:
def load_xsens(path):
    print(f"--- Chargement Xsens : {path} ---")
    h_idx = None
    try:
        with open(path, 'r', errors='ignore') as f:
            for i, l in enumerate(f):
                if l.strip().startswith('PacketCounter'): h_idx=i; break
        if h_idx is None:
             with open(path, 'r', errors='ignore') as f:
                for i, l in enumerate(f): 
                    if 'UTC_Year' in l: h_idx=i; break
        if h_idx is None: raise ValueError("Header introuvable")
        
        try: df = pd.read_csv(path, sep='\t', header=h_idx)
        except: df = pd.read_csv(path, sep=r'\s+', header=h_idx)
        
        df.columns = df.columns.str.strip()
        
        # --- FREQUENCE AVANT NETTOYAGE ---
        len_raw = len(df)
        print(f"   [Info] Lignes brutes : {len_raw}")

        # --- FIX: Suppression des lignes sans données (Ghost packets) ---
        check_cols = [c for c in ['Acc_X', 'FreeAcc_E', 'Gyr_X'] if c in df.columns]
        if check_cols:
            len_before = len(df)
            df.dropna(subset=check_cols, how='all', inplace=True)
            dropped = len_before - len(df)
            if dropped > 0:
                print(f"   [Info] {dropped} lignes vides supprimees (Ghost Packets).")
        
        req = ['UTC_Year', 'UTC_Month', 'UTC_Day', 'UTC_Hour', 'UTC_Minute', 'UTC_Second', 'UTC_Nano']
        if set(req).issubset(df.columns):
            df.dropna(subset=req, inplace=True)
            ts = pd.to_datetime(df[req[:-1]].astype(int).rename(columns={'UTC_Year':'year','UTC_Month':'month','UTC_Day':'day','UTC_Hour':'hour','UTC_Minute':'minute','UTC_Second':'second'}))
            df['TS_UTC'] = ts + pd.to_timedelta(df['UTC_Nano'], unit='ns')
            df.drop(columns=req, inplace=True)
            df.sort_values('TS_UTC', inplace=True)
            df.drop_duplicates(subset=['TS_UTC'], inplace=True)
            
            # Calcul Fréquence RÉELLE APRES NETTOYAGE
            if len(df) > 1:
                dt = np.diff(df['TS_UTC'].values.astype(float)) / 1e9
                freq_real = 1.0 / np.mean(dt)
                print(f"   [RESULTAT] Fréquence calculée APRES nettoyage : {freq_real:.2f} Hz")

            print(f"Xsens chargé : {len(df)} lignes valides.")
            return df
        else: raise ValueError("Colonnes UTC manquantes")
    except Exception as e: print(f"Erreur Xsens: {e}"); return pd.DataFrame()

df_xsens = load_xsens(TXT_PATH)

In [31]:
def parse_custom_date(val):
    try: return pd.to_datetime(val, dayfirst=True)
    except:
        try: return pd.to_datetime(str(val).replace(',', '.'), dayfirst=True)
        except: return np.nan

def load_tdms_group(path, group_name):
    print(f"\n--- Chargement TDMS Group '{group_name}' ---")
    try:
        tdms = TdmsFile.read(path)
        found = False; df = pd.DataFrame()
        for g in tdms.groups():
            if g.name == group_name:
                data = {}; 
                for c in g.channels(): data[c.name] = c[:]
                if not data: return pd.DataFrame()
                l_min = min(len(v) for v in data.values())
                data = {k: v[:l_min] for k,v in data.items()}
                df = pd.DataFrame(data); found = True; break
        if not found: print("Groupe non trouvé"); return pd.DataFrame()
            
        ts_col = None; ts_series = None
        candidates = [c for c in df.columns if 'Time' in c or 'Date' in c] or df.columns.tolist()
        for col in candidates:
            sample = df[col].iloc[0]
            if isinstance(sample, (int, float)) and 3e9 < sample < 4e9:
                print(f"Timestamp (Num) sur {col}")
                ts_series = pd.to_datetime(df[col], unit='s', origin=pd.Timestamp('1904-01-01')); ts_col = col; break
            if isinstance(sample, str) and ('/' in sample or ':' in sample):
                 try: 
                     if parse_custom_date(sample) is not pd.NaT:
                         print(f"Timestamp (Txt) sur {col}")
                         ts_series = df[col].apply(lambda x: parse_custom_date(x) if isinstance(x,str) else x); ts_col = col; break
                 except: pass
        if ts_series is not None: df['TDMS_Timestamp'] = ts_series
        else: print("Pas de timestamp TDMS"); return pd.DataFrame()
        df.set_index('TDMS_Timestamp', inplace=True)
        df.columns = [f"TDMS_{c}" for c in df.columns]
        print(f"TDMS chargé : {len(df)} lignes.")
        return df
    except Exception as e: print(f"Err TDMS: {e}"); return pd.DataFrame()

df_tdms = load_tdms_group(TDMS_PATH, TARGET_GROUP)

In [32]:
# 3. Fusion, Analyse et Rapport
if not df_xsens.empty and not df_tdms.empty:
    print("\n================ RAPPORT DE FUSION ================")
    
    # Stats avant fusion
    start_xs, end_xs = df_xsens['TS_UTC'].min(), df_xsens['TS_UTC'].max()
    start_td, end_td = df_tdms.index.min(), df_tdms.index.max()
    
    print(f"1. FICHIERS BRUTS (Avant tout traitement)")
    print(f"   Xsens: {len(df_xsens)} lignes | {start_xs} -> {end_xs}")
    print(f"   TDMS : {len(df_tdms)} lignes | {start_td} -> {end_td}")
    
    # Intervalle commun
    t_start_fused = max(start_xs, start_td)
    t_end_fused = min(end_xs, end_td)
    dur_fused = (t_end_fused - t_start_fused).total_seconds()
    
    # Vues temporaires zone commune
    view_xs = df_xsens[(df_xsens['TS_UTC'] >= t_start_fused) & (df_xsens['TS_UTC'] <= t_end_fused)]
    view_td = df_tdms[(df_tdms.index >= t_start_fused) & (df_tdms.index <= t_end_fused)]
    
    nb_points_xs = len(view_xs)
    nb_points_td = len(view_td)
    
    # Calcul des fréquences réelles
    freq_xs = 0; freq_td = 0
    if nb_points_xs > 1:
         dt_xs = np.diff(view_xs['TS_UTC'].values.astype(float)) / 1e9
         freq_xs = 1.0 / np.mean(dt_xs)
    if nb_points_td > 1:
         dt_td = np.diff(view_td.index.values.astype(float)) / 1e9
         freq_td = 1.0 / np.mean(dt_td)
    
    print(f"\n2. ANALYSE ZONE COMMUNE ({dur_fused:.2f} s)")
    print(f"   Intervalle : {t_start_fused} -> {t_end_fused}")
    print(f"   Données sources DISPONIBLES (Avant fusion/interpolation) :")
    print(f"     - Xsens : {nb_points_xs} points (Fréq: {freq_xs:.2f} Hz)")
    print(f"     - TDMS  : {nb_points_td} points (Fréq: {freq_td:.2f} Hz)")
    
    # ANALYSE ALIGNEMENT TEMPOREL (Jitter)
    # Si les nombres sont identiques, on regarde si les horloges sont synchrones ou juste meme fréquence
    if nb_points_xs == nb_points_td and nb_points_xs > 0:
        # Calcul du décalage moyen point à point
        # On suppose qu'ils sont appariés 1-1 vu qu'ils ont le meme count dans la meme fenêtre
        t_xs = view_xs['TS_UTC'].values.astype(float)
        t_td = view_td.index.values.astype(float)
        diffs = (t_xs - t_td) / 1e9 # en secondes
        mean_diff = np.mean(np.abs(diffs))
        max_diff = np.max(np.abs(diffs))
        
        print(f"   => TEST ALIGNEMENT (Jitter) :")
        print(f"      Décalage moyen Timestamp : {mean_diff*1000:.4f} ms")
        print(f"      Décalage MAX Timestamp   : {max_diff*1000:.4f} ms")
        if mean_diff < 0.0001:
            print("      -> CONCLUSION : ALIGNEMENT PARFAIT (Données identiques).")
        else:
            print("      -> CONCLUSION : Fréquences identiques mais décalage temporel (Jitter/Phase).")
            print("         L'interpolation est NÉCESSAIRE pour recaler les valeurs TDMS sur les instants Xsens exacts.")
            
    
    # Fusion
    df_merged = view_xs.copy()
    df_merged.set_index('TS_UTC', inplace=True)
    
    # Interpolation TDMS
    t_slave = df_tdms.index.values.astype(float)
    t_master = df_merged.index.values.astype(float)
    
    for col in df_tdms.columns:
        vals = df_tdms[col].values
        f = interp1d(t_slave, vals, kind='linear', bounds_error=False, fill_value=np.nan)
        df_merged[col] = f(t_master)
        
    # Nettoyage colonnes vides
    df_merged.dropna(axis=1, how='all', inplace=True)
    
    print(f"\n3. RÉSULTAT FINAL (Après Interpolation)")
    print(f"   Lignes générées : {len(df_merged)}")
    print(f"========================================================")
    
    print(df_merged.head())
    # FIX: Export avec format de date incluant les millisecondes
    df_merged.to_csv(OUTPUT_CSV, date_format='%d/%m/%Y %H:%M:%S.%f')
    print(f"\nFichier CSV sauvegardé : {OUTPUT_CSV}")
else:
    print("Erreur données manquantes.")