# Analysis of Test Durations

Goal: Iterate through all available Xsens data files, calculate the duration of each test based on the UTC timestamps, and display a summary table.

In [None]:
import pandas as pd
import glob
import os
from datetime import timedelta

# Define the root directory for data (parent of current folder)
data_dir = '../'

# Define patterns to search for. Based on previous file listings:
# Moto_Chicane_100_P1.txt
# Moto_Chicane_50_P1.txt
# Moto_Chicane_mouille_80_P1.txt
# Moto_Freinage_mouille_50_P1.txt

# We will look for all .txt files that look like Xsens data
file_pattern = os.path.join(data_dir, 'Moto_*_P*.txt')
files = glob.glob(file_pattern)

results = []

print(f"Found {len(files)} files to analyze.")

for file_path in files:
    try:
        filename = os.path.basename(file_path)
        print(f"Processing: {filename}")
        
        # Parse filename for metadata (heuristic)
        # Expecting format like: Moto_Condition_Speed_Passage.txt
        # But names vary (e.g. Moto_Chicane_100_P1 vs Moto_Chicane_mouille_80_P1)
        parts = filename.replace('.txt', '').split('_')
        
        # Extract speed and passage if possible
        speed = 'N/A'
        passage = 'N/A'
        condition = 'N/A'
        
        # Try to find 'P' part for passage
        for p in parts:
            if p.startswith('P') and p[1:].isdigit():
                passage = p
            elif p.isdigit():
                speed = p
        
        # Read Xsens file (skip header rows)
        # Reading only necessary columns to speed up
        # We need UTC variables to build time
        
        # Only reading header first to find columns
        header_df = pd.read_csv(file_path, sep='\t', skiprows=12,  nrows=1)
        cols = header_df.columns.str.strip().tolist()
        
        req_cols = ['UTC_Year', 'UTC_Month', 'UTC_Day', 'UTC_Hour', 'UTC_Minute', 'UTC_Second', 'UTC_Nano']
        
        if all(c in cols for c in req_cols):
             # Read full file
            df = pd.read_csv(file_path, sep='\t', skiprows=12)
            df.columns = df.columns.str.strip()
            
            # Drop NaNs in time
            df = df.dropna(subset=req_cols)
            
            if not df.empty:
                # Construct Start and End Time
                # We don't need to vectorise everything, just first and last row
                def get_time(row):
                    return pd.Timestamp(year=int(row['UTC_Year']), month=int(row['UTC_Month']), day=int(row['UTC_Day']),
                                      hour=int(row['UTC_Hour']), minute=int(row['UTC_Minute']), second=int(row['UTC_Second']),
                                      microsecond=int(row['UTC_Nano'] / 1000))
                
                start_time = get_time(df.iloc[0])
                end_time = get_time(df.iloc[-1])
                
                duration = end_time - start_time
                duration_sec = duration.total_seconds()
                
                results.append({
                    'Filename': filename,
                    'Speed_Label': speed,
                    'Passage': passage,
                    'Start_Time': start_time,
                    'End_Time': end_time,
                    'Duration_Sec': duration_sec,
                    'Duration_Str': str(duration)
                })
            else:
                print(f"  Warning: {filename} has empty data after cleaning.")
        else:
            print(f"  Warning: {filename} missing UTC columns.")
            
    except Exception as e:
        print(f"  Error processing {filename}: {e}")

# Create Summary DataFrame
summary_df = pd.DataFrame(results)

# Display
if not summary_df.empty:
    # Reorder columns
    cols_order = ['Filename', 'Speed_Label', 'Passage', 'Duration_Sec', 'Duration_Str', 'Start_Time', 'End_Time']
    print("\n--- Test Durations Summary ---\n")
    display(summary_df[cols_order].sort_values(by=['Speed_Label', 'Passage']))
    
    # Optional: Save to CSV
    summary_df.to_csv('Durations_Summary.csv', index=False)
    print("\nSummary saved to Durations_Summary.csv")
else:
    print("No valid duration data found.")