In [6]:
import pandas as pd
import numpy as np
import warnings
import sys

from pathlib import Path
from scipy import stats

warnings.filterwarnings('ignore')

In [9]:
DATA_DIR = Path('Data')

dynamic_data_path = DATA_DIR / 'VED_DynamicData'

csv_files = []

veh_folders = sorted([f for f in dynamic_data_path.iterdir() if f.is_dir() and f.name.startswith('VehId_')])

print(f'Found {len(veh_folders)} VehId folders')

for veh_folder in veh_folders:
    csv_in_folder = sorted(veh_folder.glob('*.csv'))
    csv_files.extend(csv_in_folder)
    if csv_in_folder:
        print(f'{veh_folder.name}: {len(csv_in_folder)} CSV files')

csv_files = [str(f) for f in csv_files]

print(f'\nTotal CSV files found: {len(csv_files)}')
if csv_files:
    print(f'First file: {csv_files[0]}')
    print(f'Last file: {csv_files[-1]}')


Found 384 VehId folders
VehId_10: 201 CSV files
VehId_108: 27 CSV files
VehId_11: 166 CSV files
VehId_110: 18 CSV files
VehId_115: 91 CSV files
VehId_116: 44 CSV files
VehId_119: 5 CSV files
VehId_12: 121 CSV files
VehId_120: 3 CSV files
VehId_123: 46 CSV files
VehId_124: 92 CSV files
VehId_125: 93 CSV files
VehId_126: 9 CSV files
VehId_128: 91 CSV files
VehId_129: 6 CSV files
VehId_130: 57 CSV files
VehId_131: 1 CSV files
VehId_132: 88 CSV files
VehId_133: 108 CSV files
VehId_135: 147 CSV files
VehId_137: 3 CSV files
VehId_138: 11 CSV files
VehId_139: 53 CSV files
VehId_140: 151 CSV files
VehId_141: 12 CSV files
VehId_142: 33 CSV files
VehId_143: 20 CSV files
VehId_145: 177 CSV files
VehId_147: 40 CSV files
VehId_148: 16 CSV files
VehId_149: 51 CSV files
VehId_150: 3 CSV files
VehId_153: 22 CSV files
VehId_154: 109 CSV files
VehId_155: 194 CSV files
VehId_156: 164 CSV files
VehId_157: 122 CSV files
VehId_159: 3 CSV files
VehId_160: 27 CSV files
VehId_161: 85 CSV files
VehId_162: 35 CS

In [11]:
# archivo dinámico
f = csv_files[0]
df0 = pd.read_csv(f, nrows=1000)
display(df0.head())
display(df0.tail())
print('Dtypes:')
print(df0.dtypes)

Unnamed: 0,DayNum,VehId,Trip,Timestamp(ms),Latitude[deg],Longitude[deg],Vehicle Speed[km/h],MAF[g/sec],Engine RPM[RPM],Absolute Load[%],...,Air Conditioning Power[kW],Air Conditioning Power[Watts],Heater Power[Watts],HV Battery Current[A],HV Battery SOC[%],HV Battery Voltage[V],Short Term Fuel Trim Bank 1[%],Short Term Fuel Trim Bank 2[%],Long Term Fuel Trim Bank 1[%],Long Term Fuel Trim Bank 2[%]
0,1.719774,10,1558,0,42.277066,-83.763404,53.59,,,,...,,0.0,2250.0,-21.5,96.341469,386.0,,,,
1,1.719774,10,1558,200,42.277066,-83.763404,51.98,,,,...,,0.0,2250.0,-21.5,96.341469,386.0,,,,
2,1.719774,10,1558,1200,42.277066,-83.763404,50.369999,,,,...,,0.0,2250.0,-21.5,96.341469,386.0,,,,
3,1.719774,10,1558,1500,42.277066,-83.763404,50.369999,,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,
4,1.719774,10,1558,2300,42.277066,-83.763404,49.799999,,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,


Unnamed: 0,DayNum,VehId,Trip,Timestamp(ms),Latitude[deg],Longitude[deg],Vehicle Speed[km/h],MAF[g/sec],Engine RPM[RPM],Absolute Load[%],...,Air Conditioning Power[kW],Air Conditioning Power[Watts],Heater Power[Watts],HV Battery Current[A],HV Battery SOC[%],HV Battery Voltage[V],Short Term Fuel Trim Bank 1[%],Short Term Fuel Trim Bank 2[%],Long Term Fuel Trim Bank 1[%],Long Term Fuel Trim Bank 2[%]
124,1.719774,10,1558,104000,42.278445,-83.754332,37.739998,,,,...,,100.0,2000.0,-45.0,95.853661,381.5,,,,
125,1.719774,10,1558,105000,42.278445,-83.754332,36.419998,,,,...,,100.0,2000.0,-45.0,95.853661,381.5,,,,
126,1.719774,10,1558,105600,42.278445,-83.754332,36.419998,,,,...,,100.0,2000.0,-45.0,95.853661,381.5,,,,
127,1.719774,10,1558,106100,42.278445,-83.754332,34.439999,,,,...,,100.0,2000.0,-45.0,95.853661,381.5,,,,
128,1.719774,10,1558,106200,42.278445,-83.754332,34.439999,,,,...,,100.0,2000.0,-45.0,95.853661,388.5,,,,


Dtypes:
DayNum                            float64
VehId                               int64
Trip                                int64
Timestamp(ms)                       int64
Latitude[deg]                     float64
Longitude[deg]                    float64
Vehicle Speed[km/h]               float64
MAF[g/sec]                        float64
Engine RPM[RPM]                   float64
Absolute Load[%]                  float64
OAT[DegC]                         float64
Fuel Rate[L/hr]                   float64
Air Conditioning Power[kW]        float64
Air Conditioning Power[Watts]     float64
Heater Power[Watts]               float64
HV Battery Current[A]             float64
HV Battery SOC[%]                 float64
HV Battery Voltage[V]             float64
Short Term Fuel Trim Bank 1[%]    float64
Short Term Fuel Trim Bank 2[%]    float64
Long Term Fuel Trim Bank 1[%]     float64
Long Term Fuel Trim Bank 2[%]     float64
dtype: object


In [14]:
LABELS = {'DayNum', 'VehId', 'Trip', 'Timestamp(ms)', 'Latitude[deg]', 'Longitude[deg]'}

Y = {'Fuel Rate[L/hr]', 'HV Battery Current[A]'}

X = {
    # Engine parameters
    'Engine RPM[RPM]',
    'Absolute Load[%]',
    
    # Vehicle operation
    'Vehicle Speed[km/h]',
    'MAF[g/sec]',  # Mass Air Flow - indicator of engine load
    
    # Environmental
    'OAT[DegC]',  # Outside Air Temperature
    
    # Auxiliary power
    'Air Conditioning Power[kW]',
    'Air Conditioning Power[Watts]',
    'Heater Power[Watts]',
    
    # Battery system
    'HV Battery SOC[%]',
    'HV Battery Voltage[V]',
    
    # Fuel trim (affects fuel injection)
    'Short Term Fuel Trim Bank 1[%]',
    'Short Term Fuel Trim Bank 2[%]',
    'Long Term Fuel Trim Bank 1[%]',
    'Long Term Fuel Trim Bank 2[%]'
}

In [None]:
# Transform data to prediction scales: kWh/km and L/100km
# Data leakage prevention: Aggregate at trip level BEFORE using as targets
# This ensures we're predicting trip-level efficiency, not instantaneous values

dir_path = DATA_DIR / 'VED_DynamicData'
dynamic_files_raw = sorted(dir_path.glob('VehId_*/Trip_*.csv'))

if not dynamic_files_raw:
    dynamic_files_raw = sorted(dir_path.glob('*.csv'))

dynamic_files_raw = [str(f) for f in dynamic_files_raw]

print(f'Processing {len(dynamic_files_raw)} trip files for efficiency metrics...')
print('Aggregating to trip level to prevent data leakage\n')

def calculate_trip_efficiency(df):
    """
    Calculate trip-level efficiency metrics from raw time-series data.
    No data leakage: we aggregate the entire trip before calculating targets.
    
    Returns:
        dict with:
        - fuel_consumption_per_100km: L/100km (for ICE/HEV/PHEV)
        - energy_consumption_per_km: kWh/km (for HEV/PHEV/EV)
        - distance_km: total distance driven
        - duration_hours: total trip duration
    """
    
    # Calculate distance from GPS (Haversine formula approximation)
    # For now, use Vehicle Speed to estimate distance
    time_intervals = np.diff(df['Timestamp(ms)'].values) / (1000 * 3600)  # Convert ms to hours
    distances = df['Vehicle Speed[km/h]'].iloc[:-1].values * time_intervals  # km
    total_distance_km = np.sum(distances[distances > 0])  # Filter out invalid values
    
    if total_distance_km == 0:
        total_distance_km = 0.001  # Avoid division by zero
    
    # Fuel consumption: integrate fuel rate over the trip
    total_fuel_consumed = df['Fuel Rate[L/hr]'].fillna(0).sum() * (np.mean(time_intervals) if len(time_intervals) > 0 else 0)
    fuel_per_100km = (total_fuel_consumed / total_distance_km * 100) if total_distance_km > 0 else 0
    
    # Energy consumption: integrate HV Battery Current over the trip
    # HV Battery Current in Amperes, need to estimate voltage for power
    avg_voltage = df['HV Battery Voltage[V]'].fillna(0).mean()
    battery_power_kw = (df['HV Battery Current[A]'].fillna(0) * avg_voltage / 1000).sum()
    total_battery_energy_kwh = battery_power_kw * (np.mean(time_intervals) if len(time_intervals) > 0 else 0)
    energy_per_km = (total_battery_energy_kwh / total_distance_km) if total_distance_km > 0 else 0
    
    trip_duration_hours = (df['Timestamp(ms)'].iloc[-1] - df['Timestamp(ms)'].iloc[0]) / (1000 * 3600)
    
    return {
        'fuel_consumption_L_per_100km': max(0, fuel_per_100km),
        'energy_consumption_kWh_per_km': max(0, energy_per_km),
        'distance_km': max(0.001, total_distance_km),
        'duration_hours': max(0.001, trip_duration_hours)
    }

trip_level_data = []

for file_path in dynamic_files_raw:
    file_name = Path(file_path).stem
    
    df = pd.read_csv(file_path)
    
    # Skip trips with insufficient data
    if len(df) < 2:
        continue
    
    # Get trip-level efficiency metrics (no data leakage)
    efficiency_metrics = calculate_trip_efficiency(df)
    
    # Calculate average values for X features (for this trip)
    row_data = {
        'filename': file_name,
        'VehId': df['VehId'].iloc[0],
        'DayNum': df['DayNum'].iloc[0],
        'Trip': df['Trip'].iloc[0],
        
        # TARGET VARIABLES (trip-level aggregates - no data leakage)
        'Fuel_Rate_L_per_100km': efficiency_metrics['fuel_consumption_L_per_100km'],
        'Energy_Consumption_kWh_per_km': efficiency_metrics['energy_consumption_kWh_per_km'],
        
        # FEATURES: Average values during the trip
        'Avg_Vehicle_Speed_km_h': df['Vehicle Speed[km/h]'].mean(),
        'Avg_Engine_RPM': df['Engine RPM[RPM]'].mean(),
        'Avg_Absolute_Load': df['Absolute Load[%]'].mean(),
        'Avg_MAF_g_sec': df['MAF[g/sec]'].mean(),
        'Avg_OAT_DegC': df['OAT[DegC]'].mean(),
        'Avg_AC_Power_kW': df['Air Conditioning Power[kW]'].mean(),
        'Avg_Heater_Power_W': df['Heater Power[Watts]'].mean(),
        'Avg_HV_Battery_SOC': df['HV Battery SOC[%]'].mean(),
        'Avg_HV_Battery_Voltage': df['HV Battery Voltage[V]'].mean(),
        'Avg_STFT_Bank1': df['Short Term Fuel Trim Bank 1[%]'].mean(),
        'Avg_STFT_Bank2': df['Short Term Fuel Trim Bank 2[%]'].mean(),
        'Avg_LTFT_Bank1': df['Long Term Fuel Trim Bank 1[%]'].mean(),
        'Avg_LTFT_Bank2': df['Long Term Fuel Trim Bank 2[%]'].mean(),
        
        # Trip characteristics
        'Trip_Distance_km': efficiency_metrics['distance_km'],
        'Trip_Duration_hours': efficiency_metrics['duration_hours'],
    }
    
    trip_level_data.append(row_data)

df_trip_efficiency = pd.DataFrame(trip_level_data)

print(f'Created {len(df_trip_efficiency)} trip-level samples')
print(f'\nTarget Variables:')
print(f'  • Fuel_Rate_L_per_100km: Range [{df_trip_efficiency["Fuel_Rate_L_per_100km"].min():.2f}, {df_trip_efficiency["Fuel_Rate_L_per_100km"].max():.2f}]')
print(f'  • Energy_Consumption_kWh_per_km: Range [{df_trip_efficiency["Energy_Consumption_kWh_per_km"].min():.4f}, {df_trip_efficiency["Energy_Consumption_kWh_per_km"].max():.4f}]')
print(f'\nFeature Variables (14 predictors + trip characteristics)')
print(f'Shape: {df_trip_efficiency.shape}')
print(df_trip_efficiency.head())


In [16]:
# Get all VED_DynamicData trip files (new structure)
dir_path = DATA_DIR / 'VED_DynamicData'

# Get all trip files from VehId folders
dynamic_files = sorted(dir_path.glob('VehId_*/Trip_*.csv'))

if not dynamic_files:
    # Fallback for old structure if new one doesn't exist
    dynamic_files = sorted(dir_path.glob('*.csv'))

dynamic_files = [str(f) for f in dynamic_files]

print(f'Processing {len(dynamic_files)} trip files...')

def calculate_metrics(series):

    # Replace NaN with 0
    series = series.fillna(0)
    
    metrics = {
        'mean': series.mean(),
        'median': series.median(),
        'std': series.std(),
        'min': series.min(),
        'max': series.max(),
        'q25': series.quantile(0.25),
        'q75': series.quantile(0.75),
        'sum': series.sum(),
        'count': len(series),
    }
    
    try:
        mode_result = stats.mode(series, keepdims=True)
        metrics['mode'] = mode_result.mode[0]
        metrics['mode_count'] = mode_result.count[0]
    except:
        metrics['mode'] = np.nan
        metrics['mode_count'] = 0
    
    metrics['range'] = metrics['max'] - metrics['min']
    metrics['iqr'] = metrics['q75'] - metrics['q25']
    
    if metrics['mean'] != 0:
        metrics['cv'] = metrics['std'] / abs(metrics['mean'])
    else:
        metrics['cv'] = 0
    
    return metrics

# Columns to skip (identifiers - keep as-is, not calculate metrics)
IDENTIFIER_COLUMNS = {'VehId', 'DayNum', 'Trip', 'Timestamp', 'Timestamp(ms)', 'Latitude[deg]', 'Longitude[deg]'}

all_results = []

for file_path in dynamic_files:
    file_name = Path(file_path).stem

    df = pd.read_csv(file_path)
    
    row_data = {'filename': file_name}
    
    # Preserve VehId and other identifiers as-is (just take first value since same for all rows)
    for id_col in IDENTIFIER_COLUMNS:
        if id_col in df.columns:
            row_data[id_col] = df[id_col].iloc[0]
    
    # Calculate metrics only for actual feature columns
    for col in df.columns:
        if col not in IDENTIFIER_COLUMNS:
            metrics = calculate_metrics(df[col])
            
            for metric_name, metric_value in metrics.items():
                key = f'{col}_{metric_name}'
                row_data[key] = metric_value
        
    all_results.append(row_data)
        
df_metrics = pd.DataFrame(all_results)

print(f'Columns: {len(df_metrics.columns)}')
print(f'VehId preserved (unaltered): {"VehId" in df_metrics.columns}')
print(df_metrics.head())

Processing 32552 trip files...


KeyboardInterrupt: 

In [23]:
# Save results to CSV
output_file = DATA_DIR / 'VED_DynamicData_Metrics.csv'
df_metrics.to_csv(output_file, index=False)
print(f'Shape: {df_metrics.shape}')
print(f'Columns: {df_metrics.shape[1]}')

# Display summary info
print(f'Number of files processed: {len(df_metrics)}')
print(f'Number of features in original data: {len(dynamic_files) > 0 and len(pd.read_csv(dynamic_files[0]).columns) or "N/A"}')
print(f'Total columns in output CSV: {df_metrics.shape[1]}')
display(df_metrics.head())


Shape: (54, 231)
Columns: 231
Number of files processed: 54
Number of features in original data: 22
Total columns in output CSV: 231
Number of features in original data: 22
Total columns in output CSV: 231


Unnamed: 0,filename,VehId,Longitude[deg],Trip,Timestamp(ms),Latitude[deg],DayNum,Vehicle Speed[km/h]_mean,Vehicle Speed[km/h]_median,Vehicle Speed[km/h]_std,...,Long Term Fuel Trim Bank 2[%]_max,Long Term Fuel Trim Bank 2[%]_q25,Long Term Fuel Trim Bank 2[%]_q75,Long Term Fuel Trim Bank 2[%]_sum,Long Term Fuel Trim Bank 2[%]_count,Long Term Fuel Trim Bank 2[%]_mode,Long Term Fuel Trim Bank 2[%]_mode_count,Long Term Fuel Trim Bank 2[%]_range,Long Term Fuel Trim Bank 2[%]_iqr,Long Term Fuel Trim Bank 2[%]_cv
0,VED_171101_week,8,-83.69875,706,0,42.277558,1.586651,37.922803,40.0,26.670385,...,10.15625,0.0,0.0,39605.46875,489414,0.0,467322,23.4375,0.0,11.751065
1,VED_171108_week,8,-83.707059,730,0,42.26657,8.560162,39.179658,42.0,27.237791,...,25.0,0.0,0.0,181728.90625,535198,0.0,437407,50.78125,0.0,7.342588
2,VED_171115_week,10,-83.719066,1625,0,42.287548,15.85402,39.609396,42.0,27.312963,...,25.0,0.0,0.0,244036.71875,602092,0.0,454136,46.09375,0.0,6.724574
3,VED_171122_week,5,-83.680033,1366,0,42.230607,22.501284,39.195471,42.0,27.084772,...,25.0,0.0,0.0,197303.90625,474329,0.0,360271,55.46875,0.0,6.625645
4,VED_171129_week,8,-83.706693,805,0,42.266449,29.572386,40.897643,43.03125,27.694109,...,25.0,0.0,0.0,170446.09375,531856,0.0,407433,45.3125,0.0,8.636688


In [24]:
all_cols = df_metrics.columns.tolist()
all_cols.remove('filename')

# Remove identifier columns that don't have metrics
identifier_cols = {'VehId', 'DayNum', 'Trip', 'Timestamp', 'Timestamp(ms)', 'Latitude[deg]', 'Longitude[deg]'}
all_cols = [col for col in all_cols if col not in identifier_cols]

# Find a sample feature to get metrics
if all_cols:
    sample_feature_cols = [col for col in all_cols if col.startswith(all_cols[0].split('_')[0])]
    print(f'\nFeature: "{sample_feature_cols[0].rsplit("_", 1)[0]}"')
    print(f'Available metrics:')
    for col in sample_feature_cols:
        metric = col.rsplit('_', 1)[1]
        print(f'  - {metric}')

    print(f'\nTotal unique metrics per feature: {len(sample_feature_cols)}')
    print(f'Number of original features: 22')
    print(f'Total output columns: 1 (filename) + 22 features × {len(sample_feature_cols)} metrics = {1 + 22 * len(sample_feature_cols)} columns')
else:
    print("No feature columns found!")



Feature: "Vehicle Speed[km/h]"
Available metrics:
  - mean
  - median
  - std
  - min
  - max
  - q25
  - q75
  - sum
  - count
  - mode
  - count
  - range
  - iqr
  - cv

Total unique metrics per feature: 14
Number of original features: 22
Total output columns: 1 (filename) + 22 features × 14 metrics = 309 columns


In [None]:
# Fourier Analysis for Dynamic Data Files (Trip-based structure)
EXCLUDE_COLUMNS = {
    'DayNum', 'Trip', 'Timestamp(ms)', 
    'Latitude[deg]', 'Longitude[deg]'
    # Note: VehId is intentionally excluded to preserve it for identification
}

# Get all trip files from VehId folders
dir_path = DATA_DIR / 'VED_DynamicData'
dynamic_files = sorted(dir_path.glob('VehId_*/Trip_*.csv'))

if not dynamic_files:
    # Fallback for old structure if new one doesn't exist
    dynamic_files = sorted(dir_path.glob('*.csv'))

dynamic_files = [str(f) for f in dynamic_files]

print(f'Processing {len(dynamic_files)} trip files with Fourier analysis...')

def apply_fourier_analysis(series, n_components=10):
    series = series.fillna(0).values
    
    fft_result = np.fft.fft(series)
    magnitude = np.abs(fft_result)
    frequencies = np.fft.fftfreq(len(series))
    
    n_samples = len(series)
    positive_freq_end = n_samples // 2
    
    mag_positive = magnitude[1:positive_freq_end]
    freq_positive = frequencies[1:positive_freq_end]
    
    sorted_indices = np.argsort(mag_positive)[::-1]
    
    n_comp = min(n_components, len(mag_positive))
    top_indices = sorted_indices[:n_comp]
    
    result = {}
    for i in range(n_comp):
        idx = top_indices[i]
        result[f'fft_magnitude_{i}'] = mag_positive[idx]
        result[f'fft_frequency_{i}'] = freq_positive[idx]
    
    result['fft_energy'] = np.sum(magnitude ** 2)
    
    if np.sum(mag_positive) > 0:
        result['fft_spectral_centroid'] = np.sum(freq_positive * mag_positive) / np.sum(mag_positive)
    else:
        result['fft_spectral_centroid'] = 0
    
    return result

fourier_results = []
veh_ids_fourier = []  # Store VehIds from the raw data
n_fft_components = 10

for file_path in dynamic_files:
    file_name = Path(file_path).stem

    df = pd.read_csv(file_path)
    row_data = {'filename': file_name}
    
    # Extract VehId if available in the raw data
    if 'VehId' in df.columns:
        # Get the first VehId value (same for all rows in file)
        veh_id = df['VehId'].iloc[0]
        row_data['VehId'] = veh_id
        veh_ids_fourier.append(veh_id)
    
    for col in df.columns:
        if col not in EXCLUDE_COLUMNS and col != 'VehId':
            fft_metrics = apply_fourier_analysis(df[col], n_components=n_fft_components)
            
            for metric_name, metric_value in fft_metrics.items():
                key = f'{col}_{metric_name}'
                row_data[key] = metric_value
    
    fourier_results.append(row_data)
        
df_fourier = pd.DataFrame(fourier_results)

print(f'\nshape: {df_fourier.shape}')
print(f'Columns: {len(df_fourier.columns)}')
print(f'Features with Fourier analysis: {len([c for c in df.columns if c not in EXCLUDE_COLUMNS and c != "VehId"])}')
print(f'VehId preserved: {"VehId" in df_fourier.columns}')
print(df_fourier.head())

Processing with Fourier: VED_171101_week
Processing with Fourier: VED_171108_week
Processing with Fourier: VED_171108_week
Processing with Fourier: VED_171115_week
Processing with Fourier: VED_171115_week
Processing with Fourier: VED_171122_week
Processing with Fourier: VED_171122_week
Processing with Fourier: VED_171129_week
Processing with Fourier: VED_171129_week
Processing with Fourier: VED_171206_week
Processing with Fourier: VED_171206_week
Processing with Fourier: VED_171213_week
Processing with Fourier: VED_171213_week
Processing with Fourier: VED_171220_week
Processing with Fourier: VED_171220_week
Processing with Fourier: VED_171227_week
Processing with Fourier: VED_171227_week
Processing with Fourier: VED_180103_week
Processing with Fourier: VED_180103_week
Processing with Fourier: VED_180110_week
Processing with Fourier: VED_180110_week
Processing with Fourier: VED_180117_week
Processing with Fourier: VED_180117_week
Processing with Fourier: VED_180124_week
Processing with 

In [26]:
# Save Fourier results to CSV
output_file_fourier = DATA_DIR / 'VED_DynamicData_Fourier.csv'
df_fourier.to_csv(output_file_fourier, index=False)
print(f'Shape: {df_fourier.shape}')
print(f'Rows (samples): {df_fourier.shape[0]}')
print(f'Columns: {df_fourier.shape[1]}')

print(f'Number of files processed: {len(df_fourier)}')
print(f'Number of features analyzed: {(df_fourier.shape[1] - 2) // 22 if "VehId" in df_fourier.columns else (df_fourier.shape[1] - 1) // 22}')
print(f'Top frequency components per feature: 10')
print(f'Additional metrics per feature: energy, spectral_centroid')
print(f'VehId preserved for vehicle identification: {"VehId" in df_fourier.columns}')

print(f'\nFirst few rows:')
display(df_fourier.head())


Shape: (54, 354)
Rows (samples): 54
Columns: 354
Number of files processed: 54
Number of features analyzed: 16
Top frequency components per feature: 10
Additional metrics per feature: energy, spectral_centroid
VehId preserved for vehicle identification: True

First few rows:


Unnamed: 0,filename,VehId,Vehicle Speed[km/h]_fft_magnitude_0,Vehicle Speed[km/h]_fft_frequency_0,Vehicle Speed[km/h]_fft_magnitude_1,Vehicle Speed[km/h]_fft_frequency_1,Vehicle Speed[km/h]_fft_magnitude_2,Vehicle Speed[km/h]_fft_frequency_2,Vehicle Speed[km/h]_fft_magnitude_3,Vehicle Speed[km/h]_fft_frequency_3,...,Long Term Fuel Trim Bank 2[%]_fft_magnitude_6,Long Term Fuel Trim Bank 2[%]_fft_frequency_6,Long Term Fuel Trim Bank 2[%]_fft_magnitude_7,Long Term Fuel Trim Bank 2[%]_fft_frequency_7,Long Term Fuel Trim Bank 2[%]_fft_magnitude_8,Long Term Fuel Trim Bank 2[%]_fft_frequency_8,Long Term Fuel Trim Bank 2[%]_fft_magnitude_9,Long Term Fuel Trim Bank 2[%]_fft_frequency_9,Long Term Fuel Trim Bank 2[%]_fft_energy,Long Term Fuel Trim Bank 2[%]_fft_spectral_centroid
0,VED_171101_week,8,892401.478753,7.2e-05,664643.291298,3.3e-05,623817.22202,2e-06,615634.096753,0.000135,...,37015.93806,3.5e-05,35018.211231,0.000153,32911.542443,2.5e-05,32519.28673,0.000106,218171300000.0,0.086314
1,VED_171108_week,8,765811.893671,0.000245,735155.596969,2e-06,714534.448518,5.2e-05,684102.285574,7e-06,...,95027.490148,2.4e-05,93433.839763,7.1e-05,92193.439538,0.000108,88273.993931,3.7e-05,1813540000000.0,0.085907
2,VED_171115_week,10,871365.32025,5e-05,850495.485345,3e-05,813922.686091,1.2e-05,762820.148156,0.000721,...,125365.875172,0.000126,119782.795748,8.1e-05,118594.136382,0.000138,116575.361426,8e-05,2752571000000.0,0.087781
3,VED_171122_week,5,652460.291133,0.000118,639900.903583,0.000156,633348.580358,2e-06,619041.564168,1.1e-05,...,87461.235095,5.3e-05,86780.612535,0.000173,86270.026104,8e-06,85989.410944,0.000211,1747868000000.0,0.085821
4,VED_171129_week,8,730041.191683,0.000199,716313.722582,9.6e-05,677605.414804,3.4e-05,663804.703289,3.8e-05,...,120711.283273,7e-05,118442.619175,3e-05,116406.974024,4.7e-05,115480.477705,7.9e-05,2196096000000.0,0.085544
