In [12]:
#@title Create training data
import numpy as np
import pandas as pd
from scipy.stats import weibull_min

# 1. Define subsystems and their Weibull parameters
subsystems = {
    'hydraulic':       {'shape': 1.5, 'scale': 100},
    'electrical':      {'shape': 1.7, 'scale': 120},
    'control_surface': {'shape': 1.2, 'scale': 80},
    'cabin':           {'shape': 1.3, 'scale': 90},
    'altimeter':       {'shape': 1.4, 'scale': 110},
}

# 2. Sensor parameterization (drift, noise, spike)
sensors_info = {
    'hydraulic_pressure':         {'mean': 3000, 'std': 50, 'drift_coeff': 200, 'drift_exp': 2, 'noise_std': 10, 'spike_mag': 150, 'spike_offset': 5},
    'hydraulic_flow':             {'mean': 120,  'std': 5,  'drift_coeff': -10, 'drift_exp': 1.5, 'noise_std': 1,  'spike_mag': 5,   'spike_offset': 4},
    'hydraulic_temp':             {'mean': 80,   'std': 2,  'drift_coeff': 20,  'drift_exp': 2,   'noise_std': 0.5,'spike_mag': 3,   'spike_offset': 6},
    'electrical_voltage':         {'mean': 28,   'std': 0.5,'drift_coeff': -1,   'drift_exp': 1,   'noise_std': 0.1,'spike_mag': 2,   'spike_offset': 3},
    'electrical_current':         {'mean': 5,    'std': 0.2,'drift_coeff': 0.5, 'drift_exp': 2,   'noise_std': 0.05,'spike_mag': 1,  'spike_offset': 5},
    'control_surface_deflection': {'mean': 0,    'std': 1,  'drift_coeff': 5,   'drift_exp': 3,   'noise_std': 0.2,'spike_mag': 10,  'spike_offset': 2},
    'cabin_pressure':             {'mean': 101.3,'std': 0.2,'drift_coeff': -0.5, 'drift_exp': 1,   'noise_std': 0.05,'spike_mag': 1,   'spike_offset': 4},
    'altimeter_drift':            {'mean': 0,    'std': 0.1,'drift_coeff': 0.3, 'drift_exp': 2,   'noise_std': 0.02,'spike_mag': 0.5, 'spike_offset': 3},
}

# 3. Sample failure cycles per subsystem per unit
num_units = 10
failure_cycles = {
    sub: weibull_min.rvs(params['shape'], scale=params['scale'], size=num_units)
    for sub, params in subsystems.items()
}

# 4. Build synthetic dataset with per-subsystem RUL and failure flags
synthetic_data = []
for unit in range(1, num_units + 1):
    # Determine overall end-of-life for this unit (max of subsystem failures + buffer)
    T_fails = {sub: failure_cycles[sub][unit - 1] for sub in subsystems}
    max_cycle = int(np.ceil(max(T_fails.values())) + 10)
    cycles = np.arange(1, max_cycle + 1)

    df_unit = pd.DataFrame({'unit': unit, 'cycle': cycles})

    # Compute per-subsystem RUL and failure flags
    for sub, T_fail in T_fails.items():
        T_int = int(np.ceil(T_fail))
        df_unit[f'RUL_{sub}'] = np.maximum(0, T_int - cycles)
        df_unit[f'failure_{sub}'] = cycles == T_int

    # Generate each sensor's degraded signal aligned to its subsystem failure
    for sensor, params in sensors_info.items():
        # Map sensor to its subsystem
        if sensor.startswith('hydraulic'):
            sub = 'hydraulic'
        elif sensor.startswith('electrical'):
            sub = 'electrical'
        elif sensor.startswith('control_surface'):
            sub = 'control_surface'
        elif sensor.startswith('cabin'):
            sub = 'cabin'
        else:
            sub = 'altimeter'

        T_fail = T_fails[sub]
        sub_max = int(np.ceil(T_fail)) + 10

        # Baseline + drift + noise
        vals = np.random.normal(params['mean'], params['std'], size=len(cycles))
        vals += params['drift_coeff'] * (cycles / sub_max) ** params['drift_exp']
        vals += np.random.normal(0, params['noise_std'], size=len(cycles))

        # Inject a spike before that subsystem's failure
        spike_cycle = int(np.ceil(T_fail)) - params['spike_offset']
        if spike_cycle > 0:
            vals[cycles == spike_cycle] += params['spike_mag']

        df_unit[sensor] = vals

    synthetic_data.append(df_unit)

df_syn = pd.concat(synthetic_data, ignore_index=True)

# Save and display
df_syn.to_csv('synthetic_per_subsystem_train.csv', index=False)



In [11]:
#@title Create testing data

import numpy as np
import pandas as pd
from scipy.stats import weibull_min

# 1. Define subsystems and their Weibull parameters
subsystems = {
    'hydraulic':       {'shape': 1.5, 'scale': 100},
    'electrical':      {'shape': 1.7, 'scale': 120},
    'control_surface': {'shape': 1.2, 'scale': 80},
    'cabin':           {'shape': 1.3, 'scale': 90},
    'altimeter':       {'shape': 1.4, 'scale': 110},
}

# 2. Sensor parameterization (drift, noise, spike)
sensors_info = {
    'hydraulic_pressure':         {'mean': 3000, 'std': 50, 'drift_coeff': 200, 'drift_exp': 2, 'noise_std': 10, 'spike_mag': 150, 'spike_offset': 5},
    'hydraulic_flow':             {'mean': 120,  'std': 5,  'drift_coeff': -10, 'drift_exp': 1.5, 'noise_std': 1,  'spike_mag': 5,   'spike_offset': 4},
    'hydraulic_temp':             {'mean': 80,   'std': 2,  'drift_coeff': 20,  'drift_exp': 2,   'noise_std': 0.5,'spike_mag': 3,   'spike_offset': 6},
    'electrical_voltage':         {'mean': 28,   'std': 0.5,'drift_coeff': -1,   'drift_exp': 1,   'noise_std': 0.1,'spike_mag': 2,   'spike_offset': 3},
    'electrical_current':         {'mean': 5,    'std': 0.2,'drift_coeff': 0.5, 'drift_exp': 2,   'noise_std': 0.05,'spike_mag': 1,  'spike_offset': 5},
    'control_surface_deflection': {'mean': 0,    'std': 1,  'drift_coeff': 5,   'drift_exp': 3,   'noise_std': 0.2,'spike_mag': 10,  'spike_offset': 2},
    'cabin_pressure':             {'mean': 101.3,'std': 0.2,'drift_coeff': -0.5, 'drift_exp': 1,   'noise_std': 0.05,'spike_mag': 1,   'spike_offset': 4},
    'altimeter_drift':            {'mean': 0,    'std': 0.1,'drift_coeff': 0.3, 'drift_exp': 2,   'noise_std': 0.02,'spike_mag': 0.5, 'spike_offset': 3},
}

# 3. Sample failure cycles per subsystem per unit
num_units = 10
failure_cycles = {
    sub: weibull_min.rvs(params['shape'], scale=params['scale'], size=num_units)
    for sub, params in subsystems.items()
}

# 4. Build synthetic dataset with per-subsystem RUL and failure flags
synthetic_data = []
for unit in range(1, num_units + 1):
    # Determine overall end-of-life for this unit (max of subsystem failures + buffer)
    T_fails = {sub: failure_cycles[sub][unit - 1] for sub in subsystems}
    max_cycle = int(np.ceil(max(T_fails.values())) + 10)
    cycles = np.arange(1, max_cycle + 1)

    df_unit = pd.DataFrame({'unit': unit, 'cycle': cycles})

    # Compute per-subsystem RUL and failure flags
    for sub, T_fail in T_fails.items():
        T_int = int(np.ceil(T_fail))
        df_unit[f'RUL_{sub}'] = np.maximum(0, T_int - cycles)
        df_unit[f'failure_{sub}'] = cycles == T_int

    # Generate each sensor's degraded signal aligned to its subsystem failure
    for sensor, params in sensors_info.items():
        # Map sensor to its subsystem
        if sensor.startswith('hydraulic'):
            sub = 'hydraulic'
        elif sensor.startswith('electrical'):
            sub = 'electrical'
        elif sensor.startswith('control_surface'):
            sub = 'control_surface'
        elif sensor.startswith('cabin'):
            sub = 'cabin'
        else:
            sub = 'altimeter'

        T_fail = T_fails[sub]
        sub_max = int(np.ceil(T_fail)) + 10

        # Baseline + drift + noise
        vals = np.random.normal(params['mean'], params['std'], size=len(cycles))
        vals += params['drift_coeff'] * (cycles / sub_max) ** params['drift_exp']
        vals += np.random.normal(0, params['noise_std'], size=len(cycles))

        # Inject a spike before that subsystem's failure
        spike_cycle = int(np.ceil(T_fail)) - params['spike_offset']
        if spike_cycle > 0:
            vals[cycles == spike_cycle] += params['spike_mag']

        df_unit[sensor] = vals

    synthetic_data.append(df_unit)

df_syn = pd.concat(synthetic_data, ignore_index=True)

# Save and display
df_syn.to_csv('synthetic_per_subsystem_test.csv', index=False)
