In [None]:
import numpy as np
import pandas as pd
import random

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Define dataset size
num_units = 200
max_cycles = 300

# Initialize empty list to store data
records = []

# Simulate data for each aircraft unit
for unit in range(1, num_units + 1):
    failure_cycle = random.randint(150, max_cycles)
    for cycle in range(1, failure_cycle + 1):
        # Operating conditions
        op_setting_1 = np.random.uniform(0.5, 1.5)
        op_setting_2 = np.random.uniform(500, 600)
        
        # Engine sensors (simulate similar to CMAPSS style)
        engine_sensor_1 = np.random.uniform(600, 620) - (cycle * 0.02)
        engine_sensor_2 = np.random.uniform(750, 770) - (cycle * 0.03)
        engine_sensor_3 = np.random.uniform(360, 370) - (cycle * 0.01)
        
        # Hydraulics degradation
        hydraulic_pressure = 3000 - (cycle * np.random.uniform(0.5, 1.5))
        hydraulic_flow = 10 - (cycle * np.random.uniform(0.01, 0.05))
        hydraulic_temp = 70 + (cycle * np.random.uniform(0.05, 0.2))
        
        # Electrical system degradation
        electrical_voltage = 28 - (cycle * np.random.uniform(0.005, 0.01))
        electrical_current = 5 + np.random.uniform(-0.2, 0.2)
        
        # Control surfaces
        control_surface_deflection = np.random.uniform(-5, 5) + (cycle * np.random.uniform(-0.01, 0.01))
        
        # Cabin pressure
        cabin_pressure = 8000 + (cycle * np.random.uniform(-0.5, 0.5))
        
        # Altimeter sensor drift
        altimeter_drift = np.random.uniform(-10, 10) + (cycle * np.random.uniform(-0.05, 0.05))
        
        # Remaining Useful Life
        RUL = failure_cycle - cycle
        
        # Subsystem failure label
        subsystem_failure = 1 if cycle > (failure_cycle - 20) else 0
        
        records.append([
            unit, cycle, op_setting_1, op_setting_2,
            engine_sensor_1, engine_sensor_2, engine_sensor_3,
            hydraulic_pressure, hydraulic_flow, hydraulic_temp,
            electrical_voltage, electrical_current,
            control_surface_deflection, cabin_pressure, altimeter_drift,
            RUL, subsystem_failure
        ])

# Create dataframe
columns = [
    'unit_number', 'cycle', 'op_setting_1', 'op_setting_2',
    'engine_sensor_1', 'engine_sensor_2', 'engine_sensor_3',
    'hydraulic_pressure', 'hydraulic_flow', 'hydraulic_temp',
    'electrical_voltage', 'electrical_current',
    'control_surface_deflection', 'cabin_pressure', 'altimeter_drift',
    'RUL', 'subsystem_failure'
]

df = pd.DataFrame(records, columns=columns)

# Save to CSV for further modeling
df.to_csv("synthetic_aircraft_maintenance.csv", index=False)

print("Synthetic dataset generated successfully!")

# Sample head
print(df.head())
