
# 04_Airframe_Synthetic_Data_Generation

This notebook generates **synthetic airframe component data** for the AeroDemo project.
It simulates realistic structural integrity and stress point readings for each aircraft in the fleet.
The generated data is saved as CSV files in the specified volume path, which are later ingested by the DLT pipeline.

### Main Components:
- Simulate stress points, fatigue crack growth, and temperature fluctuations.
- Generate realistic `event_timestamp` across recent days.
- Write data in an Auto Loader–compatible format to `/Volumes/arao/aerodemo/tmp/airframe`.


In [0]:

import os
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import random

def generate_airframe_data(num_records_per_aircraft=100):
    aircraft_ids = ["A320_101", "A330_201", "B737_301"]
    data = {
        'airframe_id': [],
        'aircraft_id': [],
        'event_timestamp': [],
        'stress_points': [],
        'fatigue_crack_growth': [],
        'temperature_fluctuations': [],
        'structural_integrity': []
    }

    for aircraft_id in aircraft_ids:
        for i in range(num_records_per_aircraft):
            random_days_ago = random.randint(0, 6)
            random_time = datetime.now() - timedelta(days=random_days_ago, hours=random.randint(0, 23), minutes=random.randint(0, 59))
            data['airframe_id'].append(f'AF_{aircraft_id}_{i:03d}')  # zero-padded for uniqueness
            data['aircraft_id'].append(aircraft_id)
            data['event_timestamp'].append(random_time.strftime("%Y-%m-%d %H:%M:%S"))
            data['stress_points'].append(round(np.random.uniform(150, 300), 2))
            data['fatigue_crack_growth'].append(round(np.random.uniform(1.0, 10.0), 2))
            data['temperature_fluctuations'].append(round(np.random.uniform(-50, 70), 2))
            data['structural_integrity'].append(round(np.random.uniform(50, 100), 2))

    df = pd.DataFrame(data)
    return df

# Generate DataFrame
df = generate_airframe_data()

# Save to Auto Loader-compatible path
output_path = "/Volumes/arao/aerodemo/tmp/airframe"
os.makedirs(output_path, exist_ok=True)
output_file = f"{output_path}/airframe_sample.csv"
if os.path.exists(output_file):
    os.remove(output_file)  # safely remove old file

df.to_csv(output_file, index=False)

print(f"✅ Airframe data generated: {len(df)} rows saved to {output_file}")
