# NASA Apollo CFD - Autoencoder Approach for Heat Flux Prediction

This notebook implements an autoencoder-based approach to predict heat flux (qw) from CFD data.

**Approach:**
- **Variational Autoencoder (VAE)** to learn compressed latent representations
- **Encoder**: Compresses input features to latent space
- **Decoder**: Reconstructs features from latent space
- **Predictor**: Uses latent representation to predict heat flux
- **Benefits**: Denoising, regularization, feature learning


In [None]:
import pandas as pd
import numpy as np
from google.colab import drive
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

print("=== NASA APOLLO CFD - AUTOENCODER MODEL ===\n")

# Mount Google Drive
drive.mount('/content/drive')
file_path = "/content/drive/MyDrive/NASA/apollo_cfd_database.csv"

df = pd.read_csv(file_path)
print(f"Dataset shape: {df.shape}")


## 1. Data Cleaning


In [None]:
print(f"\n=== DATA CLEANING ===")
print(f"Original: {len(df):,} points")

df_clean = df.copy()
df_clean = df_clean[df_clean['theta (m)'] >= 0]
df_clean = df_clean[df_clean['Re-theta'] >= 1e-5]
df_clean = df_clean[(df_clean['qw (W/m^2)'] >= 1e3) & (df_clean['qw (W/m^2)'] <= 1e7)]

print(f"Removed: {len(df) - len(df_clean):,} points")
print(f"Clean: {len(df_clean):,} points")


## 2. Feature Engineering


In [None]:
print(f"\n=== FEATURE ENGINEERING ===")

# NASA baseline
df_clean['log_density'] = np.log10(df_clean['density (kg/m^3)'])
df_clean['log_velocity'] = np.log10(df_clean['velocity (m/s)'])

# Reynolds numbers
MU_REF = 1.8e-5
df_clean['char_length'] = np.sqrt(df_clean['X']**2 + df_clean['Y']**2 + df_clean['Z']**2)
df_clean['reynolds'] = (df_clean['density (kg/m^3)'] * df_clean['velocity (m/s)'] * 
                         df_clean['char_length']) / MU_REF
df_clean['log_reynolds'] = np.log10(df_clean['reynolds'] + 1e-10)
df_clean['sqrt_reynolds'] = np.sqrt(df_clean['reynolds'])

df_clean['re_theta'] = (df_clean['density (kg/m^3)'] * df_clean['velocity (m/s)'] * 
                         df_clean['theta (m)']) / MU_REF
df_clean['log_re_theta'] = np.log10(df_clean['re_theta'] + 1e-10)

# Mach
df_clean['mach_sq'] = df_clean['mach (-)'] ** 2
df_clean['log_mach'] = np.log10(df_clean['mach (-)'] + 1e-10)
df_clean['log_me'] = np.log10(df_clean['Me'] + 1e-10)

# Boundary layer
df_clean['log_delta'] = np.log10(df_clean['delta (m)'] + 1e-10)
df_clean['log_theta'] = np.log10(df_clean['theta (m)'] + 1e-10)

# Stagnation
df_clean['is_stagnation'] = (df_clean['char_length'] < 0.05).astype(np.float32)
df_clean['radial_dist'] = np.sqrt(df_clean['X']**2 + df_clean['Y']**2)

# Sutton-Graves
df_clean['sutton_graves'] = (np.sqrt(df_clean['density (kg/m^3)']) * 
                              (df_clean['velocity (m/s)'] ** 3)) / (df_clean['char_length'] + 1e-10)
df_clean['log_sutton_graves'] = np.log10(df_clean['sutton_graves'] + 1e-10)

# Angle
df_clean['cos_aoa'] = np.cos(np.radians(df_clean['aoa (degrees)']))
df_clean['sin_aoa'] = np.sin(np.radians(df_clean['aoa (degrees)']))
df_clean['log_dyn_pressure'] = np.log10(df_clean['dynamic_pressure (Pa)'])

input_features = [
    'log_density', 'log_velocity', 'aoa (degrees)',
    'log_reynolds', 'sqrt_reynolds', 'log_re_theta',
    'mach_sq', 'log_mach', 'log_me',
    'sutton_graves', 'log_sutton_graves',
    'log_dyn_pressure', 'log_delta', 'log_theta',
    'is_stagnation', 'radial_dist',
    'cos_aoa', 'sin_aoa'
]

target_variable = 'qw (W/m^2)'
print(f"Features: {len(input_features)}")


In [None]:
print(f"\n=== TRAJECTORY STATE SPLIT ===")

VEL = 'velocity (m/s)'
RHO = 'density (kg/m^3)'
AOA = 'aoa (degrees)'

states = df_clean[[VEL, RHO, AOA]].drop_duplicates().reset_index(drop=True)
print(f"Unique states: {len(states)}")

# Velocity bins for stratification
velocity_bins = pd.qcut(df_clean[VEL], q=5, labels=['v1','v2','v3','v4','v5'], duplicates='drop')
df_clean['vel_bin'] = velocity_bins

test_keys = set()
neighbor_lock = set()

for aoa, grp in states.groupby(AOA, sort=False):
    g = grp.sort_values([VEL, RHO], ascending=[False, True]).reset_index(drop=True)
    n = len(g)
    if n <= 2:
        continue
    
    mid_idx = np.arange(1, n-1)
    k = max(1, int(round(0.10 * len(mid_idx))))
    pick_pos = np.linspace(0, len(mid_idx)-1, k, dtype=int)
    
    for i in mid_idx[pick_pos]:
        key = tuple(g.loc[i, [VEL, RHO, AOA]])
        left = tuple(g.loc[i-1, [VEL, RHO, AOA]])
        right = tuple(g.loc[i+1, [VEL, RHO, AOA]])
        test_keys.add(key)
        neighbor_lock.add(left)
        neighbor_lock.add(right)

remaining = states[~states.apply(lambda r: tuple(r) in test_keys, axis=1)]
remain_locked = remaining[remaining.apply(lambda r: tuple(r) in neighbor_lock, axis=1)]
remain_free = remaining[~remaining.apply(lambda r: tuple(r) in neighbor_lock, axis=1)]

target_val = int(round(0.10 * len(states)))

if len(remain_free) <= target_val:
    val_states = remain_free
else:
    parts = []
    for aoa, grp in remain_free.groupby(AOA, sort=False):
        take = int(round(target_val * len(grp) / len(remain_free)))
        take = min(take, len(grp))
        if take > 0:
            parts.append(grp.sample(n=take, random_state=42))
    val_states = pd.concat(parts).drop_duplicates()
    if len(val_states) > target_val:
        val_states = val_states.sample(n=target_val, random_state=42)

train_states = pd.concat([remain_locked, remain_free[~remain_free.index.isin(val_states.index)]]).drop_duplicates()

train_keys = set(map(tuple, train_states[[VEL, RHO, AOA]].to_numpy()))
val_keys = set(map(tuple, val_states[[VEL, RHO, AOA]].to_numpy()))

# VECTORIZED assignment
print("Assigning splits...")
df_clean['state_key'] = list(zip(df_clean[VEL], df_clean[RHO], df_clean[AOA]))

df_clean['split'] = 'other'
df_clean.loc[df_clean['state_key'].isin(test_keys), 'split'] = 'test'
df_clean.loc[df_clean['state_key'].isin(val_keys), 'split'] = 'val'
df_clean.loc[df_clean['state_key'].isin(train_keys), 'split'] = 'train'

df_clean.drop('state_key', axis=1, inplace=True)

print(df_clean['split'].value_counts())

train_df = df_clean[df_clean['split'] == 'train']
val_df = df_clean[df_clean['split'] == 'val']
test_df = df_clean[df_clean['split'] == 'test']


In [None]:
print(f"\n=== DATA PREPARATION ===")

import gc
gc.collect()

# Using full dataset (no downsampling)
print(f"Train: {len(train_df):,}, Val: {len(val_df):,}, Test: {len(test_df):,}")

# Convert to float32 and extract
X_train = train_df[input_features].values.astype(np.float32)
y_train = train_df[target_variable].values.astype(np.float32)
X_val = val_df[input_features].values.astype(np.float32)
y_val = val_df[target_variable].values.astype(np.float32)
X_test = test_df[input_features].values.astype(np.float32)
y_test = test_df[target_variable].values.astype(np.float32)

# Scale features
print("Scaling features...")
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train).astype(np.float32)
X_val_scaled = scaler_X.transform(X_val).astype(np.float32)
X_test_scaled = scaler_X.transform(X_test).astype(np.float32)

# Scale targets
y_train_log = np.log10(y_train).astype(np.float32)
y_val_log = np.log10(y_val).astype(np.float32)
y_test_log = np.log10(y_test).astype(np.float32)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train_log.reshape(-1, 1)).flatten().astype(np.float32)
y_val_scaled = scaler_y.transform(y_val_log.reshape(-1, 1)).flatten().astype(np.float32)
y_test_scaled = scaler_y.transform(y_test_log.reshape(-1, 1)).flatten().astype(np.float32)

print(f"✅ Data prepared: {X_train_scaled.shape}")

del X_train, X_val, X_test, y_train_log, y_val_log
gc.collect()


## 5. Build Autoencoder Model

**Architecture:**
- **Encoder**: Compresses 18D input → latent space (16D)
- **Decoder**: Reconstructs 18D features from latent space
- **Predictor**: Maps latent features → heat flux prediction

The autoencoder learns meaningful compressed representations that:
1. Remove noise and redundancy
2. Capture essential physics
3. Regularize the feature space


In [None]:
print(f"\n=== BUILDING AUTOENCODER MODEL ===")

# Hyperparameters
input_dim = X_train_scaled.shape[1]
latent_dim = 16  # Compressed representation
encoder_layers = [128, 64, 32]
decoder_layers = [32, 64, 128]
predictor_layers = [64, 32, 16]

# ============================================================================
# ENCODER: Input → Latent Space
# ============================================================================
encoder_input = layers.Input(shape=(input_dim,), name='encoder_input')
x = encoder_input

for i, units in enumerate(encoder_layers):
    x = layers.Dense(units, activation='relu', 
                     kernel_regularizer=keras.regularizers.l2(0.0001),
                     name=f'encoder_{i+1}')(x)
    x = layers.BatchNormalization(name=f'encoder_bn_{i+1}')(x)
    x = layers.Dropout(0.2, name=f'encoder_dropout_{i+1}')(x)

# Latent representation
latent = layers.Dense(latent_dim, activation='relu', name='latent')(x)

encoder = keras.Model(encoder_input, latent, name='encoder')

# ============================================================================
# DECODER: Latent Space → Reconstructed Input
# ============================================================================
latent_input = layers.Input(shape=(latent_dim,), name='latent_input')
x = latent_input

for i, units in enumerate(decoder_layers):
    x = layers.Dense(units, activation='relu',
                     kernel_regularizer=keras.regularizers.l2(0.0001),
                     name=f'decoder_{i+1}')(x)
    x = layers.BatchNormalization(name=f'decoder_bn_{i+1}')(x)
    x = layers.Dropout(0.2, name=f'decoder_dropout_{i+1}')(x)

reconstructed = layers.Dense(input_dim, activation='linear', name='reconstructed')(x)

decoder = keras.Model(latent_input, reconstructed, name='decoder')

# ============================================================================
# AUTOENCODER: Input → Latent → Reconstructed
# ============================================================================
autoencoder_input = layers.Input(shape=(input_dim,), name='autoencoder_input')
encoded = encoder(autoencoder_input)
decoded = decoder(encoded)

autoencoder = keras.Model(autoencoder_input, decoded, name='autoencoder')

# ============================================================================
# PREDICTOR: Latent Space → Heat Flux
# ============================================================================
predictor_input = layers.Input(shape=(latent_dim,), name='predictor_input')
x = predictor_input

for i, units in enumerate(predictor_layers):
    x = layers.Dense(units, activation='relu',
                     kernel_regularizer=keras.regularizers.l2(0.0001),
                     name=f'predictor_{i+1}')(x)
    x = layers.BatchNormalization(name=f'predictor_bn_{i+1}')(x)
    x = layers.Dropout(0.2, name=f'predictor_dropout_{i+1}')(x)

heat_flux_pred = layers.Dense(1, activation='linear', name='heat_flux_output')(x)

predictor = keras.Model(predictor_input, heat_flux_pred, name='predictor')

# ============================================================================
# FULL MODEL: Input → Latent → Heat Flux
# ============================================================================
full_input = layers.Input(shape=(input_dim,), name='full_input')
full_encoded = encoder(full_input)
full_prediction = predictor(full_encoded)

full_model = keras.Model(full_input, full_prediction, name='encoder_predictor')

print("\n=== MODEL SUMMARIES ===")
print(f"\nEncoder: {input_dim}D → {latent_dim}D")
encoder.summary()
print(f"\nDecoder: {latent_dim}D → {input_dim}D")
decoder.summary()
print(f"\nPredictor: {latent_dim}D → 1D (heat flux)")
predictor.summary()


In [None]:
print(f"\n=== PHASE 1: AUTOENCODER PRETRAINING ===")

# Compile autoencoder
autoencoder.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

# Callbacks
ae_callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss', 
        patience=20, 
        restore_best_weights=True, 
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', 
        factor=0.5, 
        patience=10, 
        min_lr=1e-7, 
        verbose=1
    )
]

# Train autoencoder to reconstruct input features
history_ae = autoencoder.fit(
    X_train_scaled, X_train_scaled,  # Input = Output for autoencoder
    validation_data=(X_val_scaled, X_val_scaled),
    epochs=100,
    batch_size=8192,
    callbacks=ae_callbacks,
    verbose=2
)

print(f"✅ Autoencoder pretraining complete: {len(history_ae.history['loss'])} epochs")


## 7. Train Heat Flux Predictor

Now train the predictor using the learned latent representations.


In [None]:
print(f"\n=== PHASE 2: HEAT FLUX PREDICTOR TRAINING ===")

# Option 1: Freeze encoder (transfer learning)
# encoder.trainable = False

# Option 2: Fine-tune entire model (better performance)
encoder.trainable = True

# Custom weighted MSE loss
def weighted_mse_loss(y_true, y_pred):
    weights = 1.0 + tf.abs(y_true) / (tf.reduce_mean(tf.abs(y_true)) + 1e-7)
    return tf.reduce_mean(weights * tf.square(y_true - y_pred))

# Compile full model
full_model.compile(
    optimizer=keras.optimizers.AdamW(
        learning_rate=0.001,
        weight_decay=0.01,
        clipnorm=1.0
    ),
    loss=weighted_mse_loss,
    metrics=['mae']
)

print(f"Total parameters: {full_model.count_params():,}")

# Callbacks
pred_callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss', 
        patience=30, 
        restore_best_weights=True, 
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', 
        factor=0.5, 
        patience=15, 
        min_lr=1e-8, 
        verbose=1
    ),
    keras.callbacks.ModelCheckpoint(
        'best_autoencoder_model.keras', 
        monitor='val_loss', 
        save_best_only=True, 
        verbose=0
    )
]

# Train predictor
history_pred = full_model.fit(
    X_train_scaled, y_train_scaled,
    validation_data=(X_val_scaled, y_val_scaled),
    epochs=200,
    batch_size=16384,
    callbacks=pred_callbacks,
    verbose=2
)

print(f"✅ Predictor training complete: {len(history_pred.history['loss'])} epochs")


In [None]:
print(f"\n=== EVALUATION ===")

# Predictions
y_pred_scaled = full_model.predict(X_test_scaled, verbose=0)
y_pred_log = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
y_pred = 10**y_pred_log

# Metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

rel_errors = np.abs(y_test - y_pred) / y_test * 100

pct_1 = np.sum(rel_errors <= 1) / len(rel_errors) * 100
pct_3 = np.sum(rel_errors <= 3) / len(rel_errors) * 100
pct_5 = np.sum(rel_errors <= 5) / len(rel_errors) * 100
pct_10 = np.sum(rel_errors <= 10) / len(rel_errors) * 100

median_err = np.median(rel_errors)
q95_err = np.percentile(rel_errors, 95)

print(f"\n{'='*60}")
print(f"AUTOENCODER MODEL PERFORMANCE")
print(f"{'='*60}")
print(f"MAE: {mae:.0f} W/m²")
print(f"RMSE: {rmse:.0f} W/m²")
print(f"Within ±1%: {pct_1:.1f}%")
print(f"Within ±3%: {pct_3:.1f}%")
print(f"Within ±5%: {pct_5:.1f}% ⭐ (NASA Target: 95%)")
print(f"Within ±10%: {pct_10:.1f}%")
print(f"Median error: {median_err:.2f}%")
print(f"95th %ile: {q95_err:.1f}%")
print(f"{'='*60}")


## 9. Latent Space Analysis

Visualize the learned latent representations.


In [None]:
print(f"\n=== LATENT SPACE ANALYSIS ===")

# Sample data for visualization (too large otherwise)
sample_size = 10000
sample_idx = np.random.choice(len(X_test_scaled), sample_size, replace=False)
X_sample = X_test_scaled[sample_idx]
y_sample = y_test[sample_idx]

# Get latent representations
latent_repr = encoder.predict(X_sample, verbose=0)

print(f"Latent representation shape: {latent_repr.shape}")
print(f"Latent mean: {latent_repr.mean():.4f}")
print(f"Latent std: {latent_repr.std():.4f}")

# PCA for 2D visualization
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
latent_2d = pca.fit_transform(latent_repr)

print(f"PCA variance explained: {pca.explained_variance_ratio_.sum():.2%}")


## 10. Visualizations


In [None]:
print(f"\n=== VISUALIZATIONS ===")

fig = plt.figure(figsize=(24, 16))
gs = fig.add_gridspec(3, 4, hspace=0.3, wspace=0.3)
fig.suptitle('NASA Heat Flux: Autoencoder Model', fontsize=18, fontweight='bold')

# ============================================================================
# Row 1: Model Performance
# ============================================================================

# 1.1 Performance vs NASA
ax1 = fig.add_subplot(gs[0, 0])
bars = ax1.bar(['Autoencoder', 'NASA Target'], [pct_5, 95.0],
               color=['green' if pct_5>=95 else 'orange', 'blue'], alpha=0.7)
ax1.set_ylabel('% Within ±5%', fontsize=12)
ax1.set_title('Performance vs NASA Target', fontsize=12, fontweight='bold')
ax1.set_ylim(0, 100)
for bar, val in zip(bars, [pct_5, 95.0]):
    ax1.text(bar.get_x()+bar.get_width()/2, bar.get_height()+1,
            f'{val:.1f}%', ha='center', fontweight='bold', fontsize=11)

# 1.2 Predicted vs Actual
ax2 = fig.add_subplot(gs[0, 1])
scatter = ax2.scatter(y_test, y_pred, alpha=0.1, s=1, c=rel_errors, 
                      cmap='RdYlGn_r', vmin=0, vmax=10)
ax2.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
ax2.set_xlabel('Actual Heat Flux (W/m²)', fontsize=12)
ax2.set_ylabel('Predicted Heat Flux (W/m²)', fontsize=12)
ax2.set_title('Predicted vs Actual', fontsize=12, fontweight='bold')
ax2.set_xscale('log')
ax2.set_yscale('log')
plt.colorbar(scatter, ax=ax2, label='Error (%)')

# 1.3 Error Distribution
ax3 = fig.add_subplot(gs[0, 2])
ax3.hist(rel_errors, bins=100, alpha=0.7, color='purple', edgecolor='black')
ax3.axvline(5, color='red', linestyle='--', linewidth=2, label='NASA ±5%')
ax3.axvline(median_err, color='green', linestyle='-', linewidth=2, 
            label=f'Median: {median_err:.1f}%')
ax3.set_xlabel('Relative Error (%)', fontsize=12)
ax3.set_ylabel('Frequency', fontsize=12)
ax3.set_title('Error Distribution', fontsize=12, fontweight='bold')
ax3.legend(fontsize=10)
ax3.set_xlim(0, 20)

# 1.4 Error Breakdown (Pie)
ax4 = fig.add_subplot(gs[0, 3])
w1 = pct_1
w3 = pct_3 - pct_1
w5 = pct_5 - pct_3
w10 = pct_10 - pct_5
above = 100 - pct_10
ax4.pie([w1, w3, w5, w10, above], labels=['±1%', '1-3%', '3-5%', '5-10%', '>10%'],
        colors=['darkgreen', 'lightgreen', 'yellow', 'orange', 'red'],
        autopct='%1.1f%%', startangle=90, textprops={'fontsize': 10})
ax4.set_title('Error Breakdown', fontsize=12, fontweight='bold')

# ============================================================================
# Row 2: Training History
# ============================================================================

# 2.1 Autoencoder Training
ax5 = fig.add_subplot(gs[1, 0])
ax5.plot(history_ae.history['loss'], label='Train', alpha=0.8, linewidth=2)
ax5.plot(history_ae.history['val_loss'], label='Val', alpha=0.8, linewidth=2)
ax5.set_xlabel('Epoch', fontsize=12)
ax5.set_ylabel('Reconstruction Loss', fontsize=12)
ax5.set_title('Autoencoder Pretraining', fontsize=12, fontweight='bold')
ax5.legend(fontsize=10)
ax5.set_yscale('log')
ax5.grid(True, alpha=0.3)

# 2.2 Predictor Training
ax6 = fig.add_subplot(gs[1, 1])
ax6.plot(history_pred.history['loss'], label='Train', alpha=0.8, linewidth=2)
ax6.plot(history_pred.history['val_loss'], label='Val', alpha=0.8, linewidth=2)
ax6.set_xlabel('Epoch', fontsize=12)
ax6.set_ylabel('Prediction Loss', fontsize=12)
ax6.set_title('Heat Flux Predictor Training', fontsize=12, fontweight='bold')
ax6.legend(fontsize=10)
ax6.set_yscale('log')
ax6.grid(True, alpha=0.3)

# 2.3 Error vs Magnitude
ax7 = fig.add_subplot(gs[1, 2])
ax7.scatter(y_test, rel_errors, alpha=0.1, s=1, color='purple')
ax7.axhline(5, color='red', linestyle='--', linewidth=2, label='NASA ±5%')
ax7.set_xlabel('Actual Heat Flux (W/m²)', fontsize=12)
ax7.set_ylabel('Relative Error (%)', fontsize=12)
ax7.set_title('Error vs Heat Flux Magnitude', fontsize=12, fontweight='bold')
ax7.set_xscale('log')
ax7.set_ylim(0, 30)
ax7.legend(fontsize=10)
ax7.grid(True, alpha=0.3)

# 2.4 Reconstruction Quality
ax8 = fig.add_subplot(gs[1, 3])
X_reconstructed = autoencoder.predict(X_sample, verbose=0)
reconstruction_errors = np.mean(np.abs(X_sample - X_reconstructed), axis=1)
ax8.hist(reconstruction_errors, bins=50, alpha=0.7, color='teal', edgecolor='black')
ax8.axvline(reconstruction_errors.mean(), color='red', linestyle='--', linewidth=2,
            label=f'Mean: {reconstruction_errors.mean():.4f}')
ax8.set_xlabel('Reconstruction Error', fontsize=12)
ax8.set_ylabel('Frequency', fontsize=12)
ax8.set_title('Feature Reconstruction Quality', fontsize=12, fontweight='bold')
ax8.legend(fontsize=10)

# ============================================================================
# Row 3: Latent Space Analysis
# ============================================================================

# 3.1 Latent Space (colored by heat flux)
ax9 = fig.add_subplot(gs[2, 0])
scatter = ax9.scatter(latent_2d[:, 0], latent_2d[:, 1], 
                      c=np.log10(y_sample), s=2, alpha=0.5, cmap='viridis')
ax9.set_xlabel('Latent Dim 1', fontsize=12)
ax9.set_ylabel('Latent Dim 2', fontsize=12)
ax9.set_title('Latent Space (colored by log10(heat flux))', fontsize=12, fontweight='bold')
plt.colorbar(scatter, ax=ax9, label='log10(Heat Flux)')

# 3.2 Latent Space (colored by error)
ax10 = fig.add_subplot(gs[2, 1])
y_pred_sample = 10**scaler_y.inverse_transform(
    full_model.predict(X_sample, verbose=0).reshape(-1, 1)
).flatten()
errors_sample = np.abs(y_sample - y_pred_sample) / y_sample * 100
scatter = ax10.scatter(latent_2d[:, 0], latent_2d[:, 1], 
                       c=errors_sample, s=2, alpha=0.5, cmap='RdYlGn_r', vmin=0, vmax=10)
ax10.set_xlabel('Latent Dim 1', fontsize=12)
ax10.set_ylabel('Latent Dim 2', fontsize=12)
ax10.set_title('Latent Space (colored by prediction error)', fontsize=12, fontweight='bold')
plt.colorbar(scatter, ax=ax10, label='Error (%)')

# 3.3 Latent Feature Importance
ax11 = fig.add_subplot(gs[2, 2])
latent_stds = latent_repr.std(axis=0)
ax11.bar(range(latent_dim), latent_stds, alpha=0.7, color='steelblue', edgecolor='black')
ax11.set_xlabel('Latent Dimension', fontsize=12)
ax11.set_ylabel('Std Deviation', fontsize=12)
ax11.set_title('Latent Feature Variability', fontsize=12, fontweight='bold')
ax11.grid(True, alpha=0.3, axis='y')

# 3.4 Feature Correlation Heatmap
ax12 = fig.add_subplot(gs[2, 3])
latent_corr = np.corrcoef(latent_repr.T)
im = ax12.imshow(latent_corr, cmap='coolwarm', vmin=-1, vmax=1, aspect='auto')
ax12.set_xlabel('Latent Dimension', fontsize=12)
ax12.set_ylabel('Latent Dimension', fontsize=12)
ax12.set_title('Latent Feature Correlations', fontsize=12, fontweight='bold')
plt.colorbar(im, ax=ax12, label='Correlation')

plt.tight_layout()
plt.show()

print("✅ Visualizations complete!")


In [None]:
print(f"\n{'='*70}")
print(f"AUTOENCODER MODEL - FINAL SUMMARY")
print(f"{'='*70}")
print(f"\n📊 ARCHITECTURE:")
print(f"  Input:      {input_dim} features")
print(f"  Encoder:    {input_dim} → {encoder_layers} → {latent_dim}")
print(f"  Decoder:    {latent_dim} → {decoder_layers} → {input_dim}")
print(f"  Predictor:  {latent_dim} → {predictor_layers} → 1")
print(f"  Total Params: {full_model.count_params():,}")

print(f"\n🎯 TRAINING:")
print(f"  Phase 1 (Autoencoder): {len(history_ae.history['loss'])} epochs")
print(f"  Phase 2 (Predictor):   {len(history_pred.history['loss'])} epochs")
print(f"  Training samples:      {len(X_train_scaled):,}")
print(f"  Validation samples:    {len(X_val_scaled):,}")
print(f"  Test samples:          {len(X_test_scaled):,}")

print(f"\n📈 PERFORMANCE:")
print(f"  MAE:           {mae:.0f} W/m²")
print(f"  RMSE:          {rmse:.0f} W/m²")
print(f"  Within ±1%:    {pct_1:.2f}%")
print(f"  Within ±3%:    {pct_3:.2f}%")
print(f"  Within ±5%:    {pct_5:.2f}% {'✅' if pct_5 >= 95 else '⚠️'}  (NASA Target: 95.0%)")
print(f"  Within ±10%:   {pct_10:.2f}%")
print(f"  Median Error:  {median_err:.2f}%")
print(f"  95th %ile:     {q95_err:.2f}%")

print(f"\n🔬 LATENT SPACE:")
print(f"  Dimensions:        {latent_dim}")
print(f"  PCA (2D) Variance: {pca.explained_variance_ratio_.sum():.2%}")
print(f"  Avg Reconstruction Error: {reconstruction_errors.mean():.6f}")

print(f"\n💡 INSIGHTS:")
if pct_5 >= 95:
    print(f"  ✅ SUCCESS: Autoencoder approach beats NASA baseline!")
    print(f"  The latent space effectively captures heat flux physics.")
elif pct_5 >= 85:
    print(f"  💪 STRONG: Close to NASA target ({95-pct_5:.1f}% gap)")
    print(f"  Consider: Increase latent dim, add more training data, or ensemble models.")
elif pct_5 >= 70:
    print(f"  📈 GOOD: Reasonable performance ({95-pct_5:.1f}% gap from target)")
    print(f"  Consider: Variational autoencoder (VAE), denoising autoencoder, or hybrid approach.")
else:
    print(f"  🔧 NEEDS WORK: {95-pct_5:.1f}% gap from NASA target")
    print(f"  Consider: Deeper architecture, different latent dim, or combine with physics models.")

print(f"\n💾 MODEL SAVED: best_autoencoder_model.keras")
print(f"{'='*70}\n")
