## Multi-Layer Perceptron (MLP) Ensemble Training Notebook

This notebook handles data generation, feature scaling, model architecture definition, and training for the multi-model ensemble. It is designed to be run sequentially.

In [1]:
# --- 1. IMPORTS AND SETUP ---
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from datetime import datetime, timedelta
import os

# Ensure your stars_utils.py file is in the same directory
import stars_utils

# Set seed globally for reproducibility
RANDOM_SEED = 42
tf.keras.utils.set_random_seed(RANDOM_SEED)
os.makedirs('models', exist_ok=True)
print('Setup complete. TensorFlow version: ' + str(tf.version))

Setup complete. TensorFlow version: <module 'tensorflow._api.v2.version' from '/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/site-packages/tensorflow/_api/v2/version/__init__.py'>


In [5]:
# --- 2. MODEL CONFIGURATION ---
TARGET_PLANET = 'jupiter'
NUM_MODELS = 1
EPOCHS = 5000   
TEST_SIZE = 0.2  
BATCH_SIZE = 32

# Time Range for Data Generation (Training Data)
START_DATE = datetime(1970, 1, 1)
END_DATE = datetime(2025, 1, 1)
TIME_STEP = timedelta(days=1) 

# File Path for Scaler Persistence
MODEL_DIR = 'models'
SCALER_FILEPATH = os.path.join(MODEL_DIR, f'feature_scaler_{TARGET_PLANET}.pkl')

print('Configuration loaded.')

Configuration loaded.


In [4]:
# --- 3. DATA GENERATION, SPLIT, AND SCALING ---

print('1. Generating data for ' + TARGET_PLANET.capitalize() + '...')

# Use the utility function to get the raw ephemeris data
df_raw = stars_utils.generate_planetary_ephemeris_df(
    target_planet=TARGET_PLANET, 
    start_date=START_DATE, 
    end_date=END_DATE, 
    time_step=TIME_STEP
)

df_raw.head()

1. Generating data for Jupiter...
Note: Using 'jupiter barycenter' for target lookup.
Dataset for Jupiter created successfully with 20090 data points.


Unnamed: 0,Time_UTC,Julian_Date,RA_deg,Dec_deg,Distance_AU,X_au,Y_au,Z_au
0,1970-01-01,2440588.0,210.963608,-11.290574,5.74463,-4.830654,-2.898375,-1.124711
1,1970-01-02,2440589.0,211.094562,-11.33416,5.729532,-4.810606,-2.901321,-1.126029
2,1970-01-03,2440590.0,211.223534,-11.376928,5.714343,-4.79061,-2.903987,-1.127226
3,1970-01-04,2440591.0,211.350491,-11.418872,5.699067,-4.77067,-2.906374,-1.128302
4,1970-01-05,2440592.0,211.475399,-11.459981,5.683707,-4.750791,-2.908484,-1.129258


In [6]:
jup_df = stars_utils.add_astronomy_features(df_raw, TARGET_PLANET)

Calculated jupiter's Synodic Period with Earth: 398.88 days.
Added dynamic features (Time Index, Polynomial, Earth Cycle, Target Cycle, Synodic Cycle, Interaction) to the DataFrame.


In [18]:
print(jup_df.columns)
print(jup_df.shape)
jup_df.head()

Index(['Time_UTC', 'Julian_Date', 'RA_deg', 'Dec_deg', 'Distance_AU', 'X_au',
       'Y_au', 'Z_au', 'Time_Index', 'Time_Index_2', 'Time_Index_3',
       'Sin_Year', 'Cos_Year', 'Sin_Jupiter', 'Cos_Jupiter', 'Sin_Synodic',
       'Cos_Synodic', 'Sin_Year_Sin_Synodic', 'Sin_Year_Cos_Synodic',
       'Cos_Year_Sin_Synodic', 'Cos_Year_Cos_Synodic'],
      dtype='object')
(20090, 21)


Unnamed: 0,Time_UTC,Julian_Date,RA_deg,Dec_deg,Distance_AU,X_au,Y_au,Z_au,Time_Index,Time_Index_2,...,Sin_Year,Cos_Year,Sin_Jupiter,Cos_Jupiter,Sin_Synodic,Cos_Synodic,Sin_Year_Sin_Synodic,Sin_Year_Cos_Synodic,Cos_Year_Sin_Synodic,Cos_Year_Cos_Synodic
0,1970-01-01,2440588.0,210.963608,-11.290574,5.74463,-4.830654,-2.898375,-1.124711,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
1,1970-01-02,2440589.0,211.094562,-11.33416,5.729532,-4.810606,-2.901321,-1.126029,1.0,1.0,...,0.017202,0.999852,0.00145,0.999999,0.015752,0.999876,0.000271,0.017199,0.015749,0.999728
2,1970-01-03,2440590.0,211.223534,-11.376928,5.714343,-4.79061,-2.903987,-1.127226,2.0,4.0,...,0.034398,0.999408,0.0029,0.999996,0.031499,0.999504,0.001084,0.034381,0.031481,0.998912
3,1970-01-04,2440591.0,211.350491,-11.418872,5.699067,-4.77067,-2.906374,-1.128302,3.0,9.0,...,0.051584,0.998669,0.004351,0.999991,0.047239,0.998884,0.002437,0.051527,0.047176,0.997554
4,1970-01-05,2440592.0,211.475399,-11.459981,5.683707,-4.750791,-2.908484,-1.129258,4.0,16.0,...,0.068755,0.997634,0.005801,0.999983,0.062967,0.998016,0.004329,0.068619,0.062818,0.995654


In [38]:
FEATURES = [
    'Time_Index', 'Time_Index_2', 'Time_Index_3',
    'Sin_Year', 'Cos_Year',
    'Sin_Jupiter', 'Cos_Jupiter',
    'Sin_Synodic', 'Cos_Synodic',
    # 'Sin_Year_Cos_Synodic','Cos_Year_Sin_Synodic',
    'Sin_Year_Sin_Synodic','Cos_Year_Cos_Synodic'
]
TARGETS = ['X_au', 'Y_au', 'Z_au']

X = jup_df[FEATURES] 
y = jup_df[TARGETS] 

# Chronological Train-Test Split
split_index = int(len(jup_df) * (1 - TEST_SIZE))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Scaling the features (Julian Date)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
INPUT_SHAPE = X_train_scaled.shape[1] 

y_scaler = StandardScaler()
y_train_scaled = y_scaler.fit_transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

# Save the fitted scaler for later use in prediction/analysis notebooks
stars_utils.save_scaler(scaler, SCALER_FILEPATH)

# Create robust, optimized TensorFlow Datasets
AUTOTUNE = tf.data.AUTOTUNE
# raw_train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train.values))
# raw_test_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test.values))
# test_dataset = raw_test_dataset.batch(BATCH_SIZE).prefetch(AUTOTUNE)

print('Data split and scaled. Scaler saved.')

Scaler saved successfully to: models/feature_scaler_jupiter.pkl
Data split and scaled. Scaler saved.


In [34]:
# --- 4. MODEL CONFIGURATION AND CALLBACKS ---
models = []
y_pred_list = []

early_stopping_callback = EarlyStopping(
    monitor='val_loss', 
    patience=150,
    restore_best_weights=True 
)

lr_on_plateau_callback = ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.5,
    patience=50,
    min_lr=1e-7
)

callbacks = [early_stopping_callback, lr_on_plateau_callback]
best_ensemble_rmse = float('inf')

print('Callbacks defined.')

Callbacks defined.


In [40]:
# --- 5. BUILD AND TRAIN THE INDIVIDUAL MLP MODELS ---
print('2. Building and training ' + str(NUM_MODELS) + ' diverse MLP models...')

for i in range(NUM_MODELS):
    # CRITICAL: Clear session to ensure fresh start for each model
    tf.keras.backend.clear_session()
    
    # Use a different random seed and shuffle seed for diversity
    current_seed = RANDOM_SEED + i
    tf.keras.utils.set_random_seed(1)

    # --- Model Architecture ---
    model = Sequential([
        Dense(128, activation='relu', kernel_regularizer=l2(0.00001), 
              input_shape=(INPUT_SHAPE,)), 
        
        Dense(256, activation='relu', kernel_regularizer=l2(0.00001)),
        
        Dense(128, activation='relu', kernel_regularizer=l2(0.00001)), 
        
        Dense(64, activation='relu', kernel_regularizer=l2(0.00001)),

        Dense(3, activation='linear') 
    ])
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    print('--- Training MLP Model ' + str(i+1) + ' (Seed: ' + str(current_seed) + ') ---')
    model.fit(
        X_train_scaled, 
        y_train_scaled,
        epochs=EPOCHS, 
        validation_data=(X_test_scaled, y_test_scaled),
        callbacks=callbacks,
        verbose=1
    )
    
    models.append(model)
    
    # Predict on the test set
    y_pred_scaled = model.predict(X_test_scaled)

    # get inverse value (becuase of the scaling)
    y_pred_mlp_au = y_scaler.inverse_transform(y_pred_scaled)

    # Calculate the loss (MSE) on the test set
    y_test_np = y_test.values
    loss_mse = model.evaluate(X_test_scaled, y_test_scaled, verbose=0)
    rmse_au = np.sqrt(loss_mse) 

    # Calculate RMSE for each coordinate individually
    rmse_x = np.sqrt(mean_squared_error(y_test_np[:, 0], y_pred_mlp_au[:, 0]))
    rmse_y = np.sqrt(mean_squared_error(y_test_np[:, 1], y_pred_mlp_au[:, 1]))
    rmse_z = np.sqrt(mean_squared_error(y_test_np[:, 2], y_pred_mlp_au[:, 2]))

    print("\n--- Model Evaluation (Neural Network Test Set) ---")
    print(f"Overall Averaged RMSE: {rmse_au:.6f} AU")
    print(f"X-coordinate RMSE: {rmse_x:.6f} AU")
    print(f"Y-coordinate RMSE: {rmse_y:.6f} AU")
    print(f"Z-coordinate RMSE: {rmse_z:.6f} AU")
    



2. Building and training 1 diverse MLP models...
--- Training MLP Model 1 (Seed: 42) ---
Epoch 1/5000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 890us/step - loss: 0.0204 - val_loss: 0.0126 - learning_rate: 0.0010
Epoch 2/5000
[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 691us/step - loss: 0.0041 - val_loss: 0.0127 - learning_rate: 0.0010
Epoch 3/5000
[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 686us/step - loss: 0.0037 - val_loss: 0.0155 - learning_rate: 0.0010
Epoch 4/5000
[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 687us/step - loss: 0.0034 - val_loss: 0.0102 - learning_rate: 0.0010
Epoch 5/5000
[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 694us/step - loss: 0.0029 - val_loss: 0.0089 - learning_rate: 0.0010
Epoch 6/5000
[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 686us/step - loss: 0.0025 - val_loss: 0.0083 - learning_rate: 0.0010
Epoch 7/5000
[1m503/503[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 689us/step - loss: 0.0023 - val_loss: 0.0089 -

In [None]:
# Save the individual model (using the modern .keras format)
model_save_name = os.path.join(MODEL_DIR, f'{TARGET_PLANET}_position_predictor_mm' + str(i) + '.keras')
model.save(model_save_name)
print('Saved model to: ' + model_save_name)

In [None]:
# --- 6. ENSEMBLE PREDICTION AND EVALUATION ---
print('3. Final Ensemble Evaluation...')

# The ensemble prediction is the simple average of all individual model predictions.
y_ensemble_pred_au = np.mean(y_pred_list, axis=0)

# Evaluate the ensemble performance
overall_ensemble_rmse = np.sqrt(mean_squared_error(y_test.values, y_ensemble_pred_au))

print('Overall Ensemble RMSE: ' + '{:.6f}'.format(overall_ensemble_rmse) + ' AU')

print("Training and evaluation complete. Models and scaler saved to the 'models' directory.")