In [1]:
import sys
import os

# Get directory
current_dir = os.getcwd()

# Create project_root for module imports
project_root = os.path.abspath(os.path.join(current_dir, ".."))
sys.path.append(project_root)

# Create data directory path
parent_dir = os.path.dirname(current_dir)
save_dir = os.path.join(parent_dir, "final_messages", "n_body_gravity")
os.makedirs(save_dir, exist_ok=True)

# Importing self-made models and functions
from simulations.n_body_trajectory import n_body_simulation, generate_random_positions, generate_random_velocities, generate_unique_masses
from gnn_model.node_data_list import node_data_list 
from gnn_model.GNN_MLP import GNN_MLP
from gnn_model.train_model import train_model
from gnn_model.pipeline import pipeline

# Import other packages
from pysr import PySRRegressor
import numpy as np
import torch
import pandas as pd



Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [4]:
model_3, train_messages_3, test_messages_3 = pipeline(train_iterations=64, test_iterations=16,
                 N_train=3, N_test_list=[2, 3, 4, 5, 6], T=128, dt=0.02, dim=2, hidden_channels=128,
                 m_dim=2, out_channels=2, epochs=20, lr=0.0001, G=1.0, single_node=False, testing=True)

Epoch 001: MSE = 1.281737, Mean Relative Error = 3.921820
Epoch 002: MSE = 1.115803, Mean Relative Error = 3.517240
Epoch 003: MSE = 0.971525, Mean Relative Error = 3.322888
Epoch 004: MSE = 0.881562, Mean Relative Error = 3.212323
Epoch 005: MSE = 0.816954, Mean Relative Error = 3.795496
Epoch 006: MSE = 0.776377, Mean Relative Error = 3.706107
Epoch 007: MSE = 0.728020, Mean Relative Error = 3.564572
Epoch 008: MSE = 0.714951, Mean Relative Error = 4.321532
Epoch 009: MSE = 0.692018, Mean Relative Error = 4.600280
Epoch 010: MSE = 0.673432, Mean Relative Error = 5.113424
Epoch 011: MSE = 0.680546, Mean Relative Error = 3.911061
Epoch 012: MSE = 0.656857, Mean Relative Error = 4.005616
Epoch 013: MSE = 0.634608, Mean Relative Error = 3.799608
Epoch 014: MSE = 0.630743, Mean Relative Error = 3.739285
Epoch 015: MSE = 0.610888, Mean Relative Error = 3.684139
Epoch 016: MSE = 0.595264, Mean Relative Error = 3.770298
Epoch 017: MSE = 0.649681, Mean Relative Error = 3.959217
Epoch 018: MSE

In [None]:
train_messages_3.to_csv(f"{save_dir}/train_messages_3.csv", index=False)
# Load your cleaned DataFrame
train_df = pd.read_csv(f"{save_dir}/train_messages_3.csv")
train_df['r3'] = train_df['r'] ** 3
features = ['mass_j', 'dx', 'dy', 'r3']

train_X = train_df[features].sample(frac=0.1, random_state=42)
train_y_x = train_df['message_x'].sample(frac=0.1, random_state=42)
train_y_y = train_df['message_y'].sample(frac=0.1, random_state=42)
# Load your cleaned DataFrame
test_df = pd.read_csv(f"{save_dir}/test_messages_3.csv")
test_df['r3'] = test_df['r'] ** 3
features = ['mass_j', 'dx', 'dy', 'r3']

test_X = test_df[features].sample(frac=0.001, random_state=42)
test_y_x = test_df['message_x'].sample(frac=0.001, random_state=42)
test_y_y = test_df['message_y'].sample(frac=0.001, random_state=42)


# Create and fit SR model for message_x
train_model_x = PySRRegressor(
    niterations=500,
    binary_operators=["+", "-", "*", "/"],
    # unary_operators=["cube"],
    model_selection="best",  # Select best tradeoff between complexity and error
    select_k_features=7,  # small number of features
    verbosity=1,
    maxdepth=10
)

train_model_x.fit(train_X.values, train_y_x.values, variable_names = features)

# Print best expression for message_x
print("Best expression for message_x:")
print(train_model_x)

# Optionally: model for message_y too
train_model_y = PySRRegressor(
    niterations=500,
    binary_operators=["+", "-", "*", "/"],
    # unary_operators=["cube"],
    model_selection="best",
    select_k_features=5,  # small number of features
    verbosity=1,
    maxdepth=10
)

train_model_y.fit(train_X.values, train_y_y.values, variable_names = features)
print("Best expression for message_y:")
print(train_model_y)



Using features ['mass_j' 'dx' 'dy' 'r3']
Compiling Julia backend...


[ Info: Started!



Expressions evaluated per second: 7.440e+04
Head worker occupation: 16.2%
Progress: 170 / 7500 total iterations (2.267%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
3           4.125e-01  5.314e+00  y = -0.0022156 / r3
5           1.753e-01  4.278e-01  y = dx / (0.0076315 + r3)
7           7.076e-02  4.537e-01  y = dx / ((0.015627 / mass_j) + r3)
9           3.134e-02  4.072e-01  y = (dx * mass_j) / (0.015627 + (r3 / 0.15468))
11          2.327e-02  1.488e-01  y = ((dx * mass_j) + -0.03708) / (0.015627 + (r3 / 0.15468))
13          2.277e-02  1.093e-02  y = ((dx * (mass_j - 0.38508)) + -0.099625) / (0.015627 + (r3 ...
                                  / 0.15468))
15          1.780e-02  1.230e-01  y = ((((dx / 0.24949) * mass_j) + -0.56287) / ((r3 / 0.061067)...
                                   + 0.061067)) / 1.4337
17          1.651e-02  3.758e-02  y = (((((dx / 1.0656) / 

In [2]:
# Load your cleaned DataFrame
train_df = pd.read_csv(f"{save_dir}/train_messages_3.csv")
train_df['r3'] = train_df['r'] ** 3
features = ['mass_j', 'dx', 'dy', 'r3']

train_X = train_df[features].sample(frac=0.1, random_state=42)
train_y_x = train_df['message_x'].sample(frac=0.1, random_state=42)
train_y_y = train_df['message_y'].sample(frac=0.1, random_state=42)
# Load your cleaned DataFrame
test_df = pd.read_csv(f"{save_dir}/test_messages_3.csv")
test_df['r3'] = test_df['r'] ** 3
features = ['mass_j', 'dx', 'dy', 'r3']

test_X = test_df[features].sample(frac=0.001, random_state=42)
test_y_x = test_df['message_x'].sample(frac=0.001, random_state=42)
test_y_y = test_df['message_y'].sample(frac=0.001, random_state=42)


# Create and fit SR model for message_x
train_model_x = PySRRegressor(
    niterations=500,
    binary_operators=["+", "-", "*", "/"],
    # unary_operators=["cube"],
    model_selection="best",  # Select best tradeoff between complexity and error
    select_k_features=7,  # small number of features
    verbosity=1,
    maxdepth=5
)

train_model_x.fit(train_X.values, train_y_x.values, variable_names = features)

# Print best expression for message_x
print("Best expression for message_x:")
print(train_model_x)

# Optionally: model for message_y too
train_model_y = PySRRegressor(
    niterations=500,
    binary_operators=["+", "-", "*", "/"],
    # unary_operators=["cube"],
    model_selection="best",
    select_k_features=5,  # small number of features
    verbosity=1,
    maxdepth=5
)

train_model_y.fit(train_X.values, train_y_y.values, variable_names = features)
print("Best expression for message_y:")
print(train_model_y)



Using features ['mass_j' 'dx' 'dy' 'r3']
Compiling Julia backend...


[ Info: Started!



Expressions evaluated per second: 6.750e+04
Head worker occupation: 17.5%
Progress: 154 / 7500 total iterations (2.053%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           3.012e+00  1.594e+01  y = -1.5947
3           4.107e-01  9.963e-01  y = -0.0019755 / r3
5           2.108e-01  3.335e-01  y = dx / (0.011232 + r3)
7           8.995e-02  4.257e-01  y = dx / (-0.74691 + (0.7491 + r3))
9           3.030e-02  5.441e-01  y = (dx / (0.014925 + (r3 / 0.15365))) * mass_j
11          2.688e-02  5.987e-02  y = (dx / (0.014925 + (r3 / 0.15365))) * (mass_j - dx)
13          1.677e-02  2.359e-01  y = (mass_j - (0.16625 / dx)) * (dx / (0.023348 + (r3 / 0.1589...
                                  8)))
15          1.621e-02  1.680e-02  y = (dx / (0.023348 + (r3 / 0.15898))) * ((mass_j - dx) - (0.1...
                                  6625 / dx))
-------------------------------------

[ Info: Started!


Using features ['mass_j' 'dx' 'dy' 'r3']

Expressions evaluated per second: 8.530e+04
Head worker occupation: 15.5%
Progress: 188 / 7500 total iterations (2.507%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
3           4.951e-01  5.314e+00  y = -0.010049 / dx
5           4.633e-01  3.321e-02  y = (-0.03785 * dy) / r3
7           1.098e-01  7.197e-01  y = (dy / (r3 - -0.0043802)) * -0.84626
9           6.149e-02  2.901e-01  y = (-0.14204 * (dy / (r3 - -0.005731))) * mass_j
11          2.519e-02  4.462e-01  y = mass_j * (-0.13979 * ((dy - -0.033056) / (r3 - -0.0041335)...
                                  ))
13          2.380e-02  2.849e-02  y = (mass_j - dy) * (-0.13979 * ((dy - -0.033056) / (r3 - -0.0...
                                  041335)))
15          2.229e-02  3.260e-02  y = (-0.13979 * ((dy - -0.033056) / (r3 - -0.003336))) * ((mas...
                              

In [3]:
import numpy as np
import pandas as pd
from pysr import PySRRegressor

# Assume you have already trained a PySR model
train_model_x.refresh()  # Load results if not already loaded

# Extract discovered equations
df_eq = train_model_x.equations_
df = df_eq.sort_values("complexity")  # Ensure sorting by complexity

# Compute the selection criterion
complexities = df_eq["complexity"].values
mse_values = df_eq["loss"].values  # Loss is usually MSE in PySR

# Compute fractional drop in log MSE
delta_log_mse = -np.diff(np.log(mse_values))
delta_complexity = np.diff(complexities)

# Compute selection criterion
selection_criterion = delta_log_mse / delta_complexity

# Find the best model according to the criterion
best_index = np.argmax(selection_criterion)
best_model = df_eq.iloc[best_index + 1]  # +1 because diff reduces size by 1

# Display best model
print(best_model)


complexity                                                  5
loss                                                 0.089596
score                                                0.761232
equation                            dx / (r3 - -0.0019755173)
sympy_format                      dx/(r3 - 1*(-0.0019755173))
lambda_format    PySRFunction(X=>dx/(r3 - 1*(-0.0019755173)))
Name: 2, dtype: object
