In [None]:
import sys
import os

# Get directory
current_dir = os.getcwd()

# Create project_root for module imports
project_root = os.path.abspath(os.path.join(current_dir, ".."))
sys.path.append(project_root)

# Create data directory path
parent_dir = os.path.dirname(current_dir)
save_dir = os.path.join(parent_dir, "final_messages", "n_body_gravity")
os.makedirs(save_dir, exist_ok=True)

# Importing self-made models and functions
from DeepLearningPH.simulations.n_body_simulation import n_body_simulation, generate_random_positions, generate_random_velocities, generate_unique_masses
from gnn_model.node_data_list import node_data_list 
from gnn_model.GNN_MLP import GNN_MLP
from gnn_model.train_model import train_model
from gnn_model.pipeline import pipeline

# Import other packages
from pysr import PySRRegressor
import numpy as np
import torch
import pandas as pd



Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [2]:
# model_1, train_messages_1, test_messages_1 = pipeline(train_iterations=30, test_iterations=10,
#                  N_train=2, N_test_list=[2, 3, 4, 5, 6], T=200, dt=0.01, dim=2, hidden_channels=128,
#                  m_dim=2, out_channels=2, epochs=100, lr=0.0001, G=1.0, single_node=False, testing=False) 

In [3]:
# model_1, train_messages_1, test_messages_1 = pipeline(train_iterations=20, test_iterations=10,
#                  N_train=2, N_test_list=[2, 3, 4, 5, 6], T=100, dt=0.01, dim=2, hidden_channels=128,
#                  m_dim=2, out_channels=2, epochs=50, lr=0.001, G=1.0, single_node=False, testing=False, model=model_1) 

In [None]:
# torch.save(model_1.state_dict(), "gnn_model_firstofmany.pt")

In [2]:
model_loaded = GNN_MLP(n_f=6, hidden_channels=128,
                 m_dim=2, out_channels=2, single_node=False)
model_loaded.load_state_dict(torch.load("gnn_model_firstofmany.pt"))

<All keys matched successfully>

In [5]:
model_t, train_messages_t, test_messages_t = pipeline(train_iterations=20, test_iterations=30,
                 N_train=2, N_test_list=[2], T=100, dt=0.01, dim=2, hidden_channels=128,
                 m_dim=2, out_channels=2, epochs=50, lr=0.001, G=1.0, single_node=False, testing=True, training=False, model=model_loaded) 

  y_target = torch.tensor(acceleration, dtype=torch.float32)


2970
average loss per/over timestep N=2:   4.676572133784468


In [None]:
# test_messages_t[2].to_csv(f"{save_dir}/messages_test_2D_trained_firstofmany.csv", index=False)

# for i in range(2,7):
#     test_messages_1[i].to_csv(f"{save_dir}/N_{i}_messages_test_cleaned_presentation2.csv", index=False)

In [None]:
# Load your cleaned DataFrame
train_df = pd.read_csv(f"{save_dir}/messages_test_2D_trained_firstofmany.csv")

train_df['dx'] = train_df['pos_i_x'] - train_df['pos_j_x']
train_df['dy'] = train_df['pos_i_y'] - train_df['pos_j_y']
train_df['r'] = np.sqrt(train_df['dx']**2 + train_df['dy']**2)
train_df['r3'] = train_df['r']**3

features = ['mass_j','mass_i', 'dx', 'dy', 'r']

# train_df['force_x'] = train_df['message_x'] * train_df['mass_i']
# train_df['force_y'] = train_df['message_y'] * train_df['mass_i']

# train_X = train_df[features].sample(frac=0.4, random_state=42)
# train_y_x = train_df['message_x'].sample(frac=0.4, random_state=42)
# train_y_y = train_df['message_y'].sample(frac=0.4, random_state=42)
train_X = train_df[['mass_j','mass_i', 'dx', 'r']]
train_Y = train_df[['mass_j','mass_i', 'dy', 'r']]
train_y_x = train_df['message_x']
train_y_y = train_df['message_y']

In [None]:

# Create and fit SR model for message_x
train_model_x = PySRRegressor(
    niterations=100,
    binary_operators=["+", "-", "*"],
    model_selection="score",  # Select best tradeoff between complexity and error
    select_k_features=4, 
    extra_sympy_mappings={"inv_r3": lambda r: 1 / r**3}
    # verbosity=1,
    # maxdepth=5,
)

train_model_x.fit(train_X.values, train_y_x.values, variable_names = ['mass_j','mass_i', 'dx', 'r'])

# Print best expression for message_x
print("Best expression for message_x:")
print(train_model_x)

# Optionally: model for message_y too
train_model_y = PySRRegressor(
    niterations=100,
    binary_operators=["+", "-", "*"],
    model_selection="score",
    select_k_features=4,  # small number of features
    extra_sympy_mappings={"inv_r3": lambda r: 1 / r**3}
    # verbosity=1,
    # maxdepth=5,
)

train_model_y.fit(train_X.values, train_y_y.values, variable_names = ['mass_j','mass_i', 'dy', 'r'])
print("Best expression for message_y:")
print(train_model_y)



Using features ['mass_j' 'mass_i' 'dx' 'r']


[ Info: Started!



Expressions evaluated per second: 8.860e+02
Head worker occupation: 6.5%
Progress: 7 / 1500 total iterations (0.467%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
3           6.714e+01  5.314e+00  y = 0.40956 - r
5           1.819e+01  6.530e-01  y = (0.40956 * r) - r
7           1.743e+01  2.119e-02  y = (0.40956 * (r + -0.77995)) - r
9           1.412e+01  1.052e-01  y = (((dx * 0.40956) + r) * 0.40956) - r
11          1.233e+01  6.804e-02  y = (((dx * 0.40956) + r) * 0.40956) - (r - -1.306)
13          1.164e+01  2.875e-02  y = (0.40956 * (((dx - mass_i) * 0.40956) + (r + -0.77995))) -...
                                   r
15          1.103e+01  2.708e-02  y = (((0.16007 * 0.90788) * dx) - ((1.4892 + -1.0728) * r)) + ...
                                  (-1.8463 + -2.8759)
17          1.102e+01  2.493e-04  y = (((r - ((dx + (r - (mass_j + mass_j))) * 0.27096)) - mass_..

In [5]:
import numpy as np
import pandas as pd
from pysr import PySRRegressor

# Assume you have already trained a PySR model
train_model_x.refresh()  # Load results if not already loaded

# Extract discovered equations
df = train_model_x.equations_
df = df.sort_values("complexity")  # Ensure sorting by complexity

# Compute the selection criterion
complexities = df["complexity"].values
mse_values = df["loss"].values  # Loss is usually MSE in PySR

# Compute fractional drop in log MSE
delta_log_mse = -np.diff(np.log(mse_values))
delta_complexity = np.diff(complexities)

# Compute selection criterion
selection_criterion = delta_log_mse / delta_complexity

# Find the best model according to the criterion
best_index = np.argmax(selection_criterion)
best_model = df.iloc[best_index + 1]  # +1 because diff reduces size by 1

# Display best model
print(best_model)


complexity                                      3
loss                                      17.3059
score                                    0.121111
equation                          r * -0.63643813
sympy_format                      r*(-0.63643813)
lambda_format    PySRFunction(X=>r*(-0.63643813))
Name: 1, dtype: object


In [7]:
print(df.loc[1, 'equation'])
# print(df.loc[4, 'sympy_format'])
# print(df.loc[4, 'lambda_format'])

r * -0.63643813
