In [1]:
import sys
import os

# Get directory
current_dir = os.getcwd()

# Create project_root for module imports
project_root = os.path.abspath(os.path.join(current_dir, ".."))
sys.path.append(project_root)

# Create data directory path
parent_dir = os.path.dirname(current_dir)
save_dir = os.path.join(parent_dir, "final_messages", "n_body_gravity")
os.makedirs(save_dir, exist_ok=True)

# Importing self-made models and functions
from simulations.n_body_trajectory import n_body_simulation, generate_random_positions, generate_random_velocities, generate_unique_masses
from gnn_model.node_data_list import node_data_list 
from gnn_model.GNN_MLP import GNN_MLP
from gnn_model.train_model import train_model
from gnn_model.pipeline import pipeline

# Import other packages
from pysr import PySRRegressor
import numpy as np
import torch
import pandas as pd



Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [2]:
model_4, train_messages_4, test_messages_4 = pipeline(train_iterations=256, test_iterations=32,
                 N_train=3, N_test_list=[2, 3, 4, 5, 6], T=256, dt=0.02, dim=2, hidden_channels=128,
                 m_dim=2, out_channels=2, epochs=64, lr=0.0001, G=1.0, single_node=False, testing=True)

  y_target = torch.tensor(acceleration, dtype=torch.float32)


Epoch 001: MSE = 0.536645, Mean Relative Error = 180.279190
Epoch 002: MSE = 0.360248, Mean Relative Error = 230.795729
Epoch 003: MSE = 0.306396, Mean Relative Error = 183.696835
Epoch 004: MSE = 0.268244, Mean Relative Error = 92.869452
Epoch 005: MSE = 0.235546, Mean Relative Error = 153.906072
Epoch 006: MSE = 0.218512, Mean Relative Error = 145.648796
Epoch 007: MSE = 0.203588, Mean Relative Error = 149.549176
Epoch 008: MSE = 0.189203, Mean Relative Error = 142.006546
Epoch 009: MSE = 0.181591, Mean Relative Error = 145.901895
Epoch 010: MSE = 0.163067, Mean Relative Error = 153.078382
Epoch 011: MSE = 0.165738, Mean Relative Error = 130.675896
Epoch 012: MSE = 0.150119, Mean Relative Error = 184.501749
Epoch 013: MSE = 0.149359, Mean Relative Error = 137.455619
Epoch 014: MSE = 0.150505, Mean Relative Error = 111.010317
Epoch 015: MSE = 0.143814, Mean Relative Error = 97.027595
Epoch 016: MSE = 0.136604, Mean Relative Error = 207.486601
Epoch 017: MSE = 0.134521, Mean Relative E

In [None]:
torch.save(model_4.state_dict(), "gnn_model_firstofmany_4.pt")

In [6]:
model_loaded = GNN_MLP(n_f=6, m_dim=2, hidden_channels=128, out_channels=2,single_node=False)
model_loaded.load_state_dict(torch.load("../symbolic_regression/gnn_model_firstofmany_4.pt"))

<All keys matched successfully>

In [7]:
model_5, train_messages_5, test_messages_5 = pipeline(train_iterations=512, test_iterations=32,
                 N_train=4, N_test_list=[4, 5, 6], T=2048, dt=0.002, dim=2, hidden_channels=128,
                 m_dim=2, out_channels=2, epochs=64, lr=0.0001, G=1.0, single_node=False, testing=True, model = model_loaded)

  y_target = torch.tensor(acceleration, dtype=torch.float32)


Epoch 001: MSE = 0.215257, Mean Relative Error = 171.640691
Epoch 002: MSE = 0.211219, Mean Relative Error = 175.868399
Epoch 003: MSE = 0.212989, Mean Relative Error = 179.950772
Epoch 004: MSE = 0.207648, Mean Relative Error = 236.808849
Epoch 005: MSE = 0.198226, Mean Relative Error = 211.998826
Epoch 006: MSE = 0.197190, Mean Relative Error = 167.120840
Epoch 007: MSE = 0.188891, Mean Relative Error = 194.852096
Epoch 008: MSE = 0.188050, Mean Relative Error = 230.339908
Epoch 009: MSE = 0.183576, Mean Relative Error = 239.508869
Epoch 010: MSE = 0.179441, Mean Relative Error = 223.726229
Epoch 011: MSE = 0.171252, Mean Relative Error = 193.127812
Epoch 012: MSE = 0.169045, Mean Relative Error = 205.356773
Epoch 013: MSE = 0.166677, Mean Relative Error = 211.034084
Epoch 014: MSE = 0.162251, Mean Relative Error = 182.503647
Epoch 015: MSE = 0.158973, Mean Relative Error = 195.827645
Epoch 016: MSE = 0.155783, Mean Relative Error = 207.982837
Epoch 017: MSE = 0.152725, Mean Relative

In [8]:
torch.save(model_5.state_dict(), "gnn_model_5.pt")

In [None]:
train_messages_5.to_csv(f"{save_dir}/train_messages_4.csv", index=False)

In [5]:
# Load your cleaned DataFrame
train_df = pd.read_csv(f"{save_dir}/train_messages_4.csv")
train_df['r3'] = train_df['r'] ** 3
features = ['mass_j', 'dx', 'dy', 'r3']

train_X = train_df[features].sample(frac=0.01, random_state=42)
train_y_x = train_df['message_x'].sample(frac=0.01, random_state=42)
train_y_y = train_df['message_y'].sample(frac=0.01, random_state=42)

In [6]:
combined_df = pd.concat(
    [df.assign(N_test=N) for N, df in test_messages_4.items()],
    ignore_index=True
)

combined_df.to_csv(f"{save_dir}/test_messages_4.csv", index=False)

In [7]:
# Load your cleaned DataFrame
test_df = pd.read_csv(f"{save_dir}/test_messages_4.csv")
test_df['r3'] = test_df['r'] ** 3
features = ['mass_j', 'dx', 'dy', 'r3']

test_X = test_df[features].sample(frac=0.01, random_state=42)
test_y_x = test_df['message_x'].sample(frac=0.01, random_state=42)
test_y_y = test_df['message_y'].sample(frac=0.01, random_state=42)

In [8]:

# Create and fit SR model for message_x
train_model_x = PySRRegressor(
    niterations=1500,
    binary_operators=["+", "-", "*", "/"],
    # unary_operators=["cube"],
    model_selection="accuracy",  # Select best tradeoff between complexity and error
    select_k_features=5,  # small number of features
    verbosity=1,
    maxdepth=5,
    maxsize=15,
    complexity_of_operators={"+":4, "-":1, "*":1, "/": 1}
)

train_model_x.fit(train_X.values, train_y_x.values, variable_names = features)

# Print best expression for message_x
print("Best expression for message_x:")
print(train_model_x)

# Optionally: model for message_y too
train_model_y = PySRRegressor(
    niterations=1500,
    binary_operators=["+", "-", "*", "/"],
    # unary_operators=["cube"],
    model_selection="accuracy",
    select_k_features=5,  # small number of features
    verbosity=1,
    maxdepth=5,
    maxsize=15,
    complexity_of_operators={"+":4, "-":1, "*":1, "/": 1}
)

train_model_y.fit(train_X.values, train_y_y.values, variable_names = features)
print("Best expression for message_y:")
print(train_model_y)



Using features ['mass_j' 'dx' 'dy' 'r3']
Compiling Julia backend...


[ Info: Started!



Expressions evaluated per second: 1.290e+05
Head worker occupation: 10.1%
Progress: 293 / 22500 total iterations (1.302%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           2.064e-01  1.594e+01  y = 0.074052
3           1.191e-01  2.750e-01  y = 0.0048793 / r3
5           7.240e-02  2.489e-01  y = (dx / r3) * 0.10844
7           4.690e-02  2.171e-01  y = (mass_j / r3) / (24.794 / dx)
9           2.634e-02  2.884e-01  y = dx / (0.40296 - ((0.40076 - r3) - r3))
13          2.574e-02  5.792e-03  y = (0.55036 - (dy / mass_j)) * (dx / (0.76969 - (0.76835 - r3...
                                  )))
15          2.254e-02  6.647e-02  y = (0.61224 - (dy * (-0.14219 / dx))) * (dx / (0.76969 - (0.7...
                                  6835 - r3)))
---------------------------------------------------------------------------------------------------
Press 'q' and then <enter> to sto

[ Info: Started!


Using features ['mass_j' 'dx' 'dy' 'r3']

Expressions evaluated per second: 1.330e+05
Head worker occupation: 13.8%
Progress: 297 / 22500 total iterations (1.320%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           4.263e-01  1.594e+01  y = 0.00042819
3           2.671e-01  2.338e-01  y = 0.0063903 / r3
5           1.084e-01  4.511e-01  y = (-0.25265 * dy) / r3
7           2.651e-02  7.040e-01  y = (0.54113 / r3) * (-0.020852 - dy)
9           1.331e-02  3.443e-01  y = (-0.0143 - dy) / ((r3 / mass_j) / 0.12214)
11          1.223e-02  4.240e-02  y = ((dy * -0.10523) - 0.0013046) / (r3 / (1.1261 * mass_j))
13          9.780e-03  1.118e-01  y = (((-1.0491 / mass_j) * 0.098561) - (dy * mass_j)) / (r3 / ...
                                  0.12424)
15          9.714e-03  3.358e-03  y = (((1.2737 * -0.04583) - dy) - (dy * (mass_j - dy))) / (r3 ...
                            

In [9]:

# Create and fit SR model for message_x
test_model_x = PySRRegressor(
    niterations=1500,
    binary_operators=["+", "-", "*", "/"],
    # unary_operators=["cube"],
    model_selection="accuracy",  # Select best tradeoff between complexity and error
    select_k_features=5,  # small number of features
    verbosity=1,
    maxdepth=5,
    maxsize=15,
    complexity_of_operators={"+":4, "-":1, "*":1, "/": 1}
)

test_model_x.fit(test_X.values, test_y_x.values, variable_names = features)

# Print best expression for message_x
print("Best expression for message_x:")
print(test_model_x)

# Optionally: model for message_y too
test_model_y = PySRRegressor(
    niterations=1500,
    binary_operators=["+", "-", "*", "/"],
    # unary_operators=["cube"],
    model_selection="accuracy",
    select_k_features=5,  # small number of features
    verbosity=1,
    maxdepth=5,
    maxsize=15,
    complexity_of_operators={"+":4, "-":1, "*":1, "/": 1}
)

test_model_y.fit(test_X.values, test_y_y.values, variable_names = features)
print("Best expression for message_y:")
print(test_model_y)

[ Info: Note: you are running with more than 10,000 datapoints. You should consider turning on batching (`options.batching`), and also if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form.
[ Info: Started!


Using features ['mass_j' 'dx' 'dy' 'r3']

Expressions evaluated per second: 4.820e+04
Head worker occupation: 10.3%
Progress: 119 / 22500 total iterations (0.529%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           1.883e+01  1.594e+01  y = mass_j
3           1.143e-03  4.854e+00  y = -1.6682 * -0.036153
5           1.088e-03  2.445e-02  y = 0.060664 - (0.14121 / r3)
7           1.784e-04  9.040e-01  y = 0.060388 - ((-0.0043293 - dx) / r3)
9           1.336e-04  1.448e-01  y = 0.060388 - ((-0.14701 / r3) * (dx * mass_j))
11          9.391e-05  1.761e-01  y = 0.060388 - (((-0.14029 * 1.2954) / (r3 / mass_j)) * dx)
13          8.069e-05  7.587e-02  y = 0.060388 - ((-0.14029 / (r3 / mass_j)) * ((dx - -0.033303)...
                                   / 0.72853))
15          8.053e-05  9.901e-04  y = 0.060388 - ((-0.14029 / (r3 / mass_j)) * ((dx * 1.2619) - ...
               

[ Info: Note: you are running with more than 10,000 datapoints. You should consider turning on batching (`options.batching`), and also if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form.
[ Info: Started!


Using features ['mass_j' 'dx' 'dy' 'r3']

Expressions evaluated per second: 4.110e+04
Head worker occupation: 10.0%
Progress: 106 / 22500 total iterations (0.471%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
3           9.425e-04  5.314e+00  y = 0.22467 / r3
5           2.352e-04  6.941e-01  y = -0.0091277 - (dy / r3)
9           2.069e-04  3.199e-02  y = (dx - (dy * mass_j)) / (r3 / 0.16913)
11          1.425e-04  1.865e-01  y = ((dx - (dy * mass_j)) / (r3 / 0.16913)) - 0.01373
13          1.040e-04  1.577e-01  y = ((dx - (dy * mass_j)) / ((r3 - -0.14975) / 0.2255)) - 0.01...
                                  373
---------------------------------------------------------------------------------------------------
Press 'q' and then <enter> to stop execution early.

Expressions evaluated per second: 4.690e+04
Head worker occupation: 11.0%
Progress: 219 / 22500 total iterations 

In [None]:
import numpy as np
import pandas as pd
from pysr import PySRRegressor

# Assume you have already trained a PySR model
train_model_x.refresh()  # Load results if not already loaded
train_model_x.refresh()
# Extract discovered equations
df_eq = train_model_x.equations_
df = df_eq.sort_values("complexity")  # Ensure sorting by complexity

# Compute the selection criterion
complexities = df_eq["complexity"].values
mse_values = df_eq["loss"].values  # Loss is usually MSE in PySR

# Compute fractional drop in log MSE
delta_log_mse = -np.diff(np.log(mse_values))
delta_complexity = np.diff(complexities)

# Compute selection criterion
selection_criterion = delta_log_mse / delta_complexity

# Find the best model according to the criterion
best_index = np.argmax(selection_criterion)
best_model = df_eq.iloc[best_index + 1]  # +1 because diff reduces size by 1

# Display best model
print(best_model)


complexity                                                      11
loss                                                      0.010319
score                                                     0.456285
equation         (dy + dx) / (((r3 / -0.020735774) + -0.0173932...
sympy_format     (dx + dy)/(((r3/(-0.020735774) - 0.017393213)/...
lambda_format    PySRFunction(X=>(dx + dy)/(((r3/(-0.020735774)...
Name: 5, dtype: object


In [10]:
print(df.loc[5, 'equation'])
# print(df.loc[4, 'sympy_format'])
# print(df.loc[4, 'lambda_format'])

NameError: name 'df' is not defined