In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch_geometric.nn import MessagePassing
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
import os
import sys
notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, ".."))
sys.path.append(project_root)
# from gnn_model.train_model import train_model
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
save_dir = os.path.join(parent_dir, "final_messages", "projectile_trajectory_2d")
os.makedirs(save_dir, exist_ok=True)
save_path = os.path.join(save_dir, f"final_epoch_messages_projectile_trajectory_2d.pkl")


In [2]:
# To save the final messages during training
def save_messages(sim_name, model):
    """
    Saves the message storage dictionary inside "/final_messages/{sim_name}/".
    
    Parameters:
    - sim_name: Name of the simulation (used for subfolder and file naming)
    - model: Trained GNN model containing the message storage dictionary
    """
    current_dir = os.getcwd()
    repo_root = os.path.dirname(current_dir)    
    final_messages_dir = os.path.join(repo_root, "final_messages", sim_name)
    os.makedirs(final_messages_dir, exist_ok=True)
    save_path = os.path.join(final_messages_dir, f"final_epoch_messages_{sim_name}.pkl")
    with open(save_path, "wb") as f:
        pickle.dump(model.message_storage, f)
    return save_path


In [None]:
import numpy as np
import torch

def projectile_trajectory_2d(m, dt, theta, v0, g = 9.81, x0 = 0, y0 = 0, N=1, dim=2):
    """
    Calculates a 2D cartesian trajectory of a projectile of mass m launched at angle theta with initial velocity v0.  
    
        Parameters:
    theta (int or float): angle between projectile trajectory and x-axis at t=0s in degrees \\
    v0 (int or float): the intial velocity in m/s \\
    m (int or float): mass of the projectile in kg \\
    dt (int or float): time step for simulation in s \\
    g (int or float): gravity constant (standard is 9.81) in m/s^2 \\
    x0, y0 (int or float): initial position of ball in m \\

        Returns: 
    The output is a dictionary containing the {time, positions, velocities, masses} coordinates of the projectile
    """

    # Convert theta to radians
    theta_rad = np.radians(theta)

    # Initial velocity components
    vx0 = v0 * np.cos(theta_rad)
    vy0 = v0 * np.sin(theta_rad)

    # Time array
    t_max = (vy0 + np.sqrt(vy0**2+2*g*y0)) / g
    t = np.arange(0, t_max, dt)
    T = len(t)
    print(T)
    
    # Compute trajectory
    x = x0 + vx0 * t
    y = y0 + vy0 * t - 0.5 * g * t**2

    # Remove negative y values and make np array
    valid = y >= 0
    x, y, t = x[valid], y[valid], t[valid]
    trajectory = np.column_stack((x,y))

    vx = np.full_like(x, vx0)
    vy = vy0 - g * t
    trajectory_velocities = np.column_stack((vx,vy))
    
    trajectory_data = {
        "time": torch.arange(T, dtype=torch.float64),
        "positions": torch.zeros((T, N, dim), dtype=torch.float64),
        "velocities": torch.zeros((T, N, dim), dtype=torch.float64),
        "masses": torch.arange(N, dtype=torch.float64)
    }
    
    trajectory_data["time"] = torch.tensor(t, dtype=torch.float64)
    trajectory_data["positions"] = torch.tensor(trajectory[:, :].reshape(T, N, dim), dtype=torch.float64)
    trajectory_data["velocities"] = torch.tensor(trajectory_velocities[:, :].reshape(T, N, dim), dtype=torch.float64)
    trajectory_data["masses"] = torch.tensor(m, dtype=torch.float64)

    return trajectory_data


In [4]:
import torch
from torch_geometric.data import Data

def node_data_list(trajectory_dict, self_loop=True, complete_graph=True):
    """
    Converts a trajectory dictionary into a list of PyTorch Geometric `Data` objects 
    representing graph-structured data for a node-based learning task.

    Parameters:
    -----------
    trajectory_dict : dict
        A dictionary containing simulation data with the following keys:
        - "masses" (torch.Tensor): Mass values for the nodes (particles).
        - "time" (torch.Tensor): Time steps in the trajectory.
        - "positions" (list of torch.Tensor): List of position tensors at each time step.
        - "velocities" (list of torch.Tensor): List of velocity tensors at each time step.

    self_loop : bool, optional (default=True)
        If True, self-loops (edges from a node to itself) are included in the graph.

    complete_graph : bool, optional (default=True)
        If True, a fully connected graph is created where each node is connected to every other node.

    Returns:
    --------
    data_list : list of torch_geometric.data.Data
        A list of `Data` objects, each representing a graph at a given time step.
        Each `Data` object contains:
        - `x` (torch.Tensor): Node features with shape (N, num_features), where `num_features` includes
          position, velocity, mass, and time.
        - `y` (torch.Tensor): Target values representing velocity updates.
        - `edge_index` (torch.Tensor): Graph connectivity in COO format with shape (2, num_edges).

    Notes:
    ------
    - The function assumes a constant number of nodes (N) throughout the trajectory.
    - If `masses` is a scalar, it is broadcasted to all nodes.
    - Edge indices are stored in COO format (two-row tensor).

    Example:
    --------
    >>> trajectory_dict = {
    ...     "masses": torch.tensor(1.0),
    ...     "time": torch.arange(3),
    ...     "positions": [torch.rand(5, 2) for _ in range(3)],
    ...     "velocities": [torch.rand(5, 2) for _ in range(3)]
    ... }
    >>> graphs = node_data_list(trajectory_dict)
    >>> print(graphs[0])
    Data(x=[5, 6], y=[5, 2], edge_index=[2, 25])
    """

    data_list = []

    N = trajectory_dict["masses"].numel()
    # Ensure masses has the correct shape (N, 1)
    mass_data = trajectory_dict["masses"]

    # If masses is scalar (0-dimensional), repeat it for N particles
    if mass_data.dim() == 0:  # scalar case
        mass_data = mass_data.repeat(N, 1)  # shape (N, 1)

    # If masses is already a 1D tensor of shape (N,)
    elif mass_data.dim() == 1:  # 1D tensor case
        mass_data = mass_data.unsqueeze(1)  # shape (N, 1)
    
    for i in range(len(trajectory_dict["time"]) - 1):
        time_feature = trajectory_dict["time"][i].repeat(N,1)
        # print(time_feature.shape)
        # position_features = trajectory_dict["positions"][i]

        x_features = torch.cat([
            trajectory_dict["positions"][i],    # (N,dim)
            trajectory_dict["velocities"][i],   # (N, dim)
            mass_data,       # (N, 1)
            time_feature    # (N, 1)
        ], dim=1)   # tensor of shape (N, 2 + 2 + 1 + 1) + (N, 6) if dim = 2

        velocity_update = trajectory_dict["velocities"][i+1] - trajectory_dict["velocities"][i]
        acceleration = velocity_update / (trajectory_dict["time"][i+1] - trajectory_dict["time"][i])

        y_target = torch.tensor(acceleration, dtype=torch.float32)
        # y_target = torch.tensor(velocity_update, dtype=torch.float32)

        edge_list = []
        
        if self_loop:
            edge_list.extend([j,j] for j in range(N))
        
        if complete_graph:
            edge_list.extend([k,j] for k in range(N) for j in range(N) if k != j)           
   
        edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()        

        data_list.append(Data(x=x_features.float(), y=y_target, edge_index=edge_index))

    return data_list


In [5]:
traj_proj = projectile_trajectory_2d(3, 0.2, 40, 90)
# print(traj_proj["time"].shape)

59


In [6]:
traj_graph = node_data_list(traj_proj, self_loop=True, complete_graph=False)

  y_target = torch.tensor(acceleration, dtype=torch.float32)


In [7]:
### --- Jacob's code (doesn't work) --- ###

# import torch
# import torch.nn as nn
# import torch.optim as optim
# import numpy as np
# from torch_geometric.nn import MessagePassing
# from torch_geometric.data import Data
# from torch_geometric.loader import DataLoader

# # define the message passing class to initiate MLP's among the nodes
# class GNN_MLP(MessagePassing):
#     def __init__(self, n_f, m_dim, hidden_channels, out_channels, single_node = False):
#         super(GNN_MLP, self).__init__(aggr='add')  # "Add" aggregation for summing over forces
        
#         # initialising the MLP by creating the self.MLP attribute. 2 * in_channels to account for the fact that it may use both it's own and the other nodes features.
#         self.mess_mlp = nn.Sequential(
#             nn.Linear(2 * n_f, hidden_channels),
#             nn.ReLU(),
#             nn.Linear(hidden_channels, hidden_channels),
#             nn.ReLU(),
#             nn.Linear(hidden_channels, m_dim)
#         )

#         self.agg_mlp = nn.Sequential(
#             nn.Linear(m_dim + n_f, hidden_channels),
#             nn.ReLU(),
#             nn.Linear(hidden_channels, hidden_channels),
#             nn.ReLU(),
#             nn.Linear(hidden_channels, out_channels)
#         )
        
#         self.single_node = single_node
#         self.message_storage = {}
#         self.store_messages = False  

#     def forward(self, x, edge_index, final_epoch=False):
#         """
#         Forward calls propagate to initiate message passing for all nodes in edge_index
#         """
#         self.store_messages = final_epoch
#         return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)  # Triggers message passing
    
#     def message(self, x_i, x_j, edge_index):
#         """
#         Applying MLP to every directed edge in edge_index for [x_i, x_j].
#         Also stores positions, times, and velocities for final epoch.
#         """
#         edge_features = torch.cat([x_i, x_j], dim=1)  # Concatenate node features
#         messages = self.mess_mlp(edge_features)  # Compute messages

#         if self.store_messages:  # Store messages in final epoch
#             edges = list(zip(edge_index[0].tolist(), edge_index[1].tolist()))

#             for (src, dest), msg, x_src, x_dest in zip(edges, messages, x_i, x_j):
#                 # Extract physical attributes from feature vector x_i, x_j
#                 position_src, position_dest = x_src[:3].cpu().numpy(), x_dest[:3].cpu().numpy()
#                 velocity_src, velocity_dest = x_src[3:6].cpu().numpy(), x_dest[3:6].cpu().numpy()
#                 time_src, time_dest = x_src[6].item(), x_dest[6].item()  # Assuming time is 7th feature

#                 # Store structured message data
#                 self.message_storage[(src, dest)] = {
#                     "message": msg.detach().cpu().numpy(),
#                     "position_src": position_src,
#                     "position_dest": position_dest,
#                     "velocity_src": velocity_src,
#                     "velocity_dest": velocity_dest,
#                     "time_src": time_src,
#                     "time_dest": time_dest
#                 }

#         return messages  # Pass messages to aggregation

#     def update(self, aggr_out, x=None):
#         """
#         Updates node features with passed messages.
#         """
#         if self.single_node:
#             return aggr_out
        
#         else:
#             update_features = torch.cat([x, aggr_out], dim=1)
#             return self.agg_mlp(update_features)

In [8]:
# ### --- This works --- ###

# import torch
# import torch.nn as nn
# import torch.optim as optim
# import numpy as np
# from torch_geometric.nn import MessagePassing
# from torch_geometric.data import Data
# from torch_geometric.loader import DataLoader

# # define the message passing class to initiate MLP's among the nodes
# class GNN_MLP(MessagePassing):
#     def __init__(self, n_f, m_dim, hidden_channels, out_channels, single_node = False):
#         super(GNN_MLP, self).__init__(aggr='add')  # "Add" aggregation for summing over forces
        
#         # initialising the MLP by creating the self.MLP attribute. 2 * in_channels to account for the fact that it may use both it's own and the other nodes features.
#         self.mess_mlp = nn.Sequential(
#             nn.Linear(2 * n_f, hidden_channels),
#             nn.ReLU(),
#             nn.Linear(hidden_channels, hidden_channels),
#             nn.ReLU(),
#             nn.Linear(hidden_channels, m_dim)
#         )

#         self.agg_mlp = nn.Sequential(
#             nn.Linear(m_dim + n_f, hidden_channels),
#             nn.ReLU(),
#             nn.Linear(hidden_channels, hidden_channels),
#             nn.ReLU(),
#             nn.Linear(hidden_channels, out_channels)
#         )
        
#         self.single_node = single_node
#         self.message_storage = {}
#         self.store_messages = False  

#     def forward(self, x, edge_index, final_epoch=False):
#         """
#         Forward calls propagate to initiate message passing for all nodes in edge_index
#         """
#         self.store_messages = final_epoch
#         return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)  # Triggers message passing
    
#     def message(self, x_i, x_j, edge_index):
#         """
#         Applying mlp to every directed edge in edge_index for [x_i, x_j]
#         """
#         edge_features = torch.cat([x_i, x_j], dim=1)  # Concatenating node features for edge
#         messages = self.mess_mlp(edge_features)

#         if self.store_messages: # Store messages for final epoch
#             edges = list(zip(edge_index[0].tolist(), edge_index[1].tolist()))
#             for e, msg in zip(edges, messages):
#                 self.message_storage[e] = msg.detach().cpu().numpy() # detach from tree, found a new favourite
#         return messages  # Pass through MLP

#     def update(self, aggr_out, x=None):
#         """
#         Updates node features with passed messages.
#         """
#         if self.single_node:
#             return aggr_out
        
#         else:
#             update_features = torch.cat([x, aggr_out], dim=1)
#             return self.agg_mlp(update_features)

In [None]:
class GNN_MLP(MessagePassing):
    def __init__(self, n_f, m_dim, hidden_channels, out_channels, single_node=False):
        super(GNN_MLP, self).__init__(aggr='add')
        
        self.mess_mlp = nn.Sequential(
            nn.Linear(2 * n_f, hidden_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels, hidden_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels, m_dim)
        )

        self.agg_mlp = nn.Sequential(
            nn.Linear(m_dim + n_f, hidden_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels, hidden_channels),
            nn.ReLU(),
            nn.Linear(hidden_channels, out_channels)
        )
        
        self.single_node = single_node
        self.message_storage = []
        self.store_messages = False
        self.current_time = None
        self.current_mass = None

    def forward(self, x, edge_index, pos=None, final_epoch=False):
        self.store_messages = final_epoch
        # Extract the time from the last column of x (assumes time is broadcasted to all nodes)
        if final_epoch:
            # self.current_time = round(x[0, -1].item(), 4) # Just take it from the first node
            self.current_time = x[0, -1].item()

        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x, pos=pos)

    def message(self, x_i, x_j, edge_index_i, edge_index_j):
        edge_features = torch.cat([x_i, x_j], dim=1)
        messages = self.mess_mlp(edge_features)

        if self.store_messages:
            for i in range(messages.size(0)):
                difference = x_i[i, :2] - x_j[i, :2]
                distance = torch.norm(difference, p=2)

                record = {
                    'edge': (edge_index_i[i].item(), edge_index_j[i].item()),
                    'message': messages[i].detach().cpu().numpy(),
                    'pos_i': x_i[i, :2].detach().cpu().numpy(),  # Extract position (assuming 2D)
                    'pos_j': x_j[i, :2].detach().cpu().numpy(),
                    'mass_i': x_i[0, -2].item(),  # Extract mass
                    'mass_j': x_j[0, -2].item(),
                    'distance': distance.item(),
                    'time': self.current_time,
                    
                }
                self.message_storage.append(record)

        return messages

    def update(self, aggr_out, x=None):
        if self.single_node:
            return aggr_out
        else:
            update_features = torch.cat([x, aggr_out], dim=1)
            return self.agg_mlp(update_features)


In [10]:
import numpy as np
import torch
import torch.optim as optim
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
import pickle
import os
current_dir = os.getcwd()
# Move one level up (assuming your notebook is inside "Test" and "final_messages" is in the parent)
parent_dir = os.path.dirname(current_dir)

# Define the correct path to the `final_messages` folder

def train_model(model, train_data, sim_name:str, epochs=100, lr=0.01):
    """
    Train a GNN model using the provided training and validation loaders.
    
    Parameters:
    -----------
    model : torch.nn.Module
        A PyTorch module representing the GNN model.
    train_loader : torch_geometric.loader.DataLoader
        A DataLoader object containing training data.
    val_loader : torch_geometric.loader.DataLoader
        A DataLoader object containing validation data.
    epochs : int
        Number of epochs to train the model.
    lr : float
        Learning rate for the optimizer.
    
    Returns:
    --------
    model : torch.nn.Module
        The trained model.
    """
    
    # Only convert to DataLoader if not already in DataLoader format
    if isinstance(train_data, DataLoader):
        train_data = DataLoader(train_data, batch_size=1, shuffle=False)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    
    for epoch in range(epochs):
        total_loss = 0
        final_epoch = (epoch == epochs - 1)
        for data in train_data:
            optimizer.zero_grad()
            out = model(data.x, data.edge_index, final_epoch=final_epoch)
            loss = criterion(out, data.y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
    save_messages(sim_name, model)
    
    return model


In [11]:
model = GNN_MLP(n_f=6, m_dim=2, out_channels=2, hidden_channels=128, single_node=True)
train_model(model, traj_graph, lr=0.01, epochs=100, sim_name="projectile_trajectory_2d")

GNN_MLP()

In [12]:
# Output results
print("\nSimulation Results:")
for idx, data in enumerate(traj_graph):
    with torch.no_grad():
        pred_force = model(data.x, data.edge_index)  # Model predicts net force
        
        # Ground truth force
        ground_truth_force = data.y
        
        # Ensure correct shapes and conversion to NumPy for printing
        pred_force_np = pred_force.cpu().numpy().flatten()  # Convert to NumPy array and flatten
        ground_truth_np = ground_truth_force.cpu().numpy().flatten()  # Convert to NumPy and flatten

        # Print each component of the force vector
        print(f"Sample {idx + 1}:")
        for i in range(len(pred_force_np)):
            print(f"   Component {i}: Ground Truth = {ground_truth_np[i]:.6f}, Predicted = {pred_force_np[i]:.6f}")



Simulation Results:
Sample 1:
   Component 0: Ground Truth = 0.000000, Predicted = -0.000184
   Component 1: Ground Truth = -9.810000, Predicted = -9.766108
Sample 2:
   Component 0: Ground Truth = 0.000000, Predicted = -0.002297
   Component 1: Ground Truth = -9.810000, Predicted = -9.900576
Sample 3:
   Component 0: Ground Truth = 0.000000, Predicted = -0.004300
   Component 1: Ground Truth = -9.810000, Predicted = -10.027980
Sample 4:
   Component 0: Ground Truth = 0.000000, Predicted = -0.006191
   Component 1: Ground Truth = -9.810000, Predicted = -10.148320
Sample 5:
   Component 0: Ground Truth = 0.000000, Predicted = -0.007971
   Component 1: Ground Truth = -9.810000, Predicted = -10.261597
Sample 6:
   Component 0: Ground Truth = 0.000000, Predicted = -0.009239
   Component 1: Ground Truth = -9.810000, Predicted = -10.342231
Sample 7:
   Component 0: Ground Truth = 0.000000, Predicted = -0.008087
   Component 1: Ground Truth = -9.810000, Predicted = -10.268961
Sample 8:
   Co

In [13]:
# Print stored messages
print("\nStored Messages:")
for i, record in enumerate(model.message_storage):
    print(f"Message {i + 1}")
    print(f"  Edge        : {record['edge']}")
    print(f"  Message     : {record['message']}")
    print(f"  pos_i       : {record['pos_i']}")
    print(f"  pos_j       : {record['pos_j']}")
    print(f"  mass_i      : {record['mass_i']}")
    print(f"  mass_j      : {record['mass_j']}")
    print(f"  Time        : {record['time']}")
    print(f"  Distance    : {record['distance']}")
    print("-" * 40)



Stored Messages:
Message 1
  Edge        : (0, 0)
  Message     : [-4.4512898e-03 -1.0190016e+01]
  pos_i       : [0. 0.]
  pos_j       : [0. 0.]
  mass_i      : 3.0
  mass_j      : 3.0
  Time        : 0.0
  Distance    : 0.0
----------------------------------------
Message 2
  Edge        : (0, 0)
  Message     : [ 8.7300539e-03 -1.0065115e+01]
  pos_i       : [13.7888   11.373977]
  pos_j       : [13.7888   11.373977]
  mass_i      : 3.0
  mass_j      : 3.0
  Time        : 0.20000000298023224
  Distance    : 0.0
----------------------------------------
Message 3
  Edge        : (0, 0)
  Message     : [ 3.818348e-03 -9.851719e+00]
  pos_i       : [27.5776   22.355555]
  pos_j       : [27.5776   22.355555]
  mass_i      : 3.0
  mass_j      : 3.0
  Time        : 0.4000000059604645
  Distance    : 0.0
----------------------------------------
Message 4
  Edge        : (0, 0)
  Message     : [-8.005038e-03 -9.661491e+00]
  pos_i       : [41.3664   32.944733]
  pos_j       : [41.3664   32.

In [28]:
df = pd.DataFrame(model.message_storage)

AttributeError: 'PySRRegressor' object has no attribute 'message_storage'

In [29]:
df[['pos_i_x', 'pos_i_y']] = pd.DataFrame(df['pos_i'].tolist(), index=df.index)
df[['pos_j_x', 'pos_j_y']] = pd.DataFrame(df['pos_j'].tolist(), index=df.index)
df[['message_x', 'message_y']] = pd.DataFrame(df['message'].tolist(), index=df.index)

# Drop original array columns
df = df.drop(columns=['pos_i', 'pos_j', 'message', 'edge'])  # optional


KeyError: 'pos_i'

In [16]:
df.to_csv(f"{save_dir}/messages_cleaned.csv", index=False)


In [17]:
import pandas as pd
from pysr import PySRRegressor




Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [32]:
# Load your cleaned DataFrame
dff = pd.read_csv(f"{save_dir}/messages_cleaned.csv")

# Define input features (X) and target (y)
# features = ['pos_i_x', 'pos_i_y', 'pos_j_x', 'pos_j_y', 'mass_i', 'mass_j', 'time', 'distance']
features = [['mass_i']]

dff['force_x'] = dff['message_x'] * dff['mass_i']
dff['force_y'] = dff['message_y'] * dff['mass_i']

X = dff[features]
y_x = dff['message_x']
y_y = dff['message_y']


KeyError: "None of [Index([('mass_i',)], dtype='object')] are in the [columns]"

In [33]:

# Create and fit SR model for message_x
model_x = PySRRegressor(
    niterations=100,
    binary_operators=["+", "-", "*", "/"],
    unary_operators=["sin", "cos", "exp", "log"],
    model_selection="best",  # Select best tradeoff between complexity and error
    select_k_features=1,  # small number of features
    verbosity=1,
)

model_x.fit(X.values, y_x.values)

# Print best expression for message_x
print("Best expression for message_x:")
print(model_x)

# Optionally: model for message_y too
model_y = PySRRegressor(
    niterations=100,
    binary_operators=["+", "-", "*", "/"],
    unary_operators=["sin", "cos", "exp", "log"],
    model_selection="best",
    select_k_features=1,  # small number of features
    verbosity=1,
)

model_y.fit(X.values, y_y.values)

print("Best expression for message_y:")
print(model_y)


[ Info: Started!


Using features ['x0']
Best expression for message_x:
PySRRegressor.equations_ = [
	   pick      score          equation      loss  complexity
	0         0.000000        -0.8583437  0.736827           1
	1  >>>>  11.183283  sin(3.660396e-5)  0.000010           2
]
Using features ['x0']


[ Info: Started!



Expressions evaluated per second: 6.580e+05
Head worker occupation: 17.0%
Progress: 1439 / 1500 total iterations (95.933%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           8.330e+01  1.594e+01  y = -0.68413
2           7.781e+01  6.817e-02  y = cos(x0)
3           1.025e-02  8.934e+00  y = x0 + -12.81
---------------------------------------------------------------------------------------------------
Press 'q' and then <enter> to stop execution early.
Best expression for message_y:
PySRRegressor.equations_ = [
	   pick     score         equation       loss  complexity
	0        0.000000      -0.68412554  83.295784           1
	1        0.068172          cos(x0)  77.806610           2
	2  >>>>  8.934647  x0 + -12.810217   0.010251           3
]


In [20]:

df['force_x']
df['force_y']

0    -30.570048
1    -30.195345
2    -29.555157
3    -28.984473
4    -28.701384
5    -28.772622
6    -28.768518
7    -28.948053
8    -29.290395
9    -29.647086
10   -29.857449
11   -29.822664
12   -29.564796
13   -29.223009
14   -29.030241
15   -29.100078
16   -29.328705
17   -29.586930
18   -29.717112
19   -29.727942
20   -29.768742
21   -29.616603
22   -29.369997
23   -29.181963
24   -29.164884
25   -29.322546
26   -29.545224
27   -29.677014
28   -29.626197
29   -29.438487
30   -29.262372
31   -29.232075
32   -29.364045
33   -29.543691
34   -29.617035
35   -29.524779
36   -29.357877
37   -29.272140
38   -29.342649
39   -29.494101
40   -29.572785
41   -29.502042
42   -29.363685
43   -29.308278
44   -29.391051
45   -29.512377
46   -29.530662
47   -29.429571
48   -29.337600
49   -29.367534
50   -29.475003
51   -29.515791
52   -29.438331
53   -29.355258
54   -29.383656
55   -29.476716
56   -29.492469
57   -29.410461
Name: force_y, dtype: float64

In [25]:
import pandas as pd
import numpy as np

# Simulate data
np.random.seed(42)
n_samples = 100

# Random masses between 1 and 10 kg
mass_i = np.random.uniform(1, 10, size=n_samples)

# Constant acceleration (e.g. gravity)
acceleration = -9.8  # m/s^2

# Compute force using Newton's second law: F = m * a
force_y = mass_i * acceleration

# Optionally, add a small bit of noise
force_y_noisy = force_y + np.random.normal(0, 0.05, size=n_samples)

# Simulate dummy features for testing
pos_i_x = np.random.randn(n_samples)
pos_i_y = np.random.randn(n_samples)
pos_j_x = np.random.randn(n_samples)
pos_j_y = np.random.randn(n_samples)
mass_j = np.random.uniform(1, 10, size=n_samples)
time = np.linspace(0, 10, n_samples)
distance = np.sqrt((pos_j_x - pos_i_x)**2 + (pos_j_y - pos_i_y)**2)

# Construct the DataFrame
df = pd.DataFrame({
    'pos_i_x': pos_i_x,
    'pos_i_y': pos_i_y,
    'pos_j_x': pos_j_x,
    'pos_j_y': pos_j_y,
    'mass_i': mass_i,
    'mass_j': mass_j,
    'time': time,
    'distance': distance,
    'message_y': force_y_noisy
})

# Peek at the DataFrame
print(df.head())


    pos_i_x   pos_i_y   pos_j_x   pos_j_y    mass_i    mass_j     time  \
0  0.013002  0.199060  0.872321  0.576557  4.370861  9.158249  0.00000   
1  1.453534 -0.600217  0.183342  0.311250  9.556429  6.618142  0.10101   
2 -0.264657  0.069802  2.189803  3.078881  7.587945  2.052082  0.20202   
3  2.720169 -0.385314 -0.808298  1.119575  6.387926  9.458489  0.30303   
4  0.625667  0.113517 -0.839722 -0.127918  2.404168  6.649372  0.40404   

   distance  message_y  
0  0.938580 -42.830086  
1  1.563381 -93.667952  
2  3.883159 -74.357278  
3  3.835984 -62.701057  
4  1.485145 -23.571828  


In [26]:
X = df[['mass_i']]
y = df['message_y']

model = PySRRegressor(
    niterations=100,
    binary_operators=["+", "-", "*", "/"],
    unary_operators=[],
    model_selection="best",
    loss="loss(x, y) = (x - y)^2",
    verbosity=1,
    maxsize=10,
    select_k_features=1,  # small number of features
)

model.fit(X, y)

print(model)




Using features ['mass_i']


[ Info: Started!


PySRRegressor.equations_ = [
	   pick     score                                           equation  \
	0        0.000000                                         -51.269714   
	1  >>>>  6.356709                                 mass_i * -9.800534   
	2        0.008969                 (-9.80255 * mass_i) - -0.013288184   
	3        0.008694      (-9.8013735 * mass_i) + (0.02889725 / mass_i)   
	4        0.009862  (((0.0014348597 * mass_i) - 9.817994) * mass_i...   
	
	         loss  complexity  
	0  681.935000           1  
	1    0.002053           3  
	2    0.002016           5  
	3    0.001982           7  
	4    0.001943           9  
]


In [38]:
trajectory_list = []
for i in range (10):
    trajectory_list.append(projectile_trajectory_2d(1 + i, dt = 0.05, theta = 40 + 1 * i, v0 = 65 + 5 * i))

171
188
205
223
241
260
279
299
319
339


In [40]:
graphs_list = []
for i in range(len(trajectory_list)):
    graphs_list.append(node_data_list(trajectory_list[i], self_loop=True, complete_graph=False))


  y_target = torch.tensor(acceleration, dtype=torch.float32)


In [None]:
model = GNN_MLP(n_f=6, m_dim=2, out_channels=2, hidden_channels=128, single_node=True)

messages_list = []

for i in range(len(graphs_list)):
    train_model(model, graphs_list[i], lr=0.01, epochs=100, sim_name="projectile_trajectory_2d")
    for j, record in enumerate(model.message_storage):
            messages_list.append(record.copy())
