In [26]:
import sys
import os

# Get directory
current_dir = os.getcwd()

# Create project_root for module imports
project_root = os.path.abspath(os.path.join(current_dir, ".."))
sys.path.append(project_root)

# Create data directory path
parent_dir = os.path.dirname(current_dir)
save_dir = os.path.join(parent_dir, "final_messages", "n_body_gravity")
os.makedirs(save_dir, exist_ok=True)

# Import relevant modules
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch_geometric.loader import DataLoader
import torch
import random
from gnn_model.graph_structure_from_trajecotry import node_data_list
from gnn_model.message_passing_MLP import GNN_MLP
from gnn_model.train_model import train_model

In [38]:
def generate_random_positions(N, dim, min_dist, box_size):
    positions = []
    while len(positions) < N:
        pos = torch.rand(dim) * box_size
        if all(torch.norm(pos - p) >= min_dist for p in positions):
            positions.append(pos)
    return torch.stack(positions)

def generate_random_velocities(N, dim, velocity_scale=1.0):
    return (torch.rand((N, dim)) - 0.5) * 2 * velocity_scale

def compute_gravitational_forces(positions, masses, G=1.0, eps=1e-5):
    N, dim = positions.shape
    forces = torch.zeros_like(positions)
    for i in range(N):
        for j in range(i + 1, N):
            r_vec = positions[j] - positions[i]
            dist = torch.norm(r_vec) + eps
            force_mag = G * masses[i] * masses[j] / dist**2
            force_dir = r_vec / dist
            force = force_mag * force_dir
            forces[i] += force
            forces[j] -= force
    return forces

def generate_unique_masses(N, mass_range, resolution=25):

    # Create a fine grid in the range
    mass_grid = torch.linspace(mass_range[0], mass_range[1], resolution).tolist()
    # Randomly choose N unique masses
    unique_masses = random.sample(mass_grid, N)

    return torch.tensor(unique_masses, dtype=torch.float32)

def n_body_simulation(N=5, T=100, dt=0.01, dim=2,
                      mass_range=(1.0, 30.0), min_dist=0.5,
                      box_size=10.0, velocity_scale=1.0):
    # Initialize
    masses = generate_unique_masses(N, mass_range)
    positions = generate_random_positions(N, dim, min_dist, box_size)
    velocities = generate_random_velocities(N, dim, velocity_scale)

    # Store results
    trajectory = torch.zeros((T, N, dim), dtype=torch.float32)
    trajectory_velocities = torch.zeros((T, N, dim), dtype=torch.float32)
    t_array  = torch.arange(0, T * dt, dt, dtype=torch.float32)

    for t in range(T):
        trajectory[t] = positions
        trajectory_velocities[t] = velocities

        # Compute forces and update positions & velocities (Euler method)
        forces = compute_gravitational_forces(positions, masses)
        accelerations = forces / masses[:, None]
        velocities = velocities + accelerations * dt
        positions = positions + velocities * dt

    trajectory_data = {
        "time": t_array,
        "positions": trajectory,
        "velocities": trajectory_velocities,
        "masses": masses
    }

    return trajectory_data



In [39]:

from sklearn.model_selection import train_test_split
import pandas as pd

def run_pipeline(iterations=10, train_fraction=0.7,
                 N=5, T=100, dt=0.01, dim=2, hidden_channels=64,
                 m_dim=2, out_channels=2, epochs=100, lr=0.001):

    # 1) Run simulations
    all_trajectories = [n_body_simulation(N=N, T=T, dt=dt, dim=dim) for _ in range(iterations)]

    # 2) Convert to PyG data objects
    all_graph_data = []
    dataset_index = []
    for idx, traj in enumerate(all_trajectories):
        graphs = node_data_list(traj, self_loop=False, complete_graph=True)
        all_graph_data.extend(graphs)
        dataset_index.extend([idx] * len(graphs))

    # 3) Split into train/test
    indices = list(range(len(all_graph_data)))
    train_idx, test_idx = train_test_split(indices, train_size=train_fraction, stratify=dataset_index)

    train_data = [all_graph_data[i] for i in train_idx]
    test_data = [all_graph_data[i] for i in test_idx]

    # 4) Initialize the model
    input_dim = train_data[0].x.shape[1]
    model = GNN_MLP(n_f=input_dim, m_dim=m_dim, hidden_channels=hidden_channels,
                    out_channels=out_channels, single_node=False)

    # 5) Train model (save messages from final epoch)
    model = train_model(model, train_data, epochs=epochs, lr=lr)

    # 6) Extract training messages
    train_messages = pd.DataFrame(model.message_storage)
    train_messages[['pos_i_x', 'pos_i_y']] = pd.DataFrame(train_messages['pos_i'].tolist())
    train_messages[['pos_j_x', 'pos_j_y']] = pd.DataFrame(train_messages['pos_j'].tolist())
    train_messages[['message_x', 'message_y']] = pd.DataFrame(train_messages['message'].tolist())
    train_messages = train_messages.drop(columns=['pos_i', 'pos_j', 'message', 'edge'])

    # 7) Test on held-out data and store messages
    model.message_storage = []  # Reset
    for data in test_data:
        _ = model(data.x, data.edge_index, save_messages=True)

    test_messages = pd.DataFrame(model.message_storage)
    test_messages[['pos_i_x', 'pos_i_y']] = pd.DataFrame(test_messages['pos_i'].tolist())
    test_messages[['pos_j_x', 'pos_j_y']] = pd.DataFrame(test_messages['pos_j'].tolist())
    test_messages[['message_x', 'message_y']] = pd.DataFrame(test_messages['message'].tolist())
    test_messages = test_messages.drop(columns=['pos_i', 'pos_j', 'message', 'edge'])

    return model, train_messages, test_messages


In [40]:
from sklearn.model_selection import train_test_split
import pandas as pd

def run_pipeline_multi(iterations=10, train_fraction=0.7,
                 N_train=2, N_test_list=[3, 4, 5, 6], T=100, dt=0.01, dim=2, hidden_channels=64,
                 m_dim=2, out_channels=2, epochs=100, lr=0.001):

    # 1) Run training simulations with N_train
    train_trajectories = [n_body_simulation(N=N_train, T=T, dt=dt, dim=dim) for _ in range(int((train_fraction) *iterations))]

    # 2) Convert training data to PyG objects
    # all_train_graph_data = []
    # train_dataset_index = []
    # for idx, traj in enumerate(train_trajectories):
    #     graphs = node_data_list(traj, self_loop=False, complete_graph=True)
    #     all_train_graph_data.extend(graphs)
    #     train_dataset_index.extend([idx] * len(graphs))

    # # 3) Split training data
    # train_indices, _ = train_test_split(
    #     list(range(len(all_train_graph_data))),
    #     train_size=train_fraction,
    #     stratify=train_dataset_index
    # )
    
    
    train_graph_data = []
    for traj in train_trajectories:
        graphs = node_data_list(traj, self_loop=False, complete_graph=True)
        train_graph_data.extend(graphs)
    
    # train_data = [all_train_graph_data[i] for i in train_indices]
    train_data = [train_graph_data[i] for i in range(len(train_graph_data))]

    # 4) Initialize model
    input_dim = train_graph_data[0].x.shape[1]
    model = GNN_MLP(n_f=input_dim, m_dim=m_dim, hidden_channels=hidden_channels,
                    out_channels=out_channels, single_node=False)

    # 5) Train model
    model = train_model(model, train_data, epochs=epochs, lr=lr)

    # 6) Extract training messages
    train_messages = pd.DataFrame(model.message_storage)
    train_messages[['pos_i_x', 'pos_i_y']] = pd.DataFrame(train_messages['pos_i'].tolist())
    train_messages[['pos_j_x', 'pos_j_y']] = pd.DataFrame(train_messages['pos_j'].tolist())
    train_messages[['message_x', 'message_y']] = pd.DataFrame(train_messages['message'].tolist())
    train_messages = train_messages.drop(columns=['pos_i', 'pos_j', 'message', 'edge'])

    # 7) Run and store test messages for each N in N_test_list
    test_messages_all = {}
    for N_test in N_test_list:
        test_trajectories = [n_body_simulation(N=N_test, T=T, dt=dt, dim=dim) for _ in range(int((1-train_fraction)*iterations))]
        test_graph_data = []
        for traj in test_trajectories:
            graphs = node_data_list(traj, self_loop=False, complete_graph=True)
            test_graph_data.extend(graphs)

        model.message_storage = []
        for data in test_graph_data:
            _ = model(data.x, data.edge_index, save_messages=True)

        test_messages = pd.DataFrame(model.message_storage)
        test_messages[['pos_i_x', 'pos_i_y']] = pd.DataFrame(test_messages['pos_i'].tolist())
        test_messages[['pos_j_x', 'pos_j_y']] = pd.DataFrame(test_messages['pos_j'].tolist())
        test_messages[['message_x', 'message_y']] = pd.DataFrame(test_messages['message'].tolist())
        test_messages = test_messages.drop(columns=['pos_i', 'pos_j', 'message', 'edge'])

        test_messages_all[N_test] = test_messages

    return model, train_messages, test_messages_all


In [41]:
model_1, train_messages_1, test_messages_1 = run_pipeline_multi(iterations=10, train_fraction=0.7,
                 N_train=2, N_test_list=[3, 4, 5, 6, 7], T=100, dt=0.01, dim=2, hidden_channels=128,
                 m_dim=2, out_channels=2, epochs=100, lr=0.001) 

  y_target = torch.tensor(acceleration, dtype=torch.float32)
  y_target = torch.tensor(acceleration, dtype=torch.float32)
  y_target = torch.tensor(acceleration, dtype=torch.float32)
  y_target = torch.tensor(acceleration, dtype=torch.float32)
  y_target = torch.tensor(acceleration, dtype=torch.float32)
  y_target = torch.tensor(acceleration, dtype=torch.float32)


In [42]:
train_messages_1.to_csv(f"{save_dir}/messages_cleaned.csv", index=False)

for i in range(3,8):
    test_messages_1[i].to_csv(f"{save_dir}/N_{i}_messages_test_cleaned.csv", index=False)