# DATASETS

In [1]:
def unsorted_segment_sum(data, segment_ids, num_segments):
    """Custom PyTorch op to replicate TensorFlow's `unsorted_segment_sum`."""
    result_shape = (num_segments, data.size(1))
    result = data.new_full(result_shape, 0)  # Init empty result tensor.
    segment_ids = segment_ids.unsqueeze(-1).expand(-1, data.size(1))
    result.scatter_add_(0, segment_ids, data)
    return result

In [55]:
import torch
# from n_body_system.dataset_nbody import NBodyDataset
import dataset as dataset_nbody
from dataset import NBodyDataset
# from n_body_system.model import EGNN_vel
from EGNN import EGNN_vel
# from functions_utils import unsorted_segment_mean
import os
from torch import nn, optim
import json

In [21]:
model_name = 'egnn_task2'
batch_size = 100
epochs = 1000
no_cuda = False
seed = 1
log_interval = 1
test_interval = 5
outf = './logs'
lr = 5e-4
nf = 64
attention = 0
n_layers = 4
degree = 2
max_training_samples = 3000
dataset = "nbody_small"
sweep_training = 0
time_exp = 0
weight_decay = 1e-12
div = 1
norm_diff = False
tanh = False

time_exp_dic = {'time': 0, 'counter': 0}

cuda = not no_cuda and torch.cuda.is_available()

In [7]:
# Create log directory
try:
    os.makedirs(outf)
except OSError:
    pass

try:
    os.makedirs(outf + "/" + model_name)
except OSError:
    pass

In [8]:
device = torch.device("cuda" if cuda else "cpu")
loss_mse = nn.MSELoss()

In [9]:
def get_velocity_attr(loc, vel, rows, cols):

    diff = loc[cols] - loc[rows]
    norm = torch.norm(diff, p=2, dim=1).unsqueeze(1)
    u = diff/norm
    va, vb = vel[rows] * u, vel[cols] * u
    va, vb = torch.sum(va, dim=1).unsqueeze(1), torch.sum(vb, dim=1).unsqueeze(1)
    return va

In [10]:
# # Datasets and loaders
# dataset_train = NBodyDataset(partition='train', dataset_name=dataset,
#                                  max_samples=max_training_samples)
# loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, drop_last=True)

# dataset_val = NBodyDataset(partition='val', dataset_name="nbody_small")
# loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=batch_size, shuffle=False, drop_last=False)

# dataset_test = NBodyDataset(partition='test', dataset_name="nbody_small")
# loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle=False, drop_last=False)

In [88]:
import glob
from torch_geometric.loader import DataLoader

paths = glob.glob("data/task1_2/train/*.npz")

dataset_train = NBodyDataset(paths)
dataset_size = len(dataset_train)
train_size = int(0.8 * dataset_size)
val_size = int(0.2 * dataset_size)

train_data, val_data = torch.utils.data.random_split(dataset_train, [train_size, val_size])

loader_train = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True)
loader_val = DataLoader(val_data, batch_size=batch_size, shuffle=False)


paths_test = glob.glob("data/task1_2/test/*.npz")
test_data = NBodyDataset(paths_test)
loader_test = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [31]:
dataset_train[0]

Data(x=[4, 1], edge_index=[2, 12], edge_attr=[12, 2], loc=[4, 2], vel=[4, 2], next_loc=[4, 2], next_vel=[4, 2])

In [13]:
print(type(loader_train))
next(iter(loader_train))


<class 'torch_geometric.loader.dataloader.DataLoader'>


DataBatch(x=[339, 1], edge_index=[2, 834], edge_attr=[834, 2], loc=[339, 2], vel=[339, 2], next_loc=[339, 2], next_vel=[339, 2], batch=[339], ptr=[101])

In [17]:
# # Location at time 0, velocity at time 0, edge_attr, charges, location at time T
# dataset_train[0][0].shape, dataset_train[0][1].shape, dataset_train[0][2].shape, dataset_train[0][3].shape, dataset_train[0][4].shape

In [18]:
# print(dataset_train.edges)

In [19]:
# print(dataset_train.get_edges(1, 3))

# MODEL

In [34]:
from torch_geometric.nn import summary

model = EGNN_vel(in_node_nf=1, in_edge_nf=2, hidden_nf=nf, device=device, n_layers=n_layers, recurrent=True, norm_diff=norm_diff, tanh=tanh).to(device)

dummy_data = dataset[0]
x = dummy_data.x.to(device)
loc = dummy_data.loc.to(device)
edge_index = dummy_data.edge_index.to(device)
vel = dummy_data.vel.to(device)
edge_attr = dummy_data.edge_attr.to(device)
print(summary(model, x, loc, edge_index, vel, edge_attr))


+----------------------------------+------------------------------------------+--------------------------+----------+
| Layer                            | Input Shape                              | Output Shape             | #Param   |
|----------------------------------+------------------------------------------+--------------------------+----------|
| EGNN_vel                         | [4, 1], [4, 2], [2, 12], [4, 2], [12, 2] | [4, 2], [4, 2]           | 134,150  |
| ├─(embedding)Linear              | [4, 1]                                   | [4, 64]                  | 128      |
| ├─(gcl_0)E_GCL_vel               | [4, 64], [2, 12], [4, 2], [4, 2]         | [4, 64], [4, 2], [12, 2] | 33,473   |
| │    └─(edge_mlp)Sequential      | [12, 131]                                | [12, 64]                 | 12,608   |
| │    │    └─(0)Linear            | [12, 131]                                | [12, 64]                 | 8,448    |
| │    │    └─(1)SiLU              | [12, 64]           

# TRAINING

In [35]:
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

In [36]:
def training_procedure(model, optimizer, loader_train, loader_val, loader_test, epochs, test_interval, model_name, outf):
    results = {'epochs': [], 'losess': []}
    best_val_loss = 1e8
    best_test_loss = 1e8
    best_epoch = 0
    for epoch in range(0, epochs):
        train(model, optimizer, epoch, loader_train)
        if epoch % test_interval == 0:
            #train(epoch, loader_train, backprop=False)
            val_loss = train(model, optimizer, epoch, loader_val, backprop=False)
            test_loss = train(model, optimizer, epoch, loader_test, backprop=False)
            results['epochs'].append(epoch)
            results['losess'].append(test_loss)
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_test_loss = test_loss
                best_epoch = epoch
                torch.save(model.state_dict(), outf + "/" + model_name + f"/{model_name}.pth")
            print("*** Best Val Loss: %.5f \t Best Test Loss: %.5f \t Best epoch %d" % (best_val_loss, best_test_loss, best_epoch))

        json_object = json.dumps(results, indent=4)
        with open(outf + "/" + model_name + "/losess.json", "w") as outfile:
            outfile.write(json_object)
    return best_val_loss, best_test_loss, best_epoch
    

In [46]:
def train(model, optimizer, epoch, loader, backprop=True):
    if backprop:
        model.train()
    else:
        model.eval()

    res = {'epoch': epoch, 'loss': 0, 'coord_reg': 0, 'counter': 0}

    # for batch_idx, data in enumerate(loader):
    for data in loader:
        # print(f"Data type: {type(data)}")
        # print(f"Data: {data}")
        # # print("\nBatch idx: ", batch_idx)
        # print("Data: ", len(data))
        # batch_size, n_nodes, _ = data[0].size()
        # print("Batch size: ", batch_size)
        # print("Number of nodes: ", n_nodes)
        
        # data = [d.to(device) for d in data]

        # print("Data: ", data[0].shape)
        # data = [d.view(-1, d.size(2)) for d in data] # Remove the batch dimension
        # print("Data: ", data[0].shape)

        # loc, vel, edge_attr, charges, loc_end = data
        loc = data.loc.to(device)
        vel = data.vel.to(device)
        x = data.x.to(device)
        edges = data.edge_index.to(device)
        edge_attr = data.edge_attr.to(device)
        loc_end = data.next_loc.to(device)
        vel_end = data.next_vel.to(device)

        # loc, vel, x, edges, edge_attr, loc_end = data # TODO posso farlgli passare anche gli edges

        # edges = loader.dataset.get_edges(batch_size, n_nodes) # TODO questa va tolta
        edges = [edges[0].to(device), edges[1].to(device)]

        """
            Batch idx:  0
            Data: ...
            Batch size:  100
            Number of nodes:  5
            Data:  torch.Size([100, 5, 3])
            Data:  torch.Size([500, 3])
            train epoch 18 avg loss: 0.01948
        """

        optimizer.zero_grad()

        # if time_exp:
        #     torch.cuda.synchronize()
        #     t1 = time.time()
        

        # nodes = torch.sqrt(torch.sum(vel ** 2, dim=1)).unsqueeze(1).detach()
        # rows, cols = edges
        # loc_dist = torch.sum((loc[rows] - loc[cols])**2, 1).unsqueeze(1)  # relative distances among locations
        # edge_attr = torch.cat([edge_attr, loc_dist], 1).detach()  # concatenate all edge properties

        # loc_pred = model(nodes, loc.detach(), edges, vel, edge_attr)
        # loc_pred = model(x, loc.detach(), edges, vel, edge_attr)
        loc_pred, vel_pred = model(x, loc.detach(), edges, vel, edge_attr)


        # if time_exp:
        #     torch.cuda.synchronize()
        #     t2 = time.time()
        #     time_exp_dic['time'] += t2 - t1
        #     time_exp_dic['counter'] += 1

            # print("Forward average time: %.6f" % (time_exp_dic['time'] / time_exp_dic['counter']))
        
        # loss = loss_mse(loc_pred, loc_end)
        loss = loss_mse(vel_pred, vel_end)

        if backprop:
            loss.backward()
            optimizer.step()

        res['loss'] += loss.item()*batch_size
        res['counter'] += batch_size

        # if batch_idx % log_interval == 0 and (model == "se3_transformer" or model == "tfn"):
        #     print('===> {} Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(loader.dataset.partition,
        #         epoch, batch_idx * batch_size, len(loader.dataset),
        #         100. * batch_idx / len(loader),
        #         loss.item()))

    if not backprop:
        prefix = "==> "
    else:
        prefix = ""
    print('%s epoch %d avg loss: %.5f' % (prefix, epoch, res['loss'] / res['counter']))
    # print('%s epoch %d avg loss: %.5f' % (prefix+loader.dataset.partition, epoch, res['loss'] / res['counter']))

    return res['loss'] / res['counter']


In [47]:
# training_procedure(model, optimizer, loader_train, loader_val, loader_test, epochs, test_interval, model_name, outf)

# Other Tests

In [48]:
# Smaller batch size
model_name = 'egnn_task2_2'
batch_size = 4
epochs = 300
no_cuda = False
seed = 1
log_interval = 1
test_interval = 5
outf = './logs'
lr = 5e-4
nf = 64
attention = 0
n_layers = 4
degree = 2
max_training_samples = 3000
dataset = "nbody_small"
sweep_training = 0
time_exp = 0
weight_decay = 1e-12
div = 1
norm_diff = False
tanh = False

time_exp_dic = {'time': 0, 'counter': 0}

# Create log directory
try:
    os.makedirs(outf)
except OSError:
    pass

try:
    os.makedirs(outf + "/" + model_name)
except OSError:
    pass
# __________________________________________________________________________________________________ #

model = EGNN_vel(in_node_nf=1, in_edge_nf=2, hidden_nf=nf, device=device, n_layers=n_layers, recurrent=True, norm_diff=norm_diff, tanh=tanh).to(device)

# dummy_data = dataset[0]
# print(summary(model, dummy_data.x, dummy_data.loc, dummy_data.edge_index, dummy_data.vel, dummy_data.edge_attr))

optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
training_procedure(model, optimizer, loader_train, loader_val, loader_test, epochs, test_interval, model_name, outf)

 epoch 0 avg loss: 0.33256
==>  epoch 0 avg loss: 0.31569
==>  epoch 0 avg loss: 0.30969
*** Best Val Loss: 0.31569 	 Best Test Loss: 0.30969 	 Best epoch 0
 epoch 1 avg loss: 0.31748
 epoch 2 avg loss: 0.31677
 epoch 3 avg loss: 0.31584
 epoch 4 avg loss: 0.31450
 epoch 5 avg loss: 0.31357
==>  epoch 5 avg loss: 0.31332
==>  epoch 5 avg loss: 0.30697
*** Best Val Loss: 0.31332 	 Best Test Loss: 0.30697 	 Best epoch 5
 epoch 6 avg loss: 0.31268
 epoch 7 avg loss: 0.31195
 epoch 8 avg loss: 0.31087
 epoch 9 avg loss: 0.30980
 epoch 10 avg loss: 0.30847
==>  epoch 10 avg loss: 0.30799
==>  epoch 10 avg loss: 0.30135
*** Best Val Loss: 0.30799 	 Best Test Loss: 0.30135 	 Best epoch 10
 epoch 11 avg loss: 0.30734
 epoch 12 avg loss: 0.30610
 epoch 13 avg loss: 0.30480
 epoch 14 avg loss: 0.30370
 epoch 15 avg loss: 0.30255
==>  epoch 15 avg loss: 0.30354
==>  epoch 15 avg loss: 0.29833
*** Best Val Loss: 0.30354 	 Best Test Loss: 0.29833 	 Best epoch 15
 epoch 16 avg loss: 0.30006
 epoch 1

KeyboardInterrupt: 

In [None]:
# Smaller model
model_name = 'egnn_task2_3'
batch_size = 16
epochs = 300
no_cuda = False
seed = 1
log_interval = 1
test_interval = 5
outf = './logs'
lr = 5e-4
nf = 32
attention = 0
n_layers = 4
degree = 2
max_training_samples = 3000
dataset = "nbody_small"
sweep_training = 0
time_exp = 0
weight_decay = 1e-12
div = 1
norm_diff = False
tanh = False

time_exp_dic = {'time': 0, 'counter': 0}

# Create log directory
try:
    os.makedirs(outf)
except OSError:
    pass

try:
    os.makedirs(outf + "/" + model_name)
except OSError:
    pass

cuda = not no_cuda and torch.cuda.is_available()

# __________________________________________________________________________________________________ #

model = EGNN_vel(in_node_nf=1, in_edge_nf=2, hidden_nf=nf, device=device, n_layers=n_layers, recurrent=True, norm_diff=norm_diff, tanh=tanh).to(device)

# dummy_data = dataset[0]
# print(summary(model, dummy_data.x, dummy_data.loc, dummy_data.edge_index, dummy_data.vel, dummy_data.edge_attr))

optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
training_procedure(model, optimizer, loader_train, loader_val, loader_test, epochs, test_interval, model_name, outf)

In [None]:
# Lowe lr
model_name = 'egnn_task2_4'
batch_size = 16
epochs = 300
no_cuda = False
seed = 1
log_interval = 1
test_interval = 5
outf = './logs'
lr = 1e-4
nf = 64
attention = 0
n_layers = 4
degree = 2
max_training_samples = 3000
dataset = "nbody_small"
sweep_training = 0
time_exp = 0
weight_decay = 1e-12
div = 1
norm_diff = False
tanh = False

time_exp_dic = {'time': 0, 'counter': 0}

cuda = not no_cuda and torch.cuda.is_available()

# Create log directory
try:
    os.makedirs(outf)
except OSError:
    pass

try:
    os.makedirs(outf + "/" + model_name)
except OSError:
    pass

# __________________________________________________________________________________________________ #

model = EGNN_vel(in_node_nf=1, in_edge_nf=2, hidden_nf=nf, device=device, n_layers=n_layers, recurrent=True, norm_diff=norm_diff, tanh=tanh).to(device)

dummy_data = dataset_train[0]
print(summary(model, dummy_data.x, dummy_data.loc, dummy_data.edge_index, dummy_data.vel, dummy_data.edge_attr))

optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
training_procedure(model, optimizer, loader_train, loader_val, loader_test, epochs, test_interval, model_name, outf)

In [184]:
#create a new model EGNN_vel and load the best model from the previous training
model = EGNN_vel(in_node_nf=1, in_edge_nf=2, hidden_nf=nf, device=device, n_layers=n_layers, recurrent=True, norm_diff=norm_diff, tanh=tanh).to(device)
model.load_state_dict(torch.load(outf + "/" + model_name + f"/{model_name}.pth"))



<All keys matched successfully>

In [192]:
graph = loader_train.dataset[1000]

In [193]:
print(graph.next_vel)
print(graph.next_loc)

tensor([[-0.0657,  0.7710],
        [ 0.2821,  0.0322],
        [ 0.6056,  0.2085],
        [ 0.2146,  0.0398],
        [-0.1661, -0.5974]])
tensor([[15.3184, 17.6959],
        [12.0323, 18.8535],
        [12.5798, 14.2019],
        [12.1333,  7.2062],
        [ 1.3248,  5.5971]])


In [194]:
x = graph.x.to(device)
loc = graph.loc.to(device)
vel = graph.vel.to(device)
edges = graph.edge_index.to(device)
edge_attr = graph.edge_attr.to(device)
edges = [edges[0].to(device), edges[1].to(device)]

loc_pred, vel_pred = model(x, loc.detach(), edges, vel, edge_attr)
print(f"vel_pred: {vel_pred}")
print(f"loc_pred: {loc_pred}")


vel_pred: tensor([[ 0.0349,  0.5218],
        [ 0.2865,  0.0644],
        [ 0.9613,  0.3073],
        [ 0.1873,  0.1281],
        [-0.0262, -0.5257]], device='cuda:0', grad_fn=<AddmmBackward0>)
loc_pred: tensor([[12.4386, 13.7175],
        [10.1018, 15.0359],
        [ 9.6235, 12.3083],
        [10.9564, 10.4238],
        [ 9.7249, 12.3771]], device='cuda:0', grad_fn=<AddBackward0>)
