In [208]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')

import random
import math
import numpy as np
import torch
from torch.nn import functional as F
import torch_geometric
from torch_geometric.data import Data, Batch
from torch_geometric.loader import DataLoader
from torch_geometric.utils import is_undirected, to_undirected, remove_self_loops, to_dense_adj, dense_to_sparse
import e3nn
from e3nn import o3
from functools import partial

print("PyTorch version {}".format(torch.__version__))
print("PyG version {}".format(torch_geometric.__version__))
print("e3nn version {}".format(e3nn.__version__))

from src.utils.plot_utils import plot_2d, plot_3d
from src.utils.train_utils import run_experiment
from src.models import MACEModel
from cartesian_mace.models.model import CartesianMACE

from typing import List
import string

# Check PyTorch has access to MPS (Metal Performance Shader, Apple's GPU architecture)
# print(f"Is MPS (Metal Performance Shader) built? {torch.backends.mps.is_built()}")
# print(f"Is MPS available? {torch.backends.mps.is_available()}")

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
# device = torch.device("cpu")
print(f"Using device: {device}")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
PyTorch version 1.13.1
PyG version 2.0.3
e3nn version 0.5.1
Using device: cpu


In [223]:
def create_rotsym_envs(fold=3):
    dataset = []

    # Environment 0
    atoms = torch.LongTensor([ 0 ] + [ 0 ] * fold)
    edge_index = torch.LongTensor( [ [0] * fold, [i for i in range(1, fold+1)] ] )
    x = torch.Tensor([1,0,0])
    pos = [
        torch.Tensor([0,0,0]),  # origin
        x,   # first spoke
    ]
    for count in range(1, fold):
        R = o3.matrix_z(torch.Tensor([2*math.pi/fold * count])).squeeze(0)
        pos.append(x @ R.T)
    pos = torch.stack(pos)
    y = torch.LongTensor([0])  # Label 0
    data1 = Data(atoms=atoms, edge_index=edge_index, pos=pos, y=y)
    data1.edge_index = to_undirected(data1.edge_index)
    dataset.append(data1)

    # Environment 1
    q = 2*math.pi/(fold + random.randint(1, fold))
    assert q < 2*math.pi/fold
    Q = o3.matrix_z(torch.Tensor([q])).squeeze(0)
    pos = pos @ Q.T
    y = torch.LongTensor([1])  # Label 1
    data2 = Data(atoms=atoms, edge_index=edge_index, pos=pos, y=y)
    data2.edge_index = to_undirected(data2.edge_index)
    dataset.append(data2)

    return dataset

In [224]:
# Set parameters
correlation = 2
max_ell = 3
fold = 2

# Create dataset
dataset = create_rotsym_envs(fold)
# for data in dataset:
    # plot_2d(data, lim=1)

# Create dataloaders
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(dataset, batch_size=1, shuffle=False)

n_layers = 1

cmace_model = CartesianMACE(n_layers=n_layers, dim=3, n_channels=3, self_tp_rank_max=max_ell, basis_rank_max=max_ell, feature_rank_max=max_ell, nu_max=correlation)

mace_model = MACEModel(scalar_pred=False, correlation=correlation, num_layers=n_layers, out_dim=2, max_ell=max_ell, emb_dim=3)



In [225]:
best_val_acc, test_acc, train_time = run_experiment(
    mace_model,
    dataloader,
    val_loader,
    test_loader,
    n_epochs=100,
    n_times=2,
    device=device,
    verbose=False
)

Running experiment for MACEModel (cpu).


100%|██████████| 2/2 [00:03<00:00,  1.84s/it]


Done! Averaged over 2 runs: 
 - Training time: 1.83s ± 0.07. 
 - Best validation accuracy: 75.000 ± 25.000. 
- Test accuracy: 75.0 ± 25.0. 






In [226]:
from src.utils.train_utils import eval
from torch.nn.functional import cross_entropy, softmax
eval(mace_model, val_loader, 'cpu')

100.0

In [227]:
softmax(torch.Tensor([-0.5387, -1.2]))

  softmax(torch.Tensor([-0.5387, -1.2]))


tensor([0.6596, 0.3404])

In [214]:
for batch in val_loader:
    print(batch.y)
    print(cross_entropy(mace_model(batch), batch.y))
    print(mace_model(batch))

tensor([0])
tensor(0.5032, grad_fn=<NllLossBackward0>)
tensor([[ 0.2055, -0.2191]], grad_fn=<AddmmBackward0>)
tensor([1])
tensor(0.5417, grad_fn=<NllLossBackward0>)
tensor([[0.0409, 0.3710]], grad_fn=<AddmmBackward0>)


In [215]:
def linearise_features(h: List[torch.Tensor]) -> torch.Tensor:
    # i from 0 to feature_rank_max e.g. for feature_rank_max = 2
    # in: list with h[i] = [ [n_nodes, n_channels, 1], [n_nodes, n_channels, 2], [n_nodes, n_channels, 2, 2] ]
    # out: [n_nodes, (1 * n_channels) + (2 * n_channels) + (2 * 2 * n_channels)]
    # we do this such that we have n_channel lots of each feature in order then the next

    h_flattened = []

    # essentially need to flatten all the dimensions
    for h_i in h:

        h_flattened.append(
            h_i.flatten(start_dim=1, end_dim=-1)
        )

    return torch.cat(h_flattened, dim=1)

In [230]:
def run_cmace_rotsym_test(fold: int, max_ell: int, n_times: int, cmace: bool=True) -> None:

    torch.manual_seed(1)

    dataset = create_rotsym_envs(fold)
    # for data in dataset:
        # plot_2d(data, lim=1)

    # Create dataloaders
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
    val_loader = DataLoader(dataset, batch_size=1, shuffle=False)
    test_loader = DataLoader(dataset, batch_size=1, shuffle=False)

    if cmace:
        model = CartesianMACE(n_layers=1, dim=3, n_channels=3, self_tp_rank_max=max_ell, basis_rank_max=max_ell, feature_rank_max=max_ell, nu_max=correlation)
    else:
        model = MACEModel(scalar_pred=False, correlation=correlation, num_layers=n_layers, out_dim=2, max_ell=max_ell, emb_dim=3)

    print(model(list(val_loader)[0]))


    best_val_acc, test_acc, train_time = run_experiment(
        model,
        dataloader,
        val_loader,
        test_loader,
        n_epochs=1000,
        n_times=n_times,
        device=device,
        verbose=False
    )

In [231]:
for fold in [2,3,5,10]:
    print(f'Testing {fold}-fold symmetry:')

    run_cmace_rotsym_test(fold=fold, max_ell=2, n_times=1, cmace=True)

Testing 2-fold symmetry:
tensor([[ 5.0635, -0.4064]], grad_fn=<AddmmBackward0>)
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [00:27<00:00, 27.66s/it]



Done! Averaged over 1 runs: 
 - Training time: 27.66s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 

Testing 3-fold symmetry:
tensor([[6.5633, 6.6753]], grad_fn=<AddmmBackward0>)
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [00:25<00:00, 25.78s/it]



Done! Averaged over 1 runs: 
 - Training time: 25.78s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 

Testing 5-fold symmetry:
tensor([[11.8595, 64.1019]], grad_fn=<AddmmBackward0>)
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [00:28<00:00, 28.53s/it]



Done! Averaged over 1 runs: 
 - Training time: 28.52s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 

Testing 10-fold symmetry:
tensor([[-1049.5126,   812.5063]], grad_fn=<AddmmBackward0>)
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [00:29<00:00, 29.19s/it]


Done! Averaged over 1 runs: 
 - Training time: 29.19s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 






In [232]:
for fold in [2,3,5,10]:
    print(f'Testing {fold}-fold symmetry:')

    run_cmace_rotsym_test(fold=fold, max_ell=3, n_times=1, cmace=True)

Testing 2-fold symmetry:
tensor([[ 1.3711e+09, -4.8328e+08]], grad_fn=<AddmmBackward0>)
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [02:22<00:00, 142.58s/it]



Done! Averaged over 1 runs: 
 - Training time: 142.57s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 

Testing 3-fold symmetry:
tensor([[ 9.8248e+08, -5.4012e+09]], grad_fn=<AddmmBackward0>)
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [01:50<00:00, 110.15s/it]



Done! Averaged over 1 runs: 
 - Training time: 110.14s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 

Testing 5-fold symmetry:
tensor([[-5.0708e+09, -3.2243e+10]], grad_fn=<AddmmBackward0>)
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [02:03<00:00, 123.68s/it]



Done! Averaged over 1 runs: 
 - Training time: 123.67s ± 0.00. 
 - Best validation accuracy: 50.000 ± 0.000. 
- Test accuracy: 50.0 ± 0.0. 

Testing 10-fold symmetry:
tensor([[ 4.7400e+11, -2.8252e+11]], grad_fn=<AddmmBackward0>)
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [02:26<00:00, 146.98s/it]


Done! Averaged over 1 runs: 
 - Training time: 146.97s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 






In [200]:
def model_var(n: int, cmace: Optional[bool] = True) -> torch.Tensor:

    results = []

    for _ in range(n):

        if cmace:
            model = CartesianMACE(n_layers=1, dim=3, n_channels=3, self_tp_rank_max=max_ell, basis_rank_max=1, feature_rank_max=1, nu_max=correlation)
        else:
            model = MACEModel(scalar_pred=False, correlation=correlation, num_layers=n_layers, out_dim=2, max_ell=max_ell, emb_dim=3)


        for batch in val_loader:
            results.append(
                model(batch)
            )

    results = [torch.abs(t) for t in results]

    # Sum the absolute values
    results = sum([torch.sum(t) for t in results])

    return results/(2*n)

In [201]:
model_var(100, cmace=True), model_var(100, cmace=False) # look at the difference - not good!

(tensor(22.8486, grad_fn=<DivBackward0>),
 tensor(0.9643, grad_fn=<DivBackward0>))

In [202]:
for i in range(20):

    torch.manual_seed(i)

    cmace = CartesianMACE(n_layers=1, dim=3, n_channels=3, self_tp_rank_max=max_ell, basis_rank_max=1, feature_rank_max=1, nu_max=correlation)

    # mace = MACEModel(scalar_pred=False, correlation=correlation, num_layers=n_layers, out_dim=2, max_ell=max_ell, emb_dim=3)

    # cmace.apply(he_init)

    print(f'\n\n{i}:\n')
    for batch in val_loader:
        print(cmace(batch))



0:

tensor([[21.8927, 31.9994]], grad_fn=<AddmmBackward0>)
tensor([[17.0885, 32.3366]], grad_fn=<AddmmBackward0>)


1:

tensor([[-3.1212,  2.2876]], grad_fn=<AddmmBackward0>)
tensor([[-3.1207,  2.3620]], grad_fn=<AddmmBackward0>)


2:

tensor([[3.5849, 1.4479]], grad_fn=<AddmmBackward0>)
tensor([[3.2797, 1.6850]], grad_fn=<AddmmBackward0>)


3:

tensor([[-0.6088, -0.3887]], grad_fn=<AddmmBackward0>)
tensor([[-0.4987, -0.6045]], grad_fn=<AddmmBackward0>)


4:

tensor([[-17.1576,  13.1661]], grad_fn=<AddmmBackward0>)
tensor([[-5.8083,  1.3818]], grad_fn=<AddmmBackward0>)


5:

tensor([[-5.8845,  5.2995]], grad_fn=<AddmmBackward0>)
tensor([[-5.6594,  4.0376]], grad_fn=<AddmmBackward0>)


6:

tensor([[-14.7398, -10.2856]], grad_fn=<AddmmBackward0>)
tensor([[1.9204, 3.1388]], grad_fn=<AddmmBackward0>)


7:

tensor([[-10.7634,  10.7773]], grad_fn=<AddmmBackward0>)
tensor([[-14.2322,   3.7056]], grad_fn=<AddmmBackward0>)


8:

tensor([[-5.3279, -2.0156]], grad_fn=<AddmmBackward0>)
tensor([[

In [203]:
torch.manual_seed(19)
cmace = CartesianMACE(n_layers=1, dim=3, n_channels=3, self_tp_rank_max=max_ell, basis_rank_max=1, feature_rank_max=1, nu_max=correlation)

for fold in [2,3,5,10]:
    print(f'Testing {fold}-fold symmetry:')

    run_cmace_rotsym_test(fold=fold, max_ell=3, n_times=1, cmace=True)

Testing 2-fold symmetry:
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [00:07<00:00,  7.93s/it]



Done! Averaged over 1 runs: 
 - Training time: 7.92s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 

Testing 3-fold symmetry:
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [00:08<00:00,  8.24s/it]



Done! Averaged over 1 runs: 
 - Training time: 8.24s ± 0.00. 
 - Best validation accuracy: 50.000 ± 0.000. 
- Test accuracy: 50.0 ± 0.0. 

Testing 5-fold symmetry:
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [00:08<00:00,  8.46s/it]



Done! Averaged over 1 runs: 
 - Training time: 8.45s ± 0.00. 
 - Best validation accuracy: 50.000 ± 0.000. 
- Test accuracy: 50.0 ± 0.0. 

Testing 10-fold symmetry:
Running experiment for CartesianMACE (cpu).


100%|██████████| 1/1 [00:08<00:00,  8.23s/it]


Done! Averaged over 1 runs: 
 - Training time: 8.22s ± 0.00. 
 - Best validation accuracy: 50.000 ± 0.000. 
- Test accuracy: 50.0 ± 0.0. 






In [120]:
import torch

# Define the dimensions of the matrix
n = 3

# Create an empty tensor of the desired size
orthogonal_matrix = torch.empty(10, n, n)

# Initialize the tensor with a random orthogonal matrix
torch.nn.init.orthogonal_(orthogonal_matrix)

print(orthogonal_matrix)


tensor([[[-1.9854e-01, -3.5726e-01, -3.9072e-01],
         [-3.2756e-01, -3.3672e-01,  5.4324e-02],
         [-4.7563e-01, -3.6555e-01, -1.9465e-01]],

        [[ 2.6603e-01,  6.9126e-02,  2.4355e-02],
         [-5.6519e-02, -3.9547e-01,  5.3077e-02],
         [ 5.7877e-01, -2.4843e-01, -6.0395e-01]],

        [[-1.5657e-01, -3.3078e-02, -1.4469e-01],
         [ 3.5730e-01,  4.2795e-01,  3.7995e-01],
         [ 6.7736e-02, -6.4437e-01, -2.7703e-02]],

        [[ 1.9814e-01, -6.4704e-01,  2.6346e-01],
         [ 1.4470e-01,  4.3917e-01, -1.6734e-01],
         [-7.4332e-02,  1.2774e-01, -4.1656e-01]],

        [[ 2.6841e-01, -7.5766e-02, -3.0645e-01],
         [ 5.1275e-01, -1.6116e-01,  3.7877e-01],
         [ 8.4154e-02,  1.6264e-01,  1.9937e-01]],

        [[ 7.3956e-01, -1.7768e-01, -2.0547e-01],
         [-5.9323e-02, -1.0115e-01,  7.6055e-02],
         [-2.3259e-01,  2.0006e-02,  1.4674e-01]],

        [[ 1.5771e-01,  5.7383e-01, -6.3143e-02],
         [-3.7634e-02,  2.6043e-01,  2

In [132]:
channel_weights = nn.Parameter(data=torch.randn(4, 4))
channel_weights = torch.nn.init.orthogonal_(channel_weights)

channel_weights[0] @ channel_weights[0].T

tensor(1.0000, grad_fn=<DotBackward0>)

In [134]:
channel_weights @ channel_weights.T

tensor([[ 1.0000e+00, -1.7393e-08, -7.4113e-08, -4.7840e-09],
        [-1.7393e-08,  1.0000e+00, -5.9454e-08, -5.3868e-08],
        [-7.4113e-08, -5.9454e-08,  1.0000e+00,  1.1606e-07],
        [-4.7840e-09, -5.3868e-08,  1.1606e-07,  1.0000e+00]],
       grad_fn=<MmBackward0>)

In [153]:
channel_weights = nn.Parameter(data=torch.randn(3, 2, 2))
channel_weights = torch.nn.init.orthogonal_(channel_weights)

# torch.nn.utils.parametrizations.orthogonal(channel_weights)
channel_weights

Parameter containing:
tensor([[[ 0.8572, -0.0587],
         [-0.5013, -0.1021]],

        [[ 0.2161,  0.5237],
         [ 0.4490, -0.6910]],

        [[ 0.1597, -0.8346],
         [ 0.4323, -0.3017]]], requires_grad=True)

In [154]:
[Q @ Q.T for Q in channel_weights]

[tensor([[ 0.7383, -0.4237],
         [-0.4237,  0.2617]], grad_fn=<MmBackward0>),
 tensor([[ 0.3210, -0.2648],
         [-0.2648,  0.6790]], grad_fn=<MmBackward0>),
 tensor([[0.7221, 0.3209],
         [0.3209, 0.2779]], grad_fn=<MmBackward0>)]

In [161]:
from scipy.stats import ortho_group
x = torch.from_numpy(ortho_group.rvs(3))

tensor([[ 1.0000e+00,  0.0000e+00,  1.2490e-16],
        [ 0.0000e+00,  1.0000e+00, -2.7756e-17],
        [ 1.2490e-16, -2.7756e-17,  1.0000e+00]], dtype=torch.float64)

In [197]:
weights = torch.randn(10, 3) # paths x channels
weights /= weights.std(dim=0)

weights

tensor([[-1.3964, -0.2122, -0.9179],
        [ 0.9133,  0.5158, -0.7792],
        [ 0.0905, -0.6632,  1.7903],
        [-0.5818, -0.6062, -0.2633],
        [ 0.3076, -0.5345, -0.0529],
        [-2.6448,  1.7043, -0.1977],
        [-0.5439,  1.5979, -0.0913],
        [ 0.2242,  0.0512, -1.8578],
        [-0.2327,  1.9302, -0.7186],
        [-0.2226,  0.4280,  0.8668]])

In [189]:
x = torch.randn(10, 3)

sum(x[:,0])

tensor(2.8319)

In [196]:
x.std(dim=0)

tensor([1.1466, 1.0878, 0.6871])