# Current issues with current project

1. Unit tests don't seem to be working
2. Speed tests don't work/what is the equivalent for MACE

In [1]:
# import necessary stuff to run the unit tests

%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')

import random
import math
import numpy as np
import torch
from torch.nn import functional as F
import torch_geometric
from torch_geometric.data import Data, Batch
from torch_geometric.loader import DataLoader
from torch_geometric.utils import is_undirected, to_undirected, remove_self_loops, to_dense_adj, dense_to_sparse
import e3nn
from e3nn import o3
from functools import partial

print("PyTorch version {}".format(torch.__version__))
print("PyG version {}".format(torch_geometric.__version__))
print("e3nn version {}".format(e3nn.__version__))

from src.utils.plot_utils import plot_2d, plot_3d
from src.utils.train_utils import run_experiment
from src.models import MACEModel
from cartesian_mace.models.model import CartesianMACE

from typing import List
import string

# Check PyTorch has access to MPS (Metal Performance Shader, Apple's GPU architecture)
# print(f"Is MPS (Metal Performance Shader) built? {torch.backends.mps.is_built()}")
# print(f"Is MPS available? {torch.backends.mps.is_available()}")

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
# device = torch.device("cpu")
print(f"Using device: {device}")

PyTorch version 1.13.1
PyG version 2.0.3
e3nn version 0.5.1
Using device: cpu


In [2]:
def create_rotsym_envs(fold=3):
    dataset = []

    # Environment 0
    atoms = torch.LongTensor([ 0 ] + [ 0 ] * fold)
    edge_index = torch.LongTensor( [ [0] * fold, [i for i in range(1, fold+1)] ] )
    x = torch.Tensor([1,0,0])
    pos = [
        torch.Tensor([0,0,0]),  # origin
        x,   # first spoke
    ]
    for count in range(1, fold):
        R = o3.matrix_z(torch.Tensor([2*math.pi/fold * count])).squeeze(0)
        pos.append(x @ R.T)
    pos = torch.stack(pos)
    y = torch.LongTensor([0])  # Label 0
    data1 = Data(atoms=atoms, edge_index=edge_index, pos=pos, y=y)
    data1.edge_index = to_undirected(data1.edge_index)
    dataset.append(data1)

    # Environment 1
    q = 2*math.pi/(fold + random.randint(1, fold))
    assert q < 2*math.pi/fold
    Q = o3.matrix_z(torch.Tensor([q])).squeeze(0)
    pos = pos @ Q.T
    y = torch.LongTensor([1])  # Label 1
    data2 = Data(atoms=atoms, edge_index=edge_index, pos=pos, y=y)
    data2.edge_index = to_undirected(data2.edge_index)
    dataset.append(data2)

    return dataset

In [8]:
# Create dataloaders
fold = 3

dataset = create_rotsym_envs(fold)

dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(dataset, batch_size=1, shuffle=False)

In [14]:
max_ell = 3
n_layers = 1
nu_max = 2
fold = 2
n_epochs = 100

# first of all lets run the test using a mace model that is in theory expressive enough (i.e. max_ell >= fold).

mace_model = MACEModel(scalar_pred=False, correlation=nu_max, num_layers=n_layers, out_dim=2, max_ell=max_ell, emb_dim=3)


best_val_acc, test_acc, train_time = run_experiment(
    mace_model,
    dataloader,
    val_loader,
    test_loader,
    n_epochs=n_epochs,
    n_times=2,
    device=device,
    verbose=False
)

# If we only use 100 epochs, there isn't enough time to learn and so we only get 50%



Running experiment for MACEModel (cpu).


100%|██████████| 2/2 [00:03<00:00,  1.63s/it]


Done! Averaged over 2 runs: 
 - Training time: 1.63s ± 0.11. 
 - Best validation accuracy: 50.000 ± 0.000. 
- Test accuracy: 50.0 ± 0.0. 






In [15]:
n_epochs = 200

mace_model = MACEModel(scalar_pred=False, correlation=nu_max, num_layers=n_layers, out_dim=2, max_ell=max_ell, emb_dim=3)


best_val_acc, test_acc, train_time = run_experiment(
    mace_model,
    dataloader,
    val_loader,
    test_loader,
    n_epochs=n_epochs,
    n_times=2,
    device=device,
    verbose=False
)

# now if we use 200 epochs we get 75% which basically means after 200 epochs it got 50% and then after 200 more epochs (we keep training this same instantiation of the model) the model scored 100%



Running experiment for MACEModel (cpu).


100%|██████████| 2/2 [00:06<00:00,  3.12s/it]


Done! Averaged over 2 runs: 
 - Training time: 3.12s ± 0.07. 
 - Best validation accuracy: 75.000 ± 25.000. 
- Test accuracy: 75.0 ± 25.0. 






In [17]:
n_epochs = 500
mace_model = MACEModel(scalar_pred=False, correlation=nu_max, num_layers=n_layers, out_dim=2, max_ell=max_ell, emb_dim=3)


best_val_acc, test_acc, train_time = run_experiment(
    mace_model,
    dataloader,
    val_loader,
    test_loader,
    n_epochs=n_epochs,
    n_times=2,
    device=device,
    verbose=False
)

# finally at 500 epochs, both times the test gives 100%! (not surprising that the second time, we get 100% if the first time we did too!)



Running experiment for MACEModel (cpu).


100%|██████████| 2/2 [00:15<00:00,  7.98s/it]


Done! Averaged over 2 runs: 
 - Training time: 7.98s ± 0.12. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 






In [31]:
# now set n_epochs=500 i.e. enough time to learn
n_epochs = 1000
max_ell = 3 # i.e. should get 100% on 2,3 and 50% on 5,10
nu_max = 2
n_layers=1

for fold in [2,3,5,10]:

    dataset = create_rotsym_envs(fold)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
    val_loader = DataLoader(dataset, batch_size=1, shuffle=False)
    test_loader = DataLoader(dataset, batch_size=1, shuffle=False)

    mace_model = MACEModel(scalar_pred=False, correlation=nu_max, num_layers=n_layers, out_dim=2, max_ell=max_ell, emb_dim=3)

    print(f'{fold} fold symmetry:')

    best_val_acc, test_acc, train_time = run_experiment(
        mace_model,
        dataloader,
        val_loader,
        test_loader,
        n_epochs=n_epochs,
        n_times=1,
        device=device,
        verbose=False
    )



2 fold symmetry:
Running experiment for MACEModel (cpu).


100%|██████████| 1/1 [00:16<00:00, 16.05s/it]



Done! Averaged over 1 runs: 
 - Training time: 16.04s ± 0.00. 
 - Best validation accuracy: 100.000 ± 0.000. 
- Test accuracy: 100.0 ± 0.0. 

3 fold symmetry:
Running experiment for MACEModel (cpu).


100%|██████████| 1/1 [00:15<00:00, 15.84s/it]



Done! Averaged over 1 runs: 
 - Training time: 15.84s ± 0.00. 
 - Best validation accuracy: 50.000 ± 0.000. 
- Test accuracy: 50.0 ± 0.0. 

5 fold symmetry:
Running experiment for MACEModel (cpu).


100%|██████████| 1/1 [00:17<00:00, 17.89s/it]



Done! Averaged over 1 runs: 
 - Training time: 17.89s ± 0.00. 
 - Best validation accuracy: 50.000 ± 0.000. 
- Test accuracy: 50.0 ± 0.0. 

10 fold symmetry:
Running experiment for MACEModel (cpu).


100%|██████████| 1/1 [00:17<00:00, 17.42s/it]


Done! Averaged over 1 runs: 
 - Training time: 17.42s ± 0.00. 
 - Best validation accuracy: 50.000 ± 0.000. 
- Test accuracy: 50.0 ± 0.0. 




