In [None]:
# prelude
import energnn
from ase.visualize import view
from ase.build import molecule
import torch_geometric.loader as tcg_loader
import torch as tc
from functools import reduce
def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params


# Data Preparation
This code only need to be executed at the first time.

In [None]:
# Download the files that needed.
import energnn as erg
erg.Toolbelt.download_environment_files()

# BasicGNN

In [None]:
# dataset prepare.
import pickle
from energnn import DatasetAlexandria
wbm = energnn.DatasetWBM()



In [None]:
#print(alex[60].y)
#print(wbm[880].y)
from energnn import DatasetAlexandria
DatasetAlexandria(load_files=['005', '006', '007', '008', '009'], cutoff=8.0).dump("/home/aylwin/Projects/matbench/tmp/alexset005-009_cutoff8.dump")

## Train model

In [None]:
# new model
model = energnn.ModelBasicGNNDeeper()

In [None]:
%%prun
l1loss = tc.nn.L1Loss()
mse_loss = tc.nn.MSELoss()
def train_loss_fn(y, pred):
    # This function will just train on e_above_hull aka y[:, 1]
    return mse_loss(y[:, 1], pred[:, 1])
def test_loss():
    n = 0
    energy_sum, e_above_hull_sum = 0, 0
    num_Acc, n_Acc = 0, 0
    def return_loss(y:tc.Tensor, pred:tc.Tensor)->dict:
        nonlocal n, energy_sum, e_above_hull_sum, num_Acc, n_Acc
        energy = l1loss(y[:, 0], pred[:, 0])
        e_above_hull = l1loss(y[:, 1], pred[:, 1])
        n+=1
        # determine the Acc.  *----XOR---*
        num_Acc += (lambda x, y:(1-(x+y)%2).sum())((y[:, 1]>0).int(), (pred[:, 1]>0).int())
        #print((y[:, 1]>0).int().sum())
        n_Acc += y.shape[0]
        # add energy and e_above_hull_sum
        energy_sum += float(energy)
        e_above_hull_sum += float(e_above_hull)
        return {"avr_energy":energy_sum/n, "avr_e_above_hull":e_above_hull_sum/n, "avr_accuracy":num_Acc/n_Acc}
    return return_loss
for epoch in range(40):
    print(f"Epoch {epoch} ================>")
    print("train:")
    for train_name in ('000-004', '005-009', '010-014'):
        with open(f"/home/aylwin/Projects/matbench/tmp/alexset{train_name}_cutoff8.dump", 'rb') as f:
            train_set = pickle.load(f)
        model, log = mb.tcg_trainer(
            model=model,
            dataset=train_set,
            optimizer=tc.optim.Adam(model.parameters(), lr=0.0001),
            device='cuda',
            epoch=1, 
            batch_size=64,
            num_workers=4,
            loss_fn = train_loss_fn
        )
    print("test:")
    mb.tcg_tester(
        model=model,
        dataset=wbm,
        device='cuda',
        batch_size=64,
        num_workers=4,
        loss_fn=test_loss,
    )


In [None]:
# test
l1loss = tc.nn.L1Loss()
def test_loss():
    n = 0
    energy_sum, e_above_hull_sum = 0, 0
    num_Acc, n_Acc = 0, 0
    def return_loss(y:tc.Tensor, pred:tc.Tensor)->dict:
        nonlocal n, energy_sum, e_above_hull_sum, num_Acc, n_Acc
        energy = l1loss(y[:, 0], pred[:, 0])
        e_above_hull = l1loss(y[:, 1], pred[:, 1])
        n+=1
        # determine the Acc.  *----XOR---*
        num_Acc += (lambda x, y:(1-(x+y)%2).sum())((y[:, 1]>0).int(), (pred[:, 1]>0).int())
        #print((y[:, 1]>0).int().sum())
        n_Acc += y.shape[0]
        # add energy and e_above_hull_sum
        energy_sum += float(energy)
        e_above_hull_sum += float(e_above_hull)
        return {"avr_energy":energy_sum/n, "avr_e_above_hull":e_above_hull_sum/n, "avr_accuracy":num_Acc/n_Acc}
    return return_loss
energnn.tcg_tester(
    model=model,
    dataset=wbm,
    device='cuda',
    batch_size=256,
    num_workers=32,
    loss_fn=test_loss,
)
energnn.tcg_tester(
    model=model,
    dataset=alex_train,
    device='cuda',
    batch_size=256,
    num_workers=32,
    loss_fn=test_loss,
)
energnn.tcg_tester(
    model=model,
    dataset=alex_test,
    device='cuda',
    batch_size=256,
    num_workers=32,
    loss_fn=test_loss,
)

In [None]:
# save the module.
import pickle
with open("/home/aylwin/Projects/matbench/tmp/good_model.ptm", "wb") as f:
    pickle.dump(model, f)

In [None]:
count_parameters(model)

# EnerGNN
---
This model `.forward` will return the total energy for the structure input.

## Train Model

In [None]:
# Here is an example of how to train the EnerGNN model on the Alexandria dataset.
import energnn as erg
import torch as tc

# Load the dataset.
#dataset = erg.datasetAlexandriaNeo(load_files=['000'], cutoff=6.0) # This just run in the first time.
dataset = erg.Toolbelt.pk_load("/Alexandria/000_cutoff6.dump")

# init model
model = erg.EnerG()
l1loss = tc.nn.L1Loss()
for ep in range(20):
    print(f"Epoch {i} >>>>>>>>>>>>>> ")
    for i in ['000','001', '002', '003', '004']:
        dataset = erg.Toolbelt.pk_load(f"/Alexandria/{i}_cutoff6.dump")
        model, log = erg.tcg_trainer(
            model=model,
            dataset=dataset,
            optimizer=tc.optim.Adam(model.parameters(), lr=0.0001),
            device='cuda',
            epoch=1, 
            batch_size=64,
            num_workers=4,
            loss_fn = l1loss
        )


## Test model

In [None]:
# Now you have a trained model.
import energnn as erg
import torch as tc
import numpy as np
model = erg.EnerG()#erg.Toolbelt.pk_load("/model.dump")
dataset = erg.datasetAlexandriaNeo(load_files=['001'], cutoff=6.0)
def loss_fn_gen():
    n = 0
    loss_sum = 0
    l1loss = tc.nn.L1Loss()
    def ans(y, pred):
        print("y>", float(y), "predict>", float(pred)) if np.random.rand()>0.995 else ""
        nonlocal n, loss_sum
        ret = l1loss(y, pred)
        loss_sum += ret
        n+=1
        return loss_sum/n
    return ans
erg.tcg_tester(
    model=model,
    dataset=dataset,
    loss_fn=loss_fn_gen,
    batch_size=1,
)

# Playground

In [None]:
import energnn as erg
import torch as tc
model = erg.Toolbelt.pk_load("/model.dump")
# Load the dataset.
dataset = erg.datasetAlexandriaNeo(load_files=['001'], cutoff=6.0) # This just run in the first time.
#dataset = erg.Toolbelt.pk_load("/Alexandria/000_cutoff6.dump")
dataset[0]

In [None]:
from ase.io import read as ase_read # This function read .extxyz file to ase.atoms.Atoms
import zipfile # because the ase dataset is in zip type.
import pandas as pd
import matbench_discovery.data as matbench_data
DataFiles = matbench_data.DataFiles
df_wbm_summary:pd.DataFrame = pd.read_csv(DataFiles.wbm_summary.path) # Load the wbm summary dataframe


In [None]:
import energnn as erg
import torch as tc
# Load the dataset.
#dataset = erg.datasetAlexandriaNeo(load_files=['000'], cutoff=6.0) # This just run in the first time.
for i in ['000', '001', '002', '003', '004', '005', '006', '007', '008', '009', '010']:
    dataset = erg.datasetAlexandriaNeo(load_files=[i], cutoff=6.0)
    erg.Toolbelt.pk_dump(dataset, f"/{i}_cutoff6.dump")

Loading Alexandria dataset ==== 


  syms: list[str] = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x])
  corr = sum(ufloat(ea.value, ea.uncertainty) for ea in self.energy_adjustments if ea.value) or ufloat(
100%|██████████| 100000/100000 [02:12<00:00, 752.41it/s]


Loading Alexandria dataset ==== 


100%|██████████| 100000/100000 [01:59<00:00, 836.55it/s]


Loading Alexandria dataset ==== 


  syms: list[str] = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x])
100%|██████████| 100000/100000 [02:07<00:00, 785.66it/s]


Loading Alexandria dataset ==== 


100%|██████████| 100000/100000 [02:29<00:00, 670.53it/s]


Loading Alexandria dataset ==== 


100%|██████████| 100000/100000 [03:32<00:00, 471.16it/s]


Loading Alexandria dataset ==== 


100%|██████████| 100000/100000 [02:54<00:00, 573.22it/s]


Loading Alexandria dataset ==== 


  syms: list[str] = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x])
100%|██████████| 100000/100000 [02:45<00:00, 602.82it/s]


Loading Alexandria dataset ==== 


100%|██████████| 100000/100000 [03:51<00:00, 431.12it/s]


Loading Alexandria dataset ==== 
