This script only works with the last models I trained (e.g. h_crystalls_15)

In [None]:
import sys
sys.path.insert(0, '/home/alapena/GitHub/graph2mat4abn')
import os
os.chdir('/home/ICN2/alapena/GitHub/graph2mat4abn') # Change to the root directory of the project

from graph2mat4abn.tools.import_utils import load_config, get_object_from_module
from graph2mat4abn.tools.tools import get_basis_from_structures_paths, get_kwargs, load_model
from graph2mat4abn.tools.scripts_utils import get_model_dataset, init_mace_g2m_model
from graph2mat4abn.tools.script_plots import update_loss_plots, plot_grad_norms
from pathlib import Path
from e3nn import o3
from mace.modules import MACE, RealAgnosticResidualInteractionBlock
from graph2mat.models import MatrixMACE
from graph2mat.bindings.e3nn import E3nnGraph2Mat
import torch
import warnings
from graph2mat import BasisTableWithEdges

warnings.filterwarnings("ignore", message="The TorchScript type system doesn't support")
warnings.filterwarnings("ignore", message=".*is not a known matrix type key.*")

In [None]:
# The current model:
model_dir = Path("results/block_type_mse_nonzero_globalsquarenorm_1e-3_2-8ATOMS")
filename = "train_best_model.tar"
config = load_config(model_dir / "config.yaml")

# Basis generation (needed to initialize the model)
train_paths, val_paths = get_model_dataset(model_dir, verbose=False)
paths = train_paths + val_paths
basis = get_basis_from_structures_paths(paths, verbose=True, num_unique_z=config["dataset"].get("num_unique_z", None))
table = BasisTableWithEdges(basis)

print("Initializing model...")
model, optimizer, lr_scheduler, loss_fn = init_mace_g2m_model(config, table)

In [None]:
# Load the model
model_path = model_dir / filename
model, checkpoint, optimizer, lr_scheduler = load_model(model, optimizer, model_path, lr_scheduler=None, initial_lr=None, device='cpu')
history = checkpoint["history"]
print(f"Loaded model in epoch {checkpoint["epoch"]} with training loss {checkpoint["train_loss"]} and validation loss {checkpoint["val_loss"]}.")

In [None]:
sum(history["epoch_times"]) / 60 / 60 / 24

In [None]:
.1923520622733568*24

In [None]:
.616449494560563*60

In [None]:
.98696967363378*60

# Plots

## Loss curves

In [None]:
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go


def update_loss_plots(history, verbose=False):
    def detach_list(tensor_list):
        return [
            t.detach().item() if isinstance(t, torch.Tensor)
            else t if t is None
            else float(t)
            for t in tensor_list
        ]
    
    # Prepare data


    df = pd.DataFrame(
        np.array([
            detach_list(history["train_loss"]),
            detach_list(history["val_loss"]),
            detach_list(history["train_edge_loss"]),
            detach_list(history["val_edge_loss"]),
            detach_list(history["train_node_loss"]),
            detach_list(history["val_node_loss"]),
            detach_list(history["learning_rate"]),
        ]).T,
        columns=["Train total", "Val total", "Train edge", "Val edge", "Train node", "Val node", "Learning rate"],
    )
    

    # Create figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
    # Add loss traces (primary y-axis)
    loss_colors = {
        "Train total": "blue",
        "Val total": "red",
        "Val_extra total": "magenta",
        "Train edge": "blue",
        "Val edge": "red",
        "Val_extra edge": "magenta",
        "Train node": "blue",
        "Val node": "red",
        "Val_extra node": "magenta",
    }
    loss_dashes = {
        "Train total": "solid",
        "Val total": "solid",
        "Val_extra total": "solid",
        "Train edge": "dash",
        "Val edge": "dash",
        "Val_extra edge": "dash",
        "Train node": "dot",
        "Val node": "dot",
        "Val_extra node": "dot"
    }
    
    for col in df.columns[:-1]:  # All columns except Learning rate
        fig.add_trace(
            go.Scatter(
                x=df.index,
                y=df[col],
                name=col,
                line=dict(color=loss_colors[col], dash=loss_dashes[col]),
                legendgroup=col.split()[1] if col.split()[0] in ["Train", "Val"] else col,
                connectgaps=True
            ),
            secondary_y=False
        )
    
    # Add learning rate trace (secondary y-axis)
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df["Learning rate"],
            name="Learning rate",
            line=dict(color="lightgreen", dash="solid"),
            legendgroup="Learning rate"
        ),
        secondary_y=True
    )
    
    # Set axis titles and layout
    loss_values = df.drop(columns=["Learning rate"]).to_numpy().flatten()
    loss_values = [v for v in loss_values if v is not None]

    ylim_up = int(np.percentile(loss_values, 95)) + 10
    fig.update_layout(
        title=f"Loss curves",
        xaxis_title="Epoch number",
        yaxis=dict(
            title="Loss (eVÂ²)",
            showgrid=True,
            # type="log",
            range=[-5, ylim_up]
        ),
        yaxis2=dict(
            title="Learning rate",
            showgrid=False,
            type="log",
            side="right",
            tickformat=".0e", 
            dtick=1,
        ),
        grid=dict(xside="bottom", yside="left"),
        legend=dict(
            x=1.1,  
            xanchor="left",  
            y=1.0,  
            yanchor="top"  
        ),
        margin=dict(r=150)
    )
    return fig

update_loss_plots(history)


## Gradients

In [None]:
plot_grad_norms(history['grad_norms'])

Good! It does not seem that there is any vanishing gradients problem. All the gradients remainet ~ctt during the training, and the model is still learning.