# Lennard-Jones potential example
An example notebook that shows how to reconstruct a high-dimensional neural network potential (HDNNP) of the Lennard-Jones potential using TorchIP. 

In [None]:
!gpustat

### Imports

In [None]:
import sys
sys.path.append('../')

import torchip as tip
from torchip import logger
from torchip.datasets import RunnerStructureDataset, ToStructure
from torchip.potentials import NeuralNetworkPotential
from torchip.utils import gradient, get_value
from torchip.logger import LoggingContextManager

import torch
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict
import matplotlib.pylab as plt
from torch.utils.data import DataLoader
from ase.visualize import view
from ase.io.vasp import write_vasp

In [None]:
# from dask.distributed import Client
# from torchip.config import TaskClient
# TaskClient.client = Client(memory_limit='3GB', n_workers=2, processes=False, threads_per_worker=2, dashboard_address=':8791')
# TaskClient.client

In [None]:
# tip.set_logging_level(logging.DEBUG)
tip.manual_seed(2022)
tip.device.DEVICE = torch.device("cpu")
# tip.dtype.FLOAT = torch.float64
# tip.set_logging_level(logging.DEBUG)

# print(tip.__doc__)
# print(f"version: {tip.__version__}")

In [None]:
potdir = Path('./LJ')

## Dataset

In [None]:
structures = RunnerStructureDataset(Path(potdir, "input.data"), persist=True) 
# structures = RunnerStructureDataset(Path(potdir, "input.data"), transform=ToStructure(r_cutoff=3.0), persist=True) 
print("Total number of structures:", len(structures))
structures

In [None]:
# structure = structures[0]
# atom_energy = {'O': 2.4, 'H': 1.2}

# structure.add_energy_offset(atom_energy)
# structure.total_energy

In [None]:
# with LoggingContextManager(level=logging.DEBUG):
# structures[0].to_dict()

In [None]:
# validation_split = 0.2
# nsamples = len(structures)
# split = int(np.floor(validation_split * nsamples))
# train_structures, valid_structures = torch.utils.data.random_split(structures, lengths=[nsamples-split, split])

In [None]:
# with LoggingContextManager(level=logging.DEBUG):
# structures[0].compare(structures[1])

#### Visualization

In [None]:
# atoms = structures[5].to_ase_atoms()
# atoms
# view(atoms)
# write_vasp('POSCAR', ase_atoms)

## Potential

In [None]:
nnp = NeuralNetworkPotential(Path(potdir, "input.nn"))
nnp

### Descriptors

#### Scaler

In [None]:
nnp.fit_scaler(structures)
# nnp.load_scaler()

### Model

#### Training

In [None]:
%time history = nnp.fit_model(structures, epochs=100, validation_split=0.1) # validation_split=0.20)
# %time history = nnp.fit_model(train_structures, epochs=10, validation_dataset=valid_structures)
# %time history = profile(nnp.fit_model, structures, epochs=0, validation_split=0.20)
# nnp.load_model()

## Results

#### Loss and error metrics

### Observations
- results have to validated against N2P2 package
- force values are fine
- energy values are noticible different (w.r.t. N2P2) - probbaly the training algorithm has to be improved

We need a toolset to make the N2P2 comparision and validation of forces, energy, descriptor, etc easier and faster.

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(12,4))

df = pd.DataFrame(history)
df[["train_loss", "valid_loss"]][:].plot(ax=ax[0]);
df[[f"train_energy_error", f"valid_energy_error"]][:].plot(ax=ax[1]);
df[["train_force_error", f"valid_force_error"]][:].plot(ax=ax[2]);
plt.tight_layout()

In [None]:
df.tail()

#### Energy and Forces

In [None]:
err_metric = nnp.trainer.error_metric  

df = defaultdict(list)
for structure in structures:
    
    r = get_value(structure.calculate_distance(aid=0, neighbors=1))
    df['r'].append(r[0])
    err_metric.natoms = structure.natoms
    
    energy = nnp(structure)
    E_pred = get_value(energy)
    E_true = get_value(structure.total_energy)   
    df['E_pred'].append(E_pred[0])
    df['E_true'].append(E_true[0])
    df['E_error'].append(float(get_value(err_metric(energy, structure.total_energy, structure.natoms))))
    df['E_err'].append((E_true - E_pred)[0])
    df['E_err/atom'].append((E_true - E_pred)[0]/structure.natoms)
    
    force = -gradient(energy, structure.position)
    
    F_pred = get_value(force)
    F_true = get_value(structure.force)
    
    df['F_pred'].append(F_pred[0][0])
    df['F_true'].append(F_true[0][0])
    df['F_error'].append(float(get_value(err_metric(force, structure.force)))) 
    df['F_err'].append((F_true - F_pred)[0][0])
    
    # print("Predicted energy:\n", E_pred)
    # print("True energy:\n", E_true)
    # print("MSE:\n", mse(E_pred, E_true))
    # print("RMSE:\n", rmse(E_pred, E_true))
    # print("Predicted force: \n", F_pred )
    # print("True force:\n", F_true)
    # print("MSE:\n", mse(F_pred, F_true))
    # print("RMSE:\n", rmse(F_pred, F_true))
    
df = pd.DataFrame(df)
print(f"Max  --> E_error={df['E_error'].max():6f} F_error={df['F_error'].max():6f}")
print(f"Mean --> E_error={df['E_error'].mean():6f} F_error={df['F_error'].mean():6f}")
print()
df

#### Validation 

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(11,8))
EV_TO_HARTREE = 0.0367493
ENERGY_ERR    = 2.0E-3*EV_TO_HARTREE  # 2.0 meV/atom
FORCE_ERR     = 0.1*EV_TO_HARTREE     # 0.1 eV/Bohr

df.plot(x='r', y=['E_pred', 'E_true'], ax=ax[0][0], style='.-', lw=0.5)
df.plot(x='r', y=['E_error'], ax=ax[1][0], style='.-', lw=0.5); 
df.plot.hist(y=['E_err/atom'], ax=ax[2][0])

df.plot(x='r', y=['F_pred', 'F_true'], ax=ax[0][1], style='.-', lw=0.5)
df.plot(x='r', y=['F_error'], ax=ax[1][1], style='.-', lw=0.5)
df.plot.hist(y=['F_err'], ax=ax[2][1]);

if str(err_metric) == "RMSEpa":
    ax[1][0].axhline(ENERGY_ERR, ls='--', c='r'); print(f"Eenergy Thrsh.: {ENERGY_ERR:.10f}")
    ax[1][1].axhline(FORCE_ERR, ls='--', c='r');  print(f"Force   Thrsh.: {FORCE_ERR:.10f}")
plt.tight_layout()