# TorchIP: Lennard-Jones potential
An example notebook that shows how to reconstruct a Lennard-Jones potential using high-dimensional neural network potential (HDNNP). 


TODOs
- [ ] GPU: implemtation 
- [ ] optimization: multi-thread/process
- [ ] Improve training algorithm

In [None]:
!gpustat

### Imports

In [None]:
import sys
sys.path.append('../')

import torchip
from torchip import logger
from torchip.datasets import RunnerStructureDataset, ToStructure
from torchip.potentials import NeuralNetworkPotential
from torchip.utils import gradient, get_value


import torch
import logging
import numpy as np
import pandas as pd
from pathlib import Path
from collections import defaultdict
import matplotlib.pylab as plt
from torch.utils.data import DataLoader

In [None]:
# from dask.distributed import Client
# from torchip.config import TaskClient
# TaskClient.client = Client(memory_limit='32GB', n_workers=1, processes=False, threads_per_worker=4, dashboard_address=':8791')
# TaskClient.client

In [None]:
# torchip.set_logging_level(logging.DEBUG)
torchip.manual_seed(2022)
torchip.device.DEVICE = torch.device("cpu")

# print(tp.__doc__)
# print(f"Version: {tp.__version__}")

In [None]:
potdir = Path('./LJ')

## Dataset

In [None]:
structures = RunnerStructureDataset(Path(potdir, "input.data"), persist=True) 
# structures = RunnerStructureDataset(Path(potdir, "input.data"), transform=ToStructure(r_cutoff=3.0), persist=True) 

In [None]:
# validation_split = 0.2
# nsamples = len(structures)
# split = int(np.floor(validation_split * nsamples))
# train_structures, valid_structures = torch.utils.data.random_split(structures, lengths=[nsamples-split, split])

## Potential

In [None]:
nnp = NeuralNetworkPotential(Path(potdir, "input.nn"))

### Descriptors

#### Scaler

In [None]:
nnp.fit_scaler(structures)
# nnp.load_scaler()

### Model

#### Training

In [None]:
%time history = nnp.fit_model(structures, epochs=100, validation_split=0.20)
# %time history = nnp.fit_model(train_structures, epochs=10, validation_dataset=valid_structures)
# %time history = profile(nnp.fit_model, structures, epochs=0, validation_split=0.20)
# nnp.load_model()

## Results

#### Loss and error metrics

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(12,4))

df = pd.DataFrame(history)
df[["train_loss", "valid_loss"]][:].plot(ax=ax[0]);
df[["train_force_rmse", "valid_force_rmse"]][:].plot(ax=ax[1]);
df[["train_energy_rmse", "valid_energy_rmse"]][:].plot(ax=ax[2]);
plt.tight_layout()

In [None]:
df.tail()

#### Energy and Forces

In [None]:
def mse(predictions, targets):
    if predictions.ndim > 1:
        return ((predictions - targets) ** 2).mean(axis=0)
    return ((predictions - targets) ** 2).mean()

def rmse(predictions, targets):
    return np.sqrt(mse(predictions, targets))

df = defaultdict(list)
for structure in structures:
    
    r = get_value(structure.calculate_distance(aid=0, neighbors=1))
    df['r'].append(r[0])
    
    energy = nnp(structure)
    E_pred = get_value(energy)
    E_true = get_value(structure.total_energy)   
    df['E_pred'].append(E_pred[0])
    df['E_true'].append(E_true[0])
    df['E_mse'].append(mse(E_pred, E_true)) 
    df['E_rmse'].append(rmse(E_pred, E_true))
    df['E_rmse/atom'].append(rmse(E_pred, E_true)/structure.natoms)
    df['E_err'].append((E_true - E_pred)[0])
    df['E_err/atom'].append((E_true - E_pred)[0]/structure.natoms)
    
    force = -gradient(energy, structure.position)
    F_pred = get_value(force)
    F_true = get_value(structure.force)
    
    df['F_pred'].append(F_pred[0][0])
    df['F_true'].append(F_true[0][0])
    df['F_mse'].append(mse(F_pred, F_true)[0]) 
    df['F_rmse'].append(rmse(F_pred, F_true)[0])
    df['F_err'].append((F_true - F_pred)[0][0])
    
    # print("Predicted energy:\n", E_pred)
    # print("True energy:\n", E_true)
    # print("MSE:\n", mse(E_pred, E_true))
    # print("RMSE:\n", rmse(E_pred, E_true))
    # print("Predicted force: \n", F_pred )
    # print("True force:\n", F_true)
    # print("MSE:\n", mse(F_pred, F_true))
    # print("RMSE:\n", rmse(F_pred, F_true))
    
df = pd.DataFrame(df)
print(f"Max  --> E_rmse={df['E_rmse'].max():6f} F_rmse={df['F_rmse'].max():6f}")
print(f"Mean --> E_rmse={df['E_rmse'].mean():6f} F_rmse={df['F_rmse'].mean():6f}")
print()
df

#### Validation 

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(11,8))
EV_TO_HARTREE = 0.0367493
ENERGY_ERR    = 2.0E-3*EV_TO_HARTREE  # 2.0 meV/atom
FORCE_ERR     = 0.1*EV_TO_HARTREE     # 0.1 eV/Bohr

df.plot(x='r', y=['E_pred', 'E_true'], ax=ax[0][0], style='.-', lw=0.5)
df.plot(x='r', y=['E_rmse/atom'], ax=ax[1][0], style='.-', lw=0.5); 
ax[1][0].axhline(ENERGY_ERR, ls='--', c='r') 
df.plot.hist(y=['E_err/atom'], ax=ax[2][0])

df.plot(x='r', y=['F_pred', 'F_true'], ax=ax[0][1], style='.-', lw=0.5)
df.plot(x='r', y=['F_rmse'], ax=ax[1][1], style='.-', lw=0.5)
ax[1][1].axhline(FORCE_ERR, ls='--', c='r')
df.plot.hist(y=['F_err'], ax=ax[2][1]);
plt.tight_layout()