<a href="https://colab.research.google.com/github/gautamankitkumar/ankitgau-ms-report-data/blob/main/notebooks/run-mc-simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Goal

We run Monte Carlo simulation 

## Fetch relevant packages and data

In [None]:
! pip install ase
! git clone https://github.com/gautamankitkumar/ankitgau-ms-report-data.git
% cd ankitgau-ms-report-data
% cd notebooks
% cd utils
! python3 libsymf_builder.py
% cd ..

In [5]:
import os
import torch
import numpy as np
from ase.db import connect
from utils.fcc_helpers import cal_nrg
from utils.train_agent import BPNN
from utils.fp_calculator import set_sym
from ase.build import fcc111
import matplotlib.pyplot as plt
from utils.fp_calculator import set_sym, db_to_fp
from utils.train_agent import Agent, get_scaling
os.environ['KMP_DUPLICATE_LIB_OK']='True'

m = 1
n = 4000
np.random.seed(seed=42)

full_data = connect('./datasets/CuAgAu.db')

if os.path.exists('./datasets/test.db'):
    os.remove('./datasets/test.db')
if os.path.exists('./datasets/train.db'):
    os.remove('./datasets/train.db')
if os.path.exists('./datasets/valid.db'):
    os.remove('./datasets/valid.db')

train_data = connect('./datasets/train.db')
valid_data = connect('./datasets/valid.db')
test_data = connect('./datasets/test.db')

#  Generate different train and test dataset
valid_and_test_ids = np.random.choice(np.arange(1,n),n//5,replace=False)
valid_ids = valid_and_test_ids[:n//10]
test_ids = valid_and_test_ids[n//10:]
# Write the corresponding atoms object into each databases
for i in range(1,n+1):
    if i%100==0:
        print(i)
    row = full_data.get_atoms(selection = i)
    if i in test_ids:
        test_data.write(row)
    elif i in valid_ids:
        valid_data.write(row)
    else:
        train_data.write(row)
Name = 'CuAgAu'
elements = ['Cu', 'Ag', 'Au']
Gs = [2]
cutoff = 6.0
g2_etas = [0.05, 4.0, 20.0, 80.0]
g2_Rses = [0.0]

params_set = set_sym(elements, Gs, cutoff, g2_etas=g2_etas, g2_Rses=g2_Rses)


# calculate fingerprints for databases
train_db = connect('./datasets/train.db')
train_data = db_to_fp(train_db, params_set)
torch.save(train_data, f'./{Name}/CuAgAu-train-dft.sav')

valid_db = connect('./datasets/valid.db')
valid_data = db_to_fp(valid_db, params_set)
torch.save(valid_data, f'./{Name}/CuAgAu-valid-dft.sav')

test_db = connect('./datasets/test.db')
test_data = db_to_fp(test_db, params_set)
torch.save(test_data, f'./{Name}/CuAgAu-test-dft.sav')

# load data
train_data = torch.load(f'./{Name}/CuAgAu-train-dft.sav')
valid_data = torch.load(f'./{Name}/CuAgAu-valid-dft.sav')
test_data = torch.load(f'./{Name}/CuAgAu-test-dft.sav')
scale_file = f'./{Name}/scale.sav'

if not os.path.isfile(scale_file):
    scale = get_scaling(train_data)
    torch.save(scale, scale_file)
else:
    scale = torch.load(scale_file)

# scale training fp
train_data['b_fp'] = (train_data['b_fp'] - scale['fp_min']) / (scale['fp_max'] - scale['fp_min'])
valid_data['b_fp'] = (valid_data['b_fp'] - scale['fp_min']) / (scale['fp_max'] - scale['fp_min'])
test_data['b_fp'] = (test_data['b_fp'] - scale['fp_min']) / (scale['fp_max'] - scale['fp_min'])

device = torch.device('cpu')
# for key in train_data.keys():
# 	train_data[key] = train_data[key].to(device)
# 	valid_data[key] = valid_data[key].to(device)

layer_nodes = [10,10]
activations = ['tanh','tanh']
lr = 0.1

# create model and train
element = torch.tensor([29, 47, 79])  # should have the same order with the elements above
model_paths = [f'./{Name}/model_for_{i}.sav' for i in element.tolist()]
log_name = f'./{Name}/train_log.txt'

agent = Agent(train_data=train_data, valid_data=valid_data, model_paths=model_paths, test_data=test_data,
              layer_nodes=layer_nodes, activation=activations, lr=lr, max_iter=20, history_size=100, device=device)

agent.train(log_name=log_name, n_epoch=50, interupt=True, val_interval=1,
            is_force=False, nrg_convg=2, force_convg=20, nrg_coef=1, force_coef=1)
# Energy convergence in meV, Force convergence in meV/Angstrom
# No Force fitting from the data