In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
from sklearn.ensemble import RandomForestRegressor as rfr
from sklearn.model_selection import train_test_split
import sys
sys.path.append("../Libs")
from Repres_utils import find_path,find_all_paths,distmat,bm_to_graph,append_dict,build_i_idx,integrity\
        ,angle_cos,dihedral_cos

from sklearn.metrics import mean_absolute_error as MAE

Notebook for the prediction of the Hessian elements corresponding to the second derivative of the energy with respect to an angle and to one of the bonds by which is included (the bond is a side of the angle).  

<img src="Figures/BAi.png" width="200" height="200" />


In [None]:
data = np.load("../Read Data/DataSet_H_IC.npz", allow_pickle=True)
X_IC,Y_IC = data["x"], data["y"]
Data=np.vstack((X_IC.T,Y_IC)).T
Data.shape

In [None]:
from Nondiag_representation import bond_angle_incl_repr

In [None]:
def add_repr2mols(calcs):
    Mols=[]
    for calc in calcs: 
        charges,xyzcoords,BOM,idxs,q,B,g_ic,h_ic=calc
        Mol=[]
        molg=bm_to_graph(BOM)
        i_idxs=build_i_idx(idxs)
        for b,idx in enumerate(idxs):  #  index coordinates
            if len(idx)==2: pass
            elif len(idx)==3:
                i,j,k=idx
                cycl_class= (len(find_all_paths(molg,j,k)),len(find_all_paths(molg,i,j)),\
                                 len(find_all_paths(molg,i,k)))
                rv=np.array([*cycl_class,*bond_angle_incl_repr(charges,xyzcoords,BOM,(i,j,k),i_idxs,molg,q),\
                             h_ic[b,i_idxs[(i,j)]]])
                Mol.append([tuple(charges[x] for x in (i,j,k)),rv])
                i,k=k,i   # Also the other border k=j-i   switch 'i' and 'k'
                cycl_class= (len(find_all_paths(molg,j,k)),len(find_all_paths(molg,i,j)),\
                                 len(find_all_paths(molg,i,k)))
                rv=np.array([*cycl_class,*bond_angle_incl_repr(charges,xyzcoords,BOM,(i,j,k),i_idxs,molg,q)\
                             ,h_ic[b,i_idxs[(i,j)]]])
                Mol.append([tuple(charges[x] for x in (i,j,k)),rv])
        Mols.append(Mol)
    return Mols

In [None]:
from multiprocessing import Pool
from functools import partial
def multi_process_repr(arr,num_processes = 35):
    chunks=np.array_split(arr,num_processes)
    pool = Pool(processes=num_processes)
    results = pool.map(partial(add_repr2mols),chunks)
    return  [item for list_ in results for item in list_]
Mols=multi_process_repr(Data)
train,test =train_test_split(Mols)


In [None]:
Bond_Angle_train={}
for mol in train:
    for b_a in mol:
        label,repres=b_a
        append_dict(Bond_Angle_train,label,repres)
Bond_Angle_test={}
for mol in test:
    for b_a in mol:
        label,repres=b_a
        append_dict(Bond_Angle_test,label,repres)

In [None]:
for bex in Bond_Angle_test:
    Bond_Angle_test[bex]=np.asarray(Bond_Angle_test[bex])
for bex in Bond_Angle_train:
    Bond_Angle_train[bex]=np.asarray(Bond_Angle_train[bex])

In [None]:
predictions={}
for key in Bond_Angle_test:
    if key not in Bond_Angle_train: continue
    x_train,y_train=Bond_Angle_train[key][:,:-1],Bond_Angle_train[key][:,-1]
    x_test,y_test=Bond_Angle_test[key][:,:-1],Bond_Angle_test[key][:,-1]
    if (len(y_test)+len(y_train))<10:continue 
    rf = rfr(n_estimators=100,n_jobs=32)
    rf.fit(x_train, y_train)
    y_pred=rf.predict(x_test)
    predictions[key]=(y_test,y_pred)

In [None]:
for key in predictions:
    (y_pred,y_test)=predictions[key]
    plt.figure(figsize=(8,8))
    plt.title(key)
    plt.scatter(y_pred,y_test,s=3)
    plt.xlabel("PREDICTED")
    plt.ylabel("TRUE")
    print("MAE = ", MAE(y_pred,y_test))
    ml,Ml=min(min(y_pred),min(y_test)),max(max(y_pred),max(y_test))
    plt.plot([ml,Ml],[ml,Ml],ls=":",c="k")
    plt.show()

In [None]:
plt.figure(figsize=(8,8))
for key in predictions:
    (y_pred,y_test)=predictions[key]
    plt.scatter(y_pred,y_test,s=3,c='C0')
    ml,Ml=min(min(y_pred),min(y_test)),max(max(y_pred),max(y_test))
    plt.plot([ml,Ml],[ml,Ml],ls=":",c="k")

In [None]:
Bond_Angle_all={}
for mol in Mols:
    for b_a in mol:
        label,repres=b_a
        append_dict(Bond_Angle_all,label,repres)
for bex in Bond_Angle_all:
    Bond_Angle_all[bex]=np.asarray(Bond_Angle_all[bex])
Models={}
for key in Bond_Angle_all:
    x_train,y_train=Bond_Angle_all[key][:,:-1],Bond_Angle_all[key][:,-1]
    rf = rfr(n_estimators=100,n_jobs=32)
    rf.fit(x_train, y_train)
    rf.n_jobs=1
    Models[key]=rf

In [None]:
from joblib import dump as jl_dump
from joblib import load as jl_load
for i in Models:
    jl_dump(Models[i],"./Saved_Models/BA_inc/{}{}{}.joblib".format(*i))