In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pickle

from sklearn.ensemble import RandomForestRegressor as rfr
from sklearn.model_selection import train_test_split
import sys
sys.path.append("../Libs")
from Repres_utils import find_path,find_all_paths,distmat,bm_to_graph,append_dict,build_i_idx,integrity\
        ,angle_cos,dihedral_cos
from sklearn.metrics import mean_absolute_error as MAE

In [None]:
def angle_angle_vert_repr(charges,xyzcoords,BOM,idx,i_idx,molg,q):
    i,j,k,l,v=idx
    DM=distmat(xyzcoords)
    molgi=molg[i].copy()
    molgi.remove(v)
    molgj=molg[j].copy()
    molgj.remove(v)
    molgk=molg[k].copy()
    molgk.remove(v)
    molgl=molg[l].copy()
    molgl.remove(v)
    
    za=np.zeros(60) 
    if len(molgi)>3 or len(molgj)>3 or len(molgk)>3:
        print("error!!")
    lims=[0,15,30,45]
    if len(molgj):
        for n_g,molg_n in enumerate([molgi,molgj,molgk,molgl ]):
            adj_ar=[]
            for atom in molg_n:
                adj_ar.append([charges[atom],BOM[atom,idx[n_g]],DM[atom,idx[n_g]],\
          angle_cos(xyzcoords,(atom,idx[n_g],v)),dihedral_cos(xyzcoords,(atom,idx[n_g],v,idx[2*(n_g<2)]))])

            adj_ar.sort()
            adj_ar=[x for ar in adj_ar for x in ar]
            za[lims[n_g]:lims[n_g]+len(adj_ar)]=np.asarray(adj_ar)
    
    return  BOM[i,v],DM[i,v],BOM[j,v],DM[j,v],angle_cos(xyzcoords,(i,v,j)),\
         BOM[v,l],DM[v,l],BOM[v,k],DM[v,k],angle_cos(xyzcoords,(k,v,l)),\
        BOM[i,j],BOM[i,k],BOM[i,l],BOM[j,k],BOM[j,l],BOM[k,l],*za 

Notebook for the prediction of the Hessian elements corresponding to the second derivative of the energy with respect to a pair of opposite angles (sharing the vertex ).  

![figure](Figures/AAV.png)

In [None]:
data = np.load("../Read Data/DataSet_H_IC.npz", allow_pickle=True)
X_IC,Y_IC = data["x"], data["y"]
Data=np.vstack((X_IC.T,Y_IC)).T
Data.shape

In [None]:
from Nondiag_representation import angle_angle_vert_repr

In [None]:
def add_repr2mols(calcs):
    Mols=[]
    for calc in calcs: 
        charges,xyzcoords,BOM,idxs,q,B,g_ic,h_ic=calc
        Mol=[]
        molg=bm_to_graph(BOM)
        i_idxs={}
        for n_ic,idx_ic in enumerate(idxs):
            i_idxs[idx_ic]=n_ic
            i_idxs[idx_ic[::-1]]=n_ic
        for idx in idxs:
            if len(idx)==3:
                i,v,j=idx
                molgv= molg[v].copy()
                molgv.remove(i),molgv.remove(j)
                if len (molgv)!=2: continue
                k,l=molgv[0],molgv[1]
                if max(k,l)>max(i,j):continue #avoid double repres.
                if max(charges[k],charges[l])>max(charges[i],charges[j]) or \
        max(charges[k],charges[l])==max(charges[i],charges[j]) and min(charges[k],charges[l])>min(charges[i],charges[j]) :
                    if charges[k]>charges[l]: a1,a2=k,l
                    else: a1,a2=l,k
                    if charges[i]>charges[j]: a3,a4=i,j
                    else: a4,a3=i,j
                else:
                    if charges[k]>=charges[l]: a3,a4=k,l
                    else: a3,a4=l,k
                    if charges[i]>=charges[j]: a1,a2=i,j
                    else: a2,a1=i,j 
                    rv=np.array([*angle_angle_vert_repr(charges,xyzcoords,BOM,(a1,a2,a3,a4,v),i_idxs,molg,q),\
                                 h_ic[i_idxs[(a1,v,a2)],i_idxs[(a3,v,a4)]]])
                    Mol.append([tuple(charges[x] for x in (a1,a2,a3,a4,v)),rv])
        Mols.append(Mol)
    return Mols

In [None]:
from multiprocessing import Pool
from functools import partial
def multi_process_repr(arr,num_processes = 35):
    chunks=np.array_split(arr,num_processes)
    pool = Pool(processes=num_processes)
    results = pool.map(partial(add_repr2mols),chunks)
    return  [item for list_ in results for item in list_]

In [None]:
Mols=multi_process_repr(Data)
train,test =train_test_split(Mols)

In [None]:
Angle_Angle_train={}
for mol in train:
    for b_a in mol:
        label,repres=b_a
        append_dict(Angle_Angle_train,label,repres)
Angle_Angle_test={}
for mol in test:
    for b_a in mol:
        label,repres=b_a
        append_dict(Angle_Angle_test,label,repres)

In [None]:
for bex in Angle_Angle_test:
    Angle_Angle_test[bex]=np.asarray(Angle_Angle_test[bex])
for bex in Angle_Angle_train:
    Angle_Angle_train[bex]=np.asarray(Angle_Angle_train[bex])

In [None]:
predictions={}
for key in Angle_Angle_train:
    if key not in Angle_Angle_test: continue 
    x_train,y_train=Angle_Angle_train[key][:,:-1],Angle_Angle_train[key][:,-1]
    x_test,y_test=Angle_Angle_test[key][:,:-1],Angle_Angle_test[key][:,-1]
    if (len(y_test)+len(y_train))<10:  # excluding the pairs which do not occur at least 10 times in the dataset
        continue
    rf = rfr(n_estimators=100,n_jobs=32)
    rf.fit(x_train, y_train)
    y_pred=rf.predict(x_test)
    predictions[key]=(y_test,y_pred)

In [None]:
for key in predictions:
    (y_test,y_pred)=predictions[key]
    plt.figure(figsize=(8,8))
    plt.title(key)
    plt.scatter(y_test,y_pred,s=5)
    plt.xlabel("TRUE")
    plt.ylabel("PREDICTED")
    ml,Ml=min(min(y_pred),min(y_test)),max(max(y_pred),max(y_test))
    print("MAE = ", MAE(y_pred,y_test))
    plt.plot([ml,Ml],[ml,Ml],ls=":",c="k")
    plt.show()

In [None]:
Angle_Angle_all={}
for mol in Mols:
    for b_a in mol:
        label,repres=b_a
        append_dict(Angle_Angle_all,label,repres)
for bex in Angle_Angle_all:
    Angle_Angle_all[bex]=np.asarray(Angle_Angle_all[bex])
Models={}
for key in Angle_Angle_all:
    x_train,y_train=Angle_Angle_all[key][:,:-1],Angle_Angle_all[key][:,-1]
    rf = rfr(n_estimators=100,n_jobs=32)
    rf.fit(x_train, y_train)
    rf.n_jobs=1
    Models[key]=rf

In [None]:
from joblib import dump as jl_dump
from joblib import load as jl_load
for i in Models:
    jl_dump(Models[i],"./Saved_Models/AAV/{}{}{}{}{}.joblib".format(*i))