In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pickle

from sklearn.ensemble import RandomForestRegressor as rfr
from sklearn.model_selection import train_test_split
import sys
sys.path.append("../Libs")
from Repres_utils import find_path,find_all_paths,distmat,bm_to_graph,append_dict,build_i_idx,integrity,angle_cos


Notebook for the prediction of the Hessian elements corresponding to the second derivative of the energy with respect to a pair of adjacent angles (sharing a side and the vertex ).  

![figure](Figures/AAa.png)

In [None]:
data = np.load("../Read Data/DataSet_H_IC.npz", allow_pickle=True)
X_IC,Y_IC = data["x"], data["y"]
Data=np.vstack((X_IC.T,Y_IC)).T
Data.shape

In [None]:
from Nondiag_representation import  angle_angle_adj_repr

In [None]:
def add_repr2mols(calcs):
    Mols=[]
    for calc in calcs: 
        charges,xyzcoords,BOM,idxs,q,B,g_ic,h_ic=calc
        Mol=[]
        molg=bm_to_graph(BOM)
        i_idxs={}
        for n_ic,idx_ic in enumerate(idxs):
            i_idxs[idx_ic]=n_ic
            i_idxs[idx_ic[::-1]]=n_ic
        for b,idx in enumerate(idxs): 
            if len(idx)==2: pass
            elif len(idx)==3:
                i,j,k=idx
                molgj=molg[j].copy()
                molgj.remove(k),molgj.remove(i)
                if len (molgj)>0:
                    for l in molgj:
                        a1,a2=i,j # the shared side of the adjacent angle is (a1-a2=i-j)
                        if l>k:  #avoid double counting (angle i-j-l )
                            if charges[l]>charges[k]: a3,a4=l,k
                            else: a3,a4=k,l
                            cycl_class= (len(find_all_paths(molg,a2,a1)),len(find_all_paths(molg,a2,a3)),\
                                        len(find_all_paths(molg,a1,a3)),len(find_all_paths(molg,a1,a4)),\
                                         len(find_all_paths(molg,a3,a4)))
                            rv=np.array([*cycl_class,*angle_angle_adj_repr(charges,xyzcoords,BOM,(a1,a2,a3,a4),i_idxs,molg,q),\
                                         h_ic[i_idxs[(a1,a2,a3)],i_idxs[(a1,a2,a4)]]])
                            Mol.append([tuple(charges[x] for x in (a1,a2,a3,a4)),rv]) 
                        a1,a2=k,j  #the shared side of the adjacent angle is (a1-a2 = k-j)
                        if l>i:
                            if charges[l]>charges[i]: a3,a4=l,i
                            else: a3,a4=i,l
                            cycl_class= (len(find_all_paths(molg,a2,a1)),len(find_all_paths(molg,a2,a3)),\
                                        len(find_all_paths(molg,a1,a3)),len(find_all_paths(molg,a1,a4)),\
                                         len(find_all_paths(molg,a3,a4)))
                            rv=np.array([*cycl_class,*angle_angle_adj_repr(charges,xyzcoords,BOM,(a1,a2,a3,a4),i_idxs,molg,q),\
                                         h_ic[i_idxs[(a1,a2,a3)],i_idxs[(a1,a2,a4)]]])
                            Mol.append([tuple(charges[x] for x in (a1,a2,a3,a4)),rv])
        Mols.append(Mol)
    return (Mols)

In [None]:
from multiprocessing import Pool
from functools import partial
def multi_process_repr(arr,num_processes = 35):
    chunks=np.array_split(arr,num_processes)
    pool = Pool(processes=num_processes)
    results = pool.map(partial(add_repr2mols ) , chunks )
    return  [item for list_ in results for item in list_]

In [None]:
Mols=multi_process_repr(Data)

In [None]:
train,test =train_test_split(Mols)

In [None]:
Angle_Angle_train={}
for mol in train:
    for a_a in mol:
        label,repres=a_a
        append_dict(Angle_Angle_train,label,repres)
Angle_Angle_test={}
for mol in test:
    for a_a in mol:
        label,repres=a_a
        append_dict(Angle_Angle_test,label,repres)

In [None]:
for bex in Angle_Angle_test:
    Angle_Angle_test[bex]=np.vstack(Angle_Angle_test[bex])
for bex in Angle_Angle_train:
    Angle_Angle_train[bex]=np.vstack(Angle_Angle_train[bex])

In [None]:
predictions={}
for key in Angle_Angle_train:
    if key not in Angle_Angle_test: continue 
    x_train,y_train=Angle_Angle_train[key][:,:-1],Angle_Angle_train[key][:,-1]
    x_test,y_test=Angle_Angle_test[key][:,:-1],Angle_Angle_test[key][:,-1]
    if (len(y_test)+len(y_train))<10:  # excluding the pairs which do not occur at least 10 times in the dataset
        continue
    rf = rfr(n_estimators=100,n_jobs=32)
    rf.fit(x_train, y_train)
    y_pred=rf.predict(x_test)
    predictions[key]=(y_test,y_pred)


In [None]:
for key in predictions:
    (y_test,y_pred)=predictions[key]
    plt.figure(figsize=(8,8))
    plt.title(key)
    plt.scatter(y_test,y_pred,s=4)
    plt.xlabel("TRUE")
    plt.ylabel("PREDICTED")
    ml,Ml=min(min(y_pred),min(y_test)),max(max(y_pred),max(y_test))
    plt.plot([ml,Ml],[ml,Ml],ls=":",c="k")
    plt.show()

In [None]:
Angle_Angle_all={}
for mol in Mols:
    for b_a in mol:
        label,repres=b_a
        append_dict(Angle_Angle_all,label,repres)
for bex in Angle_Angle_all:
    Angle_Angle_all[bex]=np.asarray(Angle_Angle_all[bex])

In [None]:
Models={}
for key in Angle_Angle_all:
    x_train,y_train=Angle_Angle_all[key][:,:-1],Angle_Angle_all[key][:,-1]
    rf = rfr(n_estimators=100,n_jobs=32)
    rf.fit(x_train, y_train)
    rf.n_jobs=1
    Models[key]=rf

In [None]:
from joblib import dump as jl_dump
from joblib import load as jl_load
for i in Models:
    jl_dump(Models[i],"./Saved_Models/AA_adj/{}{}{}{}.joblib".format(*i))