In [None]:
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import time,copy
from sklearn.kernel_ridge import KernelRidge
import sys 
%load_ext autoreload
%autoreload 2
sys.path.append("../Libs")
from Repres_utils import bm_to_graph,find_path,find_all_paths,distmat,append_dict,build_i_idx,get_dihedral,\
                        dihedral_cos,angle_cos,mol_integrity,ordered_charges

from parallel_representations import multi_process_repr
from Representations import build_bond_repr,build_angle_repr,build_dihedral_repr

np.set_printoptions(precision=2)


In [None]:
data = np.load("../Read Data/DataSet_H_IC.npz", allow_pickle=True)
X_IC,Y_IC = data["x"], data["y"]
Dat=np.vstack((X_IC.T,Y_IC)).T
Dat.shape

In [None]:
mols=multi_process_repr(Dat[:],"angles",num_processes = 35)

In [None]:
train,test=train_test_split(mols)
len(mols),len(train),len(test)

In [None]:
train_angles={}
train_angles_r={}
test_angles={}
test_angles_r={}

for mol in train:
    for bond in mol:
        ring,label,repres=bond
        if ring:
            append_dict(train_angles_r,label,repres)
        else:
            append_dict(train_angles,label,repres)
for mol in test:
    for bond in mol:
        ring,label,repres=bond
        if ring:
            append_dict(test_angles_r,label,repres)
        else:
            append_dict(test_angles,label,repres)

for bex in train_angles:
    train_angles[bex]=np.asarray(train_angles[bex])
for bex in train_angles_r:
    train_angles_r[bex]=np.asarray(train_angles_r[bex])
for bex in test_angles:
    test_angles[bex]=np.asarray(test_angles[bex])
for bex in test_angles_r:
    test_angles_r[bex]=np.asarray(test_angles_r[bex])

In [None]:
predict_linear={}
for bex in test_angles:
    if bex not in train_angles: continue
    x_train,y_train=train_angles[bex][:,:-1],train_angles[bex][:,-1] 
    x_test,y_test=test_angles[bex][:,:-1],test_angles[bex][:,-1] 
    if (len(y_test)+len(y_train))<100:continue 
    rfr = RandomForestRegressor(n_estimators=100,n_jobs=32)
    rfr.fit(x_train, y_train.flatten())
    y_predict=rfr.predict(x_test)
    predict_linear[bex]=(y_test,y_predict)

In [None]:
for lp in predict_linear:
    print(lp)
    y_test,y_predict=predict_linear[lp]
    plt.figure(figsize=(8,8))
    plt.scatter(y_test,y_predict,s=2)
    ml,Ml=min(min(y_predict),min(y_test)),max(max(y_predict),max(y_test))
    plt.plot([ml,Ml],[ml,Ml],ls=":",c="k")
    plt.show()

In [None]:
predict_rings={}
for bex in test_angles_r:
    if bex not in train_angles_r: continue
    x_train,y_train=train_angles_r[bex][:,:-1],train_angles_r[bex][:,-1] 
    x_test,y_test=test_angles_r[bex][:,:-1],test_angles_r[bex][:,-1] 
    if (len(y_test)+len(y_train))<100:continue 
    rfr = RandomForestRegressor(n_estimators=100,n_jobs=32)
    rfr.fit(x_train, y_train.flatten())
    y_predict=rfr.predict(x_test)
    predict_rings[bex]=(y_test,y_predict)

In [None]:
for rp in predict_rings:
    print(rp)
    y_test,y_predict=predict_rings[rp]
    plt.figure(figsize=(8,8))
    plt.scatter(y_test,y_predict,s=2)
    ml,Ml=min(min(y_predict),min(y_test)),max(max(y_predict),max(y_test))
    plt.plot([ml,Ml],[ml,Ml],ls=":",c="k")
    plt.show()

# Save Models

In [None]:
lins={}
rings={}
for mol in mols:
    for angle in mol:
        ring,label,repres=angle
        if ring:
            append_dict(rings,label,repres)
        else:
            append_dict(lins,label,repres)

for bex in lins:
    lins[bex]=np.asarray(lins[bex])
for bex in rings:
    rings[bex]=np.asarray(rings[bex])

In [None]:
models_lin={}

for bex in lins:
    x_train,y_train=lins[bex][:,:-1],lins[bex][:,-1] 
    rfr = RandomForestRegressor(n_estimators=100,n_jobs=32)
    rfr.fit(x_train, y_train)
    rfr.n_jobs=1
    models_lin[bex]=rfr

In [None]:
models_ring={}
for bex in rings:
    x_train,y_train=rings[bex][:,:-1],rings[bex][:,-1] 
    rfr = RandomForestRegressor(n_estimators=100,n_jobs=32)
    rfr.fit(x_train, y_train.flatten())
    rfr.n_jobs=1
    models_ring[bex]=rfr

In [None]:
from joblib import dump as jl_dump
from joblib import load as jl_load

In [None]:
for i in models_lin:
    jl_dump(models_lin[i],"./Saved_Models/Angles/lin_{}{}{}.joblib".format(*i))
for i in models_ring:
    jl_dump(models_ring[i],"./Saved_Models/Angles/ring_{}{}{}.joblib".format(*i))