In [None]:
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import norm
import pickle
import copy
import sys
sys.path.append("../Libs")
%load_ext autoreload
%autoreload 2
from Repres_utils import bm_to_graph,find_path,find_all_paths,distmat,append_dict,build_i_idx,get_dihedral,\
                        dihedral_cos,angle_cos,mol_integrity,ordered_charges
from parallel_representations import multi_process_repr


from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor as rfr

In [None]:
data = np.load("../Read Data/DataSet_H_IC.npz", allow_pickle=True)
X_IC,Y_IC = data["x"], data["y"]
len(X_IC)

In [None]:
Dat=np.vstack((X_IC.T,Y_IC)).T
Dat.shape

In [None]:
mols=multi_process_repr(Dat,"bonds",num_processes = 35)

In [None]:
train,test=train_test_split(mols)
len(train),len(test)

In [None]:
train,test=train_test_split(mols)
test_bonds={}
train_bonds={}
test_bonds_r={}
train_bonds_r={}
for mol in train:
    for bond in mol:
        label,ring,repres=bond
        if ring:
            append_dict(train_bonds_r,label,repres)
        else:
            append_dict(train_bonds,label,repres)
for mol in test:
    for bond in mol:
        label,ring,repres=bond
        if ring:
            append_dict(test_bonds_r,label,repres)
        else:
            append_dict(test_bonds,label,repres)

for bex in train_bonds:
    train_bonds[bex]=np.asarray(train_bonds[bex])
for bex in train_bonds_r:
    train_bonds_r[bex]=np.asarray(train_bonds_r[bex])
for bex in test_bonds:
    test_bonds[bex]=np.asarray(test_bonds[bex])
for bex in test_bonds_r:
    test_bonds_r[bex]=np.asarray(test_bonds_r[bex])

In [None]:
#acyclic molecules
bonds_pred_linear={}
for bex in test_bonds:
    if bex not in train_bonds: continue
    x_train,x_test,y_train,y_test=train_bonds[bex][:,:-1],test_bonds[bex][:,:-1],\
            train_bonds[bex][:,-1:],test_bonds[bex][:,-1:]
    regr=rfr(n_estimators=100,n_jobs=32)
    regr.fit(x_train,y_train.ravel())
    plt.figure(figsize=(8,8))
    plt.title(bex)
    y_pred=regr.predict(x_test)
    plt.plot([0,.8],[0,.8],c="r",lw=1,ls="-.")
    plt.scatter(y_test,y_pred,s=4,alpha=1)
    plt.xlabel("Reference")
    plt.ylabel("Prediction")
    plt.show()
    bonds_pred_linear[bex]=(y_test,y_pred)


In [None]:
# Rings
bonds_pred_ring={}
for bex in test_bonds_r:
    if bex not in train_bonds_r: continue
    x_train,x_test,y_train,y_test=train_bonds_r[bex][:,:-1],test_bonds_r[bex][:,:-1],\
            train_bonds_r[bex][:,-1:],test_bonds_r[bex][:,-1:]
    #if (len(y_test)+len(y_train))<100:continue 
    regr=rfr(n_estimators=100,n_jobs=32,)
    regr.fit(x_train,y_train.ravel())    
    plt.figure(figsize=(8,8))
    plt.title(bex)
    y_pred=regr.predict(x_test)
    plt.plot([0,.8],[0,.8],c="r",lw=1,ls="-.")
    plt.scatter(y_test,y_pred,s=4,alpha=1)
    plt.show()
    bonds_pred_ring[bex]=(y_test,y_pred)


# Save Models

In [None]:
rings={}
lins={}
for mol in mols:
    for bond in mol:
        label,ring,repres=bond
        if ring:
            append_dict(rings,label,repres)
        else:
            append_dict(lins,label,repres)

for bex in rings:
    rings[bex]=np.asarray(rings[bex])
for bex in lins:
    lins[bex]=np.asarray(lins[bex])

In [None]:
models_lin={}

for bex in lins:
    x_train,y_train=lins[bex][:,:-1],lins[bex][:,-1] 
    rf = rfr(n_estimators=100,n_jobs=32)
    rf.fit(x_train, y_train)
    rf.n_jobs=1
    models_lin[bex]=rf


In [None]:
models_ring={}

for bex in rings:
    x_train,y_train=rings[bex][:,:-1],rings[bex][:,-1] 
    rf = rfr(n_estimators=100,n_jobs=32)
    rf.fit(x_train, y_train.flatten())
    rf.n_jobs=1
    models_ring[bex]=rf

In [None]:
from joblib import dump as jl_dump
from joblib import load as jl_load

In [None]:
for i in models_lin:
    jl_dump(models_lin[i],"./Saved_Models/Bonds/lin_{}{}.joblib".format(*i))
for i in models_ring:
    jl_dump(models_ring[i],"./Saved_Models/Bonds/ring_{}{}.joblib".format(*i))