In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neural_network import MLPRegressor as MLP
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE
from sklearn.preprocessing import StandardScaler
from numpy import linalg as LA

# Read data from descriptor files

In [None]:
# read data
X = np.genfromtxt('../datasets/Selective_descriptors_X')
Y = np.genfromtxt('../datasets/Selective_descriptors_Y').reshape((X.shape[0],1))
Y_error = np.genfromtxt('../datasets/Selective_descriptors_error')
X = StandardScaler().fit_transform(X)
X_train,X_test,Y_train,Y_test,e_train,e_test = train_test_split(X,Y,Y_error,test_size=0.10,random_state=1010)

n_train = X_train.shape[0]
n_test = X_test.shape[0]
d = X_train.shape[1]
print(X_train.shape,X_test.shape)
print(Y_train.shape,Y_test.shape)
print(e_train.shape,e_test.shape)

# initialize hyper parameters for ANN

In [None]:
# initializing weight for first layer(w1) and second
# Parameters
hdnode = 100
w1 = np.random.normal(0,0.001,d*hdnode).reshape((d,hdnode))
d1 = np.zeros((d,hdnode))
w2 = np.random.normal(0,0.001,hdnode).reshape((hdnode,1))
d2 = np.zeros(hdnode)
h  = np.zeros(hdnode)
              
mb = 100 #minibatch size
m = int(n_train/mb)
batch = np.arange(m)
lr = 0.00020# learning rate
EP =20000# maximum epoch 
y = np.zeros((mb,1))
yh = np.zeros((n_train,1))
yh2 = np.zeros((n_test,1))

L_train= np.zeros(EP+1)
L_test = np.zeros(EP+1)

L01_train = np.zeros((EP+1))
L01_test = np.zeros((EP+1))

#relu
"""def g(A):
    return (np.maximum(A,0))

def gd(A):
    return (np.minimum(np.maximum(A,0),1))"""
#tanh
def g(A):
    return (np.tanh(A))

def gd(A):
    return (1-np.square(np.tanh(A)))
ep = 0

# Machine learning
## Change EP to change the training time

In [None]:
# machine learning stuffff....
EP = 20000
while ep < EP:
    ep += 1

    yh = g(X_train.dot(w1)).dot(w2)
    yh2 = g(X_test.dot(w1)).dot(w2)
        
    L_train[ep] = LA.norm(yh-Y_train)/n_train
    L_test[ep]  = LA.norm(yh2-Y_test)/n_test
    
    #print(ep,L_train[ep],L_test[ep])
        
    np.random.shuffle(batch)
    for i in range(m):
        st = batch[i]*mb
        ed = (batch[i]+1)*mb
        
        h  = g(X_train[st:ed].dot(w1))
        y = h.dot(w2)

        d2 = h.T.dot(Y_train[st:ed]-y)
        d1 = X_train[st:ed].T.dot(np.multiply((Y_train[st:ed]-y).dot(w2.T),gd(X_train[st:ed].dot(w1))))
        
        w2 += lr*d2
        w1 += lr*d1

# Save the current model

In [None]:
# save the model
np.savetxt("w1_{}.txt".format(EP),w1)
np.savetxt("w2_{}.txt".format(EP),w2)
np.savetxt("L_train.txt",L_train)
np.savetxt("L_test.txt",L_test)
np.savetxt("prediction_train.txt",yh)
np.savetxt("prediction_test.txt",yh2)

# Plot experiment vs prediction

In [None]:
# Plot the results
yh = np.genfromtxt("prediction_train.txt").reshape((n_train,1))
yh2 = np.genfromtxt("prediction_test.txt").reshape((n_test,1))

plt.figure(figsize=(20,10))
plt.subplot(1,2,1)
plt.scatter(Y_train,yh,s=4,color='blue')
plt.title('MLP Regressor Prediction on Training Data',fontsize =25)
plt.plot(np.linspace(0,12,1000),np.linspace(0,12,1000),color='black')
plt.xlim((0,12))
plt.ylim((0,12))
plt.xlabel("Experiment($S*m^2/mol$)",fontsize =20)
plt.ylabel("Prediction($S*m^2/mol$)",fontsize =20)
plt.tick_params(axis='both', which='major',labelsize =15)

plt.subplot(1,2,2)
plt.scatter(Y_test,yh2,s=10,color='blue')
plt.title('MLP Regressor Prediction on Test Data',fontsize=25)
plt.xlim((0,12))
plt.ylim((0,12))
plt.xlabel("Experiment($S*m^2/mol$)",fontsize =20)
plt.ylabel("Prediction($S*m^2/mol$)",fontsize =20)
plt.plot(np.linspace(0,12,1000),np.linspace(0,12,1000),color='black')
plt.tight_layout
plt.tick_params(axis='both', which='major',labelsize =15)
plt.show()

# Plot experiments, prediciton and error bar

In [None]:
fig = plt.figure(figsize=(20,16))
ax = fig.add_subplot(111)
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

result = pd.DataFrame(columns=['Experiment','Prediction','error'])
result.Experiment = Y_train.reshape(n_train)
result.Prediction = yh.reshape((n_train,1))
result.error = e_train
result = result.sort(['Experiment','Prediction'],ascending=[1,1])
size = 2

ax1.set_xlim((0,2300))
ax1.set_ylim((-1,13))
ax1.scatter(np.arange(X_train.shape[0]),result.Experiment,color="blue",s=size,label='Experiment')
ax1.scatter(np.arange(X_train.shape[0]),result.Prediction,color="red",s=size,label='Prediction')
ax1.scatter(np.arange(X_train.shape[0]),result.Experiment+result.error,color="green",s=size,label='Experiment Error')
ax1.scatter(np.arange(X_train.shape[0]),result.Experiment-result.error,color="green",s=size)
ax1.set_title('MLP Regressor Prediction on Training Data',fontsize=25)
ax1.legend(loc='upper left',fontsize=20)
ax1.tick_params(axis='both', which='major',labelsize =15)

result = pd.DataFrame(columns=['Experiment','Prediction','error'])
result.Experiment = Y_test.reshape(n_test)
result.Prediction = yh2
result.error = e_test
result = result.sort(['Experiment','Prediction'],ascending=[1,1])

size = 8
ax2.set_xlim((0,260))
ax2.set_ylim((-1,13))
ax2.scatter(np.arange(X_test.shape[0]),result.Experiment,color="blue",s=size,label='Experiment')
ax2.scatter(np.arange(X_test.shape[0]),result.Prediction,color="red",s=size,label='Prediction')
ax2.scatter(np.arange(X_test.shape[0]),result.Experiment+result.error,color="green",s=size,label='Experiment Error')
ax2.scatter(np.arange(X_test.shape[0]),result.Experiment-result.error,color="green",s=size)
ax2.set_title('MLP Regressor Prediction on Test Data',fontsize=25)
ax2.legend(loc='upper left',fontsize=20)
ax2.tick_params(axis='both', which='major',labelsize =15)

ax.set_xlabel('Data points',fontsize=20)
ax.set_ylabel('Conductivity($S*m^2/mol$)',fontsize=20)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
fig.tight_layout()
plt.show()

# Plot the error change in the training

In [None]:
plt.figure(figsize=(10,4))
plt.xlim((0,ep))
plt.ylim((0,0.06))
plt.plot(np.linspace(1,ep,ep),L_train[:ep],c="orange",label='Train error')
plt.plot(np.linspace(1,ep,ep),L_test[:ep],c="blue",label='Test error')
plt.title("Mean Square Error of MLP Regressor")
plt.xlabel("Epoch")
plt.ylabel("Mean Square Error")
plt.legend(loc="upper right")
plt.show()

In [None]:
L_train = np.genfromtxt("L_train.txt")
L_test = np.genfromtxt("L_test.txt")
ep = 20000

# Plot relative error

In [None]:
plt.figure(figsize=(8,6))
plt.title("Relative error percent of MLP Regressor")
plt.scatter(Y_test,(yh2-Y_test)/Y_test,c='blue',s=2,alpha=0.5)
plt.xlabel("Experiment Valuse of Electric Conductivity")
plt.ylabel("Relative error%")
#plt.ylim((-200,200))
plt.show()