In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler


In [2]:
def WMAE(predict,Y,weight):
    N = Y.shape[0]
    diff = np.abs(predict-Y)
    WMAE = np.sum(weight * diff) / N
    return WMAE
    
def NAE(predict,Y):
    N = Y.shape[0]
    diff = np.abs(predict-Y)
    diff = np.divide(diff,Y)
    NAE = np.sum(diff) / N
    return NAE

def err(predict,Y):
    N = Y.shape[0]
    w = [300, 1, 200]
    diff = np.abs(predict-Y)
    
    diff_WMAE = np.multiply(diff,w)
    rate_err, mesh_err, alpha_err = np.sum(diff_WMAE[:,0]) / N, np.sum(diff_WMAE[:,1]) / N, np.sum(diff_WMAE[:,2]) / N
    print('WMAE: alpha err = {0}, mesh_size err = {1}, penetration rate err = {2}'.format(alpha_err,mesh_err,rate_err))
    WMAE = alpha_err + mesh_err + rate_err
    print('WMAE = {0}'.format(WMAE))
    
    diff = np.divide(diff,Y)
    rate_err, mesh_err, alpha_err = np.sum(diff[:,0]) / N, np.sum(diff[:,1]) / N, np.sum(diff[:,2]) / N
    print('NAE: alpha err = {0}, mesh_size err = {1}, penetration rate err = {2}'.format(alpha_err,mesh_err,rate_err))
    NAE = alpha_err + mesh_err + rate_err
    print('NAE = {0}'.format(NAE))


In [63]:
with np.load('X_train.npz') as data:
    X_tr = data['arr_0']
    
with np.load('Y_train.npz') as data:
    Y_tr = data['arr_0']

with np.load('X_test.npz') as data:
    X_te = data['arr_0']

X_tr, X_val, Y_tr, Y_val = train_test_split(X_tr, Y_tr, test_size=0.2,random_state=11)


# Standardize
sc = StandardScaler().fit(X_tr)
X_tr = sc.transform(X_tr)
X_val = sc.transform(X_val)
X_te = sc.transform(X_te)



In [91]:
# linear regression

rate_loss, mesh_loss, alpha_loss = list(),list(),list()

st = time.time()
for i in range(5):
    s = 0
    k = 1000 + 1000 * i
    X = X_tr[:,s:k]
    
    x_tr = X_tr[:,s:k]
    x_val = X_val[:,s:k]

    y0 = Y_tr[:,0]
    y1 = Y_tr[:,1]
    y2 = Y_tr[:,2]
    # ordinary least squares
    clf = LinearRegression()
    # first target
    clf.fit(X, y0)
    rate_tr = clf.predict(x_tr)
    rate_val = clf.predict(x_val)
    # third target
    clf.fit(X, y1)
    mesh_tr = clf.predict(x_tr)
    mesh_val = clf.predict(x_val)
    # third target
    clf.fit(X, y2)
    alpha_tr = clf.predict(x_tr)
    alpha_val = clf.predict(x_val)
    print('iteration {0} is done.'.format(i))
    rate_loss.append(WMAE(rate_val,Y_val[:,0],300))
    mesh_loss.append(WMAE(mesh_val,Y_val[:,1],1))
    alpha_loss.append(WMAE(alpha_val,Y_val[:,2],200))

iteration 0 is done.
iteration 1 is done.
iteration 2 is done.
iteration 3 is done.
iteration 4 is done.


In [92]:
k = range(5)
plt.plot(k,rate_loss,color='red',label='rate')
plt.scatter(k,rate_loss,color='red', s=30)
plt.plot(k,mesh_loss,color='blue',label='mesh size')
plt.scatter(k,mesh_loss,color='blue', s=30)
plt.plot(k,alpha_loss,color='black',label='alpha')
plt.scatter(k,alpha_loss,color='black', s=30)
plt.xlabel('MSD feature ( starting from feature 0 )')
plt.ylabel('Error')

a, b, c = np.argmin(rate_loss), np.argmin(mesh_loss), np.argmin(alpha_loss)
print(a,b,c)
plt.text(k[a],rate_loss[a]+1,'feature {0}-{1} : {2:.4f}'.format(0,1000+a*1000,rate_loss[a]),fontsize=8,color='red')
plt.scatter(k[a],rate_loss[a],color='red', s=30)
plt.text(k[b],mesh_loss[b]+1,'feature {0}-{1} : {2:.4f}'.format(0,1000+b*1000,mesh_loss[b]),fontsize=8,color='blue')
plt.scatter(k[b],mesh_loss[b],color='blue', s=30)
plt.text(k[c],alpha_loss[c]+1,'feature {0}-{1} : {2:.4f}'.format(0,1000+c*1000,alpha_loss[c]),fontsize=8,color='black')
plt.scatter(k[c],alpha_loss[c],color='black', s=30)
'''
for i,j in enumerate(rate_loss):
    plt.text(k[i]+0.01,j+100,'{0:.2f}'.format(j),fontsize=8,color='red')
for i,j in enumerate(mesh_loss):
    plt.text(k[i]-0.1,j-100,'{0:.3f}'.format(j),fontsize=8,color='blue')
for i,j in enumerate(alpha_loss):
    plt.text(k[i]-0.1,j+100,'{0:.3f}'.format(j),fontsize=8,color='black')
'''
plt.xticks(range(5),range(1000,6000,1000))
plt.legend(loc='upper right')
plt.savefig('loss.png', dpi=300)
plt.clf()

0 0 0


<Figure size 432x288 with 0 Axes>

In [88]:
rate_loss, mesh_loss, alpha_loss = list(),list(),list()

st = time.time()
for i in range(7):
    s = 5300
    k = 5400 + i * 100
    X = X_tr[:,s:k]
    
    x_tr = X_tr[:,s:k]
    x_val = X_val[:,s:k]

    #y0 = Y_tr[:,0]
    #y1 = Y_tr[:,1]
    y2 = Y_tr[:,2]
    # ordinary least squares
    clf = LinearRegression()
    '''
    # first target
    clf.fit(X, y0)
    rate_tr = clf.predict(x_tr)
    rate_val = clf.predict(x_val)
    # third target
    clf.fit(X, y1)
    mesh_tr = clf.predict(x_tr)
    mesh_val = clf.predict(x_val)
    '''
    # third target
    clf.fit(X, y2)
    alpha_tr = clf.predict(x_tr)
    alpha_val = clf.predict(x_val)
    print('iteration {0} is done.'.format(i))
    #rate_loss.append(WMAE(rate_val,Y_val[:,0],300))
    #mesh_loss.append(WMAE(mesh_val,Y_val[:,1],1))
    alpha_loss.append(WMAE(alpha_val,Y_val[:,2],200))


iteration 0 is done.
iteration 1 is done.
iteration 2 is done.
iteration 3 is done.
iteration 4 is done.
iteration 5 is done.
iteration 6 is done.


In [90]:
k = range(7)
'''
plt.plot(k,rate_loss,color='red',label='rate')
plt.scatter(k,rate_loss,color='red', s=30)
plt.plot(k,mesh_loss,color='blue',label='mesh size')
plt.scatter(k,mesh_loss,color='blue', s=30)
'''
plt.plot(k,alpha_loss,color='black',label='alpha')
plt.scatter(k,alpha_loss,color='black', s=30)
plt.xlabel('VAC feature (starting from feature 5300)')
plt.ylabel('Error')

#a, b, c = np.argmin(rate_loss), np.argmin(mesh_loss), np.argmin(alpha_loss)
c = np.argmin(alpha_loss)

#plt.text(k[a],rate_loss[a]+1,'feature {0}-{1} : {2:.2f}'.format(5000,6000+a*1000,rate_loss[a]),fontsize=8,color='red')
#plt.text(k[b],mesh_loss[b]+1,'feature {0}-{1} : {2:.2f}'.format(5000,6000+b*1000,mesh_loss[b]),fontsize=8,color='blue')
plt.text(k[c]-1,alpha_loss[c]+0.01,'feature {0}-{1} : {2:.4f}'.format(5300,5400+c*100,alpha_loss[c]),fontsize=8,color='black')
'''
for i,j in enumerate(rate_loss):
    plt.text(k[i]+0.01,j+100,'{0:.2f}'.format(j),fontsize=8,color='red')
for i,j in enumerate(mesh_loss):
    plt.text(k[i]-0.1,j-100,'{0:.3f}'.format(j),fontsize=8,color='blue')
for i,j in enumerate(alpha_loss):
    plt.text(k[i]-0.1,j+100,'{0:.3f}'.format(j),fontsize=8,color='black')
'''
plt.xticks(range(7),range(100,800,100))
plt.legend(loc='upper right')
plt.savefig('vac_loss_v4.png', dpi=300)
plt.clf()

<Figure size 432x288 with 0 Axes>

In [53]:
rate_te = np.loadtxt('gbm_y0.csv')
meshsize_te = np.loadtxt('gbm_y1.csv')
alpha_te = np.loadtxt('gbm_y2.csv')
Y_pre_te = np.array([rate_te,meshsize_te,alpha_te]).T
np.savetxt('gbm.csv', Y_pre_te, delimiter=',')