In [8]:
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler


In [2]:
def WMAE(predict,Y,weight):
    N = Y.shape[0]
    diff = np.abs(predict-Y)
    WMAE = np.sum(weight * diff) / N
    return WMAE
    
def NAE(predict,Y):
    N = Y.shape[0]
    diff = np.abs(predict-Y)
    diff = np.divide(diff,Y)
    NAE = np.sum(diff) / N
    return NAE

def err(predict,Y):
    N = Y.shape[0]
    w = [300, 1, 200]
    diff = np.abs(predict-Y)
    
    diff_WMAE = np.multiply(diff,w)
    rate_err, mesh_err, alpha_err = np.sum(diff_WMAE[:,0]) / N, np.sum(diff_WMAE[:,1]) / N, np.sum(diff_WMAE[:,2]) / N
    print('WMAE: alpha err = {0}, mesh_size err = {1}, penetration rate err = {2}'.format(alpha_err,mesh_err,rate_err))
    WMAE = alpha_err + mesh_err + rate_err
    print('WMAE = {0}'.format(WMAE))
    
    diff = np.divide(diff,Y)
    rate_err, mesh_err, alpha_err = np.sum(diff[:,0]) / N, np.sum(diff[:,1]) / N, np.sum(diff[:,2]) / N
    print('NAE: alpha err = {0}, mesh_size err = {1}, penetration rate err = {2}'.format(alpha_err,mesh_err,rate_err))
    NAE = alpha_err + mesh_err + rate_err
    print('NAE = {0}'.format(NAE))


In [33]:
with np.load('X_train.npz') as data:
    X_tr = data['arr_0']
    
with np.load('Y_train.npz') as data:
    Y_tr = data['arr_0']

with np.load('X_test.npz') as data:
    X_te = data['arr_0']

X_tr, X_val, Y_tr, Y_val = train_test_split(X_tr, Y_tr, test_size=0.2,random_state=11)


# Standardize
sc = StandardScaler().fit(X_tr)
X_tr = sc.transform(X_tr)
X_val = sc.transform(X_val)
X_te = sc.transform(X_te)


In [4]:
N_train = X_tr.shape[0]
N_val = X_val.shape[0]
N_test = X_te.shape[0]
D = X_tr.shape[1]

In [104]:

y0 = Y_tr[:,0]
y1 = Y_tr[:,1]
y2 = Y_tr[:,2]
# ordinary least squares
clf = LinearRegression()
# first target
clf.fit(X_tr, y0)
rate_tr = clf.predict(X_tr)
rate_val = clf.predict(X_val)
# third target
clf.fit(X_tr, y1)
mesh_tr = clf.predict(X_tr)
mesh_val = clf.predict(X_val)
# third target
clf.fit(X_tr, y2)
alpha_tr = clf.predict(X_tr)
alpha_val = clf.predict(X_val)

print(WMAE(rate_val,Y_val[:,0],300))
print(WMAE(mesh_val,Y_val[:,1],1))
print(WMAE(alpha_val,Y_val[:,2],200))


57.39897974014383
34.83628206536435
1.7447335637657755


In [96]:
# linear regression

rate_loss, mesh_loss, alpha_loss = list(),list(),list()

st = time.time()
for i in range(9):
    s = 100
    k = 200 + i*100
    X = X_tr[:,s:k]
    
    x_tr = X_tr[:,s:k]
    x_val = X_val[:,s:k]

    y0 = Y_tr[:,0]
    y1 = Y_tr[:,1]
    y2 = Y_tr[:,2]
    # ordinary least squares
    clf = LinearRegression()
    # first target
    clf.fit(X, y0)
    rate_tr = clf.predict(x_tr)
    rate_val = clf.predict(x_val)
    # third target
    clf.fit(X, y1)
    mesh_tr = clf.predict(x_tr)
    mesh_val = clf.predict(x_val)
    # third target
    clf.fit(X, y2)
    alpha_tr = clf.predict(x_tr)
    alpha_val = clf.predict(x_val)
    print('iteration {0} is done.'.format(i))
    rate_loss.append(WMAE(rate_val,Y_val[:,0],300))
    mesh_loss.append(WMAE(mesh_val,Y_val[:,1],1))
    alpha_loss.append(WMAE(alpha_val,Y_val[:,2],200))


iteration 0 is done.
iteration 1 is done.
iteration 2 is done.
iteration 3 is done.
iteration 4 is done.
iteration 5 is done.
iteration 6 is done.
iteration 7 is done.
iteration 8 is done.


In [97]:
k = range(9)
plt.plot(k,rate_loss,color='red',label='rate')
plt.scatter(k,rate_loss,color='red', s=30)
plt.plot(k,mesh_loss,color='blue',label='mesh size')
plt.scatter(k,mesh_loss,color='blue', s=30)
plt.plot(k,alpha_loss,color='black',label='alpha')
plt.scatter(k,alpha_loss,color='black', s=30)
plt.xlabel('MSD feature ( starting from feature 100 )')
plt.ylabel('Error')

a, b, c = np.argmin(rate_loss), np.argmin(mesh_loss), np.argmin(alpha_loss)
print(a,b,c)
plt.text(k[a],rate_loss[a]+1,'feature {0}-{1} : {2:.4f}'.format(100,200+a*100,rate_loss[a]),fontsize=8,color='red')
plt.text(k[b],mesh_loss[b]+1,'feature {0}-{1} : {2:.4f}'.format(100,200+b*100,mesh_loss[b]),fontsize=8,color='blue')
plt.text(k[c],alpha_loss[c]+1,'feature {0}-{1} : {2:.4f}'.format(100,200+c*100,alpha_loss[c]),fontsize=8,color='black')
'''
for i,j in enumerate(rate_loss):
    plt.text(k[i]+0.01,j+100,'{0:.2f}'.format(j),fontsize=8,color='red')
for i,j in enumerate(mesh_loss):
    plt.text(k[i]-0.1,j-100,'{0:.3f}'.format(j),fontsize=8,color='blue')
for i,j in enumerate(alpha_loss):
    plt.text(k[i]-0.1,j+100,'{0:.3f}'.format(j),fontsize=8,color='black')
'''
plt.xticks(range(9),range(100,1000,100))
plt.legend(loc='upper right')
plt.savefig('loss.png', dpi=300)
plt.clf()

0 1 0


<Figure size 432x288 with 0 Axes>

In [31]:
# Uniform voting of linear regression with feature bagging
rate_tr, rate_val, rate_te = np.zeros(N_train), np.zeros(N_val), np.zeros(N_test)
mesh_tr, mesh_val, mesh_te = np.zeros(N_train), np.zeros(N_val), np.zeros(N_test)
alpha_tr, alpha_val, alpha_te = np.zeros(N_train), np.zeros(N_val), np.zeros(N_test)

n_iter = 300
random_step = np.random.randint(low=1,high=10,size=n_iter)
st = time.time()
for i in range(n_iter):
    random_id = np.random.choice(N_train, size=int( 0.6 * N_train),replace=False)
    t1_selected_features = range(0,200,random_step[i])
    t2_selected_features = range(0,300,random_step[i])
    t3_selected_features = range(0,100,random_step[i])
    X = X_tr[random_id,:]

    y0 = Y_tr[random_id,0]
    y1 = Y_tr[random_id,1]
    y2 = Y_tr[random_id,2]
    # ordinary least squares
    clf = LinearRegression()
    # first target
    clf.fit(X[:,0:200], y0)
    rate_tr += clf.predict(X_tr[:,0:200])
    rate_val += clf.predict(X_val[:,0:200])
    rate_te += clf.predict(X_te[:,0:200])
    # third target
    clf.fit(X[:,0:300], y1)
    mesh_tr += clf.predict(X_tr[:,0:300])
    mesh_val += clf.predict(X_val[:,0:300])
    mesh_te += clf.predict(X_te[:,0:300])
    # third target
    clf.fit(X[:,0:100], y2)
    alpha_tr += clf.predict(X_tr[:,0:100])
    alpha_val += clf.predict(X_val[:,0:100])
    alpha_te += clf.predict(X_te[:,0:100])
    #print('iteration {0} is done.'.format(i))

print('Training is complete. Total time: {:>5.2f}s'.format(time.time()-st))

rate_tr = rate_tr / n_iter
rate_val = rate_val / n_iter
rate_te = rate_te / n_iter
print('train WMAE = {}'.format(WMAE(rate_tr,Y_tr[:,0],300)))
print('train NAE = {}'.format(NAE(rate_tr,Y_tr[:,0])))
print('train WMAE = {}'.format(WMAE(rate_val,Y_val[:,0],300)))
print('train NAE = {}'.format(NAE(rate_val,Y_val[:,0])))

mesh_tr = mesh_tr / n_iter
mesh_val = mesh_val / n_iter
mesh_te = mesh_te / n_iter
print('train WMAE = {}'.format(WMAE(mesh_tr,Y_tr[:,1],1)))
print('train NAE = {}'.format(NAE(mesh_tr,Y_tr[:,1])))
print('train WMAE = {}'.format(WMAE(mesh_val,Y_val[:,1],1)))
print('train NAE = {}'.format(NAE(mesh_val,Y_val[:,1])))

alpha_tr = alpha_tr / n_iter
alpha_val = alpha_val / n_iter
alpha_te = alpha_te / n_iter
print('train WMAE = {}'.format(WMAE(alpha_tr,Y_tr[:,2],200)))
print('train NAE = {}'.format(NAE(alpha_tr,Y_tr[:,2])))
print('train WMAE = {}'.format(WMAE(alpha_val,Y_val[:,2],200)))
print('train NAE = {}'.format(NAE(alpha_val,Y_val[:,2])))

Training is complete. Total time: 1301.74s
train WMAE = 58.22361677752707
train NAE = 2.7372502885691423
train WMAE = 58.42297013761044
train NAE = 2.1177507249906844
train WMAE = 36.54169863859226
train NAE = 0.39097981329567755
train WMAE = 36.88138398480758
train NAE = 0.3920789915358639
train WMAE = 2.2224555204023813
train NAE = 0.01599681788742148
train WMAE = 2.2237117628029455
train NAE = 0.0159694880908934


In [99]:
st = time.time()
model = MLPRegressor(hidden_layer_sizes=(16,16),activation='relu', random_state=26,n_iter_no_change=10,
                                       solver='adam',learning_rate='adaptive', verbose=False,
                                       max_iter=3000,learning_rate_init=0.01,alpha= 0.01)

y = Y_tr[:,0] * 100
model.fit(X_tr[:,0:200],y)
print('Training is complete. Total time: {:>5.2f}s'.format(time.time()-st))

rate_tr = model.predict(X_tr[:,0:200]) / 100
rate_val = model.predict(X_val[:,0:200]) / 100
rate_te = model.predict(X_te[:,0:200]) / 100

print('train WMAE = {}'.format(WMAE(rate_tr,Y_tr[:,0],300)))
print('train NAE = {}'.format(NAE(rate_tr,Y_tr[:,0])))
print('train WMAE = {}'.format(WMAE(rate_val,Y_val[:,0],300)))
print('train NAE = {}'.format(NAE(rate_val,Y_val[:,0])))

Training is complete. Total time: 13.64s
train WMAE = 28.162861471139355
train NAE = 0.7037944755177846
train WMAE = 28.236324692480892
train NAE = 0.5195836839333107


In [101]:
st = time.time()

model_y1 = MLPRegressor(hidden_layer_sizes=(15,15),activation='relu', random_state=26,n_iter_no_change=15,
                                       solver='adam',learning_rate='adaptive', verbose=False,
                                       max_iter=3000,learning_rate_init=0.001)
y1 = Y_tr[:,1]
model_y1.fit(X_tr[:,0:300], y1)
print('Training is complete. Total time: {:>5.2f}s'.format(time.time()-st))

meshsize_tr = model_y1.predict(X_tr[:,0:300])
meshsize_val = model_y1.predict(X_val[:,0:300])
meshsize_te = model_y1.predict(X_te[:,0:300])

print('train WMAE = {}'.format(WMAE(meshsize_tr,Y_tr[:,1],1)))
print('train NAE = {}'.format(NAE(meshsize_tr,Y_tr[:,1])))
print('train WMAE = {}'.format(WMAE(meshsize_val,Y_val[:,1],1)))
print('train NAE = {}'.format(NAE(meshsize_val,Y_val[:,1])))

Training is complete. Total time: 60.35s
train WMAE = 18.777947638634416
train NAE = 0.15565746850361248
train WMAE = 18.893850114347476
train NAE = 0.15811205307176654


In [102]:

st = time.time()
model = MLPRegressor(hidden_layer_sizes=(12,12),activation='relu', random_state=26,n_iter_no_change=10,
                                       solver='adam',learning_rate='adaptive', verbose=False,
                                       max_iter=1000,learning_rate_init=0.01)

y = Y_tr[:,2] * 1000
model.fit(X_tr[:,0:100],y)
print('Training is complete. Total time: {:>5.2f}s'.format(time.time()-st))

alpha_tr = model.predict(X_tr[:,0:100]) / 1000
alpha_val = model.predict(X_val[:,0:100]) / 1000
alpha_te = model.predict(X_te[:,0:100]) / 1000

print('train WMAE = {}'.format(WMAE(alpha_tr,Y_tr[:,2],200)))
print('train NAE = {}'.format(NAE(alpha_tr,Y_tr[:,2])))
print('train WMAE = {}'.format(WMAE(alpha_val,Y_val[:,2],200)))
print('train NAE = {}'.format(NAE(alpha_val,Y_val[:,2])))

Training is complete. Total time:  7.40s
train WMAE = 1.226322915369723
train NAE = 0.008629638532114969
train WMAE = 1.2340058740284179
train NAE = 0.008642544321601796


In [103]:
Y_pre_tr = np.array([rate_tr,meshsize_tr,alpha_tr]).T
Y_pre_val = np.array([rate_val,meshsize_val,alpha_val]).T
Y_pre_te = np.array([rate_te,meshsize_te,alpha_te]).T

np.savetxt('MLP.csv', Y_pre_te, delimiter=',')
err(Y_pre_tr,Y_tr)
err(Y_pre_val,Y_val)

WMAE: alpha err = 1.226322915369723, mesh_size err = 18.777947638634416, penetration rate err = 28.162861471139355
WMAE = 48.167132025143495
NAE: alpha err = 0.008629638532114969, mesh_size err = 0.15565746850361248, penetration rate err = 0.7037944755177846
NAE = 0.8680815825535121
WMAE: alpha err = 1.2340058740284179, mesh_size err = 18.893850114347476, penetration rate err = 28.236324692480892
WMAE = 48.36418068085679
NAE: alpha err = 0.008642544321601796, mesh_size err = 0.15811205307176654, penetration rate err = 0.5195836839333107
NAE = 0.686338281326679


In [6]:
rate_te = np.loadtxt('gbm_y0_diff.csv')
meshsize_te = np.loadtxt('gbm_y1_diff.csv')
alpha_te = np.loadtxt('gbm_y2_diff.csv')
Y_pre_te = np.array([rate_te,meshsize_te,alpha_te]).T
np.savetxt('gbm_diff_v3.csv', Y_pre_te, delimiter=',')