In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_log_error 

In [2]:
df = pd.read_csv("../Data/Fixed-Fixed.csv")
df.head()

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.0,0,0,1,52.976,18.084,9.0351
1,0.0,0,0,2,145.92,17.293,9.3771
2,0.0,0,0,3,285.99,17.43,9.5803
3,0.0,0,0,4,323.4,28.366,10.872
4,0.0,0,0,5,472.81,17.537,9.719


In [3]:
seed=140

# Preprocessing

In [4]:
X = df[["depth", "l1", "l2", "Mode"]].values
Y = df[["Frequency", "Max_Deflection", "Average_Deflection"]].values

In [5]:
# Split the data

_X_train, _X_test, _y_train, _y_test = train_test_split(X, Y, test_size=0.30, random_state=seed)

In [6]:
# Standard Scalar

std_X = StandardScaler()
std_X.fit(_X_train[:, :-1]) # Fitting on the Numerical Part of the Data

std_Y = StandardScaler()
std_Y.fit(_y_train) # Fitting on the Numerical Part of the Data

StandardScaler(copy=True, with_mean=True, with_std=True)

In [7]:
get_feature = lambda x: np.hstack((std_X.transform(x[:, :-1]), np.eye(6)[x[:, -1].astype(np.int8) - 1]))

In [8]:
get_train_output = lambda y: std_Y.transform(y)
get_output = lambda y: std_Y.inverse_transform(y)

# Data

In [9]:
X_train = get_feature(_X_train)
y_train = get_train_output(_y_train)

In [10]:
X_test = get_feature(_X_test)
y_test = _y_test

# XGradient Boosted Trees Method 1

### 1. Training

In [11]:
xgb = XGBRegressor(base_score=.95, 
                   n_jobs=4, 
                   n_random_state=seed, 
                   n_estimators=500, 
                   subsample=0.8, 
                   reg_alpha=0.001, 
                   reg_lambda=0.001)

In [12]:
xgb.get_params()

{'base_score': 0.95,
 'booster': 'gbtree',
 'colsample_bylevel': 1,
 'colsample_bytree': 1,
 'gamma': 0,
 'learning_rate': 0.1,
 'max_delta_step': 0,
 'max_depth': 3,
 'min_child_weight': 1,
 'missing': None,
 'n_estimators': 500,
 'n_jobs': 4,
 'nthread': None,
 'objective': 'reg:linear',
 'random_state': 0,
 'reg_alpha': 0.001,
 'reg_lambda': 0.001,
 'scale_pos_weight': 1,
 'seed': None,
 'silent': True,
 'subsample': 0.8,
 'n_random_state': 140}

In [13]:
parametrs = {
    'learning_rate': [0.05, 0.1, 0.2],
    'subsample': [1, 0.75, 0.5]
}

In [14]:
model1 = GridSearchCV(xgb, parametrs)
model2 = GridSearchCV(xgb, parametrs)
model3 = GridSearchCV(xgb, parametrs)

In [15]:
model1.fit(X_train, y_train[:,0])

GridSearchCV(cv=None, error_score=nan,
             estimator=XGBRegressor(base_score=0.95, booster='gbtree',
                                    colsample_bylevel=1, colsample_bytree=1,
                                    gamma=0, learning_rate=0.1,
                                    max_delta_step=0, max_depth=3,
                                    min_child_weight=1, missing=None,
                                    n_estimators=500, n_jobs=4,
                                    n_random_state=140, nthread=None,
                                    objective='reg:linear', random_state=0,
                                    reg_alpha=0.001, reg_lambda=0.001,
                                    scale_pos_weight=1, seed=None, silent=True,
                                    subsample=0.8),
             iid='deprecated', n_jobs=None,
             param_grid={'learning_rate': [0.05, 0.1, 0.2],
                         'subsample': [1, 0.75, 0.5]},
             pre_dispatch='2*n_jobs', re

In [16]:
# Best Parametrs

model1.best_params_

{'learning_rate': 0.1, 'subsample': 0.75}

In [17]:
model2.fit(X_train, y_train[:,1])

GridSearchCV(cv=None, error_score=nan,
             estimator=XGBRegressor(base_score=0.95, booster='gbtree',
                                    colsample_bylevel=1, colsample_bytree=1,
                                    gamma=0, learning_rate=0.1,
                                    max_delta_step=0, max_depth=3,
                                    min_child_weight=1, missing=None,
                                    n_estimators=500, n_jobs=4,
                                    n_random_state=140, nthread=None,
                                    objective='reg:linear', random_state=0,
                                    reg_alpha=0.001, reg_lambda=0.001,
                                    scale_pos_weight=1, seed=None, silent=True,
                                    subsample=0.8),
             iid='deprecated', n_jobs=None,
             param_grid={'learning_rate': [0.05, 0.1, 0.2],
                         'subsample': [1, 0.75, 0.5]},
             pre_dispatch='2*n_jobs', re

In [18]:
# Best Parametrs

model2.best_params_

{'learning_rate': 0.1, 'subsample': 1}

In [19]:
model3.fit(X_train, y_train[:,2])

GridSearchCV(cv=None, error_score=nan,
             estimator=XGBRegressor(base_score=0.95, booster='gbtree',
                                    colsample_bylevel=1, colsample_bytree=1,
                                    gamma=0, learning_rate=0.1,
                                    max_delta_step=0, max_depth=3,
                                    min_child_weight=1, missing=None,
                                    n_estimators=500, n_jobs=4,
                                    n_random_state=140, nthread=None,
                                    objective='reg:linear', random_state=0,
                                    reg_alpha=0.001, reg_lambda=0.001,
                                    scale_pos_weight=1, seed=None, silent=True,
                                    subsample=0.8),
             iid='deprecated', n_jobs=None,
             param_grid={'learning_rate': [0.05, 0.1, 0.2],
                         'subsample': [1, 0.75, 0.5]},
             pre_dispatch='2*n_jobs', re

In [20]:
# Best Parametrs

model3.best_params_

{'learning_rate': 0.2, 'subsample': 1}

### 2. Testing

In [21]:
res1 = model1.predict(X_test)
res2 = model2.predict(X_test)
res3 = model3.predict(X_test)

y_pred_mode1 = get_output(np.hstack((res1.reshape(-1, 1), res2.reshape(-1, 1), res3.reshape(-1, 1))))

In [22]:
assert y_pred_mode1.shape == y_test.shape

### 3. Evaluation

In [23]:
# Frequency Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode1[:,0], y_test[:,0])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode1[:,0], y_test[:,0])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode1[:,0], y_test[:,0])))

Explained Varaince Score: 99.995569%
Mean Absolute Error: 0.578679
Mean Squared Error: 0.000015


In [24]:
# Max Deflection Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode1[:,1], y_test[:,1])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode1[:,1], y_test[:,1])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode1[:,1], y_test[:,1])))

Explained Varaince Score: 99.925420%
Mean Absolute Error: 0.057078
Mean Squared Error: 0.000032


In [25]:
# Average Defelction Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode1[:,2], y_test[:,2])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode1[:,2], y_test[:,2])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode1[:,2], y_test[:,2])))

Explained Varaince Score: 90.237347%
Mean Absolute Error: 0.184753
Mean Squared Error: 0.000588


# XGradient Boosted Trees Method 2

In [26]:
xgb = MultiOutputRegressor(XGBRegressor(base_score=.95, 
                                       n_jobs=4, 
                                       n_random_state=seed, 
                                       n_estimators=500, 
                                       subsample=0.8, 
                                       reg_alpha=0.001, 
                                       reg_lambda=0.001))

In [27]:
parametrs = {
    'estimator__learning_rate': [0.05, 0.1, 0.2],
    'estimator__subsample': [1, 0.75, 0.5]
}

In [28]:
model4 = GridSearchCV(xgb, parametrs)
model4.fit(X_train, y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=MultiOutputRegressor(estimator=XGBRegressor(base_score=0.95,
                                                                   booster='gbtree',
                                                                   colsample_bylevel=1,
                                                                   colsample_bytree=1,
                                                                   gamma=0,
                                                                   learning_rate=0.1,
                                                                   max_delta_step=0,
                                                                   max_depth=3,
                                                                   min_child_weight=1,
                                                                   missing=None,
                                                                   n_estimators=500,
                                     

### 2. Validation

In [29]:
res4 = model4.predict(X_test)
y_pred_mode2 = get_output(res4)

### 3. Evaluation

In [30]:
# Requency Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode2[:,0], y_test[:,0])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode2[:,0], y_test[:,0])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode2[:,0], y_test[:,0])))

Explained Varaince Score: 99.995034%
Mean Absolute Error: 0.626202
Mean Squared Error: 0.000015


In [31]:
# Max Deflection Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode2[:,1], y_test[:,1])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode2[:,1], y_test[:,1])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode2[:,1], y_test[:,1])))

Explained Varaince Score: 99.921480%
Mean Absolute Error: 0.059166
Mean Squared Error: 0.000033


In [32]:
# Average Defelction Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode2[:,2], y_test[:,2])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode2[:,2], y_test[:,2])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode2[:,2], y_test[:,2])))

Explained Varaince Score: 90.237347%
Mean Absolute Error: 0.184753
Mean Squared Error: 0.000588


# Final Testing Phase

In this phase separeate simulations were done with new set of points generated random;y with different random seed.

In [33]:
df_test = pd.read_csv("../Data/Fixed-Fixed-Test.csv")
df_test.head()

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.5,63,510,1,52.613,7.554,3.803
1,0.5,63,510,2,144.79,7.2355,3.6191
2,0.5,63,510,3,283.54,7.3149,4.0421
3,0.5,63,510,4,322.53,11.855,4.5453
4,0.5,63,510,5,468.26,7.4041,3.8655


In [34]:
test_X = df_test[["depth", "l1", "l2", "Mode"]].values
test_X = get_feature(test_X)

test_Y = df_test[["Frequency", "Max_Deflection", "Average_Deflection"]].values

### Mode 1

In [35]:
test_res1 = model1.predict(test_X)
test_res2 = model2.predict(test_X)
test_res3 = model3.predict(test_X)

test_pred_y_mode_1 = get_output(np.hstack((test_res1.reshape(-1, 1), test_res2.reshape(-1, 1), test_res3.reshape(-1, 1))))

In [36]:
assert test_pred_y_mode_1.shape == test_Y.shape

In [37]:
# Fequency Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_1[:,0], test_Y[:,0])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_1[:,0], test_Y[:,0])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_1[:,0], test_Y[:,0])))

Explained Varaince Score: 99.983048%
Mean Absolute Error: 0.894400
Mean Squared Error: 0.000035


In [38]:
# Max Deflection Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_1[:,1], test_Y[:,1])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_1[:,1], test_Y[:,1])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_1[:,1], test_Y[:,1])))

Explained Varaince Score: 66.156888%
Mean Absolute Error: 11.324528
Mean Squared Error: 0.645119


In [39]:
# Average Defelction Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_1[:,2], test_Y[:,2])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_1[:,2], test_Y[:,2])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_1[:,2], test_Y[:,2])))

Explained Varaince Score: 64.810115%
Mean Absolute Error: 5.792947
Mean Squared Error: 0.566364


### Mode 2

In [40]:
test_res4 = model4.predict(test_X)
test_pred_y_mode_2 = get_output(test_res4)

In [41]:
assert test_pred_y_mode_2.shape == test_Y.shape

In [42]:
# Fequency Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_2[:,0], test_Y[:,0])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_2[:,0], test_Y[:,0])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_2[:,0], test_Y[:,0])))

Explained Varaince Score: 99.982764%
Mean Absolute Error: 0.907481
Mean Squared Error: 0.000037


In [43]:
# Max Deflection Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_2[:,1], test_Y[:,1])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_2[:,1], test_Y[:,1])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_2[:,1], test_Y[:,1])))

Explained Varaince Score: 66.158463%
Mean Absolute Error: 11.327694
Mean Squared Error: 0.645375


In [44]:
# Average Defelction Outcome

print("Explained Variance Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_2[:,2], test_Y[:,2])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_2[:,2], test_Y[:,2])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_2[:,2], test_Y[:,2])))

Explained Variance Score: 64.810115%
Mean Absolute Error: 5.792947
Mean Squared Error: 0.566364
