In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_log_error 

In [2]:
df = pd.read_csv("../Data/Fixed-Fixed.csv")
df.head()

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.0,0,0,1,52.976,18.084,9.0351
1,0.0,0,0,2,145.92,17.293,9.3771
2,0.0,0,0,3,285.99,17.43,9.5803
3,0.0,0,0,4,323.4,28.366,10.872
4,0.0,0,0,5,472.81,17.537,9.719


In [3]:
seed=140

# Preprocessing

In [4]:
X = df[["depth", "l1", "l2", "Mode"]].values
Y = df[["Frequency", "Max_Deflection", "Average_Deflection"]].values

In [5]:
# Split the data

_X_train, _X_test, _y_train, _y_test = train_test_split(X, Y, test_size=0.30, random_state=seed)

In [6]:
# Standard Scalar

std_X = StandardScaler()
std_X.fit(_X_train[:, :-1]) # Fitting on the Numerical Part of the Data

std_Y = StandardScaler()
std_Y.fit(_y_train) # Fitting on the Numerical Part of the Data

StandardScaler(copy=True, with_mean=True, with_std=True)

In [7]:
get_feature = lambda x: np.hstack((std_X.transform(x[:, :-1]), np.eye(6)[x[:, -1].astype(np.int8) - 1]))

In [8]:
get_train_output = lambda y: std_Y.transform(y)
get_output = lambda y: std_Y.inverse_transform(y)

# Data

In [9]:
X_train = get_feature(_X_train)
y_train = get_train_output(_y_train)

In [10]:
X_test = get_feature(_X_test)
y_test = _y_test

# Gradient Boosted Trees Method 1

### 1. Training

In [11]:
gb = GradientBoostingRegressor(loss="huber", min_samples_split=2, n_estimators=100, random_state=seed)

In [12]:
gb.get_params()

{'alpha': 0.9,
 'ccp_alpha': 0.0,
 'criterion': 'friedman_mse',
 'init': None,
 'learning_rate': 0.1,
 'loss': 'huber',
 'max_depth': 3,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_iter_no_change': None,
 'presort': 'deprecated',
 'random_state': 140,
 'subsample': 1.0,
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [13]:
parametrs = {
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 5],
    'subsample': [1.0, 0.95, 0.8],
}

In [14]:
model1 = GridSearchCV(gb, parametrs)
model2 = GridSearchCV(gb, parametrs)
model3 = GridSearchCV(gb, parametrs)

In [15]:
model1.fit(X_train, y_train[:,0])

GridSearchCV(cv=None, error_score=nan,
             estimator=GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0,
                                                 criterion='friedman_mse',
                                                 init=None, learning_rate=0.1,
                                                 loss='huber', max_depth=3,
                                                 max_features=None,
                                                 max_leaf_nodes=None,
                                                 min_impurity_decrease=0.0,
                                                 min_impurity_split=None,
                                                 min_samples_leaf=1,
                                                 min_samples_split=2,
                                                 min_weight_fraction_leaf=0.0,
                                                 n_estimators=100,
                                                 n_iter_no_change=None,
           

In [16]:
# Best Parametrs

model1.best_params_

{'learning_rate': 0.2, 'max_depth': 5, 'subsample': 0.8}

In [17]:
model2.fit(X_train, y_train[:,1])

GridSearchCV(cv=None, error_score=nan,
             estimator=GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0,
                                                 criterion='friedman_mse',
                                                 init=None, learning_rate=0.1,
                                                 loss='huber', max_depth=3,
                                                 max_features=None,
                                                 max_leaf_nodes=None,
                                                 min_impurity_decrease=0.0,
                                                 min_impurity_split=None,
                                                 min_samples_leaf=1,
                                                 min_samples_split=2,
                                                 min_weight_fraction_leaf=0.0,
                                                 n_estimators=100,
                                                 n_iter_no_change=None,
           

In [18]:
# Best Parametrs

model2.best_params_

{'learning_rate': 0.2, 'max_depth': 5, 'subsample': 0.8}

In [19]:
model3.fit(X_train, y_train[:,2])

GridSearchCV(cv=None, error_score=nan,
             estimator=GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0,
                                                 criterion='friedman_mse',
                                                 init=None, learning_rate=0.1,
                                                 loss='huber', max_depth=3,
                                                 max_features=None,
                                                 max_leaf_nodes=None,
                                                 min_impurity_decrease=0.0,
                                                 min_impurity_split=None,
                                                 min_samples_leaf=1,
                                                 min_samples_split=2,
                                                 min_weight_fraction_leaf=0.0,
                                                 n_estimators=100,
                                                 n_iter_no_change=None,
           

In [20]:
# Best Parametrs

model3.best_params_

{'learning_rate': 0.2, 'max_depth': 5, 'subsample': 0.8}

### 2. Testing

In [21]:
res1 = model1.predict(X_test)
res2 = model2.predict(X_test)
res3 = model3.predict(X_test)

y_pred_mode1 = get_output(np.hstack((res1.reshape(-1, 1), res2.reshape(-1, 1), res3.reshape(-1, 1))))

In [22]:
assert y_pred_mode1.shape == y_test.shape

### 3. Evaluation

In [23]:
# Frequency Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode1[:,0], y_test[:,0])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode1[:,0], y_test[:,0])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode1[:,0], y_test[:,0])))

Explained Varaince Score: 99.995443%
Mean Absolute Error: 0.541385
Mean Squared Error: 0.000011


In [24]:
# Max Deflection Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode1[:,1], y_test[:,1])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode1[:,1], y_test[:,1])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode1[:,1], y_test[:,1])))

Explained Varaince Score: 99.926909%
Mean Absolute Error: 0.050356
Mean Squared Error: 0.000031


In [25]:
# Average Defelction Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode1[:,2], y_test[:,2])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode1[:,2], y_test[:,2])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode1[:,2], y_test[:,2])))

Explained Varaince Score: 85.902303%
Mean Absolute Error: 0.217440
Mean Squared Error: 0.000812


# Gradient Boosted Trees Method 2

### 1. Learning

In [26]:
ab = MultiOutputRegressor(GradientBoostingRegressor(random_state=seed))
ab.get_params()

{'estimator__alpha': 0.9,
 'estimator__ccp_alpha': 0.0,
 'estimator__criterion': 'friedman_mse',
 'estimator__init': None,
 'estimator__learning_rate': 0.1,
 'estimator__loss': 'ls',
 'estimator__max_depth': 3,
 'estimator__max_features': None,
 'estimator__max_leaf_nodes': None,
 'estimator__min_impurity_decrease': 0.0,
 'estimator__min_impurity_split': None,
 'estimator__min_samples_leaf': 1,
 'estimator__min_samples_split': 2,
 'estimator__min_weight_fraction_leaf': 0.0,
 'estimator__n_estimators': 100,
 'estimator__n_iter_no_change': None,
 'estimator__presort': 'deprecated',
 'estimator__random_state': 140,
 'estimator__subsample': 1.0,
 'estimator__tol': 0.0001,
 'estimator__validation_fraction': 0.1,
 'estimator__verbose': 0,
 'estimator__warm_start': False,
 'estimator': GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                           init=None, learning_rate=0.1, loss='ls', max_depth=3,
                           max_features=None, max_l

In [27]:
parametrs = {
    'estimator__learning_rate': [0.05, 0.1, 0.2],
    'estimator__max_depth': [3, 5],
    'estimator__subsample': [1.0, 0.95, 0.8],
}

In [28]:
model4 = GridSearchCV(ab, parametrs)
model4.fit(X_train, y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=MultiOutputRegressor(estimator=GradientBoostingRegressor(alpha=0.9,
                                                                                ccp_alpha=0.0,
                                                                                criterion='friedman_mse',
                                                                                init=None,
                                                                                learning_rate=0.1,
                                                                                loss='ls',
                                                                                max_depth=3,
                                                                                max_features=None,
                                                                                max_leaf_nodes=None,
                                                                                min_impurity_de

### 2. Validation

In [29]:
res4 = model4.predict(X_test)
y_pred_mode2 = get_output(res4)

### 3. Evaluation

In [30]:
# Requency Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode2[:,0], y_test[:,0])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode2[:,0], y_test[:,0])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode2[:,0], y_test[:,0])))

Explained Varaince Score: 99.995437%
Mean Absolute Error: 0.548918
Mean Squared Error: 0.000010


In [31]:
# Max Deflection Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode2[:,1], y_test[:,1])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode2[:,1], y_test[:,1])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode2[:,1], y_test[:,1])))

Explained Varaince Score: 99.926628%
Mean Absolute Error: 0.053187
Mean Squared Error: 0.000031


In [32]:
# Average Defelction Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(y_pred_mode2[:,2], y_test[:,2])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(y_pred_mode2[:,2], y_test[:,2])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(y_pred_mode2[:,2], y_test[:,2])))

Explained Varaince Score: 86.432315%
Mean Absolute Error: 0.210537
Mean Squared Error: 0.000800


# Final Testing Phase

In this phase separeate simulations were done with new set of points generated random;y with different random seed.

In [33]:
df_test = pd.read_csv("../Data/Fixed-Fixed-Test.csv")
df_test.head()

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.5,63,510,1,52.613,7.554,3.803
1,0.5,63,510,2,144.79,7.2355,3.6191
2,0.5,63,510,3,283.54,7.3149,4.0421
3,0.5,63,510,4,322.53,11.855,4.5453
4,0.5,63,510,5,468.26,7.4041,3.8655


In [34]:
test_X = df_test[["depth", "l1", "l2", "Mode"]].values
test_X = get_feature(test_X)

test_Y = df_test[["Frequency", "Max_Deflection", "Average_Deflection"]].values

### Mode 1

In [35]:
test_res1 = model1.predict(test_X)
test_res2 = model2.predict(test_X)
test_res3 = model3.predict(test_X)

test_pred_y_mode_1 = get_output(np.hstack((test_res1.reshape(-1, 1), test_res2.reshape(-1, 1), test_res3.reshape(-1, 1))))

In [36]:
assert test_pred_y_mode_1.shape == test_Y.shape

In [37]:
# Fequency Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_1[:,0], test_Y[:,0])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_1[:,0], test_Y[:,0])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_1[:,0], test_Y[:,0])))

Explained Varaince Score: 99.983212%
Mean Absolute Error: 0.837889
Mean Squared Error: 0.000032


In [38]:
# Max Deflection Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_1[:,1], test_Y[:,1])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_1[:,1], test_Y[:,1])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_1[:,1], test_Y[:,1])))

Explained Varaince Score: 66.183898%
Mean Absolute Error: 11.327208
Mean Squared Error: 0.645350


In [39]:
# Average Defelction Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_1[:,2], test_Y[:,2])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_1[:,2], test_Y[:,2])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_1[:,2], test_Y[:,2])))

Explained Varaince Score: 64.564100%
Mean Absolute Error: 5.790220
Mean Squared Error: 0.566069


### Mode 2

In [40]:
test_res4 = model4.predict(test_X)
test_pred_y_mode_2 = get_output(test_res4)

In [41]:
assert test_pred_y_mode_2.shape == test_Y.shape

In [42]:
# Fequency Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_2[:,0], test_Y[:,0])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_2[:,0], test_Y[:,0])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_2[:,0], test_Y[:,0])))

Explained Varaince Score: 99.983357%
Mean Absolute Error: 0.869900
Mean Squared Error: 0.000033


In [43]:
# Max Deflection Outcome

print("Explained Varaince Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_2[:,1], test_Y[:,1])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_2[:,1], test_Y[:,1])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_2[:,1], test_Y[:,1])))

Explained Varaince Score: 66.204763%
Mean Absolute Error: 11.324687
Mean Squared Error: 0.645165


In [44]:
# Average Defelction Outcome

print("Explained Variance Score: {:0.6f}%".format(100*explained_variance_score(test_pred_y_mode_2[:,2], test_Y[:,2])))
print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(test_pred_y_mode_2[:,2], test_Y[:,2])))
print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(test_pred_y_mode_2[:,2], test_Y[:,2])))

Explained Variance Score: 63.888871%
Mean Absolute Error: 5.798547
Mean Squared Error: 0.567207
