In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_log_error 

In [2]:
df = pd.read_csv("../Data/Fixed-Fixed.csv")
df.head()

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.0,0,0,1,52.976,18.084,9.0351
1,0.0,0,0,2,145.92,17.293,9.3771
2,0.0,0,0,3,285.99,17.43,9.5803
3,0.0,0,0,4,323.4,28.366,10.872
4,0.0,0,0,5,472.81,17.537,9.719


In [3]:
seed=140

# Utility

In [4]:
def evaluate(pred, true, outcome):
    print("Explained Variance Score: {:0.6f}%".format(100*explained_variance_score(pred[:,outcome], true[:,outcome])))
    print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(pred[:,outcome], true[:,outcome])))
    print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(pred[:,outcome], true[:,outcome])))

# Preprocessing

In [5]:
X = df[["depth", "l1", "l2", "Mode"]].values
Y = df[["Frequency", "Max_Deflection", "Average_Deflection"]].values

In [6]:
# Split the data

_X_train, _X_test, _y_train, _y_test = train_test_split(X, Y, test_size=0.30, random_state=42)

In [7]:
# Standard Scalar

std_X = StandardScaler()
std_X.fit(_X_train[:, :-1]) # Fitting on the Numerical Part of the Data

std_Y = StandardScaler()
std_Y.fit(_y_train) # Fitting on the Numerical Part of the Data

StandardScaler(copy=True, with_mean=True, with_std=True)

In [8]:
get_feature = lambda x: np.hstack((std_X.transform(x[:, :-1]), np.eye(6)[x[:, -1].astype(np.int8) - 1]))

In [9]:
get_train_output = lambda y: std_Y.transform(y)
get_output = lambda y: std_Y.inverse_transform(y)

# Data

In [10]:
X_train = get_feature(_X_train)
y_train = get_train_output(_y_train)

In [11]:
X_test = get_feature(_X_test)
y_test = _y_test

# Adaptive Boosting in Trees Method 1

### 1. Training

In [12]:
ab = AdaBoostRegressor(random_state=seed)

In [13]:
ab.get_params()

{'base_estimator': None,
 'learning_rate': 1.0,
 'loss': 'linear',
 'n_estimators': 50,
 'random_state': 140}

In [14]:
parametrs = {
    'learning_rate': [0.1, 0.5, 1.0],
    'loss': ['linear', 'square', 'exponential'],
    'n_estimators': [50, 100, 500]
}

In [15]:
model1 = GridSearchCV(ab, parametrs)
model2 = GridSearchCV(ab, parametrs)
model3 = GridSearchCV(ab, parametrs)

In [16]:
model1.fit(X_train, y_train[:,0])

GridSearchCV(cv=None, error_score=nan,
             estimator=AdaBoostRegressor(base_estimator=None, learning_rate=1.0,
                                         loss='linear', n_estimators=50,
                                         random_state=140),
             iid='deprecated', n_jobs=None,
             param_grid={'learning_rate': [0.1, 0.5, 1.0],
                         'loss': ['linear', 'square', 'exponential'],
                         'n_estimators': [50, 100, 500]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [17]:
# Best Parametrs

model1.best_params_

{'learning_rate': 1.0, 'loss': 'exponential', 'n_estimators': 500}

In [18]:
model2.fit(X_train, y_train[:,1])

GridSearchCV(cv=None, error_score=nan,
             estimator=AdaBoostRegressor(base_estimator=None, learning_rate=1.0,
                                         loss='linear', n_estimators=50,
                                         random_state=140),
             iid='deprecated', n_jobs=None,
             param_grid={'learning_rate': [0.1, 0.5, 1.0],
                         'loss': ['linear', 'square', 'exponential'],
                         'n_estimators': [50, 100, 500]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [19]:
# Best Parametrs

model2.best_params_

{'learning_rate': 0.1, 'loss': 'linear', 'n_estimators': 50}

In [20]:
model3.fit(X_train, y_train[:,2])

GridSearchCV(cv=None, error_score=nan,
             estimator=AdaBoostRegressor(base_estimator=None, learning_rate=1.0,
                                         loss='linear', n_estimators=50,
                                         random_state=140),
             iid='deprecated', n_jobs=None,
             param_grid={'learning_rate': [0.1, 0.5, 1.0],
                         'loss': ['linear', 'square', 'exponential'],
                         'n_estimators': [50, 100, 500]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [21]:
# Best Parametrs

model3.best_params_

{'learning_rate': 1.0, 'loss': 'square', 'n_estimators': 50}

### 2. Testing

In [22]:
res1 = model1.predict(X_test)
res2 = model2.predict(X_test)
res3 = model3.predict(X_test)

y_pred_mode1 = get_output(np.hstack((res1.reshape(-1, 1), res2.reshape(-1, 1), res3.reshape(-1, 1))))

In [23]:
assert y_pred_mode1.shape == y_test.shape

### 3. Evaluation

In [24]:
# Frequency Outcome

evaluate(y_pred_mode1, y_test, 0)

Explained Variance Score: 99.968094%
Mean Absolute Error: 2.060535
Mean Squared Error: 0.000082


In [25]:
# Max Deflection Outcome

evaluate(y_pred_mode1, y_test, 1)

Explained Variance Score: 99.859101%
Mean Absolute Error: 0.077768
Mean Squared Error: 0.000059


In [26]:
# Average Defelction Outcome

evaluate(y_pred_mode1, y_test, 2)

Explained Variance Score: 41.078764%
Mean Absolute Error: 0.407339
Mean Squared Error: 0.002327


# Adaptive Boosting in Trees Method 2

### 1. Training

In [27]:
ab = MultiOutputRegressor(AdaBoostRegressor(random_state=seed))
ab.get_params()

{'estimator__base_estimator': None,
 'estimator__learning_rate': 1.0,
 'estimator__loss': 'linear',
 'estimator__n_estimators': 50,
 'estimator__random_state': 140,
 'estimator': AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
                   n_estimators=50, random_state=140),
 'n_jobs': None}

In [28]:
parametrs = {
    'estimator__learning_rate': [0.1, 0.5, 1.0],
    'estimator__loss': ['linear', 'square', 'exponential'],
    'estimator__n_estimators': [50, 100, 500]
}

In [29]:
model4 = GridSearchCV(ab, parametrs)
model4.fit(X_train, y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=MultiOutputRegressor(estimator=AdaBoostRegressor(base_estimator=None,
                                                                        learning_rate=1.0,
                                                                        loss='linear',
                                                                        n_estimators=50,
                                                                        random_state=140),
                                            n_jobs=None),
             iid='deprecated', n_jobs=None,
             param_grid={'estimator__learning_rate': [0.1, 0.5, 1.0],
                         'estimator__loss': ['linear', 'square', 'exponential'],
                         'estimator__n_estimators': [50, 100, 500]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

### 2. Validation

In [30]:
res4 = model4.predict(X_test)
y_pred_mode2 = get_output(res4)

### 3. Evaluation

In [31]:
# Frequency Outcome

evaluate(y_pred_mode2, y_test, 0)

Explained Variance Score: 99.952504%
Mean Absolute Error: 2.193742
Mean Squared Error: 0.000152


In [32]:
# Max Deflection Outcome

evaluate(y_pred_mode2, y_test, 1)

Explained Variance Score: 99.707554%
Mean Absolute Error: 0.149283
Mean Squared Error: 0.000125


In [33]:
# Average Defelction Outcome

evaluate(y_pred_mode2, y_test, 2)

Explained Variance Score: 36.775975%
Mean Absolute Error: 0.409129
Mean Squared Error: 0.002315


# Final Testing Phase

In this phase separeate simulations were done with new set of points generated random;y with different random seed.

In [34]:
df_test = pd.read_csv("../Data/Fixed-Fixed-Test.csv")
df_test.head()

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.5,63,510,1,52.613,7.554,3.803
1,0.5,63,510,2,144.79,7.2355,3.6191
2,0.5,63,510,3,283.54,7.3149,4.0421
3,0.5,63,510,4,322.53,11.855,4.5453
4,0.5,63,510,5,468.26,7.4041,3.8655


In [35]:
test_X = df_test[["depth", "l1", "l2", "Mode"]].values
test_X = get_feature(test_X)

test_Y = df_test[["Frequency", "Max_Deflection", "Average_Deflection"]].values

### Mode 1

In [36]:
test_res1 = model1.predict(test_X)
test_res2 = model2.predict(test_X)
test_res3 = model3.predict(test_X)

test_pred_y_mode_1 = get_output(np.hstack((test_res1.reshape(-1, 1), test_res2.reshape(-1, 1), test_res3.reshape(-1, 1))))

In [37]:
assert test_pred_y_mode_1.shape == test_Y.shape

In [38]:
# Fequency Outcome

evaluate(test_pred_y_mode_1, test_Y, 0)

Explained Variance Score: 99.961882%
Mean Absolute Error: 2.123450
Mean Squared Error: 0.000086


In [39]:
# Max Deflection Outcome

evaluate(test_pred_y_mode_1, test_Y, 1)

Explained Variance Score: 66.187701%
Mean Absolute Error: 11.326095
Mean Squared Error: 0.645299


In [40]:
# Average Defelction Outcome

evaluate(test_pred_y_mode_1, test_Y, 2)

Explained Variance Score: 55.651597%
Mean Absolute Error: 5.738065
Mean Squared Error: 0.561868


### Mode 2

In [41]:
test_res4 = model4.predict(test_X)
test_pred_y_mode_2 = get_output(test_res4)

In [42]:
assert test_pred_y_mode_2.shape == test_Y.shape

In [43]:
# Fequency Outcome

evaluate(test_pred_y_mode_2, test_Y, 0)

Explained Variance Score: 99.947846%
Mean Absolute Error: 2.047152
Mean Squared Error: 0.000136


In [44]:
# Max Deflection Outcome

evaluate(test_pred_y_mode_2, test_Y, 1)

Explained Variance Score: 66.240574%
Mean Absolute Error: 11.327536
Mean Squared Error: 0.645599


In [45]:
# Average Defelction Outcome

evaluate(test_pred_y_mode_2, test_Y, 2)

Explained Variance Score: 56.874099%
Mean Absolute Error: 5.735692
Mean Squared Error: 0.561767
