In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_log_error 

  from numpy.core.umath_tests import inner1d


In [2]:
df = pd.read_csv("../Data/Fixed-Fixed.csv")
df.head(18)

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.0,0,0,1,52.976,18.084,9.0351
1,0.0,0,0,2,145.92,17.293,9.3771
2,0.0,0,0,3,285.99,17.43,9.5803
3,0.0,0,0,4,323.4,28.366,10.872
4,0.0,0,0,5,472.81,17.537,9.719
5,0.0,0,0,6,489.85,17.734,9.2553
6,0.5,60,110,1,52.618,18.051,7.3559
7,0.5,60,110,2,144.89,17.276,8.1888
8,0.5,60,110,3,283.94,17.488,9.0011
9,0.5,60,110,4,322.78,28.34,9.2718


# Preprocessing

In [3]:
X = df[["depth", "l1", "l2", "Mode"]].values
Y = df[["Frequency", "Max_Deflection", "Average_Deflection"]].values

In [4]:
# Split the data

_X_train, _X_test, _y_train, _y_test = train_test_split(X, Y, test_size=0.30, random_state=42)

In [5]:
# Standard Scalar

std_X = StandardScaler()
std_X.fit(_X_train[:, :-1]) # Fitting on the Numerical Part of the Data

std_Y = StandardScaler()
std_Y.fit(_y_train) # Fitting on the Numerical Part of the Data

StandardScaler(copy=True, with_mean=True, with_std=True)

In [6]:
get_feature = lambda x: np.hstack((std_X.transform(x[:, :-1]), np.eye(6)[x[:, -1].astype(np.int8) - 1]))

In [7]:
get_train_output = lambda y: std_Y.transform(y)
get_output = lambda y: std_Y.inverse_transform(y)

# Data

In [8]:
X_train = get_feature(_X_train)
y_train = get_train_output(_y_train)

In [9]:
X_test = get_feature(_X_test)
y_test = _y_test

# Adaptive Boosting in Trees

### 1. Training

In [10]:
ab = AdaBoostRegressor(random_state=42)

In [11]:
ab.get_params()

{'base_estimator': None,
 'learning_rate': 1.0,
 'loss': 'linear',
 'n_estimators': 50,
 'random_state': 42}

In [12]:
parametrs = {
    'learning_rate': [0.1, 0.5, 1.0],
    'loss': ['linear', 'square', 'exponential'],
    'n_estimators': [50, 100, 500]
}

In [13]:
model1 = GridSearchCV(ab, parametrs)
model2 = GridSearchCV(ab, parametrs)
model3 = GridSearchCV(ab, parametrs)

In [14]:
model1.fit(X_train, y_train[:,0])

GridSearchCV(cv=None, error_score='raise',
       estimator=AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
         n_estimators=50, random_state=42),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'learning_rate': [0.1, 0.5, 1.0], 'loss': ['linear', 'square', 'exponential'], 'n_estimators': [50, 100, 500]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [15]:
# Best Parametrs

model1.best_params_

{'learning_rate': 1.0, 'loss': 'exponential', 'n_estimators': 500}

In [16]:
model2.fit(X_train, y_train[:,1])

GridSearchCV(cv=None, error_score='raise',
       estimator=AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
         n_estimators=50, random_state=42),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'learning_rate': [0.1, 0.5, 1.0], 'loss': ['linear', 'square', 'exponential'], 'n_estimators': [50, 100, 500]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [17]:
# Best Parametrs

model2.best_params_

{'learning_rate': 0.1, 'loss': 'linear', 'n_estimators': 50}

In [18]:
model3.fit(X_train, y_train[:,2])

GridSearchCV(cv=None, error_score='raise',
       estimator=AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
         n_estimators=50, random_state=42),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'learning_rate': [0.1, 0.5, 1.0], 'loss': ['linear', 'square', 'exponential'], 'n_estimators': [50, 100, 500]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [19]:
# Best Parametrs

model3.best_params_

{'learning_rate': 1.0, 'loss': 'square', 'n_estimators': 50}

### 2. Testing

In [20]:
res1 = model1.predict(X_test)
res2 = model2.predict(X_test)
res3 = model3.predict(X_test)

y_pred = get_output(np.hstack((res1.reshape(-1, 1), res2.reshape(-1, 1), res3.reshape(-1, 1))))

In [21]:
assert y_pred.shape == y_test.shape

### 3. Evaluation

In [22]:
explained_variance_score(y_pred, y_test)

0.8030879229961089

In [23]:
mean_absolute_error(y_pred, y_test)

0.9951165546373404

In [24]:
mean_squared_log_error(y_pred, y_test)

0.0008386579694197477

# Final Testing Phase

In this phase separeate simulations were done with new set of points generated random;y with different random seed.

In [25]:
df_test = pd.read_csv("../Data/Fixed-Fixed-Test.csv")
df_test.head(18)

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.5,63,510,1,52.613,7.554,3.803
1,0.5,63,510,2,144.79,7.2355,3.6191
2,0.5,63,510,3,283.54,7.3149,4.0421
3,0.5,63,510,4,322.53,11.855,4.5453
4,0.5,63,510,5,468.26,7.4041,3.8655
5,0.5,63,510,6,489.25,7.4189,3.8899
6,0.5,255,742,1,52.73,7.5491,3.8768
7,0.5,255,742,2,145.0,7.2373,4.59
8,0.5,255,742,3,284.56,7.3335,4.6257
9,0.5,255,742,4,322.74,11.865,4.7365


In [26]:
test_X = df_test[["depth", "l1", "l2", "Mode"]].values
test_X = get_feature(test_X)

test_Y = df_test[["Frequency", "Max_Deflection", "Average_Deflection"]].values

In [27]:
test_res1 = model1.predict(test_X)
test_res2 = model2.predict(test_X)
test_res3 = model3.predict(test_X)

test_pred_y = get_output(np.hstack((test_res1.reshape(-1, 1), test_res2.reshape(-1, 1), test_res3.reshape(-1, 1))))

In [28]:
assert y_pred.shape == y_test.shape

In [29]:
explained_variance_score(test_pred_y, test_Y)

0.7449289612875316

In [30]:
mean_absolute_error(test_pred_y, test_Y)

6.55446786165716

In [31]:
mean_squared_log_error(test_pred_y, test_Y)

0.40319820979655785