In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_log_error 

In [2]:
df = pd.read_csv("../Data/Fixed-Fixed.csv")
df.head()

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.0,0,0,1,52.976,18.084,9.0351
1,0.0,0,0,2,145.92,17.293,9.3771
2,0.0,0,0,3,285.99,17.43,9.5803
3,0.0,0,0,4,323.4,28.366,10.872
4,0.0,0,0,5,472.81,17.537,9.719


In [3]:
seed=140

# Utility

In [4]:
def evaluate(pred, true, outcome):
    print("Explained Variance Score: {:0.6f}%".format(100*explained_variance_score(pred[:,outcome], true[:,outcome])))
    print("Mean Absolute Error: {:0.6f}".format(mean_absolute_error(pred[:,outcome], true[:,outcome])))
    print("Mean Squared Error: {:0.6f}".format(mean_squared_log_error(pred[:,outcome], true[:,outcome])))

# Preprocessing

In [5]:
X = df[["depth", "l1", "l2", "Mode"]].values
Y = df[["Frequency", "Max_Deflection", "Average_Deflection"]].values

In [6]:
# Split the data

_X_train, _X_test, _y_train, _y_test = train_test_split(X, Y, test_size=0.30, random_state=140)

In [7]:
# Standard Scalar

std_X = StandardScaler()
std_X.fit(_X_train[:, :-1]) # Fitting on the Numerical Part of the Data

std_Y = StandardScaler()
std_Y.fit(_y_train) # Fitting on the Numerical Part of the Data

StandardScaler(copy=True, with_mean=True, with_std=True)

In [8]:
get_feature = lambda x: np.hstack((std_X.transform(x[:, :-1]), np.eye(6)[x[:, -1].astype(np.int8) - 1]))

In [9]:
get_train_output = lambda y: std_Y.transform(y)
get_output = lambda y: std_Y.inverse_transform(y)

# Data

In [10]:
X_train = get_feature(_X_train)
y_train = get_train_output(_y_train)

In [11]:
X_test = get_feature(_X_test)
y_test = _y_test

# Decision Trees Method 1

### 1. Training

In [12]:
dt = DecisionTreeRegressor(max_depth=5, min_samples_split=2, random_state=seed)

In [13]:
dt.get_params()

{'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': 5,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'presort': 'deprecated',
 'random_state': 140,
 'splitter': 'best'}

In [14]:
parametrs = {
    'max_leaf_nodes': [15, 20],
    'criterion': ['mse', 'mae']
}

In [15]:
model1 = GridSearchCV(dt, parametrs)
model2 = GridSearchCV(dt, parametrs)
model3 = GridSearchCV(dt, parametrs)

In [16]:
model1.fit(X_train, y_train[:, 0])

GridSearchCV(cv=None, error_score=nan,
             estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse',
                                             max_depth=5, max_features=None,
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             presort='deprecated',
                                             random_state=140,
                                             splitter='best'),
             iid='deprecated', n_jobs=None,
             param_grid={'criterion': ['mse', 'mae'],
                         'max_leaf_nodes': [15, 20]},
             pre_dispatch='2*n_jobs', refit=True

In [17]:
# Best Parametrs

model1.best_params_

{'criterion': 'mse', 'max_leaf_nodes': 20}

In [18]:
model2.fit(X_train, y_train[:, 1])

GridSearchCV(cv=None, error_score=nan,
             estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse',
                                             max_depth=5, max_features=None,
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             presort='deprecated',
                                             random_state=140,
                                             splitter='best'),
             iid='deprecated', n_jobs=None,
             param_grid={'criterion': ['mse', 'mae'],
                         'max_leaf_nodes': [15, 20]},
             pre_dispatch='2*n_jobs', refit=True

In [19]:
# Best Parametrs

model2.best_params_

{'criterion': 'mse', 'max_leaf_nodes': 20}

In [20]:
model3.fit(X_train, y_train[:, 2])

GridSearchCV(cv=None, error_score=nan,
             estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse',
                                             max_depth=5, max_features=None,
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             presort='deprecated',
                                             random_state=140,
                                             splitter='best'),
             iid='deprecated', n_jobs=None,
             param_grid={'criterion': ['mse', 'mae'],
                         'max_leaf_nodes': [15, 20]},
             pre_dispatch='2*n_jobs', refit=True

In [21]:
# Best Parametrs

model3.best_params_

{'criterion': 'mse', 'max_leaf_nodes': 20}

### 2. Testing

In [22]:
res1 = model1.predict(X_test)
res2 = model2.predict(X_test)
res3 = model3.predict(X_test)

y_pred_mode1 = get_output(np.hstack((res1.reshape(-1, 1), res2.reshape(-1, 1), res3.reshape(-1, 1))))

In [23]:
assert y_pred_mode1.shape == y_test.shape

### 3. Evaluation

In [24]:
# Frequency Outcome

evaluate(y_pred_mode1, y_test, 0)

Explained Variance Score: 99.984083%
Mean Absolute Error: 1.177243
Mean Squared Error: 0.000030


In [25]:
# Max Deflection Outcome

evaluate(y_pred_mode1, y_test, 1)

Explained Variance Score: 99.897188%
Mean Absolute Error: 0.064888
Mean Squared Error: 0.000044


In [26]:
# Average Defelction Outcome

evaluate(y_pred_mode1, y_test, 2)

Explained Variance Score: 58.216720%
Mean Absolute Error: 0.384549
Mean Squared Error: 0.002166


# Decision Trees Method 2

In [27]:
dt = DecisionTreeRegressor(max_depth=5, min_samples_split=2, random_state=seed)

In [28]:
parametrs = {
    'max_leaf_nodes': [15, 20],
    'criterion': ['mse', 'mae']
}

In [29]:
model4 = GridSearchCV(dt, parametrs)
model4.fit(X_train, y_train)





GridSearchCV(cv=None, error_score=nan,
             estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse',
                                             max_depth=5, max_features=None,
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             presort='deprecated',
                                             random_state=140,
                                             splitter='best'),
             iid='deprecated', n_jobs=None,
             param_grid={'criterion': ['mse', 'mae'],
                         'max_leaf_nodes': [15, 20]},
             pre_dispatch='2*n_jobs', refit=True

In [30]:
res4 = model4.predict(X_test)
y_pred_mode2 = get_output(res4)

In [31]:
assert y_pred_mode2.shape == y_test.shape

### 3. Evaluation

In [32]:
# Frequency Outcome

evaluate(y_pred_mode2, y_test, 0)

Explained Variance Score: 99.980952%
Mean Absolute Error: 1.405444
Mean Squared Error: 0.000052


In [33]:
# Max Deflection Outcome

evaluate(y_pred_mode2, y_test, 1)

Explained Variance Score: 99.880679%
Mean Absolute Error: 0.071880
Mean Squared Error: 0.000051


In [34]:
# Average Defelction Outcome

evaluate(y_pred_mode2, y_test, 2)

Explained Variance Score: 47.992268%
Mean Absolute Error: 0.407766
Mean Squared Error: 0.002522


# Final Testing Phase

In this phase separeate simulations were done with new set of points generated random;y with different random seed.

In [35]:
df_test = pd.read_csv("../Data/Fixed-Fixed-Test.csv")
df_test.head()

Unnamed: 0,depth,l1,l2,Mode,Frequency,Max_Deflection,Average_Deflection
0,0.5,63,510,1,52.613,7.554,3.803
1,0.5,63,510,2,144.79,7.2355,3.6191
2,0.5,63,510,3,283.54,7.3149,4.0421
3,0.5,63,510,4,322.53,11.855,4.5453
4,0.5,63,510,5,468.26,7.4041,3.8655


In [36]:
test_X = df_test[["depth", "l1", "l2", "Mode"]].values
test_X = get_feature(test_X)

test_Y = df_test[["Frequency", "Max_Deflection", "Average_Deflection"]].values

### Mode 1

In [37]:
test_res1 = model1.predict(test_X)
test_res2 = model2.predict(test_X)
test_res3 = model3.predict(test_X)

test_pred_y_mode_1 = get_output(np.hstack((test_res1.reshape(-1, 1), test_res2.reshape(-1, 1), test_res3.reshape(-1, 1))))

In [38]:
assert test_pred_y_mode_1.shape == test_Y.shape

In [39]:
# Fequency Outcome

evaluate(test_pred_y_mode_1, test_Y, 0)

Explained Variance Score: 99.977620%
Mean Absolute Error: 1.236102
Mean Squared Error: 0.000044


In [40]:
# Max Deflection Outcome

evaluate(test_pred_y_mode_1, test_Y, 1)

Explained Variance Score: 66.185945%
Mean Absolute Error: 11.319255
Mean Squared Error: 0.644738


In [41]:
# Average Defelction Outcome

evaluate(test_pred_y_mode_1, test_Y, 2)

Explained Variance Score: 56.106064%
Mean Absolute Error: 5.789357
Mean Squared Error: 0.568323


### Mode 2

In [42]:
test_res4 = model4.predict(test_X)
test_pred_y_mode_2 = get_output(test_res4)

In [43]:
assert test_pred_y_mode_2.shape == test_Y.shape

In [44]:
# Fequency Outcome

evaluate(test_pred_y_mode_2, test_Y, 0)

Explained Variance Score: 99.975952%
Mean Absolute Error: 1.415146
Mean Squared Error: 0.000056


In [45]:
# Max Deflection Outcome

evaluate(test_pred_y_mode_2, test_Y, 1)

Explained Variance Score: 66.133751%
Mean Absolute Error: 11.323011
Mean Squared Error: 0.645011


In [46]:
# Average Defelction Outcome

evaluate(test_pred_y_mode_2, test_Y, 2)

Explained Variance Score: 52.068961%
Mean Absolute Error: 5.732701
Mean Squared Error: 0.560808
