# Importing Libraries

In [1]:
import pandas as pd
from scipy.stats import norm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

# Reading Dataset

In [2]:
df = pd.read_csv("Dataset_all.csv")
df

Unnamed: 0,Lattice Type,X,Y,Z,Thickness,Force (N),Strain (mm)
0,SplitP,10.0,8.7,9.5,1.1,0.2190,0.0001
1,SplitP,10.0,8.7,9.5,1.1,0.2675,0.0023
2,SplitP,10.0,8.7,9.5,1.1,0.3566,0.0111
3,SplitP,10.0,8.7,9.5,1.1,0.2351,0.0255
4,SplitP,10.0,8.7,9.5,1.1,0.3353,0.0407
...,...,...,...,...,...,...,...
76756,Lidinoid,8.3,7.8,6.1,0.7,1267.1747,11.3756
76757,Lidinoid,8.3,7.8,6.1,0.7,1267.6182,11.3923
76758,Lidinoid,8.3,7.8,6.1,0.7,1268.4763,11.4090
76759,Lidinoid,8.3,7.8,6.1,0.7,1269.4996,11.4256


# Normalize Dataset using RIN

In [6]:
def rank_and_normalize(series):
    ranked_series = series.rank()
    normalized_series = (ranked_series - 0.5) / len(series)
    result = norm.ppf(normalized_series)
    return result

columns_to_normalize = ['X', 'Y', 'Z', 'Thickness', 'Force (N)', 'Strain (mm)']
for column in columns_to_normalize:
    df[column] = rank_and_normalize(df[column])
df

Unnamed: 0,Lattice Type,X,Y,Z,Thickness,Force (N),Strain (mm)
0,SplitP,2.304713,0.806729,1.516002,0.014924,-2.549112,-3.236146
1,SplitP,2.304713,0.806729,1.516002,0.014924,-2.479119,-2.871088
2,SplitP,2.304713,0.806729,1.516002,0.014924,-2.394080,-2.715482
3,SplitP,2.304713,0.806729,1.516002,0.014924,-2.523859,-2.609514
4,SplitP,2.304713,0.806729,1.516002,0.014924,-2.414051,-2.532233
...,...,...,...,...,...,...,...
76756,Lidinoid,0.208935,0.226942,-0.708049,-0.819751,1.362283,0.907819
76757,Lidinoid,0.208935,0.226942,-0.708049,-0.819751,1.362572,0.911870
76758,Lidinoid,0.208935,0.226942,-0.708049,-0.819751,1.363398,0.915836
76759,Lidinoid,0.208935,0.226942,-0.708049,-0.819751,1.364143,0.919817


# Converting Lattice Type Variable to Numerical

In [9]:
le = preprocessing.LabelEncoder()
df['Lattice Type'] = le.fit_transform(df['Lattice Type'])
df

Unnamed: 0,Lattice Type,X,Y,Z,Thickness,Force (N),Strain (mm)
0,4,2.304713,0.806729,1.516002,0.014924,-2.549112,-3.236146
1,4,2.304713,0.806729,1.516002,0.014924,-2.479119,-2.871088
2,4,2.304713,0.806729,1.516002,0.014924,-2.394080,-2.715482
3,4,2.304713,0.806729,1.516002,0.014924,-2.523859,-2.609514
4,4,2.304713,0.806729,1.516002,0.014924,-2.414051,-2.532233
...,...,...,...,...,...,...,...
76756,2,0.208935,0.226942,-0.708049,-0.819751,1.362283,0.907819
76757,2,0.208935,0.226942,-0.708049,-0.819751,1.362572,0.911870
76758,2,0.208935,0.226942,-0.708049,-0.819751,1.363398,0.915836
76759,2,0.208935,0.226942,-0.708049,-0.819751,1.364143,0.919817


# Defining Features and Targets

In [10]:
x = df[['Force (N)', 'Strain (mm)']]
y = df[['Lattice Type', 'X', 'Y', 'Z', 'Thickness']]
x

Unnamed: 0,Force (N),Strain (mm)
0,-2.549112,-3.236146
1,-2.479119,-2.871088
2,-2.394080,-2.715482
3,-2.523859,-2.609514
4,-2.414051,-2.532233
...,...,...
76756,1.362283,0.907819
76757,1.362572,0.911870
76758,1.363398,0.915836
76759,1.364143,0.919817


# Splitting to Train and Test

In [13]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0, shuffle=True)

# Hyperparameters of RF using GridSearchCV

In [18]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
}

rf_regressor = RandomForestRegressor(random_state=42)

grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1,
                           n_jobs=1)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits


GridSearchCV(cv=5, estimator=RandomForestRegressor(random_state=42), n_jobs=1,
             param_grid={'max_depth': [None, 10, 20, 30],
                         'n_estimators': [50, 100, 200]},
             scoring='neg_mean_squared_error', verbose=1)

## Results of GridSearchCV

In [19]:
df_cv_results = pd.DataFrame(grid_search.cv_results_)
df_cv_results = df_cv_results[['mean_fit_time', 'mean_score_time', 'param_max_depth','param_n_estimators',
                               'mean_test_score', 'rank_test_score']]
df_cv_results.sort_values(by='rank_test_score', inplace=True)
df_cv_results

Unnamed: 0,mean_fit_time,mean_score_time,param_max_depth,param_n_estimators,mean_test_score,rank_test_score
8,25.165238,0.68942,20.0,200,-0.581645,1
7,14.609992,0.35295,20.0,100,-0.584061,2
6,9.503448,0.267663,20.0,50,-0.58835,3
11,26.169496,0.755098,30.0,200,-0.603744,4
2,25.797339,0.708218,,200,-0.603883,5
1,12.721337,0.366844,,100,-0.606306,6
10,12.834673,0.359748,30.0,100,-0.606395,7
0,6.466368,0.171236,,50,-0.611369,8
9,6.738884,0.188522,30.0,50,-0.611411,9
5,17.304656,0.284166,10.0,200,-0.762939,10


## Best Hyperparameters Values

In [20]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-0.581644563461008
{'max_depth': 20, 'n_estimators': 200}


## Evaluation Metrics of the Best RF Model

In [21]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

best_model = grid_search.best_estimator_
y_pred_test = best_model.predict(X_test)

mse_test = mean_squared_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)
rmse_test = mean_squared_error(y_test, y_pred_test, squared=False)
mae_test = mean_absolute_error(y_test, y_pred_test)

print("MSE Test:", mse_test)
print("R2 score Test:", r2)
print("RMSE Test:", rmse_test)
print("Mean Absolute Error (MAE) Test:", mae_test)

MSE Test: 0.568084195965912
R2 score Test: 0.49864184146944296
RMSE Test: 0.7376204867632243
Mean Absolute Error (MAE) Test: 0.49054787882327505


# Computation Time

In [22]:
fit_time = df_cv_results['mean_fit_time'].sum()
score_time = df_cv_results['mean_score_time'].sum()
mean_time = fit_time + score_time

print("Time to Compute Best Model: {:.2f} hours".format(mean_time / 3600))

Time to Compute Best Model: 0.05 hours
