In [28]:
import sys
dirname = '/Users/m.daffarobani/Documents/personal_research/smt'
if dirname not in sys.path:
    sys.path.append(dirname)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from smt.utils.design_space import (
    DesignSpace,
    FloatVariable,
    IntegerVariable,
    OrdinalVariable,
    CategoricalVariable,
)
from smt.surrogate_models import KRG, MixIntKernelType
from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv("../tutorial/VTF_properties.csv")

In [5]:
df.head()

Unnamed: 0,Carbon_fibre,Glass_fibre,lf,Vc,SmAvg,G,GiicmAvg,Initial_stiffness,Ultimate_strain,Pseudo_ductile_strain,Ultimate_strength,Yield_strength
0,XN-90,GF,10533.48204,0.966017,82.354229,1056.370868,0.734474,456482.1982,0.24,0.050521,864.937397,864.937397
1,XN-90,XN-05,7808.946657,0.996472,60.848804,1741.249859,0.849103,466897.1657,0.24,0.040624,930.878853,930.878853
2,XN-90,GF,9323.678328,0.242948,53.26938,1523.664676,0.671576,169973.9748,0.18,0.033303,249.346203,249.346203
3,P120J,GF,5788.757208,0.453632,79.635912,1662.767083,0.776173,240985.5142,0.26,0.069889,458.139055,458.139055
4,XN-90,XN-05,11435.54728,0.837472,61.736593,1340.566744,0.658034,389829.4153,0.22,0.042216,693.053223,693.053223


In [4]:
df['Carbon_fibre'].value_counts()

K13D                2834
XN-90               1601
C124                1534
P120J                856
T1000GB              671
HTA5131              476
C320                 385
M60JB                274
M40B                 255
T800H                215
P75S                 204
T300                 176
C100                 101
FliteStrand_S_ZT      16
XN-05                 13
GF                     1
Name: Carbon_fibre, dtype: int64

In [6]:
df['Glass_fibre'].value_counts()

C100                3001
C124                2633
C320                 685
T300                 535
T1000GB              507
P120J                350
XN-05                325
T800H                309
XN-90                272
P75S                 251
HTA5131              211
FliteStrand_S_ZT     185
M60JB                166
GF                   120
M40B                  62
Name: Glass_fibre, dtype: int64

In [53]:
df = pd.read_csv("../tutorial/VTF_properties.csv")
df = df.sample(n=1000,random_state = 10)
data = df.copy()
data.Carbon_fibre = pd.Categorical(data.Carbon_fibre)
data['Carbon_fibre'] = data.Carbon_fibre.cat.codes  
data.Glass_fibre = pd.Categorical(data.Glass_fibre)
data['Glass_fibre'] = data.Glass_fibre.cat.codes  
Xd = data.drop(['SmAvg','G','GiicmAvg','Initial_stiffness','Ultimate_strain','Pseudo_ductile_strain','Ultimate_strength',
             'Yield_strength'],axis = 1)

yd = data.Initial_stiffness

X_train, X_test, y_train, y_test = train_test_split(Xd, yd, test_size = 0.25, random_state = 42)
X_tr = np.asarray(X_train)
y_tr = np.asarray(y_train).astype(float)
X_te = np.asarray(X_test)
y_te = np.asarray(y_test).astype(float)

# to define the variables
design_space = DesignSpace ([
    CategoricalVariable (['XN-90', 'P120J', 'T1000GB', 'C124', 'T800H', 'M60JB', 'C320',
       'M40B', 'P75S', 'K13D', 'T300', 'XN-05', 'FliteStrand_S_ZT',
       'HTA5131', 'GF', 'C100']), #16 choices
     CategoricalVariable (['GF', 'XN-05', 'FliteStrand_S_ZT', 'C124', 'T300', 'T800H', 'C320',
       'P75S', 'C100', 'XN-90', 'HTA5131', 'T1000GB', 'P120J', 'M40B',
       'M60JB']), #15 choices
    FloatVariable (501.5425023,11999.96175),
    FloatVariable (2.849e-05,1.0),
])


In [61]:
design_space = DesignSpace ([
    CategoricalVariable (['P120J', 'XN-90', 'T1000GB', 'C124', 'T800H', 'M60JB', 'C320',
       'M40B', 'P75S', 'K13D', 'T300', 'XN-05', 'FliteStrand_S_ZT',
       'HTA5131', 'GF', 'C100']), #16 choices
     CategoricalVariable (['GF', 'XN-05', 'FliteStrand_S_ZT', 'C124', 'T300', 'T800H', 'C320',
       'P75S', 'C100', 'XN-90', 'HTA5131', 'T1000GB', 'P120J', 'M40B',
       'M60JB']), #15 choices
    FloatVariable (501.5425023,11999.96175),
    FloatVariable (2.849e-05,1.0),
])

In [63]:
np.arange(16)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [64]:
design_space = DesignSpace ([
    CategoricalVariable (np.arange(16)), #16 choices
     CategoricalVariable (['GF', 'XN-05', 'FliteStrand_S_ZT', 'C124', 'T300', 'T800H', 'C320',
       'P75S', 'C100', 'XN-90', 'HTA5131', 'T1000GB', 'P120J', 'M40B',
       'M60JB']), #15 choices
    FloatVariable (501.5425023,11999.96175),
    FloatVariable (2.849e-05,1.0),
])

In [62]:
sm=KRG(design_space = design_space, print_global=False, categorical_kernel=MixIntKernelType.GOWER)
sm.set_training_values(X_tr, y_tr)
sm.train()
y_pred = sm.predict_values(X_te)
rmse = mean_squared_error(y_te, y_pred, squared=False)
rrmse = rmse / y_te.mean()
print(f"RMSE: {rmse:.4f}")
print(f"rRMSE: {rrmse:.4f}")


RMSE: 18381.5144
rRMSE: 0.0881


In [65]:
sm=KRG(design_space = design_space, print_global=False, categorical_kernel=MixIntKernelType.GOWER)
sm.set_training_values(X_tr, y_tr)
sm.train()
y_pred = sm.predict_values(X_te)
rmse = mean_squared_error(y_te, y_pred, squared=False)
rrmse = rmse / y_te.mean()
print(f"RMSE: {rmse:.4f}")
print(f"rRMSE: {rrmse:.4f}")


RMSE: 18381.5144
rRMSE: 0.0881


In [60]:
data['Carbon_fibre'].value_counts().sort_index()

0       4
1     160
2      43
3      59
4     302
5      29
6      29
7      69
8      22
9      74
10     15
11     24
12    170
Name: Carbon_fibre, dtype: int64

In [58]:
df['Carbon_fibre'].value_counts().sort_index()

C100         4
C124       160
C320        43
HTA5131     59
K13D       302
M40B        29
M60JB       29
P120J       69
P75S        22
T1000GB     74
T300        15
T800H       24
XN-90      170
Name: Carbon_fibre, dtype: int64

In [46]:
check = pd.Categorical(df['Carbon_fibre'])

In [49]:
pd.Series(check)

0       K13D
1       K13D
2       C124
3       C320
4       C124
       ...  
995     C124
996     K13D
997    P120J
998    M60JB
999     K13D
Length: 1000, dtype: category
Categories (13, object): ['C100', 'C124', 'C320', 'HTA5131', ..., 'T1000GB', 'T300', 'T800H', 'XN-90']