**Data sorting**

In [245]:
import numpy as np
import pandas as pd
import joblib
import xgboost as xgb

from xgboost import plot_importance, plot_tree

In [246]:
nFeat= 6
nTarget = 1
nData= 575
seed = 165

In [247]:
data_raw = pd.read_csv('Data_raw/230314_Hydrocarbon_575.csv')

In [248]:
df_inY = data_raw.iloc[:,nFeat:]
df_inX = data_raw.iloc[:, :nFeat]
df_inY.iloc[:,[nTarget-1]]
df_inX.iloc[:, :nFeat]

Unnamed: 0,R1 Temperature,R1 Flow,R1 H2/CH4,R2 Temperature,R2 Flow,R2 H2/CH4
0,0.1495,-0.2,0.818182,-0.84,-0.750000,-0.6
1,0.1495,-0.2,0.818182,-0.76,-0.750000,-0.6
2,0.4505,0.6,0.818182,-0.52,-0.416667,-0.6
3,0.4505,0.6,0.818182,-0.04,0.583333,0.6
4,-0.5990,-0.6,-1.000000,-0.20,-0.250000,0.6
...,...,...,...,...,...,...
570,-0.3505,-0.2,-1.000000,0.76,-0.083333,-0.2
571,0.1495,-0.2,0.818182,0.84,-0.250000,0.6
572,-0.5990,-0.6,-1.000000,0.84,-0.416667,0.2
573,0.1495,-0.2,0.818182,0.84,-0.416667,0.2


In [249]:
Feature_name = list(df_inX.iloc[:, :nFeat])
Target_name = list(df_inY.iloc[:,[nTarget-1]])
Feature_Target_name = Feature_name + Target_name

In [250]:
df_inXY = np.array(pd.concat([df_inX.iloc[:, :nFeat],df_inY.iloc[:,[nTarget-1]]],axis=1)) #Normalization된 features와 실제 Target 데이터들을 다시 합쳐주는 과정

In [251]:
np.random.seed(seed)
np.random.shuffle(df_inXY)

In [252]:
df_inXY_Shuffled = pd.DataFrame(df_inXY, columns = list(Feature_Target_name))

In [253]:
data_inX = np.array(df_inXY_Shuffled.iloc[:,:nFeat])
data_inY = np.array(df_inXY_Shuffled.iloc[:, [nFeat]])

In [254]:
train_ratio = 0.8
nTrain = int(nData*train_ratio)
nTest = nData - nTrain

In [255]:
X_train = data_inX[:nTrain]
Y_train = data_inY[:nTrain]
X_test = data_inX[nTrain: , :nFeat] 
Y_test = data_inY[nTrain: , :]

**XGB Regression**

In [256]:
opt_d = 5 # max_depth: 한 트리의 maximum depth
opt_n = 1000 #n_estimators 계산 횟수

predictor = xgb.XGBRegressor(max_depth=opt_d, n_estimators=opt_n, subsample=0.8, verbosity=0)

predictor.fit(X_train, Y_train, eval_metric='mae', eval_set=[(X_test, Y_test)])
Y_test_out = predictor.predict(X_test)
test_err = np.mean(np.abs(Y_test_out - Y_test))

[0]	validation_0-mae:10.64990
[1]	validation_0-mae:7.66858
[2]	validation_0-mae:5.62044
[3]	validation_0-mae:4.25114
[4]	validation_0-mae:3.26967




[5]	validation_0-mae:2.62836
[6]	validation_0-mae:2.20823
[7]	validation_0-mae:1.97116
[8]	validation_0-mae:1.81338
[9]	validation_0-mae:1.60086
[10]	validation_0-mae:1.53015
[11]	validation_0-mae:1.44210
[12]	validation_0-mae:1.35691
[13]	validation_0-mae:1.30024
[14]	validation_0-mae:1.26105
[15]	validation_0-mae:1.20036
[16]	validation_0-mae:1.16267
[17]	validation_0-mae:1.14265
[18]	validation_0-mae:1.13822
[19]	validation_0-mae:1.09622
[20]	validation_0-mae:1.09303
[21]	validation_0-mae:1.07236
[22]	validation_0-mae:1.05924
[23]	validation_0-mae:1.04082
[24]	validation_0-mae:1.00714
[25]	validation_0-mae:0.99232
[26]	validation_0-mae:0.97944
[27]	validation_0-mae:0.97473
[28]	validation_0-mae:0.96747
[29]	validation_0-mae:0.97244
[30]	validation_0-mae:0.96868
[31]	validation_0-mae:0.94745
[32]	validation_0-mae:0.94028
[33]	validation_0-mae:0.94344
[34]	validation_0-mae:0.94321
[35]	validation_0-mae:0.94046
[36]	validation_0-mae:0.92652
[37]	validation_0-mae:0.91410
[38]	validation

In [257]:
joblib.dump(predictor, "XGB_result/predictor.xgb.%d.%d.sav"%(nData, nTarget))

['XGB_result/predictor.xgb.575.10.sav']

In [258]:
from sklearn.metrics import r2_score
r2 = r2_score(Y_test, Y_test_out)
r2_1000 = r2*1000
mae = np.mean(np.abs(Y_test_out - Y_test))
mae_1000= mae*1000

In [259]:
print('nData: {}\tnTarget: {}\topt d:{}\topt n:{}\tmin MAE: {:.4f}\tr2: {:.4f}'.format(nData, nTarget, opt_d, opt_n, mae, r2))

nData: 575	nTarget: 10	opt d:5	opt n:1000	min MAE: 5.5102	r2: 0.9537


In [260]:
feature_important = predictor.get_booster().get_score(importance_type='weight')
keys = list(feature_important.keys())
values = list(feature_important.values())
index = Feature_name

score = pd.DataFrame(list(zip(index, keys, values)), columns=['index','keys', 'values'])

score.to_csv("XGB_result/output.test.xgbscore.nData.%d.nTarget.%d..opt_d.%d.opt_n.%d.mae.%d.r2.%d.csv"%(nData, nTarget, opt_d, opt_n, mae_1000, r2_1000), index=False)

In [261]:
prt=pd.DataFrame()
ndx = range(nTest)
prt["id"] = ndx
prt["ref"] = Y_test
prt["xgb"] = Y_test_out
prt.to_csv("XGB_result/output.test.xgb.nData.%d.nTarget.%d..opt_d.%d.opt_n.%d.mae.%d.r2.%d.csv"%(nData, nTarget, opt_d, opt_n, mae_1000, r2_1000), index=False)

**ABC optimization**

In [None]:
import numpy
import pandas
import joblib

In [None]:
from utils.artificial_bee_colony import ABC

In [None]:
def pnt_func(x):
    pnt = 0
    
    for i in range (0, x.shape[1]):
       
        if x[0, i] < -1.0:   
            pnt -= 1
            
        if x[0, i] > 1.0:
            pnt -= 1
            
    return pnt

In [None]:
lbs = numpy.ones([nFeat])*(-1.0)
ubs = numpy.ones([nFeat])*(1.0)

In [None]:
predictor = joblib.load("XGB_result/predictor.xgb.%d.%d.sav"%(nData, nTarget))

In [108]:
opt = ABC(nFeat, predictor.predict, lbs, ubs, opt_type='max', lim_trial=0.1, size_pop=100, pnt_func=pnt_func)
sol, val = opt.run(100)

0 65.41327
1 65.41327
2 65.41327
3 65.41327
4 66.23917
5 66.23917
6 66.23917
7 66.23917
8 66.23917
9 66.294624
10 67.294624
11 67.294624
12 67.294624
13 67.294624


  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':
  if opt_type is not 'min' and opt_type is not 'max':


KeyboardInterrupt: 

In [None]:
numpy.savetxt("ABC_sol/ABC.sol.%d.%d.csv"%(nData, nTarget), sol.reshape(1, -1), delimiter=',', header= 'R1 Temperature,R1 Flow,R1 H2/CH4,R2 Temperature,R2 Flow,R2 H2/CH4')

**ABC to XGB**

In [25]:
import numpy as np
import pandas as pd
import xgboost as xgb
import joblib
import sys

In [26]:
sol_data = np.array(pd.read_csv("ABC_sol/ABC.sol.%d.%d.csv"%(nData,nTarget)))

In [27]:
X_sol = sol_data[:,0:nFeat]

In [28]:
df_inX
df_inY = data_raw.iloc[:,nFeat:]

In [29]:
df_inXY_ABC = []
for i in range (0, 10):
    df_inXY_ABC.append(np.array(pd.concat([df_inX.iloc[:, :nFeat], df_inY.iloc[:,[i]]],axis=1)))

In [30]:
for i in range (0, 10):  
    np.random.seed(seed)
    np.random.shuffle(df_inXY_ABC[i])
    
data_inX = []
data_inY = []
for i in range (0, 10):
    data_inX.append(np.array(pd.DataFrame(df_inXY_ABC[i]).iloc[:,:nFeat]))
    data_inY.append(np.array(pd.DataFrame(df_inXY_ABC[i]).iloc[:, [nFeat]]))
    
    
X_train = []
Y_train = []
X_test = []
Y_test = []    
for i in range (0, 10):
    X_train.append(data_inX[i][:nTrain])
    Y_train.append(data_inY[i][:nTrain])
    X_test.append(data_inX[i][nTrain: , :nFeat])
    Y_test.append(data_inY[i][nTrain: , :])

In [31]:
opt_d = 5
opt_n = 1000

for i in range (0, 10):
    predictor = xgb.XGBRegressor(max_depth=opt_d, n_estimators=opt_n, subsample=0.8, verbosity=0)
    predictor.fit(X_train[i], Y_train[i], eval_metric='mae', eval_set=[(X_test[i], Y_test[i])])
    joblib.dump(predictor, "ABC_result/predictor.ABC.%d.%d.%d.sav"%(nData, nTarget, i))

[0]	validation_0-mae:14.14035
[1]	validation_0-mae:10.29110
[2]	validation_0-mae:7.88771
[3]	validation_0-mae:6.40227




[4]	validation_0-mae:5.54615
[5]	validation_0-mae:4.98182
[6]	validation_0-mae:4.54215
[7]	validation_0-mae:4.22420
[8]	validation_0-mae:3.96162
[9]	validation_0-mae:3.71611
[10]	validation_0-mae:3.48812
[11]	validation_0-mae:3.39246
[12]	validation_0-mae:3.19635
[13]	validation_0-mae:3.01712
[14]	validation_0-mae:2.82621
[15]	validation_0-mae:2.75585
[16]	validation_0-mae:2.68260
[17]	validation_0-mae:2.59376
[18]	validation_0-mae:2.56670
[19]	validation_0-mae:2.48435
[20]	validation_0-mae:2.40211
[21]	validation_0-mae:2.39584
[22]	validation_0-mae:2.38350
[23]	validation_0-mae:2.34842
[24]	validation_0-mae:2.28842
[25]	validation_0-mae:2.24703
[26]	validation_0-mae:2.24851
[27]	validation_0-mae:2.20639
[28]	validation_0-mae:2.18154
[29]	validation_0-mae:2.16606
[30]	validation_0-mae:2.15058
[31]	validation_0-mae:2.14908
[32]	validation_0-mae:2.11902
[33]	validation_0-mae:2.09969
[34]	validation_0-mae:2.11374
[35]	validation_0-mae:2.10594
[36]	validation_0-mae:2.06707
[37]	validation_

In [32]:
results = []
for i in range (0, 10):
    predictor = joblib.load("ABC_result/predictor.ABC.%d.%d.%d.sav"%(nData, nTarget, i))
    results.append((predictor.predict(X_sol)))

results_predictor = []
for i in range (0, 10):    
    results_predictor.append(np.asarray(results))
    np.savetxt("ABC_result/ABC.results.%d.%d.%d.csv"%(nData,nTarget, i), results_predictor[i].reshape(1, -1), delimiter=',', header='nTarget', comments='')

In [33]:
numpy.savetxt("ABC_result/ABC.result.%d.%d.csv"%(nData, nTarget), results_predictor[i].reshape(1, -1), delimiter=',', header= 'Conversion,C2 selectivity,Aromatics selectivity,Aromatics and Coke selectivity,Coke selectivity,Acetylene/(Ethylene+Ethane),C2 yield,Aromatics yield,Coke yield,Hydrocarbon yield')
