In [1]:
import time, joblib
import pandas as pd
import numpy as np
import lightgbm as lgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [2]:
start_time = time.strftime('%Y%m%d-%H%M%S', time.localtime(time.time()))

# load csv data
raw_data = pd.read_csv('./data/simulation_data.csv')

In [3]:
# extract data
pump_speed = raw_data['pump_speed'].values
venturi_high_input_flow = raw_data['venturi_high_input_F'].values
venturi_high_input_pressure = raw_data['venturi_high_input_P'].values
venturi_low_input_pressure = raw_data['venturi_low_input_P'].values
venturi_out_flow = raw_data['venturi_out_F'].values
venturi_restrictor = raw_data['venturi_restrictor'].values

In [4]:
# create feature data
feature_data = np.vstack([pump_speed, venturi_high_input_flow, venturi_high_input_pressure, venturi_low_input_pressure]).T

train_input, valid_input, train_output, valid_output = train_test_split(feature_data, venturi_out_flow, test_size=0.2, random_state=0)
valid_input, test_input, valid_output, test_output = train_test_split(valid_input, valid_output, test_size=0.5, random_state=0)

In [35]:
objective = 'regression'
depth = 15
leaves = 2**depth
lr = 0.01
n_estimators = 10000
boosting_type = 'gbdt'
max_bin = 255
n_leaf = 31

depth_list = list(range(1, 15, 1))
leaves_list = list(range(2, 100, 1))



In [37]:
est_flow_model = lgb.LGBMRegressor(objective=objective, n_estimators=n_estimators, learning_rate=lr, boosting_type=boosting_type, max_depth=-1, num_leaves=100)

est_flow_model.fit(X=feature_data, y=venturi_out_flow)

pred_output = est_flow_model.predict(feature_data)

print('The mae of prediction is:', mean_absolute_error(venturi_out_flow, pred_output), end='\n\n')


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 79
[LightGBM] [Info] Number of data points in the train set: 726, number of used features: 4
[LightGBM] [Info] Start training from score 662.025990
The mae of prediction is: 78.35942485280952


In [38]:
logging_data = []

pred_output = est_flow_model.predict(test_input, num_iteration=model._best_iteration)
logging_data.append(test_output[target_data_name])
logging_data.append(pred_output)

#output = pd.DataFrame(np.array(logging_data).transpose())
#output.columns = ['FL Ground Truth', 'FL Predict', "FR Ground Truth", "FR Predict", "RL Ground Truth", "RL Predict", "RR Ground Truth", "RR Predict"]

#plot_predict_scatter(output)
#print(calculate_r2_score(output))

for model, score_column_name in zip(model_list, score_column_name_list):
    #lgb.plot_tree(model, figsize=(20,20))
    lgb.create_tree_digraph(model).render('.\\display_graph\\' + score_column_name + '.gv', format='jpg', view=False)

# save model
joblib.dump(model, '.\\models\\' + score_column_name + '.pkl')


KeyboardInterrupt: 