In [1]:
import os, joblib
import pandas as pd
import numpy as np

from utils.Dataset import CraneDataset
from sklearn.ensemble import RandomForestRegressor

In [2]:
data_file_path_list = []
data_file_name_list = os.listdir('data')

for file_name in data_file_name_list:
    data_file_path_list.append(os.path.join('data', file_name))

In [3]:
dataset_inst = CraneDataset(data_file_path_list)
raw_dataset = dataset_inst.get_dataset()

feature_names = dataset_inst.get_data_feature_names()
target_names = dataset_inst.get_data_target_names()

for t_name in target_names:
    raw_dataset[t_name] = raw_dataset[t_name]/1000

for t_name in target_names:
    raw_dataset = raw_dataset[raw_dataset[t_name]>=1]
    
#raw_dataset = raw_dataset[raw_dataset['Time(sec)']>0]

raw_dataset.reset_index(drop=True, inplace=True)

extra_feature_name_list = ['boom+roll/load', 'boom+pitch/load', 'swing/load', 'roll/pitch', 'load/roll', 'load/pitch', 'boom+roll', 'boom+pitch', 'boom_x_pos', 'boom_y_pos']

raw_dataset['boom+roll/load'] = (raw_dataset['Boom_Angle(deg)']+raw_dataset['Roll_Angle(deg)'])/raw_dataset['Load(Ton)']
raw_dataset['boom+pitch/load'] = (raw_dataset['Boom_Angle(deg)']+raw_dataset['Pitch_Angle(deg)'])/raw_dataset['Load(Ton)']
raw_dataset['swing/load'] = raw_dataset['Swing_Angle(deg)']/raw_dataset['Load(Ton)']
raw_dataset['roll/pitch'] = raw_dataset['Roll_Angle(deg)']/raw_dataset['Pitch_Angle(deg)']
raw_dataset['load/roll'] = raw_dataset['Load(Ton)']/raw_dataset['Roll_Angle(deg)']
raw_dataset['load/pitch'] = raw_dataset['Load(Ton)']/raw_dataset['Pitch_Angle(deg)']
raw_dataset['boom+roll'] = raw_dataset['Boom_Angle(deg)'] + raw_dataset['Roll_Angle(deg)']
raw_dataset['boom+pitch'] = raw_dataset['Boom_Angle(deg)'] + raw_dataset['Pitch_Angle(deg)']
raw_dataset['boom_x_pos'] = np.cos(np.deg2rad(raw_dataset['Swing_Angle(deg)'])) * np.cos(np.deg2rad(raw_dataset['Boom_Angle(deg)']))
raw_dataset['boom_y_pos'] = np.sin(np.deg2rad(raw_dataset['Swing_Angle(deg)'])) * np.cos(np.deg2rad(raw_dataset['Boom_Angle(deg)']))
    
diff_boom_angle = np.diff(raw_dataset['Boom_Angle(deg)'])
diff_swing_angle = np.diff(raw_dataset['Swing_Angle(deg)'])
diff_roll_angle = np.diff(raw_dataset['Roll_Angle(deg)'])
diff_pitch_angle = np.diff(raw_dataset['Pitch_Angle(deg)'])

#extra_feature_name_list = ['grad_boom_angle', 'grad_swing_angle', 'grad_roll_angle', 'grad_pitch_angle']
#new_feature_names = feature_names + extra_feature_name_list

#for extra_feature_name, extra_feature  in zip(extra_feature_name_list, [grad_boom_angle, grad_swing_angle, grad_roll_angle, grad_pitch_angle]):
#    raw_dataset[extra_feature_name] = extra_feature

train_dataset_1 = raw_dataset[raw_dataset['dataset_type'] == 0]
train_dataset = train_dataset_1.drop(columns=['Time(sec)', 'file_idx', 'safe_state', 'dataset_type'])
train_dataset = train_dataset[feature_names[0:4]+[feature_names[5]]+extra_feature_name_list+target_names]
train_dataset.reset_index(drop=True, inplace=True)

val_dataset_1 = raw_dataset[raw_dataset['dataset_type'] == 1]
val_dataset = val_dataset_1.drop(columns=['Time(sec)', 'file_idx', 'safe_state', 'dataset_type'])
val_dataset = val_dataset[feature_names[0:4]+[feature_names[5]]+extra_feature_name_list+target_names]
val_dataset.reset_index(drop=True, inplace=True)

  0%|          | 0/22 [00:00<?, ?it/s]

In [4]:
train_dataset.describe()

Unnamed: 0,Boom_Angle(deg),Swing_Angle(deg),Load(Ton),Roll_Angle(deg),Pitch_Angle(deg),boom+roll/load,boom+pitch/load,swing/load,roll/pitch,load/roll,...,Actual_Load_Left_1(N),Actual_Load_Left_2(N),Actual_Load_Left_3(N),Actual_Load_Left_4(N),Actual_Load_Left_5(N),Actual_Load_Right_1(N),Actual_Load_Right_2(N),Actual_Load_Right_3(N),Actual_Load_Right_4(N),Actual_Load_Right_5(N)
count,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,...,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0,76951.0
mean,65.244964,71.48414,75.703889,-9.5e-05,-8.2e-05,0.926743,0.926743,1.049855,11.03717,-6743289.0,...,294.399906,279.314035,260.95976,242.051725,225.310701,228.69935,209.834585,190.629113,173.742344,161.857265
std,10.838993,64.78831,25.525701,0.000188,0.000289,0.232774,0.232773,1.083413,3425.251487,2183465000.0,...,118.62964,78.395474,54.837048,79.415485,121.068184,120.196203,75.108241,42.903806,67.08094,109.747233
min,40.0,-3.3772950000000004e-23,40.0,-0.014762,-0.009508,0.569282,0.569291,-3.070268e-25,-15267.807788,-597539800000.0,...,1.009678,40.44896,22.720991,8.58995,1.018398,1.036816,2.564975,4.721832,7.326577,1.031677
25%,60.0,5.089531,50.0,-0.000183,-0.00029,0.699999,0.7,0.06704352,-0.194376,-1112839.0,...,204.975095,214.508315,211.565825,176.64202,121.08186,121.555835,146.40922,159.88905,121.717605,66.69893
50%,70.0,53.31334,70.0,-5.2e-05,-5.5e-05,0.987527,0.987524,0.7800375,-0.020052,-381957.5,...,317.55644,275.17463,251.87402,234.32111,196.91258,221.07795,207.98654,196.57677,166.37103,136.46546
75%,70.0,135.0,100.0,2e-06,0.000127,1.0,1.000004,1.669641,0.355243,2230936.0,...,393.59353,342.354515,295.611795,305.8116,333.72717,324.85297,265.49148,219.18072,206.399405,237.9409
max,80.0,180.0,120.0,0.009374,0.015054,1.749991,1.749992,4.5,949563.561484,43714000000.0,...,500.14971,500.12921,500.1563,500.20391,500.25152,500.16115,500.1283,500.1313,500.1343,500.1373


In [5]:
est_model = RandomForestRegressor(n_estimators=50, max_features='log2', random_state=0, n_jobs=os.cpu_count())

target_start_idx = 15
est_model.fit(train_dataset.iloc[:, 0:target_start_idx].to_numpy(), train_dataset.iloc[:, target_start_idx:].to_numpy())

In [7]:
pred_out = est_model.predict(val_dataset.iloc[:, 0:target_start_idx].to_numpy())

pred_target_names = ['Pred_Load_Left_1(kN)', 'Pred_Load_Left_2(kN)', 'Pred_Load_Left_3(kN)', 'Pred_Load_Left_4(kN)', 'Pred_Load_Left_5(kN)',
                     'Pred_Load_Right_1(kN)', 'Pred_Load_Right_2(kN)', 'Pred_Load_Right_3(kN)', 'Pred_Load_Right_4(kN)', 'Pred_Load_Right_5(kN)']

target_pred = pd.DataFrame(pred_out, columns=pred_target_names)

In [8]:
absolute_error_names = ['Absolute_Error_Left_1(kN)', 'Absolute_Error_Left_2(kN)', 'Absolute_Error_Left_3(kN)', 'Absolute_Error_Left_4(kN)', 'Absolute_Error_Left_5(kN)',
                        'Absolute_Error_Right_1(kN)', 'Absolute_Error_Right_2(kN)', 'Absolute_Error_Right_3(kN)', 'Absolute_Error_Right_4(kN)', 'Absolute_Error_Right_5(kN)']

relative_error_names = ['Relative_Error_Left_1(%)', 'Relative_Error_Left_2(%)', 'Relative_Error_Left_3(%)', 'Relative_Error_Left_4(%)', 'Relative_Error_Left_5(%)',
                        'Relative_Error_Right_1(%)', 'Relative_Error_Right_2(%)', 'Relative_Error_Right_3(%)', 'Relative_Error_Right_4(%)', 'Relative_Error_Right_5(%)']

actual_load = val_dataset[target_names].to_numpy()

pred_load = target_pred.to_numpy()

absolute_error = pd.DataFrame(abs(actual_load-pred_load), columns=absolute_error_names)
relative_error = pd.DataFrame((abs(actual_load-pred_load)/abs(actual_load))*100, columns=relative_error_names)
result = pd.concat([val_dataset, target_pred, absolute_error, relative_error], axis=1)

result.describe()

Unnamed: 0,Boom_Angle(deg),Swing_Angle(deg),Load(Ton),Roll_Angle(deg),Pitch_Angle(deg),boom+roll/load,boom+pitch/load,swing/load,roll/pitch,load/roll,...,Relative_Error_Left_1(%),Relative_Error_Left_2(%),Relative_Error_Left_3(%),Relative_Error_Left_4(%),Relative_Error_Left_5(%),Relative_Error_Right_1(%),Relative_Error_Right_2(%),Relative_Error_Right_3(%),Relative_Error_Right_4(%),Relative_Error_Right_5(%)
count,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,...,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0,16300.0
mean,66.990055,68.11634,74.288957,-0.000122,-8.9e-05,0.906948,0.906948,0.9139215,-0.34032,5017316.0,...,5.205019,2.78599,2.228661,3.109366,5.326191,16.697539,10.808968,10.10783,12.436948,19.782566
std,4.254646,54.64557,4.949335,0.000122,0.000238,0.096699,0.096698,0.713532,107.36222,294437300.0,...,8.096656,2.866957,2.229069,2.616406,28.144494,58.21255,47.46974,60.996618,84.901928,138.586513
min,40.855858,-8.949622e-26,70.0,-0.000752,-0.000971,0.583644,0.583655,-1.118703e-27,-7777.544921,-1456265000.0,...,3e-06,0.000145,0.000614,0.000305,0.000944,2e-06,0.000739,0.000622,0.000149,0.000106
25%,65.0,18.26267,70.0,-0.000163,-0.000225,0.812499,0.812502,0.248241,-0.202995,-854136.6,...,0.854792,0.864635,0.632006,1.24882,2.347668,1.642745,1.604065,1.966573,2.153236,3.449933
50%,68.825081,54.91546,70.0,-0.0001,-0.000148,0.946443,0.946444,0.7335422,0.057188,-510056.1,...,2.156986,1.921274,1.668674,2.519788,4.423704,4.128005,4.297571,3.415089,5.356768,7.970107
75%,70.0,110.0589,80.0,-2.8e-05,8e-05,0.999999,0.999998,1.436491,0.673562,-264621.0,...,4.760833,3.708745,2.958113,4.455341,6.411984,10.975295,8.728755,7.80118,8.638446,12.089771
max,70.0,180.0,80.0,7.1e-05,0.00042,1.000001,1.000004,2.25,2405.759432,31264500000.0,...,62.800129,44.328735,28.900258,59.495029,2483.969325,2458.565991,1683.729494,2488.578866,4011.613525,7373.63908


In [9]:
result[relative_error_names].mean().mean()

8.848907850709965

In [10]:
joblib.dump(est_model, 'rf_model.joblib')

['rf_model.joblib']