In [1]:
import os, joblib
import pandas as pd
import numpy as np

from utils.Dataset import CraneDataset
from sklearn.ensemble import RandomForestRegressor

In [2]:
data_file_path_list = []
data_file_name_list = os.listdir('data')

for file_name in data_file_name_list:
    data_file_path_list.append(os.path.join('data', file_name))

In [3]:
dataset_inst = CraneDataset(data_file_path_list)
raw_dataset = dataset_inst.get_dataset()

feature_names = dataset_inst.get_data_feature_names()
target_names = dataset_inst.get_data_target_names()

for t_name in target_names:
    raw_dataset[t_name] = raw_dataset[t_name]/1000

for t_name in target_names:
    raw_dataset = raw_dataset[raw_dataset[t_name]>=1]
    
#raw_dataset = raw_dataset[raw_dataset['Time(sec)']>0]

raw_dataset.reset_index(drop=True, inplace=True)

extra_feature_name_list = ['boom+roll/load', 'boom+pitch/load', 'swing/load', 'roll/pitch', 'load/roll', 'load/pitch', 'boom+roll', 'boom+pitch', 'boom_x_pos', 'boom_y_pos']

raw_dataset['boom+roll/load'] = (raw_dataset['Boom_Angle(deg)']+raw_dataset['Roll_Angle(deg)'])/raw_dataset['Load(Ton)']
raw_dataset['boom+pitch/load'] = (raw_dataset['Boom_Angle(deg)']+raw_dataset['Pitch_Angle(deg)'])/raw_dataset['Load(Ton)']
raw_dataset['swing/load'] = raw_dataset['Swing_Angle(deg)']/raw_dataset['Load(Ton)']
raw_dataset['roll/pitch'] = raw_dataset['Roll_Angle(deg)']/raw_dataset['Pitch_Angle(deg)']
raw_dataset['load/roll'] = raw_dataset['Load(Ton)']/raw_dataset['Roll_Angle(deg)']
raw_dataset['load/pitch'] = raw_dataset['Load(Ton)']/raw_dataset['Pitch_Angle(deg)']
raw_dataset['boom+roll'] = raw_dataset['Boom_Angle(deg)'] + raw_dataset['Roll_Angle(deg)']
raw_dataset['boom+pitch'] = raw_dataset['Boom_Angle(deg)'] + raw_dataset['Pitch_Angle(deg)']
raw_dataset['boom_x_pos'] = np.cos(np.deg2rad(raw_dataset['Swing_Angle(deg)'])) * np.cos(np.deg2rad(raw_dataset['Boom_Angle(deg)']))
raw_dataset['boom_y_pos'] = np.sin(np.deg2rad(raw_dataset['Swing_Angle(deg)'])) * np.cos(np.deg2rad(raw_dataset['Boom_Angle(deg)']))
    
diff_boom_angle = np.diff(raw_dataset['Boom_Angle(deg)'])
diff_swing_angle = np.diff(raw_dataset['Swing_Angle(deg)'])
diff_roll_angle = np.diff(raw_dataset['Roll_Angle(deg)'])
diff_pitch_angle = np.diff(raw_dataset['Pitch_Angle(deg)'])

#extra_feature_name_list = ['grad_boom_angle', 'grad_swing_angle', 'grad_roll_angle', 'grad_pitch_angle']
#new_feature_names = feature_names + extra_feature_name_list

#for extra_feature_name, extra_feature  in zip(extra_feature_name_list, [grad_boom_angle, grad_swing_angle, grad_roll_angle, grad_pitch_angle]):
#    raw_dataset[extra_feature_name] = extra_feature

train_dataset_1 = raw_dataset[raw_dataset['dataset_type'] == 0]
train_dataset = train_dataset_1.drop(columns=['Time(sec)', 'file_idx', 'safe_state', 'dataset_type'])
train_dataset = train_dataset[feature_names[0:4]+[feature_names[5]]+extra_feature_name_list+target_names]
train_dataset.reset_index(drop=True, inplace=True)

val_dataset_1 = raw_dataset[raw_dataset['dataset_type'] == 1]
val_dataset = val_dataset_1.drop(columns=['Time(sec)', 'file_idx', 'safe_state', 'dataset_type'])
val_dataset = val_dataset[feature_names[0:4]+[feature_names[5]]+extra_feature_name_list+target_names]
val_dataset.reset_index(drop=True, inplace=True)

  0%|          | 0/22 [00:00<?, ?it/s]

In [4]:
train_dataset.describe()

Unnamed: 0,Boom_Angle(deg),Swing_Angle(deg),Load(Ton),Roll_Angle(deg),Pitch_Angle(deg),boom+roll/load,boom+pitch/load,swing/load,roll/pitch,load/roll,...,Actual_Load_Left_1(N),Actual_Load_Left_2(N),Actual_Load_Left_3(N),Actual_Load_Left_4(N),Actual_Load_Left_5(N),Actual_Load_Right_1(N),Actual_Load_Right_2(N),Actual_Load_Right_3(N),Actual_Load_Right_4(N),Actual_Load_Right_5(N)
count,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,...,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0,73725.0
mean,65.190813,68.59897,76.197491,-9.7e-05,-8.6e-05,0.912167,0.912167,0.9808857,11.577385,-6672220.0,...,297.826645,281.638267,262.285724,242.467578,224.892958,229.977669,210.349984,190.309469,172.502127,159.603297
std,10.952016,63.48692,24.859762,0.000188,0.000289,0.210261,0.21026,1.029401,3499.592214,2231547000.0,...,114.613509,73.653794,51.523109,78.998789,121.459717,120.625319,75.656516,43.552434,66.653828,109.075876
min,40.0,-3.3772950000000004e-23,40.0,-0.014762,-0.009508,0.583644,0.583655,-3.070268e-25,-15267.807788,-597539800000.0,...,1.009678,40.44896,22.720991,8.602034,1.025118,1.048542,2.564975,2.840951,1.822599,1.025114
25%,60.0,3.936796,50.0,-0.000178,-0.000289,0.7,0.700004,0.05177023,-0.182274,-937583.7,...,215.51685,217.25543,218.42632,173.22321,119.73952,121.38619,148.25128,161.14779,122.58028,66.751591
50%,70.0,48.47766,70.0,-7e-05,-7.6e-05,0.999995,0.999992,0.7347986,-0.020938,-406932.1,...,322.16547,282.39088,257.97588,243.78764,196.68219,224.05374,208.2527,195.08659,162.53933,128.19626
75%,70.0,128.6579,100.0,1e-06,0.000126,0.999999,1.000002,1.540047,0.351399,2134435.0,...,390.89917,338.32182,292.39964,302.71803,330.36431,325.7,268.55165,219.66658,202.92159,236.27447
max,80.0,180.0,120.0,0.009374,0.015054,1.749991,1.749992,4.5,949563.561484,43714000000.0,...,500.14971,500.12921,500.1563,500.20391,500.25152,500.16115,500.1283,500.1313,500.1343,500.1373


In [5]:
est_model = RandomForestRegressor(n_estimators=50, max_features='log2', random_state=0, n_jobs=os.cpu_count())

target_start_idx = 15
est_model.fit(train_dataset.iloc[:, 0:target_start_idx].to_numpy(), train_dataset.iloc[:, target_start_idx:].to_numpy())

In [6]:
pred_out = est_model.predict(val_dataset.iloc[:, 0:target_start_idx].to_numpy())

pred_target_names = ['Pred_Load_Left_1(kN)', 'Pred_Load_Left_2(kN)', 'Pred_Load_Left_3(kN)', 'Pred_Load_Left_4(kN)', 'Pred_Load_Left_5(kN)',
                     'Pred_Load_Right_1(kN)', 'Pred_Load_Right_2(kN)', 'Pred_Load_Right_3(kN)', 'Pred_Load_Right_4(kN)', 'Pred_Load_Right_5(kN)']

target_pred = pd.DataFrame(pred_out, columns=pred_target_names)

In [7]:
absolute_error_names = ['Absolute_Error_Left_1(kN)', 'Absolute_Error_Left_2(kN)', 'Absolute_Error_Left_3(kN)', 'Absolute_Error_Left_4(kN)', 'Absolute_Error_Left_5(kN)',
                        'Absolute_Error_Right_1(kN)', 'Absolute_Error_Right_2(kN)', 'Absolute_Error_Right_3(kN)', 'Absolute_Error_Right_4(kN)', 'Absolute_Error_Right_5(kN)']

relative_error_names = ['Relative_Error_Left_1(%)', 'Relative_Error_Left_2(%)', 'Relative_Error_Left_3(%)', 'Relative_Error_Left_4(%)', 'Relative_Error_Left_5(%)',
                        'Relative_Error_Right_1(%)', 'Relative_Error_Right_2(%)', 'Relative_Error_Right_3(%)', 'Relative_Error_Right_4(%)', 'Relative_Error_Right_5(%)']

actual_load = val_dataset[target_names].to_numpy()

pred_load = target_pred.to_numpy()

absolute_error = pd.DataFrame(abs(actual_load-pred_load), columns=absolute_error_names)
relative_error = pd.DataFrame((abs(actual_load-pred_load)/abs(actual_load))*100, columns=relative_error_names)
result = pd.concat([val_dataset, target_pred, absolute_error, relative_error], axis=1)

result.describe()

Unnamed: 0,Boom_Angle(deg),Swing_Angle(deg),Load(Ton),Roll_Angle(deg),Pitch_Angle(deg),boom+roll/load,boom+pitch/load,swing/load,roll/pitch,load/roll,...,Relative_Error_Left_1(%),Relative_Error_Left_2(%),Relative_Error_Left_3(%),Relative_Error_Left_4(%),Relative_Error_Left_5(%),Relative_Error_Right_1(%),Relative_Error_Right_2(%),Relative_Error_Right_3(%),Relative_Error_Right_4(%),Relative_Error_Right_5(%)
count,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,...,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0,19526.0
mean,66.906197,79.56637,72.659019,-0.000109,-7.4e-05,0.965252,0.965253,1.196789,-0.50029,2805944.0,...,7.063053,4.304322,4.513606,6.35668,10.447941,15.8922,7.036474,5.085938,6.743403,16.348828
std,4.99942,61.06823,15.693191,0.000133,0.00025,0.229042,0.229041,1.012774,65.181544,241966700.0,...,18.429991,5.795184,7.142827,9.891491,34.531414,67.559742,16.795944,12.02005,11.398053,51.039566
min,40.139985,-8.949622e-26,50.0,-0.000885,-0.000971,0.569282,0.569291,-1.118703e-27,-6347.942247,-2392108000.0,...,6e-06,0.00022,0.000133,4.4e-05,8.2e-05,9e-06,0.000129,3.945806e-07,5.6e-05,2e-05
25%,65.0,23.52123,60.0,-0.000208,-0.00022,0.812497,0.812496,0.3176702,-0.25426,-1228818.0,...,1.062464,0.859383,0.595319,0.977572,1.946654,1.196152,1.204316,1.043062,1.68123,3.172504
50%,70.0,72.72863,80.0,-6.4e-05,-5.7e-05,0.84769,0.847691,0.8930768,0.045601,-433751.0,...,2.330356,2.256324,1.717039,2.21561,3.623969,3.843904,3.239881,3.254938,3.686837,5.982073
75%,70.0,135.0,80.0,-3e-06,6.3e-05,1.166666,1.166664,2.139043,0.63348,-218580.9,...,7.459205,5.57416,4.104186,6.268654,12.30749,16.436172,9.418972,6.591739,8.72572,15.950876
max,70.0,180.0,100.0,7.1e-05,0.00061,1.400001,1.400002,3.0,3319.477208,31264500000.0,...,581.66715,138.589613,164.2399,420.877096,3321.251556,2761.171201,1012.237873,602.1746,394.09962,2344.701444


In [8]:
result[relative_error_names].mean().mean()

8.37924441612351

In [9]:
joblib.dump(est_model, 'rf_model.joblib')

['rf_model.joblib']