In [None]:
import os.path, time
import onnxruntime as ort
import pandas as pd
import numpy as np

from utils.Dataset import load_logging_data, create_seq_2_seq_dataset_v1, create_seq_2_seq_dataset_v2
from tqdm.notebook import tqdm

In [None]:
model = ort.InferenceSession('models/reg_model.onnx')

In [None]:
train_data = load_logging_data(data_root_path='data/train')
val_data = load_logging_data(data_root_path='data/val')

train_data['Data_Type'] = 'train'
val_data['Data_Type'] = 'validation'

dataset = pd.concat([train_data, val_data], axis=0)
dataset.reset_index(drop=True, inplace=True)

In [None]:
angle_name_list = list(dataset.columns)[22:24] + list(dataset.columns)[26:28]
print(angle_name_list)

pressure_name_list = list(dataset.columns)[18:20] + list(dataset.columns)[24:26]
print(pressure_name_list)

new_angle_name_list = ['Mast_X(deg)', 'Mast_Y(deg)', 'Base_X(deg)', 'Base_Y(deg)']
new_pressure_name_list = ['P1(bar)', 'P2(bar)', 'P3(bar)', 'P4(bar)']

dataset.rename(columns={'Time  1 - default sample rate': 'Time(sec)'}, inplace=True)
dataset.rename(columns={'caloutput_drill_depth CH=26': 'Drill_Depth(m)'}, inplace=True)
dataset.rename(columns={'caloutput_rotate_velocity CH=25': 'Drill_Rotation(RPM)'}, inplace=True)

for angle_name, new_angle_name in zip(angle_name_list, new_angle_name_list):
    dataset.rename(columns={angle_name: new_angle_name}, inplace=True)

for pressure_name, new_pressure_name in zip(pressure_name_list, new_pressure_name_list):
    dataset.rename(columns={pressure_name: new_pressure_name}, inplace=True)

dataset['Work_Load'] = (dataset[new_pressure_name_list[0]] * (dataset['Drill_Rotation(RPM)']))
dataset['Drill_Depth(m)'] = dataset['Drill_Depth(m)']/10

dataset

In [None]:
for angle_name in new_angle_name_list:
    dataset[angle_name] = dataset[angle_name].clip(lower=-3, upper=3)

dataset['Work_Load'] = dataset['Work_Load']/4100
dataset['Work_Load'] = dataset['Work_Load']*100

feature_name_list = new_angle_name_list + [new_pressure_name_list[0]] + ['Drill_Rotation(RPM)']
target_name = 'Work_Load'

train_dataset = dataset[dataset['Data_Type']=='train'][feature_name_list+[target_name]]
train_dataset.reset_index(drop=True, inplace=True)
train_dataset = train_dataset.to_numpy()

val_dataset = dataset[dataset['Data_Type']=='validation'][feature_name_list+[target_name]]
val_dataset.reset_index(drop=True, inplace=True)
val_dataset = val_dataset.to_numpy()

print(train_dataset.shape, val_dataset.shape)

In [None]:
train_dataset = train_dataset[:, -1]
val_dataset = val_dataset[:, -1]

In [None]:
seq_len = 30
pred_distance = 30

train_feature, train_target = create_seq_2_seq_dataset_v2(train_dataset, seq_len=seq_len, pred_distance=pred_distance)
val_feature, val_target = create_seq_2_seq_dataset_v2(val_dataset, seq_len=seq_len, pred_distance=pred_distance)

train_target = train_target[:, -1]
val_target = val_target[:, -1]

print(train_feature.shape, train_target.shape)
print(val_feature.shape, val_target.shape)

In [None]:
train_feature = train_feature.astype(np.float32)
train_target = train_target.astype(np.float32)

time_list = []
train_pred = []

for input_data in tqdm(train_feature):
    t0 = time.time()
    input_data = input_data.reshape(1, -1, 1)
    train_pred.append(np.squeeze(model.run(output_names=None, input_feed={'input': input_data})).item())
    time_list.append(time.time() - t0)

time_arr = np.array(time_list)*1000
print('Average inference time per data sequence: {:.3f} mile seconds'.format(np.mean(time_arr)))

In [None]:
val_feature = val_feature.astype(np.float32)
val_target = val_target.astype(np.float32)

time_list = []
val_pred = []

for input_data in tqdm(val_feature):
    t0 = time.time()
    input_data = input_data.reshape(1, -1, 1)
    val_pred.append(np.squeeze(model.run(output_names=None, input_feed={'input': input_data})).item())
    time_list.append(time.time() - t0)

time_arr = np.array(time_list)*1000
print('Average inference time per data sequence: {:.3f} mile seconds'.format(np.mean(time_arr)))

In [None]:
df = pd.DataFrame(time_arr, columns=['onnx_inference_time(ms)'])
df.to_csv('result/onnx_inference_time.csv', index=False)

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error

In [None]:
print(r2_score(train_target, train_pred), r2_score(val_target, val_pred))
print(mean_absolute_error(train_target, train_pred), mean_absolute_error(val_target, val_pred))
print(mean_absolute_percentage_error(train_target, train_pred), mean_absolute_percentage_error(val_target, val_pred))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = val_data.iloc[seq_len-1:]
data.reset_index(drop=True, inplace=True)
data = pd.concat([data, pd.DataFrame({'pred_outlet_flowrate(lpm)': val_pred})], axis=1)

abs_error = np.abs(data['outlet_flowrate(lpm)'].to_numpy()-data['pred_outlet_flowrate(lpm)'].to_numpy())
rela_error = (abs_error / data['outlet_flowrate(lpm)'].to_numpy())*100

val_data_slice =  pd.concat([data, pd.DataFrame({'abs_error': abs_error,'relative_error': rela_error})], axis=1)

In [None]:
val_data_slice.head()

In [None]:
sns.scatterplot(data=val_data_slice, x='outlet_flowrate(lpm)', y='relative_error', hue='venturi_dist(mm)', edgecolor='black')

plt.xlabel('Outlet flowrate (lpm)')

plt.yscale('log')
plt.ylabel('Relative error (%)')

In [None]:
pump_speed_1_df = val_data_slice[(val_data_slice['pump_speed(rpm)'] > 850) & (val_data_slice['pump_speed(rpm)'] < 950)]
pump_speed_1_df.reset_index(drop=True, inplace=True)
pump_speed_1_df_fix = pump_speed_1_df.copy()
pump_speed_1_df_fix['pump_speed(rpm)'] = 900

pump_speed_2_df = val_data_slice[(val_data_slice['pump_speed(rpm)'] > 1150) & (val_data_slice['pump_speed(rpm)'] < 1250)]
pump_speed_2_df.reset_index(drop=True, inplace=True)
pump_speed_2_df_fix = pump_speed_2_df.copy()
pump_speed_2_df_fix['pump_speed(rpm)'] = 1200

pump_speed_3_df = val_data_slice[(val_data_slice['pump_speed(rpm)'] > 1450) & (val_data_slice['pump_speed(rpm)'] < 1550)]
pump_speed_3_df.reset_index(drop=True, inplace=True)
pump_speed_3_df_fix = pump_speed_3_df.copy()
pump_speed_3_df_fix['pump_speed(rpm)'] = 1500

pump_speed_df = pd.concat([pump_speed_1_df_fix, pump_speed_2_df_fix, pump_speed_3_df_fix])

In [None]:
stats = pump_speed_1_df_fix[['relative_error', 'abs_error']].describe()
stats

In [None]:
sns.set_theme(font_scale=1.2)
sns.set_style("whitegrid", {'grid.linestyle': '--'})

sns.boxplot(data=pump_speed_df, x='pump_speed(rpm)', y='relative_error')

plt.yscale('log')
plt.ylabel('Relative Error(%)')

In [None]:
sns.set_theme(font_scale=1.2)
sns.set_style("whitegrid", {'grid.linestyle': '--'})

sns.boxplot(data=pump_speed_df, x='nozzle_len(mm)', y='relative_error')

plt.yscale('log')
plt.ylabel('Relative Error(%)')

In [None]:
sns.set_theme(font_scale=1.2)
sns.set_style("whitegrid", {'grid.linestyle': '--'})

sns.boxplot(data=pump_speed_df, x='nozzle_dia(mm)', y='relative_error')

plt.yscale('log')
plt.ylabel('Relative Error(%)')

In [None]:
sns.set_theme(font_scale=1.2)
sns.set_style("whitegrid", {'grid.linestyle': '--'})

sns.boxplot(data=pump_speed_df, x='venturi_dist(mm)', y='relative_error')

plt.yscale('log')
plt.ylabel('Relative Error(%)')

In [None]:
sns.scatterplot(np.array(time_list)*1000)

#plt.ylim(0, 5)