In [1]:
import time, joblib, os, math
import seaborn as sns
import numpy as np
import pandas as pd
import dask.array as da
from dask.diagnostics import ProgressBar

import matplotlib.pyplot as plt
import lightgbm as lgb
import tensorflow as tf
from sklearn.conftest import dataset_fetchers

from sklearn.linear_model import LinearRegression
from tensorflow import keras
from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error
from utils.dataset import load_dataset_v2, create_lstm_dataset
from tqdm.notebook import tqdm

In [2]:
pbar = ProgressBar()
pbar.register()

In [3]:
file_name_list = os.listdir(os.path.join('data', 'ver_2'))
file_path_list = []

for file_name in file_name_list:
    file_path_list.append(os.path.join('data', 'ver_2', file_name))

start_time = time.strftime('%Y%m%d-%H%M%S', time.localtime(time.time()))

In [4]:
dataset= load_dataset_v2(file_path_list)

loading dataset...:   0%|          | 0/84 [00:00<?, ?it/s]

In [5]:
dataset = dataset[dataset['outlet_flowrate(lpm)'] > 1]
dataset = dataset[::10]
dataset.reset_index(drop=True, inplace=True)

In [6]:
extra_column_name_list = ['pressure_1(bar)', 'main_pressure(bar)']
extra_feature = pd.DataFrame()

for col_name in extra_column_name_list:
    extra_feature['grad_'+col_name] = np.gradient(dataset[col_name].to_numpy())

dataset = pd.concat([dataset, extra_feature], axis=1)

In [7]:
dataset_value = dataset.drop(columns=['reserved', 'time(s)', 'test_case', 'test_case_iter'])
dataset_value = pd.concat([dataset_value, extra_feature], axis=1)

In [8]:
feature_col_name_list = ['pressure_1(bar)', 'main_pressure(bar)', 'grad_pressure_1(bar)', 'grad_main_pressure(bar)']
target_col_name = 'outlet_flowrate(lpm)'

train_data = dataset[dataset['test_case_iter']!=3]
val_data = dataset[dataset['test_case_iter']==3]

train_feature = train_data[feature_col_name_list]
train_feature.reset_index(drop=True, inplace=True)

train_target = pd.DataFrame(train_data[target_col_name])
train_target.reset_index(drop=True, inplace=True)

val_feature = val_data[feature_col_name_list]
val_feature.reset_index(drop=True, inplace=True)

val_target = pd.DataFrame(val_data[target_col_name])
val_target.reset_index(drop=True, inplace=True)

In [18]:
seq_len = 30
pred_distance = 0

train_feature, train_target = create_lstm_dataset(train_data[feature_col_name_list+[target_col_name]].to_numpy(), seq_len=seq_len, pred_distance=pred_distance, target_idx_pos=4)
val_feature, val_target = create_lstm_dataset(val_data[feature_col_name_list+[target_col_name]].to_numpy(), seq_len=seq_len, pred_distance=pred_distance, target_idx_pos=4)

In [10]:
best_lstm_model = keras.models.load_model('lstm_model.keras')

In [11]:
train_pred = best_lstm_model.predict(train_feature, verbose=1)
val_pred = best_lstm_model.predict(val_feature, verbose=1)



In [12]:
print(r2_score(train_target, train_pred), r2_score(val_target, val_pred))

0.9663291715084642 0.9516175626648637


In [13]:
print(mean_absolute_error(train_target, train_pred), mean_absolute_error(val_target, val_pred))

61.0485990305632 74.51838399311947


In [14]:
print(mean_absolute_percentage_error(train_target, train_pred), mean_absolute_percentage_error(val_target, val_pred)) 

0.16179117605413812 0.22302372462913195


In [19]:
train_target_da = da.from_array(train_target, chunks=1000)
train_pred_da = da.from_array(train_pred, chunks=1000)
val_target_da = da.from_array(val_target, chunks=1000)
val_pred_da = da.from_array(val_pred, chunks=1000)

In [22]:
train_error_da = da.absolute(train_target_da - train_pred_da)/train_target_da*100
train_error_da

  **blockwise_kwargs,


Unnamed: 0,Array,Chunk
Bytes,130.87 GiB,7.63 MiB
Shape,"(132531, 132531)","(1000, 1000)"
Count,71022 Tasks,17689 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 130.87 GiB 7.63 MiB Shape (132531, 132531) (1000, 1000) Count 71022 Tasks 17689 Chunks Type float64 numpy.ndarray",132531  132531,

Unnamed: 0,Array,Chunk
Bytes,130.87 GiB,7.63 MiB
Shape,"(132531, 132531)","(1000, 1000)"
Count,71022 Tasks,17689 Chunks
Type,float64,numpy.ndarray


In [None]:
train_error = train_error_da.compute()


[###################                     ] | 47% Completed |  7min  2.0s