# Multioutput Regression Problem

## Input
- **ECG Lead II**: 1d time series data.

## Output
- **SigmaX and Ratio2**

## Models
- **Linear Regression**
- **Support Vector Regressor**
- **Decision Tree**
- **Random Forest**
- **Gradient Boosting Trees**
- **K-NN**
- **Ridge Regression**

In [4]:
import pandas as pd
import numpy as np

# read dataset
df = pd.read_csv(r'D:\xuhu\pyansys-heart\downloads\Strocchi2020\01\Biv_dataset\dataset_40_220.csv')
# df = pd.read_csv(r'C:\Users\xuhu\OneDrive - ANSYS, Inc\Desktop\Temp\ROM_ECG_150_normalized\dataset_150_normalized.csv')
df.head()

Unnamed: 0,LeadII,sigmaX,ratio2
0,[ 3.18085560e-03 1.44712092e-03 1.29275136e-...,1.180131,5.522553
1,[ 1.94027305e-03 8.81275820e-04 9.05450270e-...,0.886647,6.81384
2,[ 4.40902250e-03 1.97902389e-03 1.65635571e-...,1.470678,7.802587
3,[ 2.44415151e-03 1.11904018e-03 1.06973749e-...,1.005849,7.151262
4,[ 6.59697570e-04 2.33656858e-04 4.50549800e-...,0.583702,1.897641


In [5]:
Y = df['LeadII'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
Y = np.stack(Y, axis=0)

X = df[['sigmaX', 'ratio2']].values
print(X.shape, Y.shape)

(129, 2) (129, 799)


In [6]:
import numpy as np

def mean_relative_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-10, None)), axis=0)


## GradientBoostingRegressor


In [13]:
# from sklearn.ensemble import GradientBoostingRegressor

# models = []
# for i in range(y_train.shape[1]):
#     model = GradientBoostingRegressor(n_estimators=100, random_state=42)
#     model.fit(X_train, y_train[:, i])
#     models.append(model)

# y_pred = np.column_stack([m.predict(X_test) for m in models])

# # Calculate MRE
# mre = mean_relative_error(y_test, y_pred)
# print(f"Mean Relative Error: {mre}")


Mean Relative Error for each output: [0.00933872 0.12516024]


## KNN

In [14]:
# from sklearn.neighbors import KNeighborsRegressor
# from sklearn.model_selection import train_test_split
# import numpy as np

# # Assuming X_train, X_test, y_train, and y_test are already defined and properly prepared
# model = KNeighborsRegressor(n_neighbors=5)
# model.fit(X_train, y_train)

# y_pred = model.predict(X_test)

# # Calculate MRE
# mre = mean_relative_error(y_test, y_pred)
# print(f"Mean Relative Error: {mre}")


Mean Relative Error: [0.11084457 0.23736645]


## Ridge Regression

In [7]:
...

Ellipsis

# Training

In [7]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import train_test_split


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

In [9]:
linear = MultiOutputRegressor(LinearRegression())
svr = MultiOutputRegressor(SVR())
dtree = DecisionTreeRegressor(random_state=42)
rf = RandomForestRegressor(random_state=42)
gbr = MultiOutputRegressor(GradientBoostingRegressor(random_state=42))
knn = KNeighborsRegressor()
ridge = Ridge()

In [10]:
models = [linear, svr, dtree, rf, gbr, knn, ridge]
model_names = ['Linear Regression', 'SVR', 'Decision Tree', 'Random Forest', 'Gradient Boosting', 'K-NN', 'Ridge Regression']

In [11]:
predictions = {}
for model, name in zip(models, model_names):
    model.fit(X_train, y_train)
    predictions[name] = model.predict(X_test) # learning error (not test error)

In [12]:
import matplotlib.pyplot as plt
mre_scores = [mean_relative_error(y_test, predictions[name]) for name in model_names]
print(mre_scores)

[array([3.32942094e-07, 2.14464877e-01, 1.54514228e+01, 5.45070096e-01,
       2.05337929e+00, 5.22743026e-01, 4.45174308e-01, 4.46014449e-01,
       1.20655358e+00, 4.18376251e-01, 5.34075462e-01, 3.10010169e-01,
       2.64813552e-01, 2.56037057e-01, 2.46580613e-01, 2.35618957e-01,
       1.87901640e-01, 4.03718057e-01, 4.78004087e-01, 9.44709833e-01,
       1.05433068e+00, 2.56342048e+00, 2.16731461e+00, 1.04517230e+00,
       1.18102119e+00, 1.71229803e+00, 1.56581054e+00, 7.79974904e-01,
       8.05079128e-01, 7.65992116e-01, 7.09072029e-01, 4.36852632e-01,
       4.05544650e-01, 2.84132209e-01, 3.15600013e-01, 2.74552925e-01,
       2.80347859e-01, 2.29316543e-01, 2.37699974e-01, 1.54589803e-01,
       1.54256340e-01, 1.30227558e-01, 1.38627031e-01, 1.16424554e-01,
       1.33134718e-01, 1.53478063e-01, 1.62611147e-01, 1.54677485e-01,
       1.77464103e-01, 1.73757778e-01, 1.84882637e-01, 1.21751623e-01,
       1.31445483e-01, 1.15049705e-01, 1.28066460e-01, 1.67666040e-01,
     

In [59]:
# mre of each parameters
sigmaX_scores = [score[0] for score in mre_scores]
ratio2_scores = [score[1] for score in mre_scores]

In [61]:
import pandas as pd
import numpy as np

def sample_mean_relative_error(y_true, y_pred):
    mre = np.abs((y_true - y_pred) / np.clip(np.abs(y_true), np.finfo(float).eps, None))
    return mre[0]

all_mre_scores = {}

for model, name in zip(models, model_names):
    model_mres = [sample_mean_relative_error(y_test[i], predictions[name][i]) for i in range(len(y_test))]
    all_mre_scores[name] = model_mres

# Save mre outcomes
df_all_mres = pd.DataFrame(all_mre_scores)
df_all_mres.to_csv(r'D:\xuhu\Temp\AI method\all_models_sample_mres.csv', index=False)

In [32]:
total_mre_scores = (np.array(ratio2_scores) + np.array(sigmaX_scores)) / 2
total_mre_scores

array([0.11812859, 0.12062664, 0.16171073, 0.09641176, 0.06724948,
       0.17410551, 0.22477734])

# Worst-case scenario from ROM learning phase

worst-case: D:\xuhu\Temp\ROM_ECG_TRAIN_1601\roms\w_2501.bin

In [65]:
import struct
import numpy as np

def read_binary(fn , debug = False):

    '''
    Reads a vector in ANSYS ROM Builder compatible binary format and returns the corresponding vector.
    '''

    fr = open(fn, "rb")
    nb = struct.unpack('Q', fr.read(8))[0]
    
    if debug:
        print("Read binary file containing " + str(nb) + " values.")
        
    vec = np.zeros(nb)
    
    for i in range(nb):
        vec[i] = struct.unpack('d', fr.read(8))[0]
        
    fr.close()
    
    return vec

file_name = r'D:\xuhu\Temp\ROM_ECG_TRAIN_1601\roms\w_2501.bin'
# print(full_path)
ROM_result = read_binary(file_name)
ROM_result = np.expand_dims(ROM_result, axis=0)
ROM_result.shape

(1, 799)

In [68]:
models[0]

In [69]:
# models = [linear, svr, dtree, rf, gbr, knn, ridge]

# Predict with linear regression model
model = models[0]
ROM_Y = model.predict(ROM_result)
ROM_Y

array([[0.65231468, 7.98890356]])

In [70]:
models[4]

In [71]:
# models = [linear, svr, dtree, rf, gbr, knn, ridge]

# Predict with gradient boosting regression tree model
model = models[4]
ROM_Y = model.predict(ROM_result)
ROM_Y

array([[0.58117329, 8.77459096]])

In [72]:
# Predict with Ridge regression model
model = models[6]
ROM_Y = model.predict(ROM_result)
ROM_Y

array([[0.74986941, 6.92985561]])