In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from keras import Model, Input, layers, optimizers, losses, metrics
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

data = pd.read_csv('./drive/MyDrive/VagusNerveResearchProject/vns_dataset_threshold_type.csv')

data.loc[data['fibre_type'] == 'AB', 'fibre_type'] = 0.0
data.loc[data['fibre_type'] == 'C', 'fibre_type'] = 1.0

# Encode the categorical feature
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]

X_fibre_type = one_hot_encode(data[['fibre_type']].astype('int'), num_classes=2)
X_fibre_type = X_fibre_type.reshape(-1, 2)

# Split the data
X = data.loc[:, ['nerve_a', 'nerve_b', 'activation_level', 'frequency', 'pulse_width']]
X.loc[:, 'fibre_type_AB'] = X_fibre_type[:, 0]
X.loc[:, 'fibre_type_C'] = X_fibre_type[:, 1]
y = data.loc[:, ['amplitude']]

# X.loc[X['fibre_type'] == 'AB', 'fibre_type'] = 0
# X.loc[X['fibre_type'] == 'C', 'fibre_type'] = 1
# X.loc[:, 'fibre_type'] = pd.to_numeric(X['fibre_type'])

In [3]:
X_ab = X[X['fibre_type_AB'] == 1.0]
X_c = X[X['fibre_type_C'] == 1.0]
y_ab = y[X['fibre_type_AB'] == 1.0]
y_c = y[X['fibre_type_C'] == 1.0]

# Normalize the continuous features
X_scaler_ab = StandardScaler()
X_ab_norm = X_scaler_ab.fit_transform(X_ab[['nerve_a', 'nerve_b', 'activation_level', 'frequency', 'pulse_width']])
X_scaler_c = StandardScaler()
X_c_norm = X_scaler_c.fit_transform(X_c[['nerve_a', 'nerve_b', 'activation_level', 'frequency', 'pulse_width']])

y_scaler_ab = StandardScaler()
y_ab_norm = y_scaler_ab.fit_transform(y_ab)
y_scaler_c = StandardScaler()
y_c_norm = y_scaler_c.fit_transform(y_c)

X_ab_norm = np.hstack([X_ab_norm, X_ab[['fibre_type_AB', 'fibre_type_C']].values])
X_c_norm = np.hstack([X_c_norm, X_c[['fibre_type_AB', 'fibre_type_C']].values])

X_norm = np.vstack([X_ab_norm, X_c_norm])
y_norm = np.vstack([y_ab_norm, y_c_norm])

X_train, X_test, y_train, y_test = train_test_split(X_norm, y_norm, test_size=0.2, random_state=42)

In [4]:
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, train_test_split

# Define parameter grids for each model
param_grids = {
    'Linear Regression': {},
    'Ridge Regression': {'alpha': [0.1, 1.0, 10.0]},
    'Lasso Regression': {'estimator__alpha': [0.01, 0.1, 1.0]},
    'Elastic Net': {'estimator__alpha': [0.01, 0.1, 1.0], 'estimator__l1_ratio': [0.2, 0.5, 0.8]},
    'Support Vector Regression (SVR)': {'estimator__C': [0.1, 1.0, 10.0], 'estimator__epsilon': [0.01, 0.1, 0.2]},
    'Decision Tree Regressor': {'max_depth': [None, 5, 10, 15], 'min_samples_split': [2, 10, 20]},
    'Random Forest Regressor': {'n_estimators': [5, 10, 15], 'max_depth': [None, 5, 10]},
    'Gradient Boosting Regressor': {'estimator__n_estimators': [50, 75, 100], 'estimator__learning_rate': [0.01, 0.1, 0.2]},
    'K-Nearest Neighbors Regressor': {'n_neighbors': [3, 5, 7]}
}

# List of models to evaluate
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': MultiOutputRegressor(Lasso()),
    'Elastic Net': MultiOutputRegressor(ElasticNet()),
    'Support Vector Regression (SVR)': MultiOutputRegressor(SVR(kernel='rbf')),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor(),
    'Gradient Boosting Regressor': MultiOutputRegressor(GradientBoostingRegressor()),
    'K-Nearest Neighbors Regressor': KNeighborsRegressor()
}

# Evaluate each model with GridSearchCV
for name, model in models.items():
    print(f'Evaluating {name}...')
    param_grid = param_grids[name]
    if param_grid:
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        print(f'Best parameters for {name}: {grid_search.best_params_}')
    else:
        best_model = model
        best_model.fit(X_train, y_train)

    y_pred = best_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred, multioutput='uniform_average')
    r2 = r2_score(y_test, y_pred, multioutput='uniform_average')
    print(f'{name} MSE: {mse:.4f}')
    print(f'{name} R^2: {r2:.4f}')
    print()


Evaluating Linear Regression...
Linear Regression MSE: 0.6501
Linear Regression R^2: 0.3371

Evaluating Ridge Regression...


  pid = os.fork()


Best parameters for Ridge Regression: {'alpha': 10.0}
Ridge Regression MSE: 0.6501
Ridge Regression R^2: 0.3371

Evaluating Lasso Regression...
Best parameters for Lasso Regression: {'estimator__alpha': 0.01}
Lasso Regression MSE: 0.6505
Lasso Regression R^2: 0.3367

Evaluating Elastic Net...
Best parameters for Elastic Net: {'estimator__alpha': 0.01, 'estimator__l1_ratio': 0.2}
Elastic Net MSE: 0.6500
Elastic Net R^2: 0.3373

Evaluating Support Vector Regression (SVR)...
Best parameters for Support Vector Regression (SVR): {'estimator__C': 10.0, 'estimator__epsilon': 0.2}
Support Vector Regression (SVR) MSE: 0.1766
Support Vector Regression (SVR) R^2: 0.8199

Evaluating Decision Tree Regressor...
Best parameters for Decision Tree Regressor: {'max_depth': 15, 'min_samples_split': 2}
Decision Tree Regressor MSE: 0.0000
Decision Tree Regressor R^2: 1.0000

Evaluating Random Forest Regressor...


  return fit_method(estimator, *args, **kwargs)


Best parameters for Random Forest Regressor: {'max_depth': None, 'n_estimators': 15}
Random Forest Regressor MSE: 0.0000
Random Forest Regressor R^2: 1.0000

Evaluating Gradient Boosting Regressor...
Best parameters for Gradient Boosting Regressor: {'estimator__learning_rate': 0.2, 'estimator__n_estimators': 100}
Gradient Boosting Regressor MSE: 0.0007
Gradient Boosting Regressor R^2: 0.9993

Evaluating K-Nearest Neighbors Regressor...
Best parameters for K-Nearest Neighbors Regressor: {'n_neighbors': 3}
K-Nearest Neighbors Regressor MSE: 0.1009
K-Nearest Neighbors Regressor R^2: 0.8971



In [6]:
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV, train_test_split
import pickle

# Define parameter grids for each model
param_grids = {
    'Linear Regression': {},
    'Ridge Regression': {'alpha': [0.1, 1.0, 10.0]},
    'Lasso Regression': {'estimator__alpha': [0.01, 0.1, 1.0]},
    'Elastic Net': {'estimator__alpha': [0.01, 0.1, 1.0], 'estimator__l1_ratio': [0.2, 0.5, 0.8]},
    'Support Vector Regression (SVR)': {'estimator__C': [0.1, 1.0, 10.0], 'estimator__epsilon': [0.01, 0.1, 0.2]},
    # 'Decision Tree Regressor': {'max_depth': [None, 5, 10, 15], 'min_samples_split': [2, 10, 20]},
    # 'Random Forest Regressor': {'n_estimators': [5, 10, 15], 'max_depth': [None, 5, 10]},
    'Gradient Boosting Regressor': {'estimator__n_estimators': [50, 75, 100], 'estimator__learning_rate': [0.01, 0.1, 0.2]},
    'K-Nearest Neighbors Regressor': {'n_neighbors': [3, 5, 7]}
}

# List of models to evaluate
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': MultiOutputRegressor(Lasso()),
    'Elastic Net': MultiOutputRegressor(ElasticNet()),
    'Support Vector Regression (SVR)': MultiOutputRegressor(SVR(kernel='rbf')),
    # 'Decision Tree Regressor': DecisionTreeRegressor(),
    # 'Random Forest Regressor': RandomForestRegressor(),
    'Gradient Boosting Regressor': MultiOutputRegressor(GradientBoostingRegressor()),
    'K-Nearest Neighbors Regressor': KNeighborsRegressor()
}

def get_model_size(model):
    # Serialize the model using pickle
    serialized_model = pickle.dumps(model)

    # Get the size of the serialized model in bytes
    model_size_in_bytes = len(serialized_model)

    # Convert the size to kilobytes (optional)
    model_size_in_kb = model_size_in_bytes / 1024
    return model_size_in_kb

# Evaluate each model with GridSearchCV
for name, model in models.items():
    print(f'Evaluating {name}...')
    param_grid = param_grids[name]
    if param_grid:
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        print(f'Best parameters for {name}: {grid_search.best_params_}')
    else:
        best_model = model
        best_model.fit(X_train, y_train)

    y_pred = best_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred, multioutput='uniform_average')
    r2 = r2_score(y_test, y_pred, multioutput='uniform_average')
    print(f'{name} MSE: {mse:.4f}')
    print(f'{name} R^2: {r2:.4f}')
    print(f'{name} model size: {get_model_size(best_model)}')
    print()


Evaluating Linear Regression...
Linear Regression MSE: 0.6501
Linear Regression R^2: 0.3371
Linear Regression model size: 0.5126953125

Evaluating Ridge Regression...


  pid = os.fork()


Best parameters for Ridge Regression: {'alpha': 10.0}
Ridge Regression MSE: 0.6501
Ridge Regression R^2: 0.3371
Ridge Regression model size: 0.47265625

Evaluating Lasso Regression...
Best parameters for Lasso Regression: {'estimator__alpha': 0.01}
Lasso Regression MSE: 0.6505
Lasso Regression R^2: 0.3367
Lasso Regression model size: 0.732421875

Evaluating Elastic Net...
Best parameters for Elastic Net: {'estimator__alpha': 0.01, 'estimator__l1_ratio': 0.2}
Elastic Net MSE: 0.6500
Elastic Net R^2: 0.3373
Elastic Net model size: 0.7373046875

Evaluating Support Vector Regression (SVR)...
Best parameters for Support Vector Regression (SVR): {'estimator__C': 10.0, 'estimator__epsilon': 0.2}
Support Vector Regression (SVR) MSE: 0.1766
Support Vector Regression (SVR) R^2: 0.8199
Support Vector Regression (SVR) model size: 631.52734375

Evaluating Gradient Boosting Regressor...
Best parameters for Gradient Boosting Regressor: {'estimator__learning_rate': 0.2, 'estimator__n_estimators': 100}