# Regularized Linear Models only for males dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn import metrics

In [2]:
data = pd.read_excel("OSA_extreme_male.xlsx")
df_OSA_male = pd.DataFrame(data)

In [3]:
df_OSA_male.set_index("Patient", inplace = True)

In [4]:
### Picking predictor columns

# Get all the columns from the dataframe
columns = df_OSA_male.columns.tolist()

# Filter the columns to remove ones we don't want
columns = [c for c in columns if c not in ["IAH", "Gender", "OSA", "BMI"]]

# Store the variable we'll be predicting on
target = "IAH"

print('Predictors:', columns)

Predictors: ['Weight', 'Height', 'Age', 'Cervical']


In [5]:
# Creating X and y
X = df_OSA_male[columns]
y = df_OSA_male['IAH']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)

In [7]:
print(X_train.shape); 
print(X_test.shape)

(254, 4)
(64, 4)


##  <font color='red'> Linear Regression </font>

In [8]:
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

## Train set

In [9]:
pred_train_lr = lr.predict(X_train)

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_train, pred_train_lr)))
print('MAE:', metrics.mean_absolute_error(y_train, pred_train_lr))
print('R-Squared:', r2_score(y_train, pred_train_lr))

RMSE: 7.545318843892882
MAE: 6.189987346550643
R-Squared: 0.16003667796632393


## Test set

In [10]:
pred_test_lr = lr.predict(X_test)

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, pred_test_lr))) 
print('MAE:', metrics.mean_absolute_error(y_test, pred_test_lr))
print('R-Squared:', r2_score(y_test, pred_test_lr))

RMSE: 8.097587887846482
MAE: 6.643074174083332
R-Squared: 0.13439580300094622


## <font color='red'>  Ridge Regression </font>

In [11]:
rr = Ridge(alpha=0.01)
rr.fit(X_train, y_train) 

Ridge(alpha=0.01)

### Train set

In [12]:
pred_train_rr = rr.predict(X_train)

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_train, pred_train_rr)))
print('MAE:', metrics.mean_absolute_error(y_train, pred_train_rr))
print('R-Squared:', r2_score(y_train, pred_train_rr))

RMSE: 7.545318843892962
MAE: 6.18998738654825
R-Squared: 0.16003667796630594


### Test set

In [13]:
pred_test_rr = rr.predict(X_test)

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, pred_test_rr))) 
print('MAE:', metrics.mean_absolute_error(y_test, pred_test_rr))
print('R-Squared:', r2_score(y_test, pred_test_rr))

RMSE: 8.09758795792419
MAE: 6.643074308437751
R-Squared: 0.1343957880188157


## <font color='red'> LASSO Regression </font>

In [14]:
model_lasso = Lasso(alpha=0.01)
model_lasso.fit(X_train, y_train) 

Lasso(alpha=0.01)

### Train set

In [15]:
pred_train_lasso = model_lasso.predict(X_train)

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_train, pred_train_lasso)))
print('MAE:', metrics.mean_absolute_error(y_train, pred_train_lasso))
print('R-Squared:', r2_score(y_train, pred_train_lasso))

RMSE: 7.54532011855107
MAE: 6.190018657831649
R-Squared: 0.16003639417017457


### Test set

In [16]:
pred_test_lasso = model_lasso.predict(X_test)

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, pred_test_lasso))) 
print('MAE:', metrics.mean_absolute_error(y_test, pred_test_lasso))
print('R-Squared:', r2_score(y_test, pred_test_lasso))

RMSE: 8.098059977549514
MAE: 6.64383048356563
R-Squared: 0.13429487053910405


## <font color='red'> ElasticNet Regression </font>

In [17]:
model_enet = ElasticNet(alpha = 0.01)
model_enet.fit(X_train, y_train) 

ElasticNet(alpha=0.01)

### Train set

In [18]:
pred_train_enet = model_enet.predict(X_train)

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_train, pred_train_enet)))
print('MAE:', metrics.mean_absolute_error(y_train, pred_train_enet))
print('R-Squared:', r2_score(y_train, pred_train_enet))

RMSE: 7.545319202885577
MAE: 6.190006925569881
R-Squared: 0.16003659803843473


### Test set

In [19]:
pred_test_enet = model_enet.predict(X_test)

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, pred_test_enet)))
print('MAE:', metrics.mean_absolute_error(y_test, pred_test_enet))
print('R-Squared:', r2_score(y_test, pred_test_enet))

RMSE: 8.097834807871303
MAE: 6.643474363232268
R-Squared: 0.1343430123992302
