In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split, cross_val_score

## Load data

In [24]:
df = pd.read_excel('PSG_sleep_index_20220928.xlsx')

In [27]:
X = scale(df.iloc[:, 10:19]) # k to s column
y1 = df[['(A6①)寝つきの良さ']]
y2 = df[['(A6③)眠りの深さ']]
y3 = df[['(A6⑤)全体的な睡眠の質']]

feature_names = df.columns[10:19]

## Lasso Regression

### (A6①)寝つきの良さ

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size = 0.3, random_state=2)

In [41]:
lasso = Lasso(alpha=0.05)
lasso.fit(X_train, y_train)

lasso.coef_

array([-0.12940347, -0.        , -0.08719537,  0.        ,  0.1528083 ,
       -0.        ,  0.        ,  0.03573634, -0.        ])

In [43]:
lasso_selected = SelectFromModel(lasso, prefit=True)
X_test_selected = lasso_selected.transform(X_test)
print(X_test_selected.shape)
print("Selected feature names:", feature_names[lasso_selected.get_support()])

scores = cross_val_score(lasso, X_test_selected, y_test, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(93, 4)
Selected feature names: Index(['Sleep Latency (min)', 'REM Latency (min)', 'TST/SPT (%)', 'Deep (%)'], dtype='object')
R2 score -0.2165


### (A6③)眠りの深さ

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y2, test_size = 0.3, random_state=2)

In [45]:
lasso = Lasso(alpha=0.05)
lasso.fit(X_train, y_train)

lasso.coef_

array([ 0.        ,  0.        ,  0.06229516, -0.31229707, -0.        ,
        0.        , -0.        , -0.        ,  0.        ])

In [46]:
lasso_selected = SelectFromModel(lasso, prefit=True)
X_test_selected = lasso_selected.transform(X_test)
print(X_test_selected.shape)
print("Selected feature names:", feature_names[lasso_selected.get_support()])

scores = cross_val_score(lasso, X_test_selected, y_test, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(93, 2)
Selected feature names: Index(['REM Latency (min)', 'TST (Total Sleep Time (min))'], dtype='object')
R2 score 0.0097


### (A6⑤)全体的な睡眠の質

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y3, test_size = 0.3, random_state=2)

In [48]:
lasso = Lasso(alpha=0.05)
lasso.fit(X_train, y_train)

lasso.coef_

array([ 0.02784954,  0.01001932,  0.11930802, -0.        , -0.00796792,
        0.        , -0.        , -0.08841962,  0.        ])

In [49]:
lasso_selected = SelectFromModel(lasso, prefit=True)
X_test_selected = lasso_selected.transform(X_test)
print(X_test_selected.shape)
print("Selected feature names:", feature_names[lasso_selected.get_support()])

scores = cross_val_score(lasso, X_test_selected, y_test, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(93, 5)
Selected feature names: Index(['Sleep Latency (min)', 'Deep Latency (min)', 'REM Latency (min)',
       'TST/SPT (%)', 'Deep (%)'],
      dtype='object')
R2 score -0.1352
