In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.linear_model import Ridge
from sklearn.feature_selection import SelectFromModel

## Load data

In [2]:
df = pd.read_excel('PSG_sleep_index_20220928.xlsx')

In [3]:
X = scale(df.iloc[:, 10:19]) # k to s column
y1 = df[['(A6①)寝つきの良さ']]
y2 = df[['(A6③)眠りの深さ']]
y3 = df[['(A6⑤)全体的な睡眠の質']]

feature_names = df.columns[10:19]

## L2

In [29]:
ridge = Ridge(alpha=1)

# (A6①)寝つきの良さ
ridge.fit(X, y1)

ridge_selected = SelectFromModel(ridge, prefit=True)


X_selected = ridge_selected.transform(X)
print(X_selected.shape)


print("Selected feature names:", feature_names[ridge_selected.get_support()])

scores = cross_val_score(ridge, X_selected, y1, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(309, 6)
Selected feature names: Index(['Sleep Latency (min)', 'REM Latency (min)',
       'TST (Total Sleep Time (min))', 'TST/SPT (%)', 'WASO (%)', 'Deep (%)'],
      dtype='object')
R2 score -0.0240


In [27]:
ridge = Ridge(alpha=1)

# (A6③)眠りの深さ
ridge.fit(X, y2)

ridge_selected = SelectFromModel(ridge, prefit=True)


X_selected = ridge_selected.transform(X)
print(X_selected.shape)


print("Selected feature names:", feature_names[ridge_selected.get_support()])

scores = cross_val_score(ridge, X_selected, y2, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(309, 3)
Selected feature names: Index(['REM Latency (min)', 'TST (Total Sleep Time (min))', 'REM (%)'], dtype='object')
R2 score 0.0767


In [31]:
ridge = Ridge(alpha=1)

# (A6⑤)全体的な睡眠の質
ridge.fit(X, y3)

ridge_selected = SelectFromModel(ridge, prefit=True)


X_selected = ridge_selected.transform(X)
print(X_selected.shape)


print("Selected feature names:", feature_names[ridge_selected.get_support()])

scores = cross_val_score(ridge, X_selected, y3, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(309, 4)
Selected feature names: Index(['Sleep Latency (min)', 'REM Latency (min)',
       'TST (Total Sleep Time (min))', 'Deep (%)'],
      dtype='object')
R2 score -0.0270


## Lasso Regression

### (A6①)寝つきの良さ

In [4]:
lasso = Lasso(alpha=0.01)
lasso.fit(X, y1)

lasso.coef_

array([-0.1853128 , -0.00177361, -0.1436525 , -0.05013226,  0.1434182 ,
       -0.        ,  0.        ,  0.11482809, -0.        ])

In [6]:
lasso_selected = SelectFromModel(lasso, prefit=True)
X_selected = lasso_selected.transform(X)
print(X_selected.shape)
print("Selected feature names:", feature_names[lasso_selected.get_support()])

scores = cross_val_score(lasso, X_selected, y1, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(309, 6)
Selected feature names: Index(['Sleep Latency (min)', 'Deep Latency (min)', 'REM Latency (min)',
       'TST (Total Sleep Time (min))', 'TST/SPT (%)', 'Deep (%)'],
      dtype='object')
R2 score -0.0217


### (A6③)眠りの深さ

In [14]:
lasso = Lasso(alpha=0.01)
lasso.fit(X, y2)

lasso.coef_

array([ 0.0104295 ,  0.        ,  0.10503821, -0.36795879, -0.        ,
        0.        , -0.00666402,  0.        ,  0.06336108])

In [15]:
lasso_selected = SelectFromModel(lasso, prefit=True)
X_selected = lasso_selected.transform(X)
print(X_selected.shape)
print("Selected feature names:", feature_names[lasso_selected.get_support()])

scores = cross_val_score(lasso, X_selected, y2, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(309, 5)
Selected feature names: Index(['Sleep Latency (min)', 'REM Latency (min)',
       'TST (Total Sleep Time (min))', 'Light (%)', 'REM (%)'],
      dtype='object')
R2 score 0.0339


### (A6⑤)全体的な睡眠の質

In [16]:
lasso = Lasso(alpha=0.01)
lasso.fit(X, y3)

lasso.coef_

array([ 0.04643829, -0.        ,  0.16394603, -0.05513845,  0.        ,
       -0.        ,  0.        , -0.08242587,  0.01084813])

In [17]:
lasso_selected = SelectFromModel(lasso, prefit=True)
X_selected = lasso_selected.transform(X)
print(X_selected.shape)
print("Selected feature names:", feature_names[lasso_selected.get_support()])

scores = cross_val_score(lasso, X_selected, y3, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

(309, 5)
Selected feature names: Index(['Sleep Latency (min)', 'REM Latency (min)',
       'TST (Total Sleep Time (min))', 'Deep (%)', 'REM (%)'],
      dtype='object')
R2 score -0.0309


## Desicion Tree

### (A6①)寝つきの良さ

In [42]:
from sklearn.tree import DecisionTreeRegressor

In [52]:
tree = DecisionTreeRegressor().fit(X, y1)
importances = tree.feature_importances_
print(importances)

[0.10456844 0.20569783 0.14805994 0.13270276 0.13455909 0.0413656
 0.06811655 0.09254799 0.07238181]


In [53]:
sfm = SelectFromModel(tree, threshold=0.1)
X_selected = sfm.transform(X)

scores = cross_val_score(tree, X_selected, y1, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

R2 score -0.9511


In [34]:
tree = DecisionTreeClassifier().fit(X, y1)

In [35]:
scores = cross_val_score(tree, X, y1, cv=10) 
average_accuracy = scores.mean()
print("平均Accuracy: ", average_accuracy)

平均Accuracy:  0.39182795698924733


### (A6③)眠りの深さ

In [48]:
tree = DecisionTreeRegressor().fit(X, y2)
importances = tree.feature_importances_
print(importances)

[0.16361032 0.04865681 0.15262386 0.26726076 0.03879323 0.04413227
 0.08762997 0.11743111 0.07986167]


In [49]:
sfm = SelectFromModel(tree, threshold=0.1)
X_selected = sfm.transform(X)

scores = cross_val_score(tree, X_selected, y2, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

R2 score -0.7440


In [36]:
tree = DecisionTreeClassifier().fit(X, y2)

In [37]:
scores = cross_val_score(tree, X, y2, cv=10) 
average_accuracy = scores.mean()
print("平均Accuracy: ", average_accuracy)

平均Accuracy:  0.34301075268817205


### (A6⑤)全体的な睡眠の質

In [50]:
tree = DecisionTreeRegressor().fit(X, y3)
importances = tree.feature_importances_
print(importances)

[0.09173351 0.04547075 0.17709762 0.12634134 0.07223643 0.09787758
 0.12870775 0.12770048 0.13283455]


In [51]:
sfm = SelectFromModel(tree, threshold=0.1)
X_selected = sfm.transform(X)

scores = cross_val_score(tree, X_selected, y3, cv=10)
print("R2 score {:.4f}".format(scores.mean()))

R2 score -1.2226


In [38]:
tree = DecisionTreeClassifier().fit(X, y3)

In [40]:
scores = cross_val_score(tree, X, y3, cv=10) 
average_accuracy = scores.mean()
print("平均Accuracy: ", average_accuracy)

平均Accuracy:  0.34634408602150535
