In [None]:
# This file is intended to be used to predict 2 states. (For states > 2, please use multinomial regression code.)
# Independent Variables: Intensity, lifetime
# Dependent Variable: States

import pandas as pd
# Logistic Regression Library: https://www.statsmodels.org/dev/generated/statsmodels.formula.api.logit.html
import statsmodels.formula.api as smf


# -------------- NREM/REM ---------------------

In [None]:
# Read the data File
data = pd.read_csv('formated_data_nremREM.csv')
display(data)

Unnamed: 0,intensity,lifetime,sleep_states
0,121000.0,4.1530,2
1,121000.0,4.1534,2
2,119000.0,4.1349,2
3,123000.0,4.1448,2
4,119000.0,4.1376,2
...,...,...,...
1137,525000.0,4.2813,3
1138,524000.0,4.2535,3
1139,543000.0,4.2713,3
1140,528000.0,4.2765,3


In [None]:
# Converting sleep states to a binary range 0-1 (including) - required by the schematics of the library.

display(data['sleep_states'].unique())
# Sleep State 2 -> 0
# Sleep State 3 -> 1

# change the state variable here to account for different original state counts.
data.loc[data['sleep_states'] == 2, 'sleep_states'] = 0
data.loc[data['sleep_states'] == 3, 'sleep_states'] = 1

array([2, 3])

# Lifetime predicting sleep states 



In [None]:
# Applying logistic regression
# Using lifetime to predict sleep states
# Regression equation: dependent variable ~ independent variable [Same as: target_variable ~ features]
log_reg = smf.logit("sleep_states ~ lifetime", data=data).fit()
# Making predictions
predict = log_reg.predict(data['lifetime'])
# Mapping probabilities to integer prediction states
prediction = list(map(round, predict))

Optimization terminated successfully.
         Current function value: 0.287731
         Iterations 8


In [None]:
# comparing original and predicted values of states (y)
print('Actual values', list(data['sleep_states'].values))
print('Predictions :', prediction)

Actual values [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [None]:
# Generating confusion matrix, classification report and accuracy score of the model.
# Refer: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html 
from sklearn.metrics import (confusion_matrix, 
                           accuracy_score, classification_report)
  
# confusion matrix
cm = confusion_matrix(data['sleep_states'], prediction) 
print ("Confusion Matrix : \n", cm) 

# classification report
cr = classification_report(data['sleep_states'], prediction) 
print ("Classification Report : \n", cr) 
  
# accuracy score of the model
print('Test accuracy = ', accuracy_score(data['sleep_states'], prediction))

Confusion Matrix : 
 [[753  67]
 [102 220]]
Classification Report : 
               precision    recall  f1-score   support

           0       0.88      0.92      0.90       820
           1       0.77      0.68      0.72       322

    accuracy                           0.85      1142
   macro avg       0.82      0.80      0.81      1142
weighted avg       0.85      0.85      0.85      1142

Test accuracy =  0.852014010507881


In [None]:
# Extracting pseudo r-squared value (McFadden’s pseudo R-squared value) 
# When comparing two models on the same data, McFadden’s would be higher for the model with the greater relation.
log_reg.summary()

0,1,2,3
Dep. Variable:,sleep_states,No. Observations:,1142.0
Model:,Logit,Df Residuals:,1140.0
Method:,MLE,Df Model:,1.0
Date:,"Tue, 27 Sep 2022",Pseudo R-squ.:,0.5163
Time:,20:27:30,Log-Likelihood:,-328.59
converged:,True,LL-Null:,-679.26
Covariance Type:,nonrobust,LLR p-value:,1.531e-154

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-249.7650,15.144,-16.493,0.000,-279.446,-220.084
lifetime,59.1460,3.594,16.458,0.000,52.102,66.190


# Intensity predicting sleep states 

In [None]:
# Applying logistic regression
# Using intensity to predict sleep states
# Regression equation: dependent variable ~ independent variable [Same as: target_variable ~ features]
log_reg = smf.logit("sleep_states ~ intensity", data=data).fit()
# Making predictions
predict = log_reg.predict(data['intensity'])
# Mapping probabilities to integer prediction states
prediction = list(map(round, predict))

Optimization terminated successfully.
         Current function value: 0.587763
         Iterations 5


In [None]:
# comparing original and predicted values of y
print('Actual values', list(data['sleep_states'].values))
print('Predictions :', prediction)

Actual values [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [None]:
# Generating confusion matrix, classification report and accuracy score of the model.
# Refer: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html 
from sklearn.metrics import (confusion_matrix, 
                           accuracy_score)
  
# confusion matrix
cm = confusion_matrix(data['sleep_states'], prediction) 
print ("Confusion Matrix : \n", cm) 

# classification report
cr = classification_report(data['sleep_states'], prediction, zero_division='warn') 
print ("Classification Report : \n", cr) 

# accuracy score of the model
print('Test accuracy = ', accuracy_score(data['sleep_states'], prediction))

Confusion Matrix : 
 [[820   0]
 [322   0]]
Classification Report : 
               precision    recall  f1-score   support

           0       0.72      1.00      0.84       820
           1       0.00      0.00      0.00       322

    accuracy                           0.72      1142
   macro avg       0.36      0.50      0.42      1142
weighted avg       0.52      0.72      0.60      1142

Test accuracy =  0.7180385288966725


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Extracting pseudo r-squared value (McFadden’s pseudo R-squared value) 
# When comparing two models on the same data, McFadden’s would be higher for the model with the greater relation.
log_reg.summary()

0,1,2,3
Dep. Variable:,sleep_states,No. Observations:,1142.0
Model:,Logit,Df Residuals:,1140.0
Method:,MLE,Df Model:,1.0
Date:,"Tue, 27 Sep 2022",Pseudo R-squ.:,0.01182
Time:,20:29:01,Log-Likelihood:,-671.23
converged:,True,LL-Null:,-679.26
Covariance Type:,nonrobust,LLR p-value:,6.123e-05

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.6075,0.184,-8.745,0.000,-1.968,-1.247
intensity,2.388e-06,5.96e-07,4.005,0.000,1.22e-06,3.56e-06


# -------------- Resting/Running ---------------------

In [None]:
# Read the data File
data_runRest = pd.read_csv('/content/formated_data_restingRunning.csv')

display(data_runRest)

Unnamed: 0,lifetime,intensity,states
0,4.227825,154627.0,-0.5
1,4.228798,154896.0,-0.5
2,4.250315,161254.0,-0.5
3,4.231662,151291.5,-0.5
4,4.224715,149838.0,-0.5
...,...,...,...
659,4.275354,516257.0,0.5
660,4.272212,512707.0,0.5
661,4.274698,514025.5,0.5
662,4.276619,514296.0,0.5


In [None]:
# Converting states to a binary range 0-1 (including) - required by the schematics of the library.

display(data_runRest['states'].unique())
#  State -0.5 -> 0 (Resting)
#  State 0.5 -> 1 (Running)

# change the state variable here to account for different original state counts.
data_runRest.loc[data_runRest['states'] == -0.5, 'states'] = 0
data_runRest.loc[data_runRest['states'] == 0.5, 'states'] = 1

array([-0.5,  0.5])

# Lifetime predicting resting/running


In [None]:
# Applying logistic regression
# Using lifetime to predict sleep states
# Regression equation: dependent variable ~ independent variable [Same as: target_variable ~ features]
log_reg = smf.logit("states ~ lifetime", data=data_runRest).fit()
# Making predictions
predict = log_reg.predict(data_runRest['lifetime'])
# Mapping probabilities to integer prediction states
prediction = list(map(round, predict))

Optimization terminated successfully.
         Current function value: 0.195778
         Iterations 9


In [None]:
# comparing original and predicted values of states (y)
print('Actual values', list(data_runRest['states'].values))
print('Predictions :', prediction)

Actual values [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 

In [None]:
# Generating confusion matrix, classification report and accuracy score of the model.
# Refer: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html 
from sklearn.metrics import (confusion_matrix, 
                           accuracy_score, classification_report)
  
# confusion matrix
cm = confusion_matrix(data_runRest['states'], prediction) 
print ("Confusion Matrix : \n", cm) 

# classification report
cr = classification_report(data_runRest['states'], prediction) 
print ("Classification Report : \n", cr) 
  
# accuracy score of the model
print('Test accuracy = ', accuracy_score(data_runRest['states'], prediction))

Confusion Matrix : 
 [[285  26]
 [ 30 323]]
Classification Report : 
               precision    recall  f1-score   support

         0.0       0.90      0.92      0.91       311
         1.0       0.93      0.92      0.92       353

    accuracy                           0.92       664
   macro avg       0.92      0.92      0.92       664
weighted avg       0.92      0.92      0.92       664

Test accuracy =  0.9156626506024096


In [None]:
# Extracting pseudo r-squared value (McFadden’s pseudo R-squared value) 
# When comparing two models on the same data, McFadden’s would be higher for the model with the greater relation.
log_reg.summary()

0,1,2,3
Dep. Variable:,states,No. Observations:,664.0
Model:,Logit,Df Residuals:,662.0
Method:,MLE,Df Model:,1.0
Date:,"Tue, 27 Sep 2022",Pseudo R-squ.:,0.7167
Time:,20:36:49,Log-Likelihood:,-130.0
converged:,True,LL-Null:,-458.92
Covariance Type:,nonrobust,LLR p-value:,4.3899999999999996e-145

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-599.7140,54.330,-11.038,0.000,-706.199,-493.229
lifetime,141.7293,12.843,11.036,0.000,116.558,166.900


# Intensity predicting resting/running

In [None]:
# Applying logistic regression
# Using intensity to predict sleep states
# Regression equation: dependent variable ~ independent variable [Same as: target_variable ~ features]
log_reg = smf.logit("states ~ intensity", data=data_runRest).fit()
# Making predictions
predict = log_reg.predict(data_runRest['intensity'])
# Mapping probabilities to integer prediction states
prediction = list(map(round, predict))

Optimization terminated successfully.
         Current function value: 0.677116
         Iterations 4


In [None]:
# comparing original and predicted values of y
print('Actual values', list(data_runRest['states'].values))
print('Predictions :', prediction)

Actual values [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 

In [None]:
# Generating confusion matrix, classification report and accuracy score of the model.
# Refer: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html 
from sklearn.metrics import (confusion_matrix, 
                           accuracy_score)
  
# confusion matrix
cm = confusion_matrix(data_runRest['states'], prediction) 
print ("Confusion Matrix : \n", cm) 

# classification report
cr = classification_report(data_runRest['states'], prediction, zero_division='warn') 
print ("Classification Report : \n", cr) 

# accuracy score of the model
print('Test accuracy = ', accuracy_score(data_runRest['states'], prediction))

Confusion Matrix : 
 [[166 145]
 [103 250]]
Classification Report : 
               precision    recall  f1-score   support

         0.0       0.62      0.53      0.57       311
         1.0       0.63      0.71      0.67       353

    accuracy                           0.63       664
   macro avg       0.63      0.62      0.62       664
weighted avg       0.63      0.63      0.62       664

Test accuracy =  0.6265060240963856


In [None]:
# Extracting pseudo r-squared value (McFadden’s pseudo R-squared value) 
# When comparing two models on the same data, McFadden’s would be higher for the model with the greater relation.
log_reg.summary()

0,1,2,3
Dep. Variable:,states,No. Observations:,664.0
Model:,Logit,Df Residuals:,662.0
Method:,MLE,Df Model:,1.0
Date:,"Tue, 27 Sep 2022",Pseudo R-squ.:,0.0203
Time:,20:38:16,Log-Likelihood:,-449.61
converged:,True,LL-Null:,-458.92
Covariance Type:,nonrobust,LLR p-value:,1.587e-05

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.6993,0.209,-3.348,0.001,-1.109,-0.290
intensity,2.404e-06,5.67e-07,4.239,0.000,1.29e-06,3.52e-06
