In [1]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics

In [2]:
# Importing the dataset

sleepData = pd.read_csv("../data/SaYoPillow.csv")
sleepData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 630 entries, 0 to 629
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   sr      630 non-null    float64
 1   rr      630 non-null    float64
 2   t       630 non-null    float64
 3   lm      630 non-null    float64
 4   bo      630 non-null    float64
 5   rem     630 non-null    float64
 6   sr.1    630 non-null    float64
 7   hr      630 non-null    float64
 8   sl      630 non-null    int64  
dtypes: float64(8), int64(1)
memory usage: 44.4 KB


In [3]:
sleepData.corr()

Unnamed: 0,sr,rr,t,lm,bo,rem,sr.1,hr,sl
sr,1.0,0.976268,-0.902475,0.981078,-0.90314,0.9506,-0.920554,0.976268,0.975322
rr,0.976268,1.0,-0.889237,0.991738,-0.88921,0.935572,-0.891855,1.0,0.963516
t,-0.902475,-0.889237,1.0,-0.896412,0.998108,-0.857299,0.95486,-0.889237,-0.962354
lm,0.981078,0.991738,-0.896412,1.0,-0.898527,0.964703,-0.901102,0.991738,0.971071
bo,-0.90314,-0.88921,0.998108,-0.898527,1.0,-0.862136,0.950189,-0.88921,-0.961092
rem,0.9506,0.935572,-0.857299,0.964703,-0.862136,1.0,-0.893952,0.935572,0.951988
sr.1,-0.920554,-0.891855,0.95486,-0.901102,0.950189,-0.893952,1.0,-0.891855,-0.973036
hr,0.976268,1.0,-0.889237,0.991738,-0.88921,0.935572,-0.891855,1.0,0.963516
sl,0.975322,0.963516,-0.962354,0.971071,-0.961092,0.951988,-0.973036,0.963516,1.0


In [4]:
# Extract Response and Predictors
y = pd.DataFrame(sleepData["sl"])
X = pd.DataFrame(sleepData[["sr", "rr", "t", "lm", "bo", "rem", "sr.1", "hr"]])

In [5]:
# Split the Dataset into Train and Test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

# Check the sample sizes
print("Train Set :", y_train.shape, X_train.shape)
print("Test Set  :", y_test.shape, X_test.shape)

# Import LinearRegression model from Scikit-Learn
from sklearn.linear_model import LinearRegression


# Linear Regression using Train Data
linreg = LinearRegression()         # create the linear regression object
linreg.fit(X_train, y_train)        # train the linear regression model

# Coefficients of the Linear Regression line
print('Intercept of Regression \t: b = ', linreg.intercept_)
print('Coefficients of Regression \t: a = ', linreg.coef_)
print()

# Print the Coefficients against Predictors
pd.DataFrame(list(zip(X_train.columns, linreg.coef_[0])), columns = ["Predictors", "Coefficients"])

Train Set : (472, 1) (472, 8)
Test Set  : (158, 1) (158, 8)
Intercept of Regression 	: b =  [8.65543259e+10]
Coefficients of Regression 	: a =  [[ 9.71472629e-03  2.16385815e+10 -1.63350582e-01 -7.71808624e-02
   4.64706421e-02  3.19004059e-02 -1.11298561e-01 -8.65543258e+09]]



Unnamed: 0,Predictors,Coefficients
0,sr,0.009714726
1,rr,21638580000.0
2,t,-0.1633506
3,lm,-0.07718086
4,bo,0.04647064
5,rem,0.03190041
6,sr.1,-0.1112986
7,hr,-8655433000.0


In [6]:
# Predict the Total values from Predictors
y_train_pred = linreg.predict(X_train)
y_test_pred = linreg.predict(X_test)

# Import mean_squared_error from sklearn
from sklearn.metrics import mean_squared_error

# Check the Goodness of Fit (on Train Data)
print("Goodness of Fit of Model \tTrain Dataset")
print("Explained Variance (R^2) \t:", linreg.score(X_train, y_train))
print("Mean Squared Error (MSE) \t:", mean_squared_error(y_train, y_train_pred))
print()

# Check the Goodness of Fit (on Test Data)
print("Goodness of Fit of Model \tTest Dataset")
print("Explained Variance (R^2) \t:", linreg.score(X_test, y_test))
print("Mean Squared Error (MSE) \t:", mean_squared_error(y_test, y_test_pred))
print()

Goodness of Fit of Model 	Train Dataset
Explained Variance (R^2) 	: 0.9998926071547165
Mean Squared Error (MSE) 	: 0.0002137521771177397

Goodness of Fit of Model 	Test Dataset
Explained Variance (R^2) 	: 0.9998837484681435
Mean Squared Error (MSE) 	: 0.00023425400811188582



In [13]:
predictingFactors = ["sr", "rr", "lm", "rem", "sl"]

for i in predictingFactors:
    print("~~~~~Predicting for {}~~~~~".format(i))
    # Predicting snoring rate (sr) with body temp (t), sleep hours (sr), blood oxygen (bo), heart rate (hr)
    y = pd.DataFrame(sleepData[i])
    X = pd.DataFrame(sleepData[["sr.1", "t", "bo", "hr"]])

    # Split the Dataset into Train and Test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

    # Check the sample sizes
    print("Train Set :", y_train.shape, X_train.shape)
    print("Test Set  :", y_test.shape, X_test.shape)

    # Import LinearRegression model from Scikit-Learn
    from sklearn.linear_model import LinearRegression


    # Linear Regression using Train Data
    linreg = LinearRegression()         # create the linear regression object
    linreg.fit(X_train, y_train)        # train the linear regression model

    # Coefficients of the Linear Regression line
    print('Intercept of Regression \t: b = ', linreg.intercept_)
    print('Coefficients of Regression \t: a = ', linreg.coef_)
    print()

    # Print the Coefficients against Predictors
    pd.DataFrame(list(zip(X_train.columns, linreg.coef_[0])), columns = ["Predictors", "Coefficients"])

    # Predict the Total values from Predictors
    y_train_pred = linreg.predict(X_train)
    y_test_pred = linreg.predict(X_test)

    # Import mean_squared_error from sklearn
    from sklearn.metrics import mean_squared_error

    # Check the Goodness of Fit (on Train Data)
    print("Goodness of Fit of Model \tTrain Dataset")
    print("Explained Variance (R^2) \t:", linreg.score(X_train, y_train))
    print("Mean Squared Error (MSE) \t:", mean_squared_error(y_train, y_train_pred))
    print()

    # Check the Goodness of Fit (on Test Data)
    print("Goodness of Fit of Model \tTest Dataset")
    print("Explained Variance (R^2) \t:", linreg.score(X_test, y_test))
    print("Mean Squared Error (MSE) \t:", mean_squared_error(y_test, y_test_pred))
    print()

~~~~~Predicting for sr~~~~~
Train Set : (472, 1) (472, 4)
Test Set  : (158, 1) (158, 4)
Intercept of Regression 	: b =  [-85.63404042]
Coefficients of Regression 	: a =  [[-1.92671402  3.44151854 -2.77159628  1.50229246]]

Goodness of Fit of Model 	Train Dataset
Explained Variance (R^2) 	: 0.966892961289167
Mean Squared Error (MSE) 	: 12.448447818068825

Goodness of Fit of Model 	Test Dataset
Explained Variance (R^2) 	: 0.9656128279416544
Mean Squared Error (MSE) 	: 12.746946933880063

~~~~~Predicting for rr~~~~~
Train Set : (472, 1) (472, 4)
Test Set  : (158, 1) (158, 4)
Intercept of Regression 	: b =  [-4.]
Coefficients of Regression 	: a =  [[ 2.07930750e-15 -6.66133815e-16 -2.35922393e-16  4.00000000e-01]]

Goodness of Fit of Model 	Train Dataset
Explained Variance (R^2) 	: 1.0
Mean Squared Error (MSE) 	: 2.8505956778494867e-29

Goodness of Fit of Model 	Test Dataset
Explained Variance (R^2) 	: 1.0
Mean Squared Error (MSE) 	: 2.787974237186158e-29

~~~~~Predicting for lm~~~~~
Train