In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns 
import math

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("/kaggle/input/sleep-health-and-lifestyle-dataset/Sleep_health_and_lifestyle_dataset.csv")
df

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
for c in ["Age", "Occupation", "Sleep Duration", "Quality of Sleep", "Physical Activity Level", "Stress Level", "Blood Pressure", "Heart Rate", "Daily Steps", "BMI Category", "Sleep Disorder"]:
    print("---- %s ---" % c)
    print(df[c].value_counts())

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(15, 10)
sns.heatmap(df.corr(),cmap='coolwarm',ax=ax,annot=True,linewidths=2)
plt.show()

In [None]:
num_cols = ['Age', 'Sleep Duration', 'Quality of Sleep', 'Physical Activity Level', 'Stress Level', 'Heart Rate']
plt.figure(figsize=(18,9))
df[num_cols].boxplot()
plt.show()

In [None]:
df_g = df.copy()

def assignNum(g):
    if g == 'Male':
        return 1
    else:
        return 2
df_g['Gender'] = df['Gender'].apply(assignNum)
df_g.head()

In [None]:
df_bp = df_g.copy()
def bp(bp):
    s = int(bp[:3])
    d = int(bp[4:])
    if s < 120 and d < 80:
        return 1
    elif 120 <= s <= 129 and d < 80:
        return 2
    elif 130 <= s <= 139 or 80 <= d < 90:
        return 3
    elif s >= 140 or d >= 90:
        return 4
    
df_bp['Blood Pressure'] = df['Blood Pressure'].apply(bp)
df_bp

In [None]:
df_w = df_bp.copy()
def weight(w):
    if w == 'Normal Weight'or w == 'Normal':
        return 1
    elif w == 'Overweight':
        return 2
    else:
        return 3
    
df_w['BMI Category'] = df['BMI Category'].apply(weight)
df_w

In [None]:
#from sklearn.preprocessing import LabelEncoder
df_dis = df_w.copy()
df_dis['Sleep Disorder'] = np.where(df_dis['Sleep Disorder'].str.contains("a"), 1, 0)
df_dis

In [None]:
from scipy import stats
df_copy = df_dis.copy()
df_copy['Age'] = stats.zscore(df['Age'])
df_copy['Quality of Sleep'] = stats.zscore(df['Quality of Sleep'])
df_copy['Physical Activity Level'] = stats.zscore(df['Physical Activity Level'])
df_copy['Stress Level'] = stats.zscore(df['Stress Level'])
df_copy['Heart Rate'] = stats.zscore(df['Heart Rate'])

In [None]:
df_copy2 = df_copy.copy()
for col in ['Person ID', 'Occupation', 'Sleep Duration', 'Daily Steps']:
    del df_copy2[col]
df_copy2

In [None]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [None]:
def cost(X, y, w, b):
    m, n = X.shape
    cost = 0
    for i in range(m):
        cost += -y[i]*np.log(sigmoid(np.dot(X[i],w) + b)) + (1-y[i])*np.log(1-sigmoid(np.dot(X[i],w) + b))
    return cost/m

In [None]:
def gradient(X, y, w, b):
    m, n = X.shape
    dj_dw = np.zeros(w.shape)
    dj_db = 0.
    for i in range(m):
        for j in range(n):
            dj_dw = dj_dw + (sigmoid(np.dot(X[i],w) + b) - y[i])*X[i, j]
        dj_db += sigmoid(np.dot(X[i],w) + b) - y[i]
    dj_dw = dj_dw/m
    dj_db = dj_db/m
    return dj_db, dj_dw
    

In [None]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    # number of training examples
    m = len(X)
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w_history = []
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db, dj_dw = gradient_function(X, y, w_in, b_in)   

        # Update Parameters using w, b, alpha and gradient
        w_in = w_in - alpha * dj_dw               
        b_in = b_in - alpha * dj_db              
       
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            cost =  cost_function(X, y, w_in, b_in)
            J_history.append(cost)

        #Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
        
    return w_in, b_in, J_history, w_history #return w and J,w history for graphing

In [None]:
from sklearn.model_selection import train_test_split
X = df_copy2.drop(columns=['Sleep Disorder'])
y = df_copy2['Sleep Disorder']
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.7)
x_train, y_train, x_test, y_test = x_train.to_numpy(), y_train.to_numpy(), x_test.to_numpy(), y_test.to_numpy()

In [None]:
np.random.seed(1)
initial_w = 0.01 * (np.random.rand(8).reshape(-1,1) - 0.5)
initial_b = -8


# Some gradient descent settings
iterations = 100000
alpha = 0.1

In [None]:
w,b, J_history,_ = gradient_descent(x_train ,y_train, initial_w, initial_b, 
                                   cost, gradient, alpha, iterations)

In [None]:
def predict(X, w, b):
    m, n = X.shape
    p = np.zeros(m)
    for i in range(m):
        f = sigmoid(np.dot(X[i], w))
        p[i] = 1 if f > 0.5 else 0
    return p        

In [None]:
p = predict(x_train, w,b)
print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(random_state=0)
logreg.fit(x_train, y_train.ravel())
y_pred = logreg.predict(x_test)

In [None]:
from sklearn.metrics import classification_report
target_names = ['without disorder', 'with disorder']
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
from sklearn import metrics

cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix