<a href="https://colab.research.google.com/github/Yashasvigm14/Sleep-Score-Prediction/blob/main/ML_Approach_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Uploading the dataset
import pandas as pd
from google.colab import files

uploaded = files.upload()

file_name = list(uploaded.keys())[0]
data = pd.read_csv(file_name)

print("Initial Dataset:")
display(data.head())


Saving Fitbit_Sleep_JB_041219_010720.csv to Fitbit_Sleep_JB_041219_010720.csv
Initial Dataset:


Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep,overall_score
0,30/6/20 21:57,1/7/20 5:59,402,79,40,481,32,282,88,71.0
1,29/6/20 21:35,30/6/20 6:02,444,63,36,507,51,332,61,78.0
2,28/6/20 22:01,29/6/20 6:01,420,60,36,480,37,335,48,78.0
3,27/6/20 22:05,28/6/20 9:27,567,115,51,682,83,390,94,75.0
4,26/6/20 21:40,27/6/20 7:35,495,100,35,595,75,335,85,78.0


In [4]:
# Check for null values
print("Null values before cleaning:\n", data.isnull().sum())

# Drop null values
data = data.dropna()

print("Null values after cleaning:\n", data.isnull().sum())


Null values before cleaning:
 Start Time              0
End Time                0
Minutes Asleep          0
Minutes Awake           0
Number of Awakenings    0
Time in Bed             0
Minutes REM Sleep       0
Minutes Light Sleep     0
Minutes Deep Sleep      0
overall_score           1
dtype: int64
Null values after cleaning:
 Start Time              0
End Time                0
Minutes Asleep          0
Minutes Awake           0
Number of Awakenings    0
Time in Bed             0
Minutes REM Sleep       0
Minutes Light Sleep     0
Minutes Deep Sleep      0
overall_score           0
dtype: int64


In [5]:
# Drop unnecessary columns
data = data.drop(columns=['Start Time', 'End Time'], errors='ignore')

print("Data after removing unnecessary columns:")
display(data.head())


Data after removing unnecessary columns:


Unnamed: 0,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep,overall_score
0,402,79,40,481,32,282,88,71.0
1,444,63,36,507,51,332,61,78.0
2,420,60,36,480,37,335,48,78.0
3,567,115,51,682,83,390,94,75.0
4,495,100,35,595,75,335,85,78.0


In [11]:
# Add Sleep Efficiency column
data['Sleep Efficiency'] = data.apply(
    lambda row: (row['Minutes Asleep'] / row['Time in Bed'] * 100) if row['Time in Bed'] > 0 else 0, axis=1
)

# Add Non-REM Sleep column
data['Minutes Non-REM Sleep'] = data['Minutes Light Sleep'] + data['Minutes Deep Sleep']

print("Data with Sleep Efficiency and Non-REM Sleep added:")
display(data[['Minutes Asleep', 'Time in Bed', 'Sleep Efficiency', 'Minutes Non-REM Sleep']].head())


Data with Sleep Efficiency and Non-REM Sleep added:


Unnamed: 0,Minutes Asleep,Time in Bed,Sleep Efficiency,Minutes Non-REM Sleep
0,402,481,83.575884,370
1,444,507,87.573964,393
2,420,480,87.5,383
3,567,682,83.13783,484
4,495,595,83.193277,420


In [12]:
# Reaaranging columns
columns = [col for col in data.columns if col != 'overall_score'] + ['overall_score']
data = data[columns]

print("Data after rearranging columns:")
display(data.head())


Data after rearranging columns:


Unnamed: 0,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed,Minutes REM Sleep,Minutes Light Sleep,Minutes Deep Sleep,Sleep Efficiency,Minutes Non-REM Sleep,overall_score
0,402,79,40,481,32,282,88,83.575884,370,71.0
1,444,63,36,507,51,332,61,87.573964,393,78.0
2,420,60,36,480,37,335,48,87.5,383,78.0
3,567,115,51,682,83,390,94,83.13783,484,75.0
4,495,100,35,595,75,335,85,83.193277,420,78.0


In [13]:
# Saving the cleaned dataset and downloading
data.to_csv('updated_dataset.csv', index=False)

files.download('updated_dataset.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [14]:
from sklearn.model_selection import train_test_split

# Load the updated dataset
data = pd.read_csv('updated_dataset.csv')

X = data.drop(columns=['overall_score'])
y = data['overall_score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training Features Shape:", X_train.shape)
print("Test Features Shape:", X_test.shape)


Training Features Shape: (167, 9)
Test Features Shape: (42, 9)


In [15]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

model = RandomForestRegressor(random_state=42, n_estimators=100)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Random Forest MAE:", mae)
print("Random Forest R² Score:", r2)


Random Forest MAE: 2.5535714285714284
Random Forest R² Score: 0.7425010097631881


In [16]:
def preprocess_input(input_data, training_columns, default_values):

    import numpy as np

    input_df = pd.DataFrame([input_data])

    # Calculate 'Minutes Non-REM Sleep' if not provided
    if 'Minutes Non-REM Sleep' not in input_df.columns or pd.isnull(input_df['Minutes Non-REM Sleep']).all():
        input_df['Minutes Non-REM Sleep'] = input_df.get('Minutes Light Sleep', default_values['Minutes Light Sleep']) + \
                                            input_df.get('Minutes Deep Sleep', default_values['Minutes Deep Sleep'])

    for col in training_columns:
        if col in input_df.columns:

            default_value = default_values.get(col, 0)
            input_df[col] = input_df[col].apply(lambda x: default_value if pd.isnull(x) else x).astype(type(default_value))
        else:

            default_value = default_values.get(col, 0)
            input_df[col] = np.array([default_value], dtype=type(default_value))

    input_df = input_df[training_columns]

    return input_df


In [17]:
# Giving default values
default_values = {
    "Minutes Asleep": 420,
    "Minutes Awake": 60,
    "Number of Awakenings": 10,
    "Time in Bed": 480,
    "Minutes REM Sleep": 80,
    "Minutes Light Sleep": 200,
    "Minutes Deep Sleep": 100,
    "Minutes Non-REM Sleep": 300,
    "Sleep Efficiency": 85
}

training_columns = X.columns.tolist()

# Input data
input_data = {
    "Minutes Asleep": 480,
    "Minutes Awake": None,
    "Number of Awakenings": 30,
    "Time in Bed": 530,
    "Minutes REM Sleep": 90,
    "Minutes Light Sleep": None,
    "Minutes Deep Sleep": 120,
    "Minutes Non-REM Sleep": None,
    "Sleep Efficiency": None
}

# Preprocess the input data
processed_input = preprocess_input(input_data, training_columns, default_values)

# Predict using Random Forest
predicted_score = model.predict(processed_input)
print(f"Random Forest Predicted Sleep Score: {predicted_score[0]}")


Random Forest Predicted Sleep Score: 79.59


**DECISION TREE**

In [18]:
from sklearn.tree import DecisionTreeRegressor

dt_model = DecisionTreeRegressor(random_state=42, max_depth=5)  # Adjust max_depth as needed
dt_model.fit(X_train, y_train)

y_pred_dt = dt_model.predict(X_test)

mae_dt = mean_absolute_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)

print("Decision Tree MAE:", mae_dt)
print("Decision Tree R² Score:", r2_dt)


Decision Tree MAE: 2.8196010860484546
Decision Tree R² Score: 0.6769220025137086


In [19]:
# Preprocess the input data
processed_input_dt = preprocess_input(input_data, X.columns, default_values)

# Predict using Decision Tree
predicted_score_dt = dt_model.predict(processed_input_dt)
print(f"Decision Tree Predicted Sleep Score: {predicted_score_dt[0]}")


Decision Tree Predicted Sleep Score: 86.0


**KNN**


In [20]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the KNN model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

y_pred_knn = knn_model.predict(X_test_scaled)

mae_knn = mean_absolute_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

print("KNN MAE:", mae_knn)
print("KNN R² Score:", r2_knn)


KNN MAE: 2.6142857142857143
KNN R² Score: 0.7088887459641631


In [21]:
# Normalize the input data
processed_input_knn = scaler.transform(preprocess_input(input_data, X.columns, default_values))

# Predict using KNN
predicted_score_knn = knn_model.predict(processed_input_knn)
print(f"KNN Predicted Sleep Score: {predicted_score_knn[0]}")


KNN Predicted Sleep Score: 77.8
