In [4]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [5]:
# **1. Load Dataset**
data = pd.read_csv('Sleep_Efficiency.csv')

In [6]:
# **2. Periksa Data Input**
print("Ringkasan Data:")
print(data.info())  # Struktur dataset
print("\nStatistik Deskriptif:")
print(data.describe())  # Statistik deskriptif

Ringkasan Data:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 452 entries, 0 to 451
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ID                      452 non-null    int64  
 1   Age                     452 non-null    int64  
 2   Gender                  452 non-null    object 
 3   Bedtime                 452 non-null    object 
 4   Wakeup time             452 non-null    object 
 5   Sleep duration          452 non-null    float64
 6   Sleep efficiency        452 non-null    float64
 7   REM sleep percentage    452 non-null    int64  
 8   Deep sleep percentage   452 non-null    int64  
 9   Light sleep percentage  452 non-null    int64  
 10  Awakenings              432 non-null    float64
 11  Caffeine consumption    427 non-null    float64
 12  Alcohol consumption     438 non-null    float64
 13  Smoking status          452 non-null    object 
 14  Exercise frequency      44

In [7]:
# Periksa apakah ada nilai kosong (NaN)
print("\nJumlah Nilai Kosong per Kolom:")
print(data.isnull().sum())


Jumlah Nilai Kosong per Kolom:
ID                         0
Age                        0
Gender                     0
Bedtime                    0
Wakeup time                0
Sleep duration             0
Sleep efficiency           0
REM sleep percentage       0
Deep sleep percentage      0
Light sleep percentage     0
Awakenings                20
Caffeine consumption      25
Alcohol consumption       14
Smoking status             0
Exercise frequency         6
dtype: int64


In [8]:
# Isi nilai kosong dengan nilai default (opsional, tergantung konteks data)
data.fillna(0, inplace=True)
# Periksa apakah ada nilai kosong (NaN)
print("\nJumlah Nilai Kosong per Kolom:")
print(data.isnull().sum())


Jumlah Nilai Kosong per Kolom:
ID                        0
Age                       0
Gender                    0
Bedtime                   0
Wakeup time               0
Sleep duration            0
Sleep efficiency          0
REM sleep percentage      0
Deep sleep percentage     0
Light sleep percentage    0
Awakenings                0
Caffeine consumption      0
Alcohol consumption       0
Smoking status            0
Exercise frequency        0
dtype: int64


In [9]:
#Outlier
median = data['Sleep duration'].median()
data['Sleep duration'] = data['Sleep duration'].apply(lambda x: median if x < 5.5 or x > 9.5 else x)


In [10]:
# Convert Bedtime and Wakeup time to datetime
data['Bedtime'] = pd.to_datetime(data['Bedtime'])
data['Wakeup time'] = pd.to_datetime(data['Wakeup time'])

# Extract relevant features
data['Bedtime_Hour'] = data['Bedtime'].dt.hour
data['Wakeup_Hour'] = data['Wakeup time'].dt.hour

# Define chronotype based on bedtime and wakeup time
def determine_chronotype(row):
    if row['Bedtime_Hour'] <= 22 and row['Wakeup_Hour'] <= 6:
        return 'Lion'
    elif 22 <= row['Bedtime_Hour'] <= 23 and 6 <= row['Wakeup_Hour'] <= 8:
        return 'Bear'
    elif row['Bedtime_Hour'] > 23 and row['Wakeup_Hour'] > 8:
        return 'Wolf'
    else:
        return 'Dolphin'

data['Chronotype'] = data.apply(determine_chronotype, axis=1)

# Prepare features (X) and labels (y)
X = data[['Bedtime_Hour', 'Wakeup_Hour']]
y = data['Chronotype']

# Encode Chronotype labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [11]:
# Build TensorFlow model
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(shape=(X_train_scaled.shape[1],)),  # Use `shape` instead of `input_shape`
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')  # 4 output classes
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [12]:
# Train the model
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=8, validation_split=0.2)


Epoch 1/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.0610 - loss: 1.3753 - val_accuracy: 0.6438 - val_loss: 1.2401
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7906 - loss: 1.2139 - val_accuracy: 0.9315 - val_loss: 1.0861
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8917 - loss: 1.0777 - val_accuracy: 0.8904 - val_loss: 0.9265
Epoch 4/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8851 - loss: 0.9183 - val_accuracy: 0.8904 - val_loss: 0.7204
Epoch 5/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8403 - loss: 0.7408 - val_accuracy: 0.8904 - val_loss: 0.5217
Epoch 6/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8732 - loss: 0.5559 - val_accuracy: 0.9315 - val_loss: 0.3863
Epoch 7/50
[1m36/36[0m [32m━━━━━━━━━

In [13]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test Accuracy: {test_accuracy:.2f}')


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9507 - loss: 0.0839 
Test Accuracy: 0.96


In [14]:
# Prediction Function with Feature Names
def predict_chronotype(new_data):
    # Konversi input ke DataFrame dengan nama kolom sesuai data asli
    feature_columns = ['Bedtime_Hour', 'Wakeup_Hour']
    new_data_df = pd.DataFrame([new_data], columns=feature_columns)

    # Normalize the new data
    new_data_scaled = scaler.transform(new_data_df)

    # Predict using the model
    prediction = model.predict(new_data_scaled)

    # Decode the predicted label
    predicted_class = label_encoder.inverse_transform([prediction.argmax()])
    return predicted_class[0]

# Example Prediction
example_input = [22, 6]  # Bedtime hour, Wakeup hour, etc.
print("Predicted Chronotype:", predict_chronotype(example_input))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
Predicted Chronotype: Bear


In [15]:
model.save("turuku_chronotype_model.h5")  # Saves the model in HDF5 format

