In [97]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [98]:
# Load a saved model 
# (choose the which fold to be tested [1 to 5])
model = load_model('best_modelCekIni_fold_3.keras')

def load_and_preprocess_test_data(test_folder):
    test_data = []
    for i in range(1, 6):
        file_path = os.path.join(test_folder, f'test_{i}.csv')
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            test_data.append(df[['timestamp', 'activity']].values)
    return test_data

In [99]:
def preprocess_data(data):
    processed_data = []
    for sequence in data:
        df = pd.DataFrame(sequence, columns=['timestamp', 'activity'])
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['hour'] = df['timestamp'].dt.hour
        processed_data.append(df[['hour', 'activity']].values)
    return processed_data

In [100]:
def prepare_data(data, time_steps=65407, num_features=2):
    scaler = StandardScaler()
    processed_data = []
    for sequence in data:
        scaled_sequence = scaler.fit_transform(sequence)
        
        # Pad or truncate to match the required time steps
        if len(scaled_sequence) < time_steps:

            padding = np.zeros((time_steps - len(scaled_sequence), num_features))
            scaled_sequence = np.vstack([scaled_sequence, padding])

        elif len(scaled_sequence) > time_steps:
            
            scaled_sequence = scaled_sequence[:time_steps]

        processed_data.append(scaled_sequence)

    return np.array(processed_data)

In [101]:
# Load test data
test_data_folder = 'test_data'
raw_test_data = load_and_preprocess_test_data(test_data_folder)
processed_test_data = preprocess_data(raw_test_data)
X_test = prepare_data(processed_test_data)

In [102]:
true_labels = [1, 1, 0, 1, 0]

In [103]:
# Prediction process
predictions = model.predict(X_test)
predictions = [(np.squeeze(pred), 'depressed' if np.squeeze(pred) >= 0.5 else 'non-depressed') for pred in predictions]
predicted_labels = [1 if pred[0] >= 0.5 else 0 for pred in predictions]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


In [104]:
# Output predictions with the value of prediction 
# 1 is depressed and 0 is non-depressed
for i, (prob, prediction) in enumerate(predictions):
    print(f"{prediction} - {prob:.4f}")


depressed - 1.0000
depressed - 0.9992
non-depressed - 0.0000
depressed - 0.9998
non-depressed - 0.0000


In [105]:
# Output prediction with how accurate the model predict it based on the confidence
for i, (prob, prediction) in enumerate(predictions):
    confidence = prob if prediction == 'depressed' else 1 - prob
    print(f"{prediction} - Confidence: {confidence:.4f}")

depressed - Confidence: 1.0000
depressed - Confidence: 0.9992
non-depressed - Confidence: 1.0000
depressed - Confidence: 0.9998
non-depressed - Confidence: 1.0000
