In [25]:
%pip install pandas





[notice] A new release of pip is available: 23.0.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [26]:
%pip install tensorflow

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [27]:
%pip install scikit-learn




[notice] A new release of pip is available: 23.0.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip





In [28]:
%pip install imblearn

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [29]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.sequence import pad_sequences
from imblearn.over_sampling import SMOTE

In [30]:
# Function to load the data
def load_data(folder):
    data = []
    labels = []
    for label, subfolder in enumerate(['control', 'condition']):
        path = os.path.join(folder, subfolder)
        for file in os.listdir(path):
            if file.endswith(".csv"):
                df = pd.read_csv(os.path.join(path, file))
                data.append(df[['timestamp', 'activity']].values)
                labels.append(label)
    return data, labels

In [31]:
# Function to preprocess the data
def preprocess_data(data):
    processed_data = []
    for sequence in data:
        df = pd.DataFrame(sequence, columns=['timestamp', 'activity'])
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['hour'] = df['timestamp'].dt.hour
        processed_data.append(df[['hour', 'activity']].values)
    return processed_data

In [32]:
# Load and preprocess the data
data, labels = load_data('data')
processed_data = preprocess_data(data)

In [33]:
# Standardize the data
scaler = StandardScaler()
for i in range(len(processed_data)):
    processed_data[i] = scaler.fit_transform(processed_data[i])

In [34]:
def add_noise(data, noise_factor=0.1):
    noisy_data = []
    for sequence in data:
        noise = np.random.normal(loc=0.0, scale=noise_factor, size=sequence.shape)
        noisy_sequence = sequence + noise
        noisy_data.append(noisy_sequence)
    return noisy_data

# Apply noise to the processed data
noisy_data = add_noise(processed_data)

In [35]:
# Pad sequences
max_len = max(len(seq) for seq in processed_data)
X = pad_sequences(processed_data, maxlen=max_len, dtype='float32', padding='post', truncating='post')
y = np.array(labels)

In [36]:
# Reshape X for SMOTE
n_samples, time_steps, n_features = X.shape
X_reshaped = X.reshape((n_samples, time_steps * n_features))

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_reshaped, y)

# Reshape X back to original form
X_resampled = X_resampled.reshape((X_resampled.shape[0], time_steps, n_features))

In [37]:
# Define the model creation function
def create_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(LSTM(50, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [38]:
# Perform k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []
fold = 1

for train_index, test_index in kf.split(X_resampled):
    X_train, X_test = X_resampled[train_index], X_resampled[test_index]
    y_train, y_test = y_resampled[train_index], y_resampled[test_index]
    
    input_shape = (X_train.shape[1], X_train.shape[2])
    model = create_model(input_shape)
    
    # Callbacks for early stopping and saving the best model
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
    model_checkpoint = ModelCheckpoint(f'best_modelKfold_fold_{fold}.keras', monitor='val_loss', save_best_only=True, verbose=1)
    
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping, model_checkpoint],
        verbose=1
    )
    
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f'Fold {fold} - Test Accuracy: {accuracy:.4f}')
    accuracies.append(accuracy)
    fold += 1

print(f'Mean Test Accuracy: {np.mean(accuracies):.4f}')
print("")
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Accuracy: {accuracy:.4f}')
# Model Evaluation Section
# Assuming you have the true labels and predictions ready
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions if needed

# Insert the new code here
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision, recall, and F1 score
precision = precision_score(y_test, y_pred, average='binary')  # Use 'micro', 'macro', 'weighted' for multi-class
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')

# Print the scores
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129s/step - accuracy: 0.5406 - loss: 1.5580  
Epoch 1: val_loss improved from inf to 15.30238, saving model to best_modelKfold_fold_1.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 134s/step - accuracy: 0.5437 - loss: 1.8472 - val_accuracy: 0.0000e+00 - val_loss: 15.3024
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98s/step - accuracy: 0.6906 - loss: 5.4384   
Epoch 2: val_loss improved from 15.30238 to 0.00079, saving model to best_modelKfold_fold_1.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m299s[0m 101s/step - accuracy: 0.7021 - loss: 5.2386 - val_accuracy: 1.0000 - val_loss: 7.9279e-04
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112s/step - accuracy: 0.6781 - loss: 3.1578  
Epoch 3: val_loss did not improve from 0.00079
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m276s[0m 116s/step - accuracy: 0.6854 - 

KeyboardInterrupt: 