<a href="https://colab.research.google.com/github/Ucchwas/Polysomnographic-Database-Respiration-and-ECG-features/blob/main/Sleep_Stages.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install scikeras[tensorflow]

Collecting scikeras[tensorflow]
  Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Collecting keras>=3.2.0 (from scikeras[tensorflow])
  Downloading keras-3.4.1-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scikit-learn>=1.4.2 (from scikeras[tensorflow])
  Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.4/13.4 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow>=2.16.1 (from scikeras[tensorflow])
  Downloading tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (601.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m601.3/601.3 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting namex (from keras>=3.2.0->scikeras[tensorflow])
  Downloading namex-0.0.8-py3-none-any.whl (5.8 kB)
Coll

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import resample
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, Nadam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
import numpy as np
from scikeras.wrappers import KerasClassifier

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Datasets/PSG/SLPDB_Top_79_Features_PCA.csv')
# Extract features and labels
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Resample the dataset to handle imbalance manually
df_resampled = pd.concat([pd.DataFrame(X), pd.Series(y, name='label')], axis=1)
majority_class = df_resampled['label'].value_counts().idxmax()
df_majority = df_resampled[df_resampled['label'] == majority_class]

df_minority_resampled = pd.DataFrame()
for class_label in df_resampled['label'].unique():
    if class_label != majority_class:
        df_minority = df_resampled[df_resampled['label'] == class_label]
        df_minority_resampled = pd.concat([df_minority_resampled, resample(df_minority, replace=True, n_samples=len(df_majority), random_state=42)])

df_resampled = pd.concat([df_majority, df_minority_resampled])
X_resampled = df_resampled.iloc[:, :-1].values
y_resampled = df_resampled.iloc[:, -1].values

# Standardize features
scaler = StandardScaler()
X_resampled = scaler.fit_transform(X_resampled)

# Convert labels to one-hot encoding for final training and evaluation
y_resampled_one_hot = to_categorical(y_resampled, num_classes=4)

# Define the model creation function
def create_model(optimizer='adam', input_shape=(25,)):  # Assuming 25 features
    model = Sequential()
    model.add(Dense(256, activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(0.01), input_shape=input_shape))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu', kernel_initializer='he_uniform', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.5))
    model.add(Dense(4, activation='softmax'))

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Define the model
model = KerasClassifier(model=create_model, verbose=0, input_shape=(X_resampled.shape[1],))

# Define the grid search parameters
param_grid = {
    'batch_size': [32, 64],
    'epochs': [50, 100],
    'optimizer': ['adam', 'nadam']
}

# Implement GridSearchCV with KFold
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=KFold(n_splits=3), verbose=1)
grid_result = grid.fit(X_resampled, y_resampled_one_hot)

# Summarize results
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")

best_model = grid_result.best_estimator_.model_

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled_one_hot, test_size=0.2, train_size=0.8, random_state=42)

# Train the model with the best parameters
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
best_model.fit(X_train, y_train, epochs=grid_result.best_params_['epochs'], batch_size=grid_result.best_params_['batch_size'], validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
_, accuracy = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

y_pred = np.argmax(best_model.predict(X_test), axis=1)
y_test_original = np.argmax(y_test, axis=1)

print("Classification Report:")
print(classification_report(y_test_original, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test_original, y_pred))

Fitting 3 folds for each of 8 candidates, totalling 24 fits


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

Best: 0.4204679299187291 using {'batch_size': 32, 'epochs': 100, 'optimizer': 'adam'}
Epoch 1/100
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9304 - loss: 0.5141 - val_accuracy: 0.9651 - val_loss: 0.4188
Epoch 2/100
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9384 - loss: 0.5120 - val_accuracy: 0.9486 - val_loss: 0.4652
Epoch 3/100
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9371 - loss: 0.5062 - val_accuracy: 0.9525 - val_loss: 0.4634
Epoch 4/100
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9310 - loss: 0.5235 - val_accuracy: 0.9380 - val_loss: 0.4917
Epoch 5/100
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9393 - loss: 0.5007 - val_accuracy: 0.9390 - val_loss: 0.4801
Epoch 6/100
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 

In [None]:
# !pip install wfdb
# !pip install tsfel
# !pip install biosppy
# import wfdb
# import numpy as np
# import matplotlib.pyplot as plt
# from biosppy.signals import ecg
# from biosppy.signals import resp
# import tsfel
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.preprocessing import LabelEncoder
# from sklearn.metrics import classification_report, confusion_matrix
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Dropout
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.callbacks import EarlyStopping
# from tensorflow.keras.utils import to_categorical
# from imblearn.over_sampling import RandomOverSampler
# from tensorflow import keras

In [None]:
# import pandas as pd

# # Load the dataset
# df = pd.read_csv('/content/drive/MyDrive/Datasets/PSG/SLPDB_Top_22_Features_PCA.csv')

# # Mapping of numerical labels to their corresponding labels
# label_mapping = {0: 'W', 1: 'H', 2: 'OA', 3: 'CA'}

# # Replace numerical labels with their corresponding labels
# df['label'] = df['label'].map(label_mapping)

# # Calculate the number of occurrences of each label
# label_counts = df['label'].value_counts()

# # Print the number of occurrences for each specified label
# for label in ['W', 'H', 'OA', 'CA']:
#     count = label_counts.get(label, 0)
#     print(f"Number of {label}: {count}")

In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.preprocessing import StandardScaler, LabelEncoder
# from sklearn.model_selection import train_test_split
# from tensorflow.keras.utils import to_categorical
# from tensorflow.keras.callbacks import EarlyStopping
# from tensorflow.keras import layers, models, optimizers, regularizers
# from imblearn.over_sampling import SMOTE
# from sklearn.metrics import classification_report, confusion_matrix

# def ANN(X_train, y_train, X_test, y_test):
#     model = models.Sequential([
#         layers.Dense(128, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=regularizers.l2(0.01)),
#         layers.Dropout(0.3),
#         layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
#         layers.Dropout(0.3),
#         layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
#         layers.Dense(4, activation='softmax')
#     ])

#     # Compile the model with a lower learning rate
#     optimizer = optimizers.Adam(learning_rate=0.0005)
#     model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

#     # Use early stopping to prevent overfitting
#     early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

#     # Train the model with adjusted class weights
#     model.fit(X_train, y_train, epochs=500, batch_size=32, validation_split=0.2,
#               callbacks=[early_stopping], class_weight={0: 1.8, 1: 1.5, 2: 1.8, 3: 1.0})

#     # Evaluate the model on the test set
#     _, accuracy = model.evaluate(X_test, y_test)
#     print(f"Test Accuracy: {accuracy}")

#     # Make predictions on the test set
#     y_pred = np.argmax(model.predict(X_test), axis=1)
#     y_test_original = np.argmax(y_test, axis=1)

#     # Print classification report and confusion matrix
#     print("Classification Report:")
#     print(classification_report(y_test_original, y_pred))

#     print("Confusion Matrix:")
#     print(confusion_matrix(y_test_original, y_pred))

#     return model, y_pred

# # Load the dataset
# df = pd.read_csv('/content/drive/MyDrive/Datasets/PSG/SLPDB_Top_22_Features_PCA.csv')

# # Convert all feature columns to numeric, handling errors by coercing to NaN
# for col in df.columns:
#     df[col] = pd.to_numeric(df[col], errors='coerce')

# # Drop rows with NaN values if any
# df.dropna(inplace=True)

# # Check the shape of the dataframe after dropping NaN values
# print("Shape after dropping NaNs:", df.shape)

# # Separate features and labels
# X = df.drop(columns=['label'])
# y = df['label']

# # Encode labels to numerical values
# label_encoder = LabelEncoder()
# y = label_encoder.fit_transform(y)

# # Resample the dataset using SMOTE to handle imbalance
# smote = SMOTE(sampling_strategy='auto', random_state=42)
# X_resampled, y_resampled = smote.fit_resample(X, y)

# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# # Standardize features
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # Convert labels to one-hot encoding
# y_train = to_categorical(y_train, num_classes=4)
# y_test = to_categorical(y_test, num_classes=4)

# # Train the model
# model, y_preds = ANN(X_train, y_train, X_test, y_test)