In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
edf_path="/content/drive/MyDrive/diplomski/one_df_trta_SYNC.feather"

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier

import pyarrow.feather as feather

In [None]:
table_data = feather.read_table(edf_path)
edf = table_data.to_pandas()


In [None]:
psg_columns = ['EOG LOC-A2', 'EOG ROC-A2', 'EEG F3-A2', 'EEG F4-A1', 'EEG A1-A2',
               'EEG C3-A2', 'EEG C4-A1', 'EEG O1-A2', 'EEG O2-A1', 'EMG Chin',
               'Flow Patient-0', 'Flow Patient-1', 'Effort THO', 'Effort ABD',
               'Snore_x', 'ECG I', 'SpO2', 'PulseRate', 'Leg 1', 'Leg 2']


In [None]:
# downsampled_data = data.groupby(data.index // 5).mean()
downsampled_data = edf.iloc[::3,:]

In [None]:
# features = pd.DataFrame()

# epoch_size = 3000

# for i in range(0, len(downsampled_data) - epoch_size, epoch_size):
#     epoch = downsampled_data.iloc[i:i + epoch_size]
#     feature_vector = {}

#     for col in psg_columns:
#         try:
#             feature_vector[f'{col}_mean'] = epoch[col].mean()
#             feature_vector[f'{col}_std'] = epoch[col].std()
#             feature_vector[f'{col}_skew'] = skew(epoch[col])
#             feature_vector[f'{col}_kurt'] = kurtosis(epoch[col])
#         except FloatingPointError:
#             print(f"Skipping epoch {i} for column {col} due to numerical issues.")

#     sleep_phase = epoch['Stage'].value_counts().idxmax()
#     feature_vector['Stage'] = sleep_phase

#     feature_row = pd.DataFrame([feature_vector])

#     features = pd.concat([features, feature_row], ignore_index=True)

In [None]:
X = downsampled_data[psg_columns]
y = downsampled_data['Stage']

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
clf = XGBClassifier(
    eval_metric='mlogloss',
    learning_rate=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    n_estimators=800,  # Number of boosting rounds
    objective='multi:softprob',
    random_state=42,
    verbosity=2
)

In [None]:
clf.fit(X_train, y_train)

[17:05:23] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 112 extra nodes, 0 pruned nodes, max_depth=6
[17:05:27] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 110 extra nodes, 0 pruned nodes, max_depth=6
[17:05:30] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 126 extra nodes, 0 pruned nodes, max_depth=6
[17:05:34] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 118 extra nodes, 0 pruned nodes, max_depth=6
[17:05:38] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 106 extra nodes, 0 pruned nodes, max_depth=6
[17:05:41] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 120 extra nodes, 0 pruned nodes, max_depth=6
[17:05:45] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 100 extra nodes, 0 pruned nodes, max_depth=6
[17:05:48] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 126 extra nodes, 0 pruned nodes, max_depth=6
[17:05:52] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 116 extra nodes, 0 pruned no

Exception ignored on calling ctypes callback function: <function _log_callback at 0x7a56e22735b0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 160, in _log_callback
    def _log_callback(msg: bytes) -> None:
KeyboardInterrupt: 


[20:09:17] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 118 extra nodes, 0 pruned nodes, max_depth=6
[20:09:20] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 104 extra nodes, 0 pruned nodes, max_depth=6
[20:09:24] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 120 extra nodes, 0 pruned nodes, max_depth=6
[20:09:27] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 94 extra nodes, 0 pruned nodes, max_depth=6
[20:09:31] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 124 extra nodes, 0 pruned nodes, max_depth=6
[20:09:35] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 78 extra nodes, 0 pruned nodes, max_depth=6
[20:09:39] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 126 extra nodes, 0 pruned nodes, max_depth=6
[20:09:42] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 110 extra nodes, 0 pruned nodes, max_depth=6
[20:09:46] INFO: ../src/tree/updater_prune.cc:98: tree pruning end, 122 extra nodes, 0 pruned node

In [None]:
y_pred = clf.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

In [None]:
import pickle

filename = 'xgboost_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(clf, file)
