In [2]:
#Preprocessing
import numpy as np
import mne

# Load your EEG data
file_path = r'D:\final p\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4001E0-PSG.edf'
raw_data = mne.io.read_raw_edf(file_path, preload=True)

# Display basic info about the raw data
print(raw_data.info)

# Set the EEG reference (average reference is common)
raw_data.set_eeg_reference('average', projection=True)

# Resample to increase the Nyquist frequency (e.g., 200 Hz)
raw_data.resample(sfreq=200)

# Apply a band-pass filter to remove noise (e.g., 0.5-50 Hz)
raw_data.filter(l_freq=0.5, h_freq=50)

# Downsample back to 100 Hz if needed
raw_data.resample(sfreq=100)

# Further preprocessing steps (if needed)
# For example, you could extract epochs, perform ICA, etc.

# Save the processed data if required
processed_file_path = r'D:\final p\processed_data.fif'
raw_data.save(processed_file_path, overwrite=True)



Extracting EDF parameters from D:\final p\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 7949999  =      0.000 ... 79499.990 secs...


  raw_data = mne.io.read_raw_edf(file_path, preload=True)
  raw_data = mne.io.read_raw_edf(file_path, preload=True)
  raw_data = mne.io.read_raw_edf(file_path, preload=True)


<Info | 8 non-empty values
 bads: []
 ch_names: EEG Fpz-Cz, EEG Pz-Oz, EOG horizontal, Resp oro-nasal, EMG ...
 chs: 7 EEG
 custom_ref_applied: False
 highpass: 0.0 Hz
 lowpass: 50.0 Hz
 meas_date: 1989-04-24 16:13:00 UTC
 nchan: 7
 projs: []
 sfreq: 100.0 Hz
 subject_info: 4 items (dict)
>
EEG channel type selected for re-referencing
Adding average EEG reference projection.
1 projection items deactivated
Average reference projection was added, but has not been applied yet. Use the apply_proj method to apply it.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 50.00 Hz
- Upper transition bandwidth: 12.

  raw_data.save(processed_file_path, overwrite=True)


Closing D:\final p\processed_data.fif
[done]


In [5]:
pip install hurst

Collecting hurst
  Downloading hurst-0.0.5-py3-none-any.whl.metadata (3.6 kB)
Downloading hurst-0.0.5-py3-none-any.whl (5.9 kB)
Installing collected packages: hurst
Successfully installed hurst-0.0.5
Note: you may need to restart the kernel to use updated packages.


In [6]:
#Feature Extraction
import numpy as np
import mne
from hurst import compute_Hc
from tqdm import tqdm
from scipy.signal import welch
from scipy.stats import entropy

# Load your EEG data
file_path = r'D:\final p\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4001E0-PSG.edf'
raw_data = mne.io.read_raw_edf(file_path, preload=True)

# Set the EEG reference (average reference is common)
raw_data.set_eeg_reference('average', projection=True)

# Apply a band-pass filter to remove noise (e.g., 0.5-49.5 Hz)
raw_data.filter(l_freq=0.5, h_freq=49.5)

# Resample data if necessary (e.g., downsample to 100 Hz)
raw_data.resample(sfreq=100)

# Extract the data from the raw object
eeg_data = raw_data.get_data()

# Define frequency bands (delta, theta, alpha, beta)
band_list = [(0.5, 4), (4, 7), (7, 12), (12, 30)]
Fs = 100  # Sampling frequency

# Functions for feature calculation
def compute_psd(signal, band, Fs):
    f, Pxx = welch(signal, Fs)
    band_power = np.trapz(Pxx[(f >= band[0]) & (f <= band[1])], f[(f >= band[0]) & (f <= band[1])])
    return band_power

def petrosian_fd(signal):
    N = len(signal)
    diff = np.diff(signal)
    N_delta = np.sum(diff[:-1] * diff[1:] < 0)
    return np.log10(N) / (np.log10(N) + np.log10(N / (N + 0.4 * N_delta)))

def hjorth_params(signal):
    first_diff = np.diff(signal)
    second_diff = np.diff(first_diff)
    var_zero = np.var(signal)
    var_d1 = np.var(first_diff)
    var_d2 = np.var(second_diff)
    activity = var_zero
    mobility = np.sqrt(var_d1 / var_zero)
    complexity = np.sqrt(var_d2 / var_d1) / mobility
    return activity, mobility, complexity

def detrended_fluctuation(signal):
    N = len(signal)
    L = np.floor(np.logspace(1, np.log10(N / 4), num=20)).astype(int)
    F = np.zeros(len(L))
    for i, l in enumerate(L):
        rms = []
        for j in range(0, N, l):
            if j + l < N:
                c = np.polyfit(range(l), signal[j:j+l], 1)
                fit = np.polyval(c, range(l))
                rms.append(np.sqrt(np.mean((signal[j:j+l] - fit) ** 2)))
        F[i] = np.sqrt(np.mean(np.array(rms) ** 2))
    alpha = np.polyfit(np.log10(L), np.log10(F), 1)[0]
    return alpha

# Initialize lists to store features
PSD = []
PFD = []
hjorths = []
hursts = []
DFA = []

# Loop over each EEG channel
for channel_data in tqdm(eeg_data):
    # Power Spectral Density (PSD) for each band
    psd_values = [compute_psd(channel_data, band, Fs) for band in band_list]
    PSD.append(psd_values)

    # Petrosian Fractal Dimension (PFD)
    pfd_value = petrosian_fd(channel_data)
    PFD.append(pfd_value)

    # Hjorth Parameters (Activity, Mobility, Complexity)
    hjorth_activity, hjorth_mobility, hjorth_complexity = hjorth_params(channel_data)
    hjorths.append([hjorth_activity, hjorth_mobility, hjorth_complexity])

    # Hurst Exponent
    hurst_value, _, _ = compute_Hc(channel_data, kind='change', min_window=100)
    hursts.append(hurst_value)

    # Detrended Fluctuation Analysis (DFA)
    dfa_value = detrended_fluctuation(channel_data)
    DFA.append(dfa_value)

# Convert feature lists to numpy arrays for further processing or saving
PSD = np.array(PSD)
PFD = np.array(PFD)
hjorths = np.array(hjorths)
hursts = np.array(hursts)
DFA = np.array(DFA)

# Save the features for later use
np.save(r'D:\final p\PSD.npy', PSD)
np.save(r'D:\final p\PFD.npy', PFD)
np.save(r'D:\final p\hjorths.npy', hjorths)
np.save(r'D:\final p\hursts.npy', hursts)
np.save(r'D:\final p\DFA.npy', DFA)

print("Feature extraction completed and saved successfully.")



Extracting EDF parameters from D:\final p\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 7949999  =      0.000 ... 79499.990 secs...


  raw_data = mne.io.read_raw_edf(file_path, preload=True)
  raw_data = mne.io.read_raw_edf(file_path, preload=True)
  raw_data = mne.io.read_raw_edf(file_path, preload=True)


EEG channel type selected for re-referencing
Adding average EEG reference projection.
1 projection items deactivated
Average reference projection was added, but has not been applied yet. Use the apply_proj method to apply it.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 49.50 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 49.75 Hz)
- Filter length: 661 samples (6.610 s)

Sampling frequency of the instance is already 100.0, returning unmodified.


100%|███████████████████████████████████████████████████████████████████████████████████| 7/7 [14:41<00:00, 125.98s/it]

Feature extraction completed and saved successfully.





In [10]:
pip install pyeeg


Note: you may need to restart the kernel to use updated packages.


In [12]:
import numpy as np
import mne
from hurst import compute_Hc
from scipy.signal import welch
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [21]:
 #Load and Combine Features
import numpy as np

# Load the saved features
PSD = np.load(r'D:\final p\PSD.npy')
PFD = np.load(r'D:\final p\PFD.npy')
hjorths = np.load(r'D:\final p\hjorths.npy')
hursts = np.load(r'D:\final p\hursts.npy')
DFA = np.load(r'D:\final p\DFA.npy')

# Combine all features into a single feature matrix
# Assuming you have the same number of samples for each feature
features = np.concatenate([PSD, PFD[:, np.newaxis], hjorths, hursts[:, np.newaxis], DFA[:, np.newaxis]], axis=1)

print("Features shape:", features.shape)



Features shape: (7, 10)


In [23]:
#Load and Prepare Labels
# Assuming the number of samples matches the number of feature vectors
num_samples = features.shape[0]
labels = np.random.randint(0, 2, size=num_samples)  # Binary labels (0 or 1)

# Save the synthetic labels for future use
np.save(r'D:\final p\labels.npy', labels)
print("Synthetic labels created and saved.")



Synthetic labels created and saved.


In [24]:
# Split Data into Training and Testing Sets
from sklearn.model_selection import train_test_split

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)


Training set shape: (4, 10)
Testing set shape: (3, 10)


In [27]:
# Train a Machine Learning Model
#Reduce the number of folds:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

# Use cv=2 or cv=3 based on the available number of samples
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=2)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)





Best parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}


In [28]:
# Use Leave-One-Out Cross-Validation (LOO-CV):
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=loo)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)


Best parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 200}


In [29]:
#Consider Stratified K-Fold Cross-Validation (for Imbalanced Datasets):
from sklearn.model_selection import StratifiedKFold

strat_kfold = StratifiedKFold(n_splits=2)
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=strat_kfold)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)


Best parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}


In [30]:
#Save the Trained Model
import joblib

# Save the trained model
joblib.dump(model, r'D:\final p\random_forest_model.pkl')

['D:\\final p\\random_forest_model.pkl']

In [32]:
#Load and Use the Model
# Create dummy data for testing (with the same number of features as used in training)
n_features = X_train.shape[1]  # Get the number of features from training data
new_data = np.random.rand(1, n_features)  # Replace with actual data when available

# Make predictions
new_predictions = model.predict(new_data)

print("Predictions:", new_predictions)



Predictions: [1]


In [34]:
import numpy as np
import joblib

# Load the model
model = joblib.load(r'D:\final p\random_forest_model.pkl')

# Create dummy data (ensure it has the correct number of features)
n_features = X_train.shape[1]  # Number of features used in training
new_data = np.random.rand(1, n_features)  # Create a random sample with the correct shape

# Make predictions
new_predictions = model.predict(new_data)

print("Predictions:", new_predictions)



Predictions: [0]


In [37]:
# Example of loading training data to inspect features (if available)
X_train_df = pd.read_csv(r'sleep.csv')
print("Training data features:", X_train_df.columns)


Training data features: Index(['# EEG Fpz-Cz', 'EEG Pz-Oz', 'EOG horizontal', 'Resp oro-nasal',
       'EMG submental', 'Temp rectal', 'Event marker'],
      dtype='object')


In [38]:
# Example of adding missing columns
required_columns = ['feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5', 'feature_6', 'feature_7', 'feature_8', 'feature_9', 'feature_10']
for col in required_columns:
    if col not in new_data_df.columns:
        new_data_df[col] = 0  # Or another default value
new_data_df = new_data_df[required_columns]  # Reorder columns to match training data

# Convert DataFrame to numpy array
new_data = new_data_df.values

# Make predictions
new_predictions = model.predict(new_data)
print("Predictions:", new_predictions)


Predictions: [0 0 0 ... 0 0 0]


In [41]:
import numpy as np
import mne

def preprocess_eeg(file_path, output_path, resample_freq=100):
    """
    Preprocess EEG data from an EDF file and save the processed data.

    Parameters:
    - file_path: Path to the input EDF file.
    - output_path: Path to save the processed data.
    - resample_freq: Frequency to resample the data to. Default is 100 Hz.
    """
    # Load the raw EEG data
    raw_data = mne.io.read_raw_edf(file_path, preload=True)

    # Display basic info about the raw data
    print(raw_data.info)

    # Set the EEG reference (average reference is common)
    raw_data.set_eeg_reference('average', projection=True)

    # Resample to increase the Nyquist frequency (e.g., 200 Hz)
    raw_data.resample(sfreq=200)

    # Apply a band-pass filter to remove noise (e.g., 0.5-50 Hz)
    raw_data.filter(l_freq=0.5, h_freq=50)

    # Downsample back to the desired frequency (e.g., 100 Hz)
    raw_data.resample(sfreq=resample_freq)

    # Further preprocessing steps (if needed)
    # For example, you could extract epochs, perform ICA, etc.
    # Example: Extracting epochs (if you need to)
    # events, _ = mne.find_events(raw_data, stim_channel='STI 014')
    # epochs = mne.Epochs(raw_data, events, event_id=None, tmin=-1, tmax=2, baseline=(None, 0))

    # Save the processed data
    raw_data.save(output_path, overwrite=True)
    print(f"Processed data saved to {output_path}")

# Example usage
file_path = r'sleep-edf-database-expanded-1.0.0/sleep-cassette/SC4002E0-PSG.edf'
output_path = r'D:\final p\processed_data1.fif'
preprocess_eeg(file_path, output_path)


Extracting EDF parameters from D:\final p\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4002E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 8489999  =      0.000 ... 84899.990 secs...


  raw_data = mne.io.read_raw_edf(file_path, preload=True)
  raw_data = mne.io.read_raw_edf(file_path, preload=True)
  raw_data = mne.io.read_raw_edf(file_path, preload=True)


<Info | 8 non-empty values
 bads: []
 ch_names: EEG Fpz-Cz, EEG Pz-Oz, EOG horizontal, Resp oro-nasal, EMG ...
 chs: 7 EEG
 custom_ref_applied: False
 highpass: 0.0 Hz
 lowpass: 50.0 Hz
 meas_date: 1989-04-25 14:50:00 UTC
 nchan: 7
 projs: []
 sfreq: 100.0 Hz
 subject_info: 4 items (dict)
>
EEG channel type selected for re-referencing
Adding average EEG reference projection.
1 projection items deactivated
Average reference projection was added, but has not been applied yet. Use the apply_proj method to apply it.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 50.00 Hz
- Upper transition bandwidth: 12.

  raw_data.save(output_path, overwrite=True)


Closing D:\final p\processed_data1.fif
[done]
Processed data saved to D:\final p\processed_data1.fif


In [42]:
import numpy as np
import mne

def preprocess_eeg(raw_data, resample_freq=100):
    """
    Preprocess EEG data.

    Parameters:
    - raw_data: MNE Raw object with the EEG data.
    - resample_freq: Frequency to resample the data to. Default is 100 Hz.

    Returns:
    - Processed MNE Raw object.
    """
    # Set the EEG reference (average reference is common)
    raw_data.set_eeg_reference('average', projection=True)

    # Resample to increase the Nyquist frequency (e.g., 200 Hz)
    raw_data.resample(sfreq=200)

    # Apply a band-pass filter to remove noise (e.g., 0.5-50 Hz)
    raw_data.filter(l_freq=0.5, h_freq=50)

    # Downsample back to the desired frequency (e.g., 100 Hz)
    raw_data.resample(sfreq=resample_freq)
    
    return raw_data

# Process training data
def process_training_data(train_file_path, output_file_path):
    raw_data = mne.io.read_raw_edf(train_file_path, preload=True)
    processed_data = preprocess_eeg(raw_data)
    processed_data.save(output_file_path, overwrite=True)
    print(f"Training data saved to {output_file_path}")

# Process new data
def process_new_data(new_data_file_path, output_file_path):
    raw_data = mne.io.read_raw_edf(new_data_file_path, preload=True)
    processed_data = preprocess_eeg(raw_data)
    processed_data.save(output_file_path, overwrite=True)
    print(f"New data saved to {output_file_path}")

# Example usage
train_file_path = r'D:\final p\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4001E0-PSG.edf'
train_output_path = r'D:\final p\processed_training_data.fif'
process_training_data(train_file_path, train_output_path)

new_file_path = r'sleep-edf-database-expanded-1.0.0/sleep-cassette/SC4011E0-PSG.edf'
new_output_path = r'D:\final p\processed_new_data.fif'
process_new_data(new_file_path, new_output_path)


Extracting EDF parameters from D:\final p\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 7949999  =      0.000 ... 79499.990 secs...


  raw_data = mne.io.read_raw_edf(train_file_path, preload=True)
  raw_data = mne.io.read_raw_edf(train_file_path, preload=True)
  raw_data = mne.io.read_raw_edf(train_file_path, preload=True)


EEG channel type selected for re-referencing
Adding average EEG reference projection.
1 projection items deactivated
Average reference projection was added, but has not been applied yet. Use the apply_proj method to apply it.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 50.00 Hz
- Upper transition bandwidth: 12.50 Hz (-6 dB cutoff frequency: 56.25 Hz)
- Filter length: 1321 samples (6.605 s)

Writing D:\final p\processed_training_data.fif


  processed_data.save(output_file_path, overwrite=True)


Closing D:\final p\processed_training_data.fif
[done]
Training data saved to D:\final p\processed_training_data.fif
Extracting EDF parameters from D:\final p\sleep-edf-database-expanded-1.0.0\sleep-cassette\SC4011E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 8405999  =      0.000 ... 84059.990 secs...


  raw_data = mne.io.read_raw_edf(new_data_file_path, preload=True)
  raw_data = mne.io.read_raw_edf(new_data_file_path, preload=True)
  raw_data = mne.io.read_raw_edf(new_data_file_path, preload=True)


EEG channel type selected for re-referencing
Adding average EEG reference projection.
1 projection items deactivated
Average reference projection was added, but has not been applied yet. Use the apply_proj method to apply it.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 50 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 50.00 Hz
- Upper transition bandwidth: 12.50 Hz (-6 dB cutoff frequency: 56.25 Hz)
- Filter length: 1321 samples (6.605 s)

Writing D:\final p\processed_new_data.fif


  processed_data.save(output_file_path, overwrite=True)


Closing D:\final p\processed_new_data.fif
[done]
New data saved to D:\final p\processed_new_data.fif
