## Load EEG data 24h

In [1]:

import pyedflib
file_name = "by captain borat/raw/EEG_0_per_hour_2024-03-20 17_12_18.edf"
try:
        f = pyedflib.EdfReader(file_name)
        n = f.signals_in_file
        signal_labels = f.getSignalLabels()
        n_samples = f.getNSamples()[0]
        data = f.readSignal(0)
        print(f"Archivo: {file_name}")
        print(f"Número de señales: {n}")
        print("Etiquetas de señal:", signal_labels)
        print(f"Muestras en la primera señal: {n_samples}")
        f._close()
except Exception as e:
        print(f"Error al leer el archivo EDF: {e}")


Archivo: by captain borat/raw/EEG_0_per_hour_2024-03-20 17_12_18.edf
Número de señales: 1
Etiquetas de señal: ['Temp']
Muestras en la primera señal: 44153856


## DMD and lstm models for clustering

In [16]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import detrend, butter, filtfilt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from pydmd import DMD


In [8]:
# Define the start and end times in seconds
fs=512
start_time = 40000
end_time = 60000

# Convert times to sample indices
start_idx = start_time * fs
end_idx = end_time * fs

# Slice the data
split_data = data[start_idx:end_idx]

# Create a time array for the split data
t_split = np.arange(len(split_data)) / fs


In [9]:

# PART 1: Preprocess the EEG signal
def preprocess_eeg(signal, fs):
    """Preprocess the EEG signal with filtering and normalization"""
    # Apply bandpass filter (0.5-30 Hz for sleep EEG)
    nyq = 0.5 * fs
    low = 0.5 / nyq
    high = 100.0 / nyq
    b, a = butter(3, [low, high], btype='band')
    filtered = filtfilt(b, a, signal)
    
    # Detrend to remove linear trends
    detrended = detrend(filtered)
    
    # Normalize
    normalized = StandardScaler().fit_transform(detrended.reshape(-1, 1)).flatten()
    
    return normalized


In [10]:

# PART 2: Create windowed segments for analysis
def create_windows(signal, window_size=30*fs, overlap=0.5):
    """Create overlapping windows from the signal"""
    step = int(window_size * (1 - overlap))
    n_windows = (len(signal) - window_size) // step + 1
    windows = np.zeros((n_windows, window_size))
    
    for i in range(n_windows):
        start = i * step
        windows[i] = signal[start:start+window_size]
        
    return windows


In [11]:

# PART 3: Apply DMD to extract features
def extract_dmd_features(windows, rank=20, embedding_dim=40):
    """Extract DMD features from signal windows"""
    n_windows = windows.shape[0]
    
    # Prepare feature arrays
    eigenvalues_real = np.zeros((n_windows, rank))
    eigenvalues_imag = np.zeros((n_windows, rank))
    amplitudes = np.zeros((n_windows, rank))
    frequencies = np.zeros((n_windows, rank))
    
    for i in range(n_windows):
        # Create Hankel matrix (time-delay embedding)
        window = windows[i]
        hankel = np.zeros((embedding_dim, len(window) - embedding_dim + 1))
        for j in range(embedding_dim):
            hankel[j, :] = window[j:j+hankel.shape[1]]
        
        # Apply DMD
        dmd = DMD(svd_rank=rank)
        dmd.fit(hankel)
        
        # Sort by amplitude
        idx = np.argsort(np.abs(dmd.amplitudes))[::-1][:rank]
        sorted_eigs = dmd.eigs[idx]
        sorted_amps = np.abs(dmd.amplitudes[idx])
        sorted_freqs = np.abs(dmd.frequency[idx])
        
        # Store features
        eigenvalues_real[i] = np.real(sorted_eigs)
        eigenvalues_imag[i] = np.imag(sorted_eigs)
        amplitudes[i] = sorted_amps
        frequencies[i] = sorted_freqs
    
    # Combine features
    features = np.hstack((eigenvalues_real, eigenvalues_imag, amplitudes, frequencies))
    return features


In [12]:

# PART 4: Unsupervised clustering for sleep stages
def cluster_sleep_stages(features, n_clusters):
    """Cluster DMD features into sleep stages"""
    # Dimensionality reduction with t-SNE
    tsne = TSNE(n_components=2, random_state=42)
    features_2d = tsne.fit_transform(features)
    
    # K-means clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    labels = kmeans.fit_predict(features)
    
    # Visualize clusters
    plt.figure(figsize=(10, 8))
    for i in range(n_clusters):
        plt.scatter(features_2d[labels == i, 0], features_2d[labels == i, 1], label=f'Cluster {i}')
    plt.legend()
    plt.title('Sleep Stage Clusters')
    plt.xlabel('t-SNE dimension 1')
    plt.ylabel('t-SNE dimension 2')
    plt.show()
    
    return labels, features_2d


In [13]:

# PART 5: Build deep learning model for micro-arousal detection
def build_micro_arousal_model(input_shape, learning_rate=0.001):
    """Build a deep learning model for micro-arousal detection"""
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(32),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model


In [17]:

# PART 6: Detect transition points for micro-arousals
def detect_transitions(labels, threshold=0.5):
    """Detect potential micro-arousals from stage transitions"""
    transitions = []
    
    # Compute transition probability between consecutive windows
    for i in range(1, len(labels)):
        if labels[i] != labels[i-1]:
            transitions.append(i)
    
    return transitions


In [15]:

# PART 7: Full analysis pipeline
def analyze_sleep_eeg(signal, fs):
    """Complete analysis pipeline for sleep EEG"""
    # Step 1: Preprocess
    preprocessed = preprocess_eeg(signal, fs)
    
    # Step 2: Create windows
    window_size = 1 * fs 
    windows = create_windows(preprocessed, window_size, overlap=0.6)
    
    # Step 3: Extract DMD features
    features = extract_dmd_features(windows, rank=20, embedding_dim=40)
    
    # Step 4: Cluster sleep stages
    labels, features_2d = cluster_sleep_stages(features, n_clusters=4)
    
    # Step 5: Detect potential micro-arousals from transitions
    transitions = detect_transitions(labels)
    
    # Step 6: Visualize results
    plt.figure(figsize=(15, 10))
    
    # Plot hypnogram
    plt.subplot(3, 1, 1)
    plt.plot(labels, drawstyle='steps-post')
    plt.title('Sleep Stage Hypnogram')
    plt.ylabel('Stage')
    plt.xlabel('Window Index')
    
    # Mark transitions
    for t in transitions:
        plt.axvline(x=t, color='r', alpha=0.3)
    
    # Plot original signal segment
    plt.subplot(3, 1, 2)
    t = np.arange(len(preprocessed)) / fs
    plt.plot(t[:1000], preprocessed[:1000])
    plt.title('EEG Signal Segment')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    
    # Plot t-SNE of features with trajectory
    plt.subplot(3, 1, 3)
    plt.scatter(features_2d[:, 0], features_2d[:, 1], c=labels, cmap='viridis')
    plt.plot(features_2d[:, 0], features_2d[:, 1], 'k-', alpha=0.3)
    plt.title('DMD Feature Space (t-SNE)')
    plt.xlabel('t-SNE dimension 1')
    plt.ylabel('t-SNE dimension 2')
    
    plt.tight_layout()
    plt.show()
    
    return {
        'features': features,
        'labels': labels, 
        'transitions': transitions,
        'preprocess_eeg': preprocessed
    }


In [None]:
# Assuming your EEG data is in work_data and sampled at 512 Hz
fs=512
results = analyze_sleep_eeg(data, fs=512)

# Access specific keys from the results dictionary if needed
patterns = results['features']
sleep_results = results['labels']
preprocess_eeg = results['preprocess_eeg']


In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Define the cluster to highlight
cluster_n = 1
fs = 512  # Sampling frequency
window_size = 30 * fs  # Window size in samples

# Calculate the first 30 minutes of data in samples
mins_samples = 20 * 60 * fs
mins_windows = 20 * 60 // 30  # Assuming 30-second windows for sleep_results
h1=60*60*fs
# Trim the data to first 30 minutes
data_30min = data[0-h1*5:0-h1*5+mins_samples]
sleep_results_30min = sleep_results[0-3*mins_windows: 0-2*mins_windows]

# Find occurrences of cluster n within the first 30 minutes
occurrences = [i for i, label in enumerate(sleep_results_30min) if label == cluster_n][:3]

# Create time arrays
t_signal = np.arange(len(data_30min)) / fs  # Time in seconds for signal
t_hypno = np.arange(len(sleep_results_30min)) * 30  # Time in seconds for hypnogram (30s windows)

# Get min/max values for hypnogram y-axis limits
hypno_min = min(sleep_results_30min) - 0.5
hypno_max = max(sleep_results_30min) + 0.5

# Create subplot figure
fig = make_subplots(rows=2, cols=1, 
                   shared_xaxes=True,
                   vertical_spacing=0.1,
                   subplot_titles=("EEG Signal with Highlighted Cluster Occurrences", 
                                  "Hypnogram with Highlighted Cluster Occurrences"))

# Add EEG signal trace
fig.add_trace(
    go.Scatter(x=t_signal, y=data_30min, name="EEG Signal", line=dict(color='blue')),
    row=1, col=1
)

# Add highlighted regions for cluster occurrences in EEG plot
for occ in occurrences:
    start_idx = max(0, occ * window_size - 5 * fs)
    end_idx = min(len(data_30min), occ * window_size + 5 * fs)
    
    # Convert to time in seconds
    start_time = start_idx / fs
    end_time = end_idx / fs
    
    # Add highlighted region
    fig.add_trace(
        go.Scatter(
            x=[start_time, start_time, end_time, end_time],
            y=[min(data_30min), max(data_30min), max(data_30min), min(data_30min)],
            fill="toself",
            fillcolor="rgba(255, 0, 0, 0.3)",
            line=dict(width=0),
            showlegend=True if occ == occurrences[0] else False,
            name=f"Cluster {cluster_n}"
        ),
        row=1, col=1
    )

# Add hypnogram trace
fig.add_trace(
    go.Scatter(
        x=t_hypno, 
        y=sleep_results_30min, 
        mode='lines+markers',
        line=dict(shape='hv', color='black'),
        name="Hypnogram"
    ),
    row=2, col=1
)

# Add highlighted regions for cluster occurrences in hypnogram
for occ in occurrences:
    # Convert window indices to time
    start_time = max(0, (occ - 5/30) * 30)
    end_time = min(len(sleep_results_30min) * 30, (occ + 5/30) * 30)
    
    # Add highlighted region
    fig.add_trace(
        go.Scatter(
            x=[start_time, start_time, end_time, end_time],
            y=[hypno_min, hypno_max, hypno_max, hypno_min],
            fill="toself",
            fillcolor="rgba(255, 0, 0, 0.3)",
            line=dict(width=0),
            showlegend=False,
            name=f"Cluster {cluster_n}"
        ),
        row=2, col=1
    )

# Update layout
fig.update_layout(
    height=800,
    width=1200,
    title_text="EEG Signal and Hypnogram Analysis (First 30 Minutes)",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# Configure axes
fig.update_xaxes(title_text="Time (s)", row=2, col=1)
fig.update_yaxes(title_text="Amplitude", row=1, col=1)
fig.update_yaxes(
    title_text="Sleep Stage", 
    row=2, 
    col=1,
    fixedrange=True,  # This prevents y-axis zooming for hypnogram
    range=[hypno_min, hypno_max]  # Set fixed y-axis range
)

# Show the figure
fig.show()

## activity data comparison

In [3]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load the activity data
activity_file = "by captain borat/raw/Activity at 16 Hz_2024-03-20 17_12_18.edf"
fs=512
try:
    f_activity = pyedflib.EdfReader(activity_file)
    activity_data = f_activity.readSignal(0)
    activity_fs = 16  # Sampling frequency of activity data
    f_activity._close()
    
    # Trim the activity data to 24 hours (24 * 60 * 60 seconds)
    activity_24h_samples = 24 * 60 * 60 * activity_fs
    activity_data_24h = activity_data[:activity_24h_samples]
    
    # Trim the EEG data to 24 hours (24 * 60 * 60 seconds)
    eeg_24h_samples = 24 * 60 * 60 * fs
    eeg_data_24h = data[:eeg_24h_samples]
    
    # Create time arrays for plotting
    t_activity = np.arange(len(activity_data_24h)) / activity_fs
    t_eeg = np.arange(len(eeg_data_24h)) / fs
    
    # Plot the data
    plt.figure(figsize=(15, 6))
    
    # Plot activity data
    plt.subplot(2, 1, 1)
    plt.plot(t_activity, activity_data_24h, label="Activity Data (16 Hz)", color='orange')
    plt.title("Activity Data (24 Hours)")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.legend()
    
    # Plot EEG data
    plt.subplot(2, 1, 2)
    plt.plot(t_eeg, eeg_data_24h, label="EEG Data (512 Hz)", color='blue')
    plt.title("EEG Data (24 Hours)")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")
    plt.legend()
    
    plt.tight_layout()
    plt.show()

except Exception as e:
    print(f"Error loading activity data: {e}")

In [None]:
# Define the start and end times in seconds
start_time = 40000
end_time = 60000

# Convert times to sample indices
start_idx = start_time * fs
end_idx = end_time * fs

# Slice the data
split_data = data[start_idx:end_idx]

# Create a time array for the split data
t_split = np.arange(len(split_data)) / fs

# Plot the split data
plt.figure(figsize=(12, 6))
plt.plot(t_split, split_data, label="EEG Data (Split)", color='green')
plt.title(f"EEG Data from {start_time}s to {end_time}s")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.legend()
plt.show()