<a href="https://colab.research.google.com/github/evanjames19/LC_Pupil_Tracking-Cortical-State/blob/main/Classification_Analysis_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from itertools import combinations
from scipy.stats import sem

# Define file paths
classify_table_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/control_new2.csv'

# Define save paths
save_path = '/content/drive/My Drive/autoencoder/NEW_DATA/PRO'

# Ensure save paths exist
os.makedirs(save_path, exist_ok=True)

# Function to get pupil and eeg data, extracting the period 750:1750
def get_pupil_eeg_data(data):
    pupil_columns = [col for col in data.columns if col.startswith('RawPupil')]
    eeg_columns = {
        'DeltaPwr': [col for col in data.columns if col.startswith('DeltaPwr')],
        'ThetaPwr': [col for col in data.columns if col.startswith('ThetaPwr')],
        'AlphaPwr': [col for col in data.columns if col.startswith('AlphaPwr')],
        'BetaPwr': [col for col in data.columns if col.startswith('BetaPwr')],
        'LowGammaPwr': [col for col in data.columns if col.startswith('LowGammaPwr')],
        'HighGammaPwr': [col for col in data.columns if col.startswith('HighGammaPwr')],
    }

    raw_pupil = data[pupil_columns].values[:, 1:1999]
    eeg_bands = np.stack(
        [data[eeg_columns[band]].values[:, 1:1999] for band in ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']],
        axis=-1
    )

    return raw_pupil, eeg_bands

# Function to process and save data
def process_and_save_data(data, save_path):
    animals = data['Animal'].unique()
    all_data = {}

    for animal in animals:
        animal_data = data[data['Animal'] == animal]
        raw_pupil, eeg_bands = get_pupil_eeg_data(animal_data)
        stim_freq = animal_data['StimulationFrequency'].values

        # Combine spon and stim data
        labels = (stim_freq != 0).astype(int)

        all_data[f'animal_{animal}'] = {
            'pupil': raw_pupil,
            'eeg': eeg_bands,
            'label': labels
        }

    # Save the all_data dictionary
    np.save(os.path.join(save_path, 'all_data.npy'), all_data)

    return all_data

# Process and save data
all_data = process_and_save_data(pd.read_csv(classify_table_path), save_path)

print("Data processed and saved successfully.")

# Load processed data
all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# # Define the sets of animals
# set1_animals = ['animal_24124', 'animal_33107', 'animal_3336', 'animal_3358', 'animal_3368']
# set2_animals = ['animal_3398', 'animal_33105', 'animal_31117', 'animal_31108', 'animal_3734']
set2_animals = ['animal_33118','animal_24124', 'animal_33135', 'animal_33119']


In [None]:
import numpy as np
import pandas as pd
import os
from scipy.io import savemat

# Define file paths
classify_table_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/classify_table.csv'

# Define save paths
save_path = '/content/drive/My Drive/autoencoder/NEW_DATA/PRO'

# Ensure save paths exist
os.makedirs(save_path, exist_ok=True)

# Function to get pupil and eeg data, extracting the period 750:1750
def get_pupil_eeg_data(data):
    pupil_columns = [col for col in data.columns if col.startswith('RawPupil')]
    eeg_columns = {
        'DeltaPwr': [col for col in data.columns if col.startswith('DeltaPwr')],
        'ThetaPwr': [col for col in data.columns if col.startswith('ThetaPwr')],
        'AlphaPwr': [col for col in data.columns if col.startswith('AlphaPwr')],
        'BetaPwr': [col for col in data.columns if col.startswith('BetaPwr')],
        'LowGammaPwr': [col for col in data.columns if col.startswith('LowGammaPwr')],
        'HighGammaPwr': [col for col in data.columns if col.startswith('HighGammaPwr')],
    }

    raw_pupil = data[pupil_columns].values[:, 850:1850]
    eeg_bands = np.stack(
        [data[eeg_columns[band]].values[:, 850:1850] for band in ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']],
        axis=-1
    )

    return raw_pupil, eeg_bands

# Function to process and save data
def process_and_save_data(data, save_path):
    animals = data['Animal'].unique()
    all_data = {}

    for animal in animals:
        animal_data = data[data['Animal'] == animal]
        raw_pupil, eeg_bands = get_pupil_eeg_data(animal_data)
        stim_freq = animal_data['StimulationFrequency'].values
        session_ids = animal_data['Date'].values

        # Combine spon and stim data
        labels = (stim_freq != 0).astype(int)

        all_data[f'animal_{animal}'] = {
            'pupil': raw_pupil,
            'eeg': eeg_bands,
            'label': labels,
            'session_id': session_ids
        }

    # Save the all_data dictionary
    np.save(os.path.join(save_path, 'all_data.npy'), all_data)

    return all_data

# Process and save data
all_data = process_and_save_data(pd.read_csv(classify_table_path), save_path)

print("Data processed and saved successfully.")

# Load processed data
all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Define the sets of animals
set1_animals = ['animal_24124', 'animal_33107', 'animal_3336', 'animal_3358', 'animal_3368']
set2_animals = ['animal_3398', 'animal_33105', 'animal_31117', 'animal_31108', 'animal_3734']

# Extract and save session IDs for Set 2 animals
for animal in set2_animals:
    animal_data = all_data.get(animal)
    if animal_data:
        session_ids = animal_data['session_id']
        mat_dict = {'session_ids': session_ids}
        savemat(os.path.join(save_path, f'{animal}_session_ids.mat'), mat_dict)
        print(f'Session IDs for {animal} saved successfully.')

print("All session IDs for Set 2 animals saved as .mat files successfully.")


In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from scipy.stats import sem, zscore

# Load processed data
all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Define the set of animals
# set2_animals = ['animal_3398', 'animal_33105', 'animal_31117', 'animal_31108', 'animal_3734']
set2_animals = [  'animal_33119', 'animal_24124', 'animal_33118']

# Initialize lists to store the raw pupil data and EEG band data for spon and stim
pupil_spon = []
pupil_stim = []
eeg_spon = {band: [] for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']}
eeg_stim = {band: [] for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']}

# Iterate over each animal in set2
for animal_key in set2_animals:
    animal_data = all_data[animal_key]
    labels = animal_data['label']

    # Separate the raw pupil data based on the labels
    pupil_spon.append(animal_data['pupil'][labels == 0])
    pupil_stim.append(animal_data['pupil'][labels == 1])

    # Separate and z-score the EEG band data based on the labels
    for i, band in enumerate(['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']):
        eeg_spon[band].append(zscore(animal_data['eeg'][labels == 0, :, i], axis=None))
        eeg_stim[band].append(zscore(animal_data['eeg'][labels == 1, :, i], axis=None))

# Function to calculate mean and SEM
def calculate_mean_sem(data):
    data_concat = np.concatenate(data, axis=0)
    mean = np.mean(data_concat, axis=0)
    sem_value = sem(data_concat, axis=0)
    return mean, sem_value

# Calculate mean and SEM for raw pupil data
pupil_spon_mean, pupil_spon_sem = calculate_mean_sem(pupil_spon)
pupil_stim_mean, pupil_stim_sem = calculate_mean_sem(pupil_stim)

# Plot Raw Pupil data
plt.figure(figsize=(10, 5))
plt.plot(pupil_spon_mean, label='Spon', color='blue')
plt.fill_between(range(len(pupil_spon_mean)), pupil_spon_mean - pupil_spon_sem, pupil_spon_mean + pupil_spon_sem, color='blue', alpha=0.3)
plt.plot(pupil_stim_mean, label='Stim', color='red')
plt.fill_between(range(len(pupil_stim_mean)), pupil_stim_mean - pupil_stim_sem, pupil_stim_mean + pupil_stim_sem, color='red', alpha=0.3)
plt.xlabel('Time')
plt.ylabel('Raw Pupil Size')
plt.title('Average Raw Pupil Size with SEM')
plt.legend()
plt.show()

# Calculate mean and SEM for each EEG band
for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']:
    eeg_spon_mean, eeg_spon_sem = calculate_mean_sem(eeg_spon[band])
    eeg_stim_mean, eeg_stim_sem = calculate_mean_sem(eeg_stim[band])

    # Plot EEG band data
    plt.figure(figsize=(10, 5))
    plt.plot(eeg_spon_mean, label='Spon', color='blue')
    plt.fill_between(range(len(eeg_spon_mean)), eeg_spon_mean - eeg_spon_sem, eeg_spon_mean + eeg_spon_sem, color='blue', alpha=0.3)
    plt.plot(eeg_stim_mean, label='Stim', color='red')
    plt.fill_between(range(len(eeg_stim_mean)), eeg_stim_mean - eeg_stim_sem, eeg_stim_mean + eeg_stim_sem, color='red', alpha=0.3)
    plt.xlabel('Time')
    plt.ylabel(f'{band} Power (z-scored)')
    plt.title(f'Average {band} Power with SEM (z-scored)')
    plt.legend()
    plt.show()


In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.stats import sem, zscore

# Load processed data
save_path = '/content/drive/My Drive/autoencoder/NEW_DATA/update'
all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Define the set of animals
set2_animals = ['animal_3398', 'animal_33105', 'animal_31117', 'animal_31108', 'animal_3734']
other_animals = ['animal_33117', 'animal_24116', 'animal_24124', 'animal_3335', 'animal_33119']

# Initialize lists to store the raw pupil data and EEG band data for spon and stim
pupil_spon = []
pupil_stim = []
eeg_spon = {band: [] for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']}
eeg_stim = {band: [] for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']}

# Iterate over all animals to separate spon and stim data
for animal_key in all_data.keys():
    animal_data = all_data[animal_key]
    labels = animal_data['label']

    if animal_key in set2_animals:
        # Append stimulation data
        pupil_stim.append(animal_data['pupil'][labels == 1])
        for i, band in enumerate(['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']):
            eeg_stim[band].append(zscore(animal_data['eeg'][labels == 1, :, i], axis=None))
    elif animal_key in other_animals:
        # Append spontaneous data
        pupil_spon.append(animal_data['pupil'][labels == 0])
        for i, band in enumerate(['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']):
            eeg_spon[band].append(zscore(animal_data['eeg'][labels == 0, :, i], axis=None))

# Function to calculate mean and SEM
def calculate_mean_sem(data):
    data_concat = np.concatenate(data, axis=0)
    mean = np.mean(data_concat, axis=0)
    sem_value = sem(data_concat, axis=0)
    return mean, sem_value

# Calculate mean and SEM for raw pupil data
pupil_spon_mean, pupil_spon_sem = calculate_mean_sem(pupil_spon)
pupil_stim_mean, pupil_stim_sem = calculate_mean_sem(pupil_stim)

# Plot Raw Pupil data
plt.figure(figsize=(10, 5))
plt.plot(pupil_spon_mean, label='Spon', color='blue')
plt.fill_between(range(len(pupil_spon_mean)), pupil_spon_mean - pupil_spon_sem, pupil_spon_mean + pupil_spon_sem, color='blue', alpha=0.3)
plt.plot(pupil_stim_mean, label='Stim', color='red')
plt.fill_between(range(len(pupil_stim_mean)), pupil_stim_mean - pupil_stim_sem, pupil_stim_mean + pupil_stim_sem, color='red', alpha=0.3)
plt.xlabel('Time')
plt.ylabel('Raw Pupil Size')
plt.title('Average Raw Pupil Size with SEM')
plt.legend()
plt.show()

# Calculate mean and SEM for each EEG band
for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']:
    eeg_spon_mean, eeg_spon_sem = calculate_mean_sem(eeg_spon[band])
    eeg_stim_mean, eeg_stim_sem = calculate_mean_sem(eeg_stim[band])

    # Plot EEG band data
    plt.figure(figsize=(10, 5))
    plt.plot(eeg_spon_mean, label='Spon', color='blue')
    plt.fill_between(range(len(eeg_spon_mean)), eeg_spon_mean - eeg_spon_sem, eeg_spon_mean + eeg_spon_sem, color='blue', alpha=0.3)
    plt.plot(eeg_stim_mean, label='Stim', color='red')
    plt.fill_between(range(len(eeg_stim_mean)), eeg_stim_mean - eeg_stim_sem, eeg_stim_mean + eeg_stim_sem, color='red', alpha=0.3)
    plt.xlabel('Time')
    plt.ylabel(f'{band} Power (z-scored)')
    plt.title(f'Average {band} Power with SEM (z-scored)')
    plt.legend()
    plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import os

# Load processed data
save_path = '/content/drive/My Drive/autoencoder/NEW_DATA/update'
all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Define the set of animals
set2_animals = ['animal_3398', 'animal_33105', 'animal_31117', 'animal_31108', 'animal_3734']

# Iterate over each animal in set2
for animal_key in set2_animals:
    animal_data = all_data[animal_key]
    labels = animal_data['label']

    # Combine spon and stim pupil data
    pupil_data = np.concatenate((animal_data['pupil'][labels == 0], animal_data['pupil'][labels == 1]), axis=0)
    pupil_labels = np.concatenate((np.zeros(np.sum(labels == 0)), np.ones(np.sum(labels == 1))), axis=0)

    # Perform t-SNE on the combined pupil data
    tsne = TSNE(n_components=2, random_state=42)
    tsne_result = tsne.fit_transform(pupil_data)

    # Create a scatter plot of the t-SNE results
    plt.figure()
    scatter = plt.scatter(tsne_result[:, 0], tsne_result[:, 1], c=pupil_labels, cmap='bwr', alpha=0.7)
    plt.colorbar(scatter, ticks=[0, 1], label='Type of Dilation')
    plt.xlabel('t-SNE Dimension 1')
    plt.ylabel('t-SNE Dimension 2')
    plt.title(f't-SNE of Pupil Data - {animal_key}')
    plt.grid(True)
    plt.legend(handles=scatter.legend_elements()[0], labels=['Spontaneous', 'Stimulus-induced'])
    plt.show()

In [None]:
import numpy as np
import os
import scipy.io
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Load processed data
save_path = '/content/drive/My Drive/autoencoder/NEW_DATA/update'
all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Define the animal
animal_key = 'animal_3734'

# Get the data for the specific animal
animal_data = all_data[animal_key]
labels = animal_data['label']

# Combine spon and stim pupil data
pupil_data = np.concatenate((animal_data['pupil'][labels == 0], animal_data['pupil'][labels == 1]), axis=0)
pupil_labels = np.concatenate((np.zeros(np.sum(labels == 0)), np.ones(np.sum(labels == 1))), axis=0)

# Perform t-SNE on the combined pupil data
tsne = TSNE(n_components=2, random_state=42)
tsne_result = tsne.fit_transform(pupil_data)

# Save the t-SNE results to a .mat file
tsne_data = {
    'tsne_result': tsne_result,
    'pupil_labels': pupil_labels
}
scipy.io.savemat(os.path.join(save_path, 'tsne_pupil_data_animal_3734.mat'), tsne_data)

# Create a scatter plot of the t-SNE results
plt.figure()
scatter = plt.scatter(tsne_result[:, 0], tsne_result[:, 1], c=pupil_labels, cmap='bwr', alpha=0.7)
plt.colorbar(scatter, ticks=[0, 1], label='Type of Dilation')
plt.xlabel('t-SNE Dimension 1')
plt.ylabel('t-SNE Dimension 2')
plt.title(f't-SNE of Pupil Data - {animal_key}')
plt.grid(True)
plt.legend(handles=scatter.legend_elements()[0], labels=['Spontaneous', 'Stimulus-induced'])
plt.show()


In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.callbacks import Callback, EarlyStopping
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from scipy.stats import zscore
from itertools import combinations
from scipy.io import savemat
import json

# Original convolutional model function
def build_classification_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.Conv1D(64, 3, activation='relu', padding='same')(inputs)
    conv1_output = x  # Save the output of Conv1D layer
    x = tf.keras.layers.MaxPooling1D(2)(x)
    x = tf.keras.layers.Conv1D(128, 3, activation='relu', padding='same')(x)
    conv2_output = x  # Save the output of Conv1D layer
    x = tf.keras.layers.MaxPooling1D(2)(x)
    x = tf.keras.layers.Conv1D(256, 3, activation='relu', padding='same')(x)
    conv3_output = x  # Save the output of Conv1D layer
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    dense_output = x  # Save the output of Dense layer
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Also create a model that outputs the intermediate layers
    intermediate_layer_model = tf.keras.Model(inputs=inputs, outputs=[conv1_output, conv2_output, conv3_output, dense_output])

    return model, intermediate_layer_model

# New RNN model function
def build_rnn_classification_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.LSTM(128, return_sequences=True)(inputs)
    lstm_output = x  # Save the output of the LSTM layer
    x = tf.keras.layers.LSTM(64)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    dense_output = x  # Save the output of Dense layer
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Also create a model that outputs the intermediate layers
    intermediate_layer_model = tf.keras.Model(inputs=inputs, outputs=[lstm_output, dense_output])

    return model, intermediate_layer_model

    # New RNN model function using GRU layers
def build_gru_classification_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.GRU(128, return_sequences=True)(inputs)
    gru_output = x  # Save the output of the GRU layer
    x = tf.keras.layers.GRU(64)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    dense_output = x  # Save the output of Dense layer
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Also create a model that outputs the intermediate layers
    intermediate_layer_model = tf.keras.Model(inputs=inputs, outputs=[gru_output, dense_output])

    return model, intermediate_layer_model


# def extract_and_save_latent_features(model, intermediate_model, data, save_path, prefix):
#     # latent_features = intermediate_model.predict(data)
#     # lstm_features, dense_features = latent_features

#     # # Save the features as arrays in shape (nTrials x # latent points)
#     # np.save(os.path.join(save_path, f'{prefix}_lstm_features.npy'), lstm_features)
#     # np.save(os.path.join(save_path, f'{prefix}_dense_features.npy'), dense_features)

#     return lstm_features, dense_features


# Custom callback to print accuracy at the end of each epoch
class PrintAccuracyCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        train_acc = logs.get('accuracy')
        val_acc = logs.get('val_accuracy')
        print(f'Epoch {epoch + 1}, Train Accuracy: {train_acc:.4f}, Val Accuracy: {val_acc:.4f}')

def train_with_combinations(data, labels, animal_ids, k, m):
    unique_animals = np.unique(animal_ids)
    best_model = None
    intermediate_model = None
    max_val_accuracy = 0
    history_best = None
    all_preds = []
    all_true_labels = []
    total_accuracy = 0
    total_combinations = 0
    all_accuracies = []

    for train_animals in combinations(unique_animals, k-1):
        train_indices = np.where(np.isin(animal_ids, train_animals))[0]
        test_indices = np.where(~np.isin(animal_ids, train_animals))[0]
        X_train, y_train = data[train_indices], labels[train_indices]
        X_val, y_val = data[test_indices], labels[test_indices]
        kf = KFold(n_splits=m)

        for fold_idx, (train_index, test_index) in enumerate(kf.split(X_train)):
            print(f"Pretraining step on {train_animals} ... fold {fold_idx}")
            X_train_fold, X_val_fold = X_train[train_index], X_train[test_index]
            y_train_fold, y_val_fold = y_train[train_index], y_train[test_index]
            model, intermediate_model = build_classification_model((X_train_fold.shape[1], 1))
           # model, intermediate_model = build_rnn_classification_model((X_train_fold.shape[1], X_train_fold.shape[2]))
            early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
            print_accuracy_callback = PrintAccuracyCallback()
            history = model.fit(X_train_fold, y_train_fold, batch_size=32, epochs=100, validation_data=(X_val_fold, y_val_fold), callbacks=[early_stopping, print_accuracy_callback], verbose=1, shuffle=True)
            val_accuracy = max(history.history['val_accuracy'])
            all_accuracies.append(val_accuracy)
            total_accuracy += val_accuracy
            total_combinations += 1
            if val_accuracy > max_val_accuracy:
                max_val_accuracy = val_accuracy
                best_model = model
                history_best = history.history
            preds = model.predict(X_val_fold)
            all_preds.extend(preds)
            all_true_labels.extend(y_val_fold)

    average_accuracy = total_accuracy / total_combinations if total_combinations > 0 else 0
    print(f"Best model-0 found! Proceeding to fine-tuning ...")
    return best_model, intermediate_model, history_best, average_accuracy, all_accuracies, all_preds, all_true_labels

def fine_tune_and_test(animal, best_model, intermediate_model, X_train, y_train, m, save_path):
    kf = KFold(n_splits=m)
    max_val_accuracy = 0
    best_fold_history = None
    all_preds = []
    all_true_labels = []
    X_val_combined = np.empty((0, X_train.shape[1], X_train.shape[2]))
    y_val_combined = np.empty((0,))

    for fold_idx, (train_index, test_index) in enumerate(kf.split(X_train)):
        print(f"Fine-tuning step on {animal} ... fold {fold_idx}")
        X_train_fold, X_val_fold = X_train[train_index], X_train[test_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[test_index]
        early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
        print_accuracy_callback = PrintAccuracyCallback()
        history = best_model.fit(X_train_fold, y_train_fold, batch_size=32, epochs=100, validation_data=(X_val_fold, y_val_fold), callbacks=[early_stopping, print_accuracy_callback], verbose=1, shuffle=True)
        val_accuracy = max(history.history['val_accuracy'])
        if val_accuracy > max_val_accuracy:
            max_val_accuracy = val_accuracy
            best_fold_history = history.history
        preds = best_model.predict(X_val_fold)
        all_preds.extend(preds)
        all_true_labels.extend(y_val_fold)

        # Combine validation data for saving latent features
        X_val_combined = np.concatenate((X_val_combined, X_val_fold))
        y_val_combined = np.concatenate((y_val_combined, y_val_fold))

    # Extract and save latent features for all validation trials combined
    # lstm_features, dense_features = extract_and_save_latent_features(best_model, intermediate_model, X_val_combined, save_path, f'fine_tune_{animal}')

    print(f"Best model-x found!")
    return max_val_accuracy, best_fold_history, all_preds, all_true_labels


def cross_validate_animals(data, labels, animal_ids, k, m0, m1, save_path):
    unique_animals = np.unique(animal_ids)
    results = {}
    for animal in unique_animals:
        print(f"Processing animal {animal} as the test animal...")
        train_indices = np.where(animal_ids != animal)[0]
        test_indices = np.where(animal_ids == animal)[0]
        X_train, y_train = data[train_indices], labels[train_indices]
        best_model, intermediate_model, history_best, avg_accuracy_step1, all_accuracies, y_preds_pretrain, true_labels_pretrain = train_with_combinations(X_train, y_train, animal_ids[train_indices], k, m0)
        X_train_ft, y_train_ft = data[test_indices], labels[test_indices]
        val_accuracy_step2, best_fold_history, y_preds_finetune, true_labels_finetune= fine_tune_and_test(animal, best_model, intermediate_model, X_train_ft, y_train_ft, m1, save_path)
        # lstm_features, dense_features = latent_features

        results[f'animal_{animal}'] = {
            'History_BestMdl0': history_best,
            'AverageAccuracy_BestMdl0': avg_accuracy_step1,
            'AllAccuracies_BestMdl0': all_accuracies,
            'History_BestMdlx': best_fold_history,
            'AverageAccuracy_BestMdlx': val_accuracy_step2,
            'Weights_BestMdlx': [w.tolist() for w in best_model.get_weights()],  # Convert weights to list
            'Y_Preds_BestMdl0': y_preds_pretrain,
            'True_Labels_BestMdl0': true_labels_pretrain,
            'Y_Preds_BestMdlx': y_preds_finetune,
            'True_Labels_BestMdlx': true_labels_finetune,
            # 'LSTM_Features': lstm_features,
            # 'Dense_Features': dense_features
        }
    return results


# Function to train and cross-validate on each animal
def cross_validate_animal_individual(data, labels, animal_ids, m):
    unique_animals = np.unique(animal_ids)
    results = {}
    for animal in unique_animals:
        print(f"Processing animal {animal}...")
        animal_indices = np.where(animal_ids == animal)[0]
        X, y = data[animal_indices], labels[animal_indices]

        kf = KFold(n_splits=m)
        animal_results = {'accuracy': [], 'y_preds': [], 'true_labels': []}

        for fold_idx, (train_index, test_index) in enumerate(kf.split(X)):
            print(f"Training fold {fold_idx + 1} for animal {animal}...")
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            model, intermediate_model = build_classification_model((X_train.shape[1], 1))
            # model, intermediate_model = build_rnn_classification_model((X_train.shape[1], X_train.shape[2]))
            early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
            print_accuracy_callback = PrintAccuracyCallback()
            history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test, y_test), callbacks=[early_stopping, print_accuracy_callback], verbose=1, shuffle=True)

            y_pred = model.predict(X_test)
            y_pred = (y_pred > 0.5).astype(int)
            accuracy = accuracy_score(y_test, y_pred)
            print(f"Fold {fold_idx + 1}, Accuracy: {accuracy:.4f}")

            animal_results['accuracy'].append(accuracy)
            animal_results['y_preds'].extend(y_pred)
            animal_results['true_labels'].extend(y_test)

        results[f'animal_{animal}'] = animal_results
    return results


# Function to z-score each animal's trials for each frequency band separately
def zscore_eeg_data(data, animal_ids):
    unique_animals = np.unique(animal_ids)
    zscored_data = np.empty_like(data)
    for animal in unique_animals:
        for band in range(data.shape[2]):
            animal_indices = np.where(animal_ids == animal)[0]
            zscored_data[animal_indices, :, band] = zscore(data[animal_indices, :, band], axis=None)
    return zscored_data

In [None]:
# Run cross-validation and save results
m = 5 # number of folds used in cross-validation

# Define the bands
bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']

all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Loop over each band
for i, band in enumerate(bands):
    data = np.concatenate([all_data[animal]['eeg'][..., i:i+1] for animal in set2_animals])
    labels = np.concatenate([all_data[animal]['label'] for animal in set2_animals])
    animal_ids = np.concatenate([[animal] * len(all_data[animal]['label']) for animal in set2_animals])

    # Z-score the data for each animal and each frequency band separately
    data = zscore_eeg_data(data, animal_ids)

    # Perform cross-validation on each animal
    results = cross_validate_animal_individual(data, labels, animal_ids, m)

    # Save results
    scipy.io.savemat(os.path.join(save_path, f'resultsall_{i}.mat'), {f'resultsall_{i}': results})

    print(f"Cross-validation for {band} band completed and results saved successfully.")

In [None]:

# Run cross-validation and save results
import scipy

set2_animals = ['animal_33118', 'animal_33135','animal_24124']

k = 3 # number of total animals
m0 = 3 # number of folds used in pretraining
m1 = 3 # number of folds used in fine-tuning

# Define the bands
bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']

all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Loop over each band
for i, band in enumerate(bands):
    data = np.concatenate([all_data[animal]['eeg'][..., i:i+1] for animal in set2_animals])
    labels = np.concatenate([all_data[animal]['label'] for animal in set2_animals])
    animal_ids = np.concatenate([[animal] * len(all_data[animal]['label']) for animal in set2_animals])

    # Z-score the data for each animal and each frequency band separately
    data = zscore_eeg_data(data, animal_ids)

    # Perform cross-validation
    results = cross_validate_animals(data, labels, animal_ids, k, m0, m1, save_path)

    scipy.io.savemat(os.path.join(save_path, f'results_PROP__3_{i}.mat'), {f'results_PROP__3_{i}': results})

    print(f"Cross-validation for {band} band completed and results saved successfully.")

In [None]:

data = np.concatenate([all_data[animal]['eeg'] for animal in set2_animals])
labels = np.concatenate([all_data[animal]['label'] for animal in set2_animals])
animal_ids = np.concatenate([[animal] * len(all_data[animal]['label']) for animal in set2_animals])
data = zscore_eeg_data(data, animal_ids)

import matplotlib.pyplot as plt

# Function to calculate and plot the average EEG band trace based on labels
def plot_average_eeg_band_trace(data, labels, animal_ids, bands):
    unique_animals = np.unique(animal_ids)
    averaged_traces = {band: {0: [], 1: []} for band in bands}

    for animal in unique_animals:
        animal_indices = np.where(animal_ids == animal)[0]
        animal_data = data[animal_indices]
        animal_labels = labels[animal_indices]
        for i, band in enumerate(bands):
            band_data = animal_data[:, :, i]
            spon_trials = band_data[animal_labels == 0]
            stim_trials = band_data[animal_labels == 1]
            averaged_traces[band][0].append(np.mean(spon_trials, axis=0))
            averaged_traces[band][1].append(np.mean(stim_trials, axis=0))

    for band in bands:
        mean_trace_spon = np.mean(averaged_traces[band][0], axis=0)
        sem_trace_spon = sem(averaged_traces[band][0], axis=0)
        mean_trace_stim = np.mean(averaged_traces[band][1], axis=0)
        sem_trace_stim = sem(averaged_traces[band][1], axis=0)

        plt.figure(figsize=(10, 6))
        plt.plot(mean_trace_spon, label=f'{band} Spontaneous Mean')
        plt.fill_between(range(len(mean_trace_spon)), mean_trace_spon - sem_trace_spon, mean_trace_spon + sem_trace_spon, alpha=0.3, label=f'{band} Spontaneous SEM')
        plt.plot(mean_trace_stim, label=f'{band} Stimulated Mean')
        plt.fill_between(range(len(mean_trace_stim)), mean_trace_stim - sem_trace_stim, mean_trace_stim + sem_trace_stim, alpha=0.3, label=f'{band} Stimulated SEM')
        plt.title(f'Average {band} Band Trace Across All Animals')
        plt.xlabel('Time')
        plt.ylabel('Z-scored Power')
        plt.legend()
        plt.grid(True)
        plt.show()

# Define the EEG bands
bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']

# Plot the average EEG band trace based on labels
plot_average_eeg_band_trace(data, labels, animal_ids, bands)


In [None]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.callbacks import Callback, EarlyStopping
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from scipy.stats import zscore
from scipy.io import savemat

# Function to build the classification model
def build_classification_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.Conv1D(64, 3, activation='relu', padding='same')(inputs)
    conv1_output = x  # Save the output of Conv1D layer
    x = tf.keras.layers.MaxPooling1D(2)(x)
    x = tf.keras.layers.Conv1D(128, 3, activation='relu', padding='same')(x)
    conv2_output = x  # Save the output of Conv1D layer
    x = tf.keras.layers.MaxPooling1D(2)(x)
    x = tf.keras.layers.Conv1D(256, 3, activation='relu', padding='same')(x)
    conv3_output = x  # Save the output of Conv1D layer
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    dense_output = x  # Save the output of Dense layer
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Custom callback to print accuracy at the end of each epoch
class PrintAccuracyCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        train_acc = logs.get('accuracy')
        val_acc = logs.get('val_accuracy')
        print(f'Epoch {epoch + 1}, Train Accuracy: {train_acc:.4f}, Val Accuracy: {val_acc:.4f}')

# Function to z-score each animal's trials for each frequency band separately
def zscore_eeg_data(data, animal_ids):
    unique_animals = np.unique(animal_ids)
    zscored_data = np.empty_like(data)
    for animal in unique_animals:
        animal_indices = np.where(animal_ids == animal)[0]
        for band in range(data.shape[2]):
            zscored_data[animal_indices, :, band] = zscore(data[animal_indices, :, band], axis=None)
    return zscored_data

# Function to train and cross-validate on all data
def cross_validate_all_data(data, labels, m):
    kf = KFold(n_splits=m, shuffle=True, random_state=42)
    all_accuracies = []
    all_preds = []
    all_true_labels = []

    for fold_idx, (train_index, test_index) in enumerate(kf.split(data)):
        print(f"Training fold {fold_idx + 1}...")
        X_train, X_val = data[train_index], data[test_index]
        y_train, y_val = labels[train_index], labels[test_index]

        # Shuffle the training data
        shuffle_idx = np.random.permutation(len(X_train))
        X_train = X_train[shuffle_idx]
        y_train = y_train[shuffle_idx]

        model = build_classification_model((X_train.shape[1], X_train.shape[2]))
        early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
        print_accuracy_callback = PrintAccuracyCallback()
        history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_val, y_val), callbacks=[early_stopping, print_accuracy_callback], verbose=1, shuffle=True)

        val_accuracy = max(history.history['val_accuracy'])
        all_accuracies.append(val_accuracy)

        preds = model.predict(X_val)
        all_preds.extend(preds)
        all_true_labels.extend(y_val)

    average_accuracy = np.mean(all_accuracies)
    print(f"Cross-validation completed. Average Validation Accuracy: {average_accuracy:.4f}")
    return average_accuracy, all_accuracies, all_preds, all_true_labels

# Main code to load data, z-score, and perform cross-validation
all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Define the set of animals
# set2_animals = ['animal_3398', 'animal_33105', 'animal_31117', 'animal_31108', 'animal_3734']
# other_animals = ['animal_33117', 'animal_24116', 'animal_24124', 'animal_3335', 'animal_33119']

set2_animals = [ 'animal_33105', 'animal_31117']
other_animals = [ 'animal_24124', 'animal_3335']

set2_animals = ['animal_24124', 'animal_33118', 'animal_33135', 'animal_33119']
# set2_animals = [ 'animal_33118', 'animal_33119', 'animal_33135']


# Initialize lists to store the raw pupil data and EEG band data for spon and stim
eeg_spon = {band: [] for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']}
eeg_stim = {band: [] for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']}

# Iterate over all animals to separate spon and stim data
for animal_key in all_data.keys():
    animal_data = all_data[animal_key]
    labels = animal_data['label']

    if animal_key in set2_animals:
        # Append stimulation data
        for i, band in enumerate(['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']):
            eeg_stim[band].append(zscore(animal_data['eeg'][labels == 1, :, i], axis=None))
            eeg_spon[band].append(zscore(animal_data['eeg'][labels == 0, :, i], axis=None))

# Define the bands
bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']

# Loop over each band
for i, band in enumerate(bands):
    print(f"Processing {band} band...")

    combined_data = []
    combined_labels = []
    animal_ids = []

    for animal_key in all_data.keys():
        animal_data = all_data[animal_key]
        labels = animal_data['label']
        eeg_data = animal_data['eeg'][..., i:i+1]  # Select the current band data

        if animal_key in set2_animals:
            combined_data.append(eeg_data[labels == 1])
            combined_labels.append(np.ones(np.sum(labels == 1)))
            combined_data.append(eeg_data[labels == 0])
            combined_labels.append(np.zeros(np.sum(labels == 0)))

        animal_ids.extend([animal_key] * len(labels))

    combined_data = np.concatenate(combined_data, axis=0)
    combined_labels = np.concatenate(combined_labels, axis=0)
    animal_ids = np.array(animal_ids[:combined_data.shape[0]])

    # Print shapes for debugging
    print(f"Shape of combined_data: {combined_data.shape}")
    print(f"Shape of combined_labels: {combined_labels.shape}")
    print(f"Shape of animal_ids: {animal_ids.shape}")

    # Z-score the data for each animal and each frequency band separately
    combined_data = zscore_eeg_data(combined_data, animal_ids)

    # Perform cross-validation
    m = 5  # Number of folds
    average_accuracy, all_accuracies, all_preds, all_true_labels = cross_validate_all_data(combined_data, combined_labels, m)

    # Save results
    results = {
        'AverageAccuracy': average_accuracy,
        'AllAccuracies': all_accuracies,
        'Y_Preds': all_preds,
        'True_Labels': all_true_labels
    }

    savemat(os.path.join(save_path, f'resultclon_{band}_band.mat'), results)
    print(f"Cross-validation for {band} band completed and results saved successfully.")

# # # Initialize lists to store the raw pupil data and EEG band data for spon and stim
# eeg_spon = {band: [] for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']}
# eeg_stim = {band: [] for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']}

# # Iterate over all animals to separate spon and stim data
# for animal_key in all_data.keys():
#     animal_data = all_data[animal_key]
#     labels = animal_data['label']

#     if animal_key in set2_animals:
#         # Append stimulation data
#         for i, band in enumerate(['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']):
#             eeg_stim[band].append(zscore(animal_data['eeg'][labels == 1, :, i], axis=None))
#             # eeg_spon[band].append(zscore(animal_data['eeg'][labels == 0, :, i], axis=None))

#     elif animal_key in other_animals:
#         # Append spontaneous data
#         for i, band in enumerate(['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']):
#             eeg_spon[band].append(zscore(animal_data['eeg'][labels == 0, :, i], axis=None))

# # Define the bands
# bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']

# # Loop over each band
# for i, band in enumerate(bands):
#     print(f"Processing {band} band...")

#     combined_data = []
#     combined_labels = []
#     animal_ids = []

#     for animal_key in all_data.keys():
#         animal_data = all_data[animal_key]
#         labels = animal_data['label']
#         eeg_data = animal_data['eeg'][..., i:i+1]  # Select the current band data

#         if animal_key in set2_animals:
#             combined_data.append(eeg_data[labels == 1])
#             combined_labels.append(np.ones(np.sum(labels == 1)))
#             # combined_data.append(eeg_data[labels == 0])
#             # combined_labels.append(np.zeros(np.sum(labels == 0)))

#         elif animal_key in other_animals:
#             combined_data.append(eeg_data[labels == 0])
#             combined_labels.append(np.zeros(np.sum(labels == 0)))

#         animal_ids.extend([animal_key] * len(labels))

#     combined_data = np.concatenate(combined_data, axis=0)
#     combined_labels = np.concatenate(combined_labels, axis=0)
#     animal_ids = np.array(animal_ids[:combined_data.shape[0]])

#     # Print shapes for debugging
#     print(f"Shape of combined_data: {combined_data.shape}")
#     print(f"Shape of combined_labels: {combined_labels.shape}")
#     print(f"Shape of animal_ids: {animal_ids.shape}")

#     # Z-score the data for each animal and each frequency band separately
#     combined_data = zscore_eeg_data(combined_data, animal_ids)

#     # Perform cross-validation
#     m = 5  # Number of folds
#     average_accuracy, all_accuracies, all_preds, all_true_labels = cross_validate_all_data(combined_data, combined_labels, m)

#     # Save results
#     results = {
#         'AverageAccuracy': average_accuracy,
#         'AllAccuracies': all_accuracies,
#         'Y_Preds': all_preds,
#         'True_Labels': all_true_labels
#     }

#     savemat(os.path.join(save_path, f'results_includes_combo1_{band}_band.mat'), results)
#     print(f"Cross-validation for {band} band completed and results saved successfully.")


In [None]:
# Save eeg pupil data as matfile

import numpy as np
import scipy.io as sio

all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Define the set of animals
set2_animals = ['animal_3398', 'animal_33105', 'animal_31117', 'animal_31108', 'animal_3734']
other_animals = ['animal_33117', 'animal_24116', 'animal_24124', 'animal_3335', 'animal_33119']
set2_animals = ['animal_33119', 'animal_33118', 'animal_33135']

# Initialize lists to store the raw pupil data and EEG band data for spon and stim
pupil_spon = []
pupil_stim = []
eeg_spon = []
eeg_stim = []

# Iterate over all animals to separate spon and stim data
for animal_key in all_data.keys():
    animal_data = all_data[animal_key]
    labels = animal_data['label']
    pupil_data = animal_data['pupil']
    eeg_data = animal_data['eeg']

    if animal_key in set2_animals:
        # Append stimulation data
        pupil_stim.append(pupil_data[labels == 1])
        eeg_stim.append(eeg_data[labels == 1])
        pupil_spon.append(pupil_data[labels == 0])
        eeg_spon.append(eeg_data[labels == 0])
    # elif animal_key in other_animals:
    #     # Append spontaneous data
    #     pupil_spon.append(pupil_data[labels == 0])
    #     eeg_spon.append(eeg_data[labels == 0])

# Combine data for each set
pupil_spon = np.concatenate(pupil_spon, axis=0)
pupil_stim = np.concatenate(pupil_stim, axis=0)
eeg_spon = np.concatenate(eeg_spon, axis=0)
eeg_stim = np.concatenate(eeg_stim, axis=0)

# Save to .mat file
sio.savemat(os.path.join(save_path, 'pupil_eeg_data_control_dose.mat'), {
    'pupil_spon': pupil_spon,
    'pupil_stim': pupil_stim,
    'eeg_spon': eeg_spon,
    'eeg_stim': eeg_stim
})

print("Pupil and EEG data saved successfully.")

In [None]:
all_data = np.load(os.path.join(save_path, 'all_data.npy'), allow_pickle=True).item()

# Define the set of animals
set2_animals = ['animal_3398', 'animal_33105', 'animal_31117', 'animal_31108', 'animal_3734']
other_animals = [animal for animal in all_data.keys() if animal not in set2_animals]

# Function to calculate mean and SEM
def calculate_mean_sem(data):
    mean_data = np.mean(data, axis=0)
    sem_data = np.std(data, axis=0) / np.sqrt(data.shape[0])
    return mean_data, sem_data

# Iterate over other_animals to plot the average EEG for spontaneous data
bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'LowGamma', 'HighGamma']

for animal_key in other_animals:
    animal_data = all_data[animal_key]
    labels = animal_data['label']
    eeg_spon = animal_data['eeg'][labels == 0]

    # Plot the average EEG for each band
    for i, band in enumerate(bands):
        eeg_band_data = eeg_spon[:, :, i]
        mean_data, sem_data = calculate_mean_sem(eeg_band_data)

        plt.figure()
        plt.plot(mean_data, label=f'{animal_key} - {band}')
        plt.fill_between(range(len(mean_data)), mean_data - sem_data, mean_data + sem_data, alpha=0.3)
        plt.xlabel('Time')
        plt.ylabel(f'{band} Power (z-scored)')
        plt.title(f'Average {band} Power with SEM - {animal_key}')
        plt.legend()
        plt.grid(True)
        plt.show()

In [None]:
!pip install umap-learn

In [None]:
# Raw UMAP Visualization

import numpy as np
import pandas as pd
import os
import umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Define file paths
prop_file_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/prop_table_aug.csv'
phent_file_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/phent_tablenew.csv'
control_file_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/control_NEW.csv'
clon_file_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/clon_table.csv'

# Define function to load and extract data
def load_and_extract_data(file_path):
    data = pd.read_csv(file_path)
    animals = data['Animal'].unique()
    eeg_bands = ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']
    eeg_data = {band: {'stim': [], 'spon': []} for band in eeg_bands}

    for animal in animals:
        animal_data = data[data['Animal'] == animal]
        stim_freq = animal_data['StimulationFrequency'].values
        labels = (stim_freq != 0).astype(int)

        for band in eeg_bands:
            band_data = animal_data[[col for col in animal_data.columns if col.startswith(band)]].values[:, 1:2000]
            eeg_data[band]['stim'].append(band_data[labels == 1])
            eeg_data[band]['spon'].append(band_data[labels == 0])

    # Concatenate all animal data within each condition
    for band in eeg_bands:
        eeg_data[band]['stim'] = np.concatenate(eeg_data[band]['stim'], axis=0)
        eeg_data[band]['spon'] = np.concatenate(eeg_data[band]['spon'], axis=0)

    return eeg_data

# Load and extract data from each group
prop_data = load_and_extract_data(prop_file_path)
phent_data = load_and_extract_data(phent_file_path)
control_data = load_and_extract_data(control_file_path)
clon_data = load_and_extract_data(clon_file_path)

# Perform UMAP and plot results
def plot_umap_for_band_and_label(eeg_data, label, band, group_names, title):
    # Combine data from all groups
    combined_data = np.concatenate(
        [eeg_data['prop'][band][label], eeg_data['phent'][band][label],eeg_data['clon'][band][label], eeg_data['control'][band][label]], axis=0
    )
    group_labels = np.concatenate(
        [np.full(eeg_data['prop'][band][label].shape[0], group_names[0]),
         np.full(eeg_data['phent'][band][label].shape[0], group_names[1]),
         np.full(eeg_data['clon'][band][label].shape[0], group_names[2]),
         np.full(eeg_data['control'][band][label].shape[0], group_names[3])]
    )

    # Scale data
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(combined_data)

    # Perform UMAP
    reducer = umap.UMAP(n_neighbors=75, min_dist=0.1, n_components=2, random_state=42)
    embedding = reducer.fit_transform(scaled_data)

    # Plot
    plt.figure(figsize=(10, 8))
    for i, group_name in enumerate(group_names):
        plt.scatter(embedding[group_labels == group_name, 0], embedding[group_labels == group_name, 1],
                    label=group_name, alpha=0.6)

    plt.title(f'UMAP - {title} {band} Band')
    plt.xlabel('UMAP 1')
    plt.ylabel('UMAP 2')
    plt.legend()
    plt.show()

# Define group names
group_names = ['Propranolol', 'Phentolamine','Clonidine', 'Control']

# Prepare combined data structure
eeg_data = {
    'prop': prop_data,
    'phent': phent_data,
    'clon': clon_data,
    'control': control_data
}

# Plot UMAP for each frequency band and condition
for band in ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']:
    for label, condition in zip(['stim', 'spon'], ['Stimulated', 'Spontaneous']):
        plot_umap_for_band_and_label(eeg_data, label, band, group_names, condition)


In [None]:
# Define function to calculate the moving average
def moving_average(data, window_size):
    return np.convolve(data, np.ones(window_size) / window_size, mode='valid')

# Define function to load and extract data with moving average applied
def load_and_extract_data(file_path, window_size=10):
    data = pd.read_csv(file_path)
    animals = data['Animal'].unique()
    eeg_bands = ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']
    eeg_data = {band: {'stim': [], 'spon': []} for band in eeg_bands}

    for animal in animals:
        animal_data = data[data['Animal'] == animal]
        stim_freq = animal_data['StimulationFrequency'].values
        labels = (stim_freq != 0).astype(int)

        for band in eeg_bands:
            band_data = animal_data[[col for col in animal_data.columns if col.startswith(band)]].values[:, 800:1800]
            smoothed_data = np.apply_along_axis(moving_average, 1, band_data, window_size)
            eeg_data[band]['stim'].append(smoothed_data[labels == 1])
            eeg_data[band]['spon'].append(smoothed_data[labels == 0])

    # Concatenate all animal data within each condition
    for band in eeg_bands:
        eeg_data[band]['stim'] = np.concatenate(eeg_data[band]['stim'], axis=0)
        eeg_data[band]['spon'] = np.concatenate(eeg_data[band]['spon'], axis=0)

    return eeg_data

# Load and extract data from each group with smoothing
prop_data = load_and_extract_data(prop_file_path)
phent_data = load_and_extract_data(phent_file_path)
control_data = load_and_extract_data(control_file_path)
clon_data = load_and_extract_data(clon_file_path)

# Define a function to compute global UMAP embeddings to determine axis limits
def compute_global_limits(eeg_data, group_names):
    all_embeddings = []
    for band in eeg_data[list(eeg_data.keys())[0]].keys():  # Access keys dynamically
        for label in ['stim', 'spon']:
            combined_data = np.concatenate(
                [eeg_data[group][band][label] for group in group_names if group in eeg_data], axis=0
            )
            scaler = StandardScaler()
            scaled_data = scaler.fit_transform(combined_data)
            reducer = umap.UMAP(n_neighbors=75, min_dist=0.1, n_components=2, random_state=42)
            embedding = reducer.fit_transform(scaled_data)
            all_embeddings.append(embedding)

    all_embeddings_combined = np.concatenate(all_embeddings, axis=0)
    xlim = (all_embeddings_combined[:, 0].min(), all_embeddings_combined[:, 0].max())
    ylim = (all_embeddings_combined[:, 1].min(), all_embeddings_combined[:, 1].max())
    return xlim, ylim

# Calculate global limits for consistent axis ranges
xlim, ylim = compute_global_limits({
    'prop': prop_data,
    'phent': phent_data,
    'clon': clon_data,
    'control': control_data
}, ['prop', 'phent', 'clon', 'control'])

# Perform UMAP and plot results with consistent scaling
def plot_umap_for_band(eeg_data, band, group_names, title):
    # Combine data from all groups for both conditions
    stim_data = np.concatenate(
        [eeg_data[group][band]['stim'] for group in group_names if group in eeg_data], axis=0
    )
    spon_data = np.concatenate(
        [eeg_data[group][band]['spon'] for group in group_names if group in eeg_data], axis=0
    )
    combined_data = np.concatenate((stim_data, spon_data), axis=0)

    # Create labels for the groups and conditions
    stim_labels = np.concatenate(
        [np.full(eeg_data[group][band]['stim'].shape[0], f'{group}_stim') for group in group_names if group in eeg_data]
    )
    spon_labels = np.concatenate(
        [np.full(eeg_data[group][band]['spon'].shape[0], f'{group}_spon') for group in group_names if group in eeg_data]
    )
    group_labels = np.concatenate((stim_labels, spon_labels), axis=0)

    # Scale data
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(combined_data)

    # Perform UMAP
    reducer = umap.UMAP(n_neighbors=75, min_dist=0.1, n_components=2, random_state=42)
    embedding = reducer.fit_transform(scaled_data)

    # Plot
    plt.figure(figsize=(10, 8))
    markers = {'stim': '^', 'spon': 'o'}  # Define markers for stim and spon
    colors = {'prop': 'red', 'phent': 'blue', 'clon': 'green', 'control': 'black'}  # Define colors for groups

    for group in group_names:
        if group in eeg_data:
            for condition in ['stim', 'spon']:
                plt.scatter(
                    embedding[group_labels == f'{group}_{condition}', 0],
                    embedding[group_labels == f'{group}_{condition}', 1],
                    label=f'{group} {condition}', alpha=0.6,
                    marker=markers[condition], color=colors[group]
                )

    plt.title(f'UMAP - {title} {band} Band')
    plt.xlabel('UMAP 1')
    plt.ylabel('UMAP 2')

    plt.legend()
    plt.show()

# Define group names
group_names = ['prop', 'phent', 'clon', 'control']

# Plot UMAP for each frequency band
for band in ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']:
    plot_umap_for_band({
        'prop': prop_data,
        'phent': phent_data,
        'clon': clon_data,
        'control': control_data
    }, band, group_names, 'Combined Stim and Spon')

In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
import umap
import matplotlib.pyplot as plt
from scipy.stats import zscore

# Define file paths
prop_file_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/prop_table_aug.csv'
phent_file_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/phent_tablenew.csv'
control_file_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/control_NEW.csv'
clon_file_path = '/content/drive/My Drive/autoencoder/NEW_DATA/CLASSIFY/clon_table.csv'

# Function to load and extract data
def load_and_extract_data(file_path):
    data = pd.read_csv(file_path)
    animals = data['Animal'].unique()
    eeg_bands = ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']
    eeg_data = {band: {'stim': [], 'spon': []} for band in eeg_bands}

    for animal in animals:
        animal_data = data[data['Animal'] == animal]
        stim_freq = animal_data['StimulationFrequency'].values
        labels = (stim_freq != 0).astype(int)

        for band in eeg_bands:
            band_data = animal_data[[col for col in animal_data.columns if col.startswith(band)]].values[:, 850:1850]
            eeg_data[band]['stim'].append(band_data[labels == 1])
            eeg_data[band]['spon'].append(band_data[labels == 0])

    # Concatenate all animal data within each condition
    for band in eeg_bands:
        eeg_data[band]['stim'] = np.concatenate(eeg_data[band]['stim'], axis=0)
        eeg_data[band]['spon'] = np.concatenate(eeg_data[band]['spon'], axis=0)

    return eeg_data

# Load and extract data from each group
prop_data = load_and_extract_data(prop_file_path)
phent_data = load_and_extract_data(phent_file_path)
control_data = load_and_extract_data(control_file_path)
clon_data = load_and_extract_data(clon_file_path)

# Function to build the classification model with the latent layer output
def build_classification_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.Conv1D(64, 3, activation='relu', padding='same')(inputs)
    x = tf.keras.layers.MaxPooling1D(2)(x)
    x = tf.keras.layers.Conv1D(128, 3, activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling1D(2)(x)
    conv_output = tf.keras.layers.Conv1D(256, 3, activation='relu', padding='same', name='latent_layer')(x)
    x = tf.keras.layers.GlobalAveragePooling1D()(conv_output)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Function to train the model and extract latent layer outputs
def train_and_extract_latent(eeg_data, band):
    latent_outputs = {'prop': {'stim': None, 'spon': None},
                      'phent': {'stim': None, 'spon': None},
                      'control': {'stim': None, 'spon': None},
                      'clon': {'stim': None, 'spon': None}}

    group_names = ['prop', 'phent', 'control', 'clon']

    for group in group_names:
        X_stim = eeg_data[group][band]['stim']
        X_spon = eeg_data[group][band]['spon']

        # Combine stim and spon data
        X = np.concatenate((X_stim, X_spon), axis=0)
        y = np.concatenate((np.ones(X_stim.shape[0]), np.zeros(X_spon.shape[0])), axis=0)

        # Reshape if the data is 1D
        if X.ndim == 2:
            X = X[..., np.newaxis]

        # KFold cross-validation to train the model
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        latent_vectors_stim = []
        latent_vectors_spon = []

        for train_index, test_index in kf.split(X):
            X_train, X_val = X[train_index], X[test_index]
            y_train, y_val = y[train_index], y[test_index]

            # Standardize data
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
            X_val = scaler.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)

            # Build and train model
            model = build_classification_model((X_train.shape[1], X_train.shape[2]))
            early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
            model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=0, shuffle=True)

            # Extract latent layer output
            latent_model = tf.keras.Model(inputs=model.input, outputs=model.get_layer('latent_layer').output)
            latent_predictions = latent_model.predict(X)

            # Flatten the latent outputs to 2D for UMAP
            latent_predictions_flat = latent_predictions.reshape(latent_predictions.shape[0], -1)

            # Separate stim and spon latent vectors
            latent_vectors_stim.append(latent_predictions_flat[:X_stim.shape[0]])
            latent_vectors_spon.append(latent_predictions_flat[X_stim.shape[0]:])

        # Average latent vectors across folds
        latent_outputs[group]['stim'] = np.mean(latent_vectors_stim, axis=0)
        latent_outputs[group]['spon'] = np.mean(latent_vectors_spon, axis=0)

    return latent_outputs

# Perform UMAP and plot results
def plot_umap_for_latent(latent_data, group_names, band, condition, xlim, ylim):
    # Combine data from all groups for the specified condition
    combined_data = np.concatenate(
        [latent_data[group][condition] for group in group_names if latent_data[group][condition] is not None], axis=0
    )
    group_labels = np.concatenate(
        [np.full(latent_data[group][condition].shape[0], group) for group in group_names if latent_data[group][condition] is not None]
    )

    # Scale data
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(combined_data)

    # Perform UMAP
    reducer = umap.UMAP(n_neighbors=50, min_dist=0.2, n_components=2, random_state=42)
    embedding = reducer.fit_transform(scaled_data)

    # Plot
    plt.figure(figsize=(10, 8))
    for group in group_names:
        if latent_data[group][condition] is not None:
            plt.scatter(embedding[group_labels == group, 0], embedding[group_labels == group, 1],
                        label=group, alpha=0.6)

    plt.title(f'UMAP - {condition.capitalize()} {band} Band (Latent Space)')
    plt.xlabel('UMAP 1')
    plt.ylabel('UMAP 2')
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.legend()
    plt.show()

# Define group names
group_names = ['prop', 'phent', 'control', 'clon']

# Prepare combined data structure
eeg_data = {
    'prop': prop_data,
    'phent': phent_data,
    'control': control_data,
    'clon': clon_data
}

# Calculate global limits for UMAP plots
all_embeddings = []
for band in ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']:
    latent_outputs = train_and_extract_latent(eeg_data, band)
    for condition in ['stim', 'spon']:
        # Combine data from all groups
        combined_data = np.concatenate(
            [latent_outputs[group][condition] for group in group_names if latent_outputs[group][condition] is not None], axis=0
        )
        scaler = StandardScaler()
        scaled_data = scaler.fit_transform(combined_data)
        reducer = umap.UMAP(n_neighbors=50, min_dist=0.2, n_components=2, random_state=42)
        embedding = reducer.fit_transform(scaled_data)
        all_embeddings.append(embedding)

# Determine limits based on all embeddings
all_embeddings_combined = np.concatenate(all_embeddings, axis=0)
xlim = (all_embeddings_combined[:, 0].min(), all_embeddings_combined[:, 0].max())
ylim = (all_embeddings_combined[:, 1].min(), all_embeddings_combined[:, 1].max())

# Perform UMAP on latent layer outputs for each frequency band and condition
for band in ['DeltaPwr', 'ThetaPwr', 'AlphaPwr', 'BetaPwr', 'LowGammaPwr', 'HighGammaPwr']:
    print(f"Processing {band} band...")
    latent_outputs = train_and_extract_latent(eeg_data, band)
    for condition in ['stim', 'spon']:
        plot_umap_for_latent(latent_outputs, group_names, band, condition, xlim, ylim)