In [None]:
import os
import mne
import numpy as np
import pickle
import glob

In [None]:
def save_data(data, output_file):
    with open(output_file, 'wb') as file:
        pickle.dump(data, file, protocol=4)

In [None]:
fif_file = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed/preprocessed_P1-epo.fif"
output_dir = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed_npy"
def read_and_crop_epochs(fif_file):
    epochs = mne.read_epochs(fif_file, preload=True)
    cropped_epochs = epochs.crop(tmin=0, tmax=1.0)
    return cropped_epochs

epochs = read_and_crop_epochs(fif_file)    
sorted_indices = np.argsort(epochs.events[:, 2])
epochs = epochs[sorted_indices]

In [None]:

print(len(epochs.events))
epochs.events

In [None]:
import pandas as pd
csv_file_path = '/home/ldy/Workspace/THINGS/osfstorage/THINGS/Metadata/Concept-specific/image_concept_index.csv'
image_concept_df = pd.read_csv(csv_file_path, header=None)
print(image_concept_df)


# Accessing a column by its name
# Display the first few rows to understand its structure
image_concept_df.shape[0]

In [None]:
def filter_valid_epochs(epochs, exclude_event_id=999999):
    return epochs[epochs.events[:, 2] != exclude_event_id]

valid_epochs = filter_valid_epochs(epochs)
valid_epochs.info
valid_epochs.events.shape

In [None]:
def identify_zs_event_ids(epochs, num_repetitions=12):
    event_ids = epochs.events[:, 2]
    unique_event_ids, counts = np.unique(event_ids, return_counts=True)
    zs_event_ids = unique_event_ids[counts == num_repetitions]
    return zs_event_ids

zs_event_ids = identify_zs_event_ids(valid_epochs)
# Verify the zero-shot event IDs
print("Zero-shot Event IDs:", zs_event_ids)

len(zs_event_ids)

In [None]:
# Separate and process datasets
training_epochs = valid_epochs[~np.isin(valid_epochs.events[:, 2], zs_event_ids)]
# Verify the number of events in the training set
print("Number of events in the training set:", len(training_epochs.events))
print(len(training_epochs.events))

In [None]:
# Extract event IDs from the filtered training epochs
training_event_ids = np.unique(training_epochs.events[:, 2])

# Check for any overlap between zero-shot and training event IDs
overlap_ids = np.intersect1d(zs_event_ids, training_event_ids)

# Print the overlap, if any
print("Overlapping Event IDs:", overlap_ids)

In [None]:
zs_test_epochs = valid_epochs[np.isin(valid_epochs.events[:, 2], zs_event_ids)]
zs_test_epochs.events
len(zs_test_epochs.events)
# zs_test_epochs.events

In [None]:
print(len(training_epochs.events))
print(len(zs_test_epochs.events))

In [None]:
training_epochs.events[:, -1]
zs_test_epochs.events[:, -1]

In [None]:
import numpy as np
training_event_ids = training_epochs.events[:, -1]
test_event_ids = zs_test_epochs.events[:, -1]

counts = {test_id: np.sum(training_event_ids == test_id) for test_id in test_event_ids}
counts

In [None]:
# Assuming zs_event_ids is a numpy array or a list of event IDs
# Assuming image_concept_df is a pandas DataFrame with one column '1' representing image category indices

zs_event_to_category_map = {}

for i, event_id in enumerate(zs_event_ids):
    # Using the row index (i) to map to the image category index
    # Assuming the first event_id corresponds to the first row, second event_id to the second row, and so on
    image_category_index = image_concept_df.iloc[event_id-1, 0]  # Accessing the first (and only) column at row i
    zs_event_to_category_map[event_id] = image_category_index

# Print the mapping
print("Event ID to Image Category Index Mapping:")
for event_id, category_index in zs_event_to_category_map.items():
    print(f"Event ID {event_id}: Image Category Index {category_index}")

In [None]:
# List to hold all the categories in the test set
test_set_categories = []

# Iterate over the event IDs in the test set
for event_id in zs_event_ids:
    if event_id in zs_event_to_category_map:
        # Get the category index from the mapping
        category_index = zs_event_to_category_map[event_id]
        test_set_categories.append(category_index)

# Print the list of categories in the test set
print("Categories in the test set:", test_set_categories)
len(test_set_categories)
# test_set_categories

In [None]:
from collections import Counter

# Count the occurrences of each category ID in the training set
category_counts = Counter(test_set_categories)

# Print the counts of each category ID
print("Counts of each category ID in the training set:")
for category_id, count in category_counts.items():
    print(f"Category ID {category_id}: Count {count}")

In [None]:
# Assuming zs_event_ids is a numpy array or a list of event IDs
# Assuming image_concept_df is a pandas DataFrame with one column '1' representing image category indices

event_to_category_map = {}

for i, event_id in enumerate(training_event_ids):
    # Using the row index (i) to map to the image category index
    # Assuming the first event_id corresponds to the first row, second event_id to the second row, and so on
    image_category_index = image_concept_df.iloc[event_id-1, 0]  # Accessing the first (and only) column at row i
    event_to_category_map[event_id] = image_category_index

# Print the mapping
print("Event ID to Image Category Index Mapping:")
for event_id, category_index in event_to_category_map.items():
    print(f"Event ID {event_id}: Image Category Index {category_index}")

In [None]:
# Assuming training_epochs is a variable that contains your training set epochs
# And it has an 'events' attribute similar to zs_test_epochs

# List to hold all the categories in the training set
train_set_categories = []

# Extract event IDs from the training set
training_event_ids = training_epochs.events[:, 2]

# Iterate over the event IDs in the training set
for event_id in training_event_ids:
    if event_id in event_to_category_map:
        # Get the category index from the mapping
        category_index = event_to_category_map[event_id]        
        train_set_categories.append(category_index)

# Print the list of categories in the training set
print("Categories in the training set:", train_set_categories)
print("Total number of category entries in the training set:", len(train_set_categories))

In [None]:
from collections import Counter

# Count the occurrences of each category ID in the training set
category_counts = Counter(train_set_categories)

# Print the counts of each category ID
print("Counts of each category ID in the training set:")
for category_id, count in category_counts.items():
    print(f"Category ID {category_id}: Count {count}")

In [None]:
counts = {test_id: np.sum(train_set_categories == test_id) for test_id in test_set_categories}
# Calculate the total number of elements in 'counts'
total_elements = sum(counts.values())

# Print the total number of elements
print("Total number of elements represented in 'counts':", total_elements)

counts

In [None]:
# Assuming train_set_categories and test_set_categories are lists or numpy arrays

# Create a new list with elements from train_set_categories that are not in test_set_categories
train_set_categories_filtered = [item for item in train_set_categories if item not in test_set_categories]

# train_set_categories_filtered now contains elements from train_set_categories excluding those in test_set_categories
print("Filtered train_set_categories:", train_set_categories_filtered)
len(train_set_categories_filtered)

In [None]:
# Create a mask for epochs to keep in the training set
keep_epochs_mask = [category not in test_set_categories for category in train_set_categories]
keep_epochs_mask
# Apply the mask to filter out epochs from training_epochs
training_epochs_filtered = training_epochs[keep_epochs_mask]

# Confirm the filtering
print("Original training set size:", len(training_epochs))
print("Filtered training set size:", len(training_epochs_filtered))

In [None]:
def reshape_meg_data(epochs, num_concepts, num_imgs, repetitions):
    data = epochs.get_data()
    reshaped_data = data.reshape((num_concepts, num_imgs, repetitions, data.shape[1], data.shape[2]))
    return reshaped_data


training_data = reshape_meg_data(training_epochs_filtered, num_concepts=1654, num_imgs=12, repetitions=1)
training_data.shape

In [None]:
zs_test_data = reshape_meg_data(zs_test_epochs, num_concepts=200, num_imgs=1, repetitions=12)
zs_test_data.shape

In [None]:
# # Save data
# if not os.path.isdir(output_dir):
#     os.makedirs(output_dir)
# save_data({'meg_data': training_data, 'ch_names': training_epochs_filtered.ch_names, 'times': training_epochs_filtered.times},
#             os.path.join(output_dir, 'preprocessed_meg_training.pkl'))
# save_data({'meg_data': zs_test_data, 'ch_names': zs_test_epochs.ch_names, 'times': zs_test_epochs.times},
#             os.path.join(output_dir, 'preprocessed_meg_zs_test.pkl'))

In [None]:
import numpy as np
import os

def process_and_save_meg_data(fif_file, output_dir):
    epochs = read_and_crop_epochs(fif_file)
    
    sorted_indices = np.argsort(epochs.events[:, 2])
    epochs = epochs[sorted_indices]

    valid_epochs = filter_valid_epochs(epochs)
    zs_event_ids = identify_zs_event_ids(valid_epochs)

    training_epochs = valid_epochs[~np.isin(valid_epochs.events[:, 2], zs_event_ids)]
    zs_test_epochs = valid_epochs[np.isin(valid_epochs.events[:, 2], zs_event_ids)]

    keep_epochs_mask = [category not in test_set_categories for category in train_set_categories]
    training_epochs_filtered = training_epochs[keep_epochs_mask]

    training_data = reshape_meg_data(training_epochs_filtered, num_concepts=1654, num_imgs=12, repetitions=1)
    zs_test_data = reshape_meg_data(zs_test_epochs, num_concepts=200, num_imgs=1, repetitions=12)

    # Save data
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    save_data({'meg_data': training_data, 'ch_names': training_epochs_filtered.ch_names, 'times': training_epochs_filtered.times},
              os.path.join(output_dir, 'preprocessed_meg_training.pkl'))
    save_data({'meg_data': zs_test_data, 'ch_names': zs_test_epochs.ch_names, 'times': zs_test_epochs.times},
              os.path.join(output_dir, 'preprocessed_meg_zs_test.pkl'))

# fif_file = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed/preprocessed_P1-epo.fif"
# output_dir = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed_npy"
# process_and_save_meg_data(fif_file, output_dir)

In [None]:
# def process_and_save_meg_data(fif_file, output_dir):
#     epochs = read_and_crop_epochs(fif_file)    
#     valid_epochs = filter_valid_epochs(epochs)    
#     zs_event_ids = identify_zs_event_ids(valid_epochs)
    
#     training_epochs = valid_epochs[~np.isin(valid_epochs.events[:, 2], zs_event_ids)]    
#     zs_test_epochs = valid_epochs[np.isin(valid_epochs.events[:, 2], zs_event_ids)]
    
#     keep_epochs_mask = [category not in test_set_categories for category in train_set_categories]    
#     training_epochs_filtered = training_epochs[keep_epochs_mask]


#     training_data = reshape_meg_data(training_epochs_filtered, num_concepts=1654, num_imgs=12, repetitions=1)
#     zs_test_data = reshape_meg_data(zs_test_epochs, num_concepts=200, num_imgs=1, repetitions=12)

#     # Save data
#     if not os.path.isdir(output_dir):
#         os.makedirs(output_dir)
#     save_data({'meg_data': training_data, 'ch_names': training_epochs_filtered.ch_names, 'times': training_epochs_filtered.times},
#                 os.path.join(output_dir, 'preprocessed_meg_training.pkl'))
#     save_data({'meg_data': zs_test_data, 'ch_names': zs_test_epochs.ch_names, 'times': zs_test_epochs.times},
#                 os.path.join(output_dir, 'preprocessed_meg_zs_test.pkl'))

# # fif_file = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed/preprocessed_P1-epo.fif"
# # output_dir = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed_npy"
# # process_and_save_meg_data(fif_file, output_dir)

In [None]:
def process_directory(input_dir, output_dir):
    fif_files = glob.glob(os.path.join(input_dir, '**/*epo.fif'), recursive=True)
    for fif_file in fif_files:
        filename = os.path.basename(fif_file)
        subject_num = filename.split('_')[1].split('-')[0]
        subject_dir_name = f"sub-{int(subject_num[1:]):02d}"
        subject_output_dir = os.path.join(output_dir, subject_dir_name)
        process_and_save_meg_data(fif_file, subject_output_dir)

In [None]:
in_dir = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed/"
output_dir = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed_npy"
process_directory(in_dir, output_dir)

In [None]:
import os
import shutil
import csv
import pandas as pd
import mne
import numpy  as np
csv_img_file_path = "/home/ldy/Workspace/THINGS/osfstorage/THINGS/Metadata/Image-specific/image_paths.csv"
origin_img_dir = "/home/ldy/Workspace/THINGS/osfstorage/THINGS/Images/"
training_images_dir = "/home/ldy/Workspace/THINGS/osfstorage/THINGS/images_set/training_images"
test_images_dir = "/home/ldy/Workspace/THINGS/osfstorage/THINGS/images_set/test_images"

In [None]:
image_df = pd.read_csv(csv_img_file_path, header=None)
print(image_df)

In [None]:
training_epochs_filtered.events

In [None]:
import pandas as pd
concept_csv_file_path = '/home/ldy/Workspace/THINGS/osfstorage/THINGS/Metadata/Concept-specific/image_concept_index.csv'
image_concept_df = pd.read_csv(concept_csv_file_path, header=None)
print(image_concept_df)

In [None]:
len(training_epochs_filtered.events[:, -1])

In [None]:
import pandas as pd
import os
import shutil
concept_csv_file_path = '/home/ldy/Workspace/THINGS/osfstorage/THINGS/Metadata/Concept-specific/image_concept_index.csv'
image_concept_df = pd.read_csv(concept_csv_file_path, header=None)

for index, row in image_df.iterrows():
    source_image_path = row[0]

    path_parts = source_image_path.split('/')
    if len(path_parts) > 2:
        formatted_index = str(category_index).zfill(5)
        path_parts[1] = f"{formatted_index}_{path_parts[1]}"
    image_path = '/'.join(path_parts)
    
    if event_id in training_epochs_filtered.events[:, -1]:
        target_dir = os.path.join(training_images_dir)
    elif event_id in zs_test_epochs.events[:, -1]:
        target_dir = os.path.join(test_images_dir)
    else:
        continue
    # print(image_path)
    src_file = os.path.join(origin_img_dir, source_image_path)
    dest_file = os.path.join(target_dir, image_path)
    # print(src_file)
    # print(target_dir)
    # print(dest_file)
    os.makedirs(os.path.dirname(dest_file), exist_ok=True)
    shutil.copy(src_file, dest_file)

In [None]:
# for index, row in image_df.iterrows():
#     image_path = row[0]

#     if event_id in training_epochs_filtered.events[:, -1]:
#         target_dir = training_images_dir
#     elif event_id in zs_test_epochs.events[:, -1]:
#         target_dir = test_images_dir
#     else:

#     src_file = os.path.join(origin_img_dir, image_path)
#     dest_file = os.path.join(target_dir, image_path)

#     os.makedirs(os.path.dirname(dest_file), exist_ok=True)
#     shutil.copy(src_file, dest_file)

In [None]:
import os

training_images_dir = "/home/ldy/Workspace/THINGS/osfstorage/THINGS/images_set/training_images/images/"

def count_images(directory):
    total_dirs = 0
    total_images = 0

    for entry in os.listdir(directory):
        path = os.path.join(directory, entry)
        if os.path.isdir(path):
            total_dirs += 1
            total_images += len([file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file))])

    return total_dirs, total_images

num_dirs, num_images = count_images(training_images_dir)

In [None]:
import pickle

path = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed_npy/sub-01/preprocessed_meg_training.pkl"

with open(path, 'rb') as file:
    data = pickle.load(file)

meg_data = data['meg_data']
ch_names = data['ch_names']
times = data['times']
meg_data.shape

In [None]:
path = "/home/ldy/Workspace/THINGS_MEG/ds004212/derivatives/preprocessed_npy/sub-01/preprocessed_meg_zs_test.pkl"
with open(path, 'rb') as file:
    data = pickle.load(file)

meg_data = data['meg_data']
ch_names = data['ch_names']
times = data['times']
meg_data.shape

In [None]:
print(np.std(meg_data))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Assuming meg_data is of shape (samples, channels, time_points)
first_sample = meg_data[0, 0, 0, :, :]  # Select the first sample

# Plotting
plt.figure(figsize=(15, 8))  # Adjust the size as needed

# You may not want to plot all channels if there are many, so adjust this as needed
for i, ch_name in enumerate(ch_names):
    plt.plot(times, first_sample[i, :], label=ch_name)

plt.xlabel('Time (s)')  # Assuming 'times' is in seconds
plt.ylabel('MEG Signal')  # Adjust label as appropriate
plt.title('MEG Waveform of the First Sample')
plt.legend()  # Comment out if there are too many channels
plt.show()