In [1]:
# import necessary modules
import glob
import os
import mne
import numpy as np
import pandas as pd
from scipy import signal, stats
import multiprocessing as mp
import os 

In [2]:
def read_files(file):
    """Read a EDF file and eliminate 14 out of 34 channels.
       
       Parameters
       ----------
       file : EDF file   
       
       Output
       ----------
       data : truncated RawEDF with 20 channels
       
    """  

    # read the file
    data = mne.io.read_raw_edf(file, exclude = ["Trigger Event", 
        "Patient Event", "ECG1", "ECG2", "AUX1", "AUX4", "AUX5", "AUX6", "AUX7",
        "AUX8", "AUX3", "PG1", "PG2", "A1", "A2", "EOG1", "EOG2", "EKG1", "EKG2", "AUX2", 
        "Photic", "phoic", "photic", "aux1"])
        
    return data

In [3]:
def hyperventilation(data):
    """Identify beginning and end of hyperventilation from EEG data."""
    
    # labels to look for
    start_labels = ["HV Begin", "Hyperventilation begins", "Begin HV"]
    end_labels = ["HV End", "End HV", "end HV here as some fragments noted"]
    
    # parameters to check if hyperventilation is present
    # check for existence of start and end
    s = 0
    e = 0
    
    # identify start and end times of hyperventilation
    for position, item in enumerate(data.annotations.description):
        if item in start_labels:
            start = data.annotations.onset[position]
            s += 1
        if item in end_labels:
            end = data.annotations.onset[position] + data.annotations.duration[position]
            e += 1
    
    # when hyperventilation is present
    # eliminate the corresponding segment
    if s == 1 and e == 1:
        return [start, end]
    
    if s ==2 or e ==2:
        return "Possibly bad file; manual check needed."
    
    # null value when no hyperventilation is present
    return np.nan

In [4]:
def photic_stimulation(data):
    """Identify beginning and end times of photic stimulation.
        
       Parameters
       ----------
       data : RawEDF instance
       
       Output
       ----------
       list of floats, contains start and end times
       """
    
    # store times when stimulation occurs
    stimulation = []
    
    # loop over descriptions and identify those that contain frequencies
    for position, annot in enumerate(data.annotations.description):
        if "Hz" in annot:
            # record the positions of stimulations
            stimulation.append(position)
    
    # provided stimulation has occured
    if len(stimulation)>1:
        
        # identify beginning and end
        start = data.annotations.onset[stimulation[0]]
        end = data.annotations.onset[stimulation[-1]] + data.annotations.duration[stimulation[-1]]
        return [start, end]    
    
    # null value when no stimulation is present
    return np.nan

In [5]:
# select filenames and ids 
df = pd.read_excel("/project/6019337/cosmin14/processed_metadata_2.xlsx")
filenames = df["Filename"]
scanid = df["ScanID"]
empty = df["Empty"]

FileNotFoundError: [Errno 2] No such file or directory: '/project/6019337/cosmin14/processed_metadata_2.xlsx'

In [None]:
hyper_list = []
photic_list = []

In [None]:
# loop over all files
for i in range(len(filenames)):
    print(i)
    # read the file if it is not empty
    if not empty[i]:
        
        a = read_files(filenames[i])
        hyper_list.append(hyperventilation(a))
        photic_list.append(photic_stimulation(a))
        
    else:
        hyper_list.append("EMPTY File")
        photic_list.append("EMPTY File")

In [None]:
df2 = pd.DataFrame({"ScanID":scanid, "Photic stimulation":photic_list, "Hyperventilation":hyper_list})

In [35]:
df2.to_csv("/project/6019337/cosmin14/hyper_photic.csv")