<a href="https://colab.research.google.com/github/bdandersen-berkeley/mids/blob/master/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MIDS W207 Group 9 - Final Project

In [5]:
import re
from os.path import basename

import numpy as np
from scipy.io import loadmat

# Regular expression representing the EEG data file naming conventions
CLIP_FILENAME_REGEX = r'((Patient|Dog)_\d+)_(\w+)_segment_(\d+)\.mat$'
clip_filename_pattern = re.compile(CLIP_FILENAME_REGEX)

def clip_summary(mat_clip_pathname):
    '''
    Prints summary of data maintained in EEG clip file

    The specified EEG clip file is expected to be formatted as a Matlab data file.  Attempts to
    load the data from a file of any other format will throw an exception.

    Arguments:
    ----------

    mat_clip_pathname -- Pathname of an EEG clip file whose data to summarize (required).
    '''

    assert mat_clip_pathname is not None, "Parameter mat_clip_pathname is not specified"

    # Check whether the EEG clip filename conforms to naming conventions
    clip_filename_compiled = clip_filename_pattern.match(basename(mat_clip_pathname))
    if (clip_filename_compiled is None):
        print("Clip filename does not follow EEG data naming conventions")

    # Attempt to load the EEG clip data, anticipating that it is in Matlab format
    clip = loadmat(mat_clip_pathname)

    # Print the summary
    unknown = "unknown"
    column_spec = "{0:18}  {1}"
    data_column_spec = "  {0:16}  {1}"
    print(column_spec.format("Filename:", basename(mat_clip_pathname)))
    print(column_spec.format("Subject:", unknown if not clip_filename_compiled else clip_filename_compiled.group(1)))
    print(column_spec.format("Data class:", unknown if not clip_filename_compiled else clip_filename_compiled.group(3)))
    print(column_spec.format("Segment:", unknown if not clip_filename_compiled else clip_filename_compiled.group(4)))
    print("Data:")
    print(data_column_spec.format("Shape:", unknown if "data" not in clip else clip["data"].shape))
    print(data_column_spec.format("Min (volts):", unknown if "data" not in clip else "%.4f" % clip["data"].min()))
    print(data_column_spec.format("Max (volts):", unknown if "data" not in clip else "%.4f" % clip["data"].max()))
    print(data_column_spec.format("Duration (sec):", unknown if "data_length_sec" not in clip else "%.4f" % clip["data_length_sec"]))
    print(column_spec.format("Latency (sec):", unknown if "latency" not in clip else "%.4f" % clip["latency"]))
    print(column_spec.format("Samples:", unknown if "sampling_frequency" not in clip else clip["sampling_frequency"]))
    print(column_spec.format("Electrodes:", unknown if "data" not in clip else clip["data"].shape[0]))

# clip_summary("sample_clip.mat")
clip_summary("Dog_4_interictal_segment_1012.mat")

Filename:           Dog_4_interictal_segment_1012.mat
Subject:            Dog_4
Data class:         interictal
Segment:            1012
Data:
  Shape:            (16, 400)
  Min (volts):      -339.1075
  Max (volts):      233.9075
  Duration (sec):   unknown
Latency (sec):      unknown
Samples:            unknown
Electrodes:         16
