In [5]:
import numpy as np
import pandas as pd
from scipy.io import loadmat
import pickle

In [6]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

**Download in terminal**

`$ kg download -u ‘username’ -p ‘password’ -c seizure-prediction -f Dog_4.tar.gz`

**Data**
* 16 channels
* preictal channels have 239,766 data points--234 short of 10min

In [7]:
preictal_files = ['Dog_4_preictal_segment_%04d' % i for i in range(1,98)]
interictal_files = ['Dog_4_interictal_segment_%04d' % i for i in range(1,805)]

In [8]:
def create_df(ictal_type, filenames):
    """Input: type (preictal or interictal) and a list of
    filenames.
    Returns: a dataframe composed of every preictal or
    interictal file.
    
    Uses function mat_to_df.
    """
    
    frames = []
    for i, filename in enumerate(filenames, 1):
        mat = loadmat('../Dog_4/{}/{}'.format(ictal_type, filename))
        df = mat_to_df(mat, ictal_type, i)
        frames.append(df)
    ictal_df = pd.concat(frames, ignore_index=True) # essentially reindexing
    
    return ictal_df

In [9]:
def mat_to_df(mat, ictal_type, num):
    """Input: matlab file, preictal or interictal, sequence number (from filename).
    Returns: dataframe where each row represents 15s of data. Column for each
    channel and columns for sequence number and target. 
    """
    
    data = mat['{}_segment_{}'.format(ictal_type, num)][0][0][0]
    #columns = ['ch_{}'.format(i) for i in range(1,17)]
    columns = ['ch_%02d' % i for i in range(1,17)] # % stuff maintains column order
    data_list = []
    for i in range(39): # 39 fifteen second segments
        data_dict = {'target': ictal_type, 'sequence': num}
        for channel, column in zip(data, columns):
            channel = channel[5766:] # we're skipping partial first segment
            data_dict[column] = channel[(6000*i):(6000*(i+1))]
        data_list.append(data_dict)
    df = pd.DataFrame(data_list)
    return df

In [10]:
preictals = create_df('preictal', preictal_files)
df = preictals.append(create_df('interictal', interictal_files), ignore_index=True)

In [11]:
df.to_pickle('prelim_df.pkl')