In [1]:
import pandas as pd
import numpy as np 
import scipy
import matplotlib.pyplot as plt
%matplotlib inline

from scipy.signal import lfilter, butter, welch

import os
import glob

from tqdm import tqdm_notebook

# Upload the data 

In [2]:
path = r'C:\Users\ecath\Desktop\Research\Raw Data\LD dataset'

data_list = []
for filename in glob.glob(os.path.join(path, '*.csv')):
    data = pd.read_csv(filename, header=0)
    data.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)
    data.rename(columns=lambda x: x.replace('_O', '_opt'), inplace=True)
    data.rename(columns=lambda x: x.replace('.', '_opt_'), inplace=True)
    data.rename(columns=lambda x: x.replace('Bsk', filename + '_Bsk'), inplace=True)
    data.rename(columns=lambda x: x.replace(path, ''), inplace=True)
    data.rename(columns=lambda x: x.replace('.csv', ''), inplace=True)
    data.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)
#     data.rename(columns=lambda x: x.replace('_Electrode_KH', ''), inplace=True)
#     data.rename(columns=lambda x: x.replace('_csv_annot', ''), inplace=True)
#     data.rename(columns=lambda x: x.replace('_MISSING_H8', ''), inplace=True)
    data.rename(columns=lambda x: x.replace('_annot', ''), inplace=True)
#     data.rename(columns=lambda x: x.replace('_OM_KH', ''), inplace=True)
    data.rename(columns=lambda x: x.replace('\\', ''), inplace=True)
    print(filename, data.shape)
    data_list.append(data)



C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\1.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\10_Epi.csv_annot.csv (8191, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\11_Endo.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\12_Epi.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\15_Endo.csv_annot.csv (8191, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\16_Epi.csv_annot.csv (8191, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\17_Endo.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\18_Epi.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\22.csv_annot.csv (16667, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\23.csv_annot.csv (16383, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\24.csv_annot.csv (18311, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\25.csv_annot.csv (1

# Division by classes 

In [36]:
def del_empty_cols(data_list):
    for df in data_list:
        for col in df.columns:
            if col.startswith('Unnamed:_'):
                df.drop(col, inplace=True, axis=1)
                
del_empty_cols(data_list)

In [37]:
"""
Splitting spreadspeet to the classes by label 

Parameters: 
-----------
data_list: list
    List with pd.DataFrames

Returns: 
-------
drivers, nondrivers, noises: list, list, list
    Lists with pd.DataFrames, one for each of three classes respectively
"""

def class_splitting(data_list):
    
    drivers = []
    nondrivers = []
    noises = []

    for df in data_list:
        driver = pd.DataFrame()
        nondriver = pd.DataFrame()
        noise = pd.DataFrame()
        for col in df.columns[1::3]:
            curr_index = list(df.columns).index(col)
            prev_index = curr_index - 1
            next_index = curr_index + 1
            prev_col = df.iloc[:,prev_index]
            next_col = df.iloc[:,next_index]
            if df[col][0] == 1:
                driver[df.columns[prev_index]] = prev_col
                driver[df.columns[next_index]] = next_col
            elif df[col][0] == 0:
                nondriver[df.columns[prev_index]] = prev_col
                nondriver[df.columns[next_index]] = next_col
#             elif df[col][0] == -1:
#                 noise[df.columns[prev_index]] = prev_col
#                 noise[df.columns[next_index]] = next_col
            else:
                noise[df.columns[prev_index]] = prev_col
                noise[df.columns[next_index]] = next_col
        drivers.append(driver)
        nondrivers.append(nondriver)
        noises.append(noise)
        
    return(drivers, nondrivers, noises)

drivers, nondrivers, noises = class_splitting(data_list)

In [38]:
"""
Function performs bandpass filter for the digital signal 

Parameters: 
-----------
data: array_like
    An N-dimensional input array
lowcut: float
    Low cutoff frequency
highcut: float
    High cutoff frequency
fs: float
    Sampling rate of the signal
order: int
    The order of the filter.

Returns: 
-------
filtered_signal: array
    The output of the digital filter
"""

def butter_bandpass(data, lowcut, highcut, fs, order=2):
    nyq = 0.5 * fs
    
    high = highcut / nyq
    low = lowcut / nyq
    
    b, a = butter(order, [low, high], btype='bandpass', analog=False)
    filtered_signal = lfilter(b, a, data)
    
    return filtered_signal



"""
Fill NaN with zeros

Parameters: 
-----------
df: DataFrame
    DataFrame with NaNs
    
Returns: 
-------
df: Dataframe
    All NaNs filled by zeros
"""

def del_nul_and_nan(df):
    
    df.fillna(value=0, axis=1, inplace=True)
    
################# add to drop off zero columns ######################    
#     for col in df.columns:
#         if df[col].sum() == 0:
#             df.drop([col], axis = 1, inplace = True)  
################# add to drop off zero columns ######################    
    
    return df


"""


Parameters: 
-----------
df: pd.DataFrame
    An N-dimensional input DataFrame
N: int
    Number of sample points
lowcut: float
    Low cutoff frequency
highcut: float
    High cutoff frequency
fs: float
    Sampling rate of the signal

Returns: 
-------
fft_out: pd.DataFrame
    An output Dataframe with spectrum and frequencies 
"""

def spec_and_freq_for_single_df(df, N, fs, lowcut, highcut):
    
    fft_out = pd.DataFrame()
    
    for col in df.columns:
        filtered_yf = butter_bandpass(df[col], lowcut, highcut, fs, order=2)
        
        
        fft_yf = np.fft.fft(filtered_yf) #spectrum
        fft_xf = np.fft.fftfreq(N, 1/fs) #frequencies
        
        fft_20_index = np.argwhere((fft_xf<25) & (fft_xf>0))        
        fft_yf_20 = fft_yf[fft_20_index] #cutting on 20Hz
        fft_xf_20 = fft_xf[fft_20_index] #cutting on 20Hz
        
        fft_yf_20 = pd.DataFrame(np.abs(fft_yf_20) / N, columns=[col + '_yf'])
        fft_xf_20 = pd.DataFrame(fft_xf_20, columns=[col + '_xf']) 

        fft_out = pd.concat([fft_out, fft_yf_20, fft_xf_20], axis=1)
        
    return(fft_out)


"""

Parameters: 
-----------
class_data_list: list
    List with DataFrames 
    
Returns: 
-------
all_fft_el, all_fft_om: list, list
    Lists with spectrum and frequencies DataFrames
"""

def full_spec_and_freq(class_data_list):
    
    all_fft_el = pd.DataFrame()
    all_fft_om = pd.DataFrame()

    for k, df in enumerate(class_data_list):

        electrode_signal = df[df.columns[::2]]
        optical_signal = df[df.columns[1::2]]

        electrode_signal = del_nul_and_nan(electrode_signal)
        optical_signal = del_nul_and_nan(optical_signal)        

        N = df.shape[0] # Number of sample points
        Fs_el = 1017.25 # sampling rate
        Fs_om = 1000.0

        fft_el = spec_and_freq_for_single_df(electrode_signal, N, Fs_el, lowcut=5, highcut=20)
        fft_om = spec_and_freq_for_single_df(optical_signal, N, Fs_om, lowcut=5, highcut=20)
        
        all_fft_el = pd.concat([all_fft_el, fft_el], axis=1)
        all_fft_om = pd.concat([all_fft_om, fft_om], axis=1)
        
    return(all_fft_el, all_fft_om)

In [39]:
"""

Parameters: 
-----------
class_data_list: list
    List with DataFrames 
label: int, {0,1}
    Target label
signal_type: string, {electrode, optical}
    Electrode or optical signal type
    
Returns: 
-------
all_fft: DataFrame
    Concatanation of DataFrames and adding of target value
"""

def add_target(class_data_list, label, signal_type):
    
    if signal_type == 'electrode':

        all_fft_el, _ = full_spec_and_freq(class_data_list)

        all_fft_el.fillna(value=0, axis=1, inplace=True)

        target = np.full(shape=(1, all_fft_el.shape[1]), fill_value = label)
        target = pd.DataFrame(target, columns=all_fft_el.columns, index=['target'])

        all_fft_electrode = pd.concat([all_fft_el, target], axis=0)

        return(all_fft_electrode)
    
    
    if signal_type == 'optical':
        
        _, all_fft_om = full_spec_and_freq(class_data_list)

        all_fft_om.fillna(value=0, axis=1, inplace=True)

        target = np.full(shape=(1, all_fft_om.shape[1]), fill_value = label)
        target = pd.DataFrame(target, columns=all_fft_om.columns, index=['target'])
        all_fft_optical = pd.concat([all_fft_om, target], axis=0)

        
        return(all_fft_optical)

# Concat dataframes with spectrum for each class and download them

In [40]:
"""
Function that save 

Parameters: 
-----------
signal_type: string, {electrode, optical}
    Electrode or optical signal type
path: string
    Path where to save file in csv format
name: string
    Name of the file
"""

def spectrum(signal_type, path, name):
    
    if not os.path.exists(path):
        os.mkdir(path)

    driver_spectrum = add_target(drivers, 1, signal_type=signal_type)
    nondriver_spectrum = add_target(nondrivers, 0, signal_type=signal_type)
    noise_spectrum = add_target(noises, -1, signal_type=signal_type)


    full_spectrum = pd.concat([driver_spectrum, nondriver_spectrum, noise_spectrum], axis=1)
    full_spectrum.to_csv(path + name, sep=',', index=True)

In [41]:
path = r'C:\Users\ecath\Desktop\Research\Raw Data\LD dataset spectrum'
spectrum('electrode', path, '\Spectrum of electrode LD.csv')
spectrum('optical', path, '\Spectrum of optical LD.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)


In [42]:
df = pd.read_csv(r'C:\Users\ecath\Desktop\Research\Raw Data\LD dataset spectrum\Spectrum of electrode LD.csv',
                 index_col=0)
df.head()

  interactivity=interactivity, compiler=compiler, result=result)
