In [1]:
import pandas as pd
import numpy as np 
import scipy
import matplotlib.pyplot as plt
%matplotlib inline

from scipy.signal import lfilter, butter, welch

import os
import glob

from tqdm import tqdm_notebook

# Upload the data 

In [15]:
"""
Uploading data and renaming columns
Parameters: 
-----------
path: string
    path to the folder with dataframes 

Returns: 
-------
data_list
    List with pd.DataFrames 
"""

def upload_data(path):

    data_list = []
    files_list = os.path.join(path, '*.csv')
    for filename in glob.glob(files_list):
        data = pd.read_csv(filename, header=0)
        data.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)
        data.rename(columns=lambda x: x.replace('_O', '_opt'), inplace=True)
        data.rename(columns=lambda x: x.replace('.', '_opt_'), inplace=True)
        data.rename(columns=lambda x: x.replace('Bsk', filename + '_Bsk'), inplace=True)
        data.rename(columns=lambda x: x.replace(path, ''), inplace=True)
        data.rename(columns=lambda x: x.replace('.csv_annot.csv', ''), inplace=True)
        data.rename(columns=lambda x: x.replace('\\', ''), inplace=True)
        print(filename, data.shape)
        data_list.append(data)
    return(data_list)
path = r'C:\Users\ecath\Desktop\Research\Raw Data\LD dataset'
data_list = upload_data(path)

C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\1.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\10_Epi.csv_annot.csv (8191, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\11_Endo.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\12_Epi.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\15_Endo.csv_annot.csv (8191, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\16_Epi.csv_annot.csv (8191, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\17_Endo.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\18_Epi.csv_annot.csv (10173, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\22.csv_annot.csv (16667, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\23.csv_annot.csv (16383, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\24.csv_annot.csv (18311, 192)
C:\Users\ecath\Desktop\Research\Raw Data\LD dataset\25.csv_annot.csv (1

# Division by classes 

In [17]:
"""
Splitting spreadspeet to the classes by label 

Parameters: 
-----------
data_list: list
    List with pd.DataFrames

Returns: 
-------
drivers, nondrivers, noises: list, list, list
    Lists with pd.DataFrames, one for each of three classes respectively
"""

def class_splitting(data_list):
    
    drivers = []
    nondrivers = []
    noises = []

    for df in data_list:
        driver = pd.DataFrame()
        nondriver = pd.DataFrame()
        noise = pd.DataFrame()
        for col in df.columns[1::3]:
            curr_index = list(df.columns).index(col)
            prev_index = curr_index - 1
            next_index = curr_index + 1
            prev_col = df.iloc[:,prev_index]
            next_col = df.iloc[:,next_index]
            if df[col][0] == 1:
                driver[df.columns[prev_index]] = prev_col
                driver[df.columns[next_index]] = next_col
            elif df[col][0] == 0:
                nondriver[df.columns[prev_index]] = prev_col
                nondriver[df.columns[next_index]] = next_col
            else:
                noise[df.columns[prev_index]] = prev_col
                noise[df.columns[next_index]] = next_col
        drivers.append(driver)
        nondrivers.append(nondriver)
        noises.append(noise)
        
    return(drivers, nondrivers, noises)

drivers, nondrivers, noises = class_splitting(data_list)

In [4]:
# # make spreadsheet with all siganls 

# target_d = pd.DataFrame(np.ones((1, drivers.shape[1])), columns=drivers.columns, index=['target'])
# drivers = pd.concat([drivers, target_d], axis=0 )

# target_nd = pd.DataFrame(np.zeros((1, nondrivers.shape[1])), columns=nondrivers.columns, index=['target'])
# nondrivers = pd.concat([nondrivers, target_nd], axis=0)

# signals = pd.concat([drivers, nondrivers], axis=1)
# signals.fillna(0, inplace=True)
# signals.to_csv(r'C:\Users\ecath\Desktop\Research\Raw Data\Raw signals.csv', sep=',', index=True)

# Butterworth bandpass [1Hz, 20Hz]

In [19]:
"""
Function performs bandpass filter for the digital signal 

Parameters: 
-----------
data: array_like
    An N-dimensional input array
lowcut: float
    Low cutoff frequency
highcut: float
    High cutoff frequency
fs: float
    Sampling rate of the signal
order: int
    The order of the filter.

Returns: 
-------
filtered_signal: array
    The output of the digital filter
"""

def butter_bandpass(data, lowcut, highcut, fs, order=2):
    nyq = 0.5 * fs
    
    high = highcut / nyq
    low = lowcut / nyq
    
    b, a = butter(order, [low, high], btype='bandpass', analog=False)
    filtered_signal = lfilter(b, a, data)
    
    return filtered_signal


"""
Fill NaN with zeros

Parameters: 
-----------
df: DataFrame
    DataFrame with NaNs
    
Returns: 
-------
df: Dataframe
    All NaNs filled by zeros
"""

def del_nul_and_nan(df):
    
    df.fillna(value=0, axis=1, inplace=True)
    
################# add to drop off zero columns ######################    
#     for col in df.columns:
#         if df[col].sum() == 0:
#             df.drop([col], axis = 1, inplace = True)  
################# add to drop off zero columns ######################    
    
    return df

# Fourier transform

In [20]:
"""


Parameters: 
-----------
df: pd.DataFrame
    An N-dimensional input DataFrame
N: int
    Number of sample points
lowcut: float
    Low cutoff frequency
highcut: float
    High cutoff frequency
fs: float
    Sampling rate of the signal

Returns: 
-------
fft_out: pd.DataFrame
    An output Dataframe with spectrum and frequencies 
"""

def spec_and_freq_for_single_df_welch(df, N, fs, lowcut, highcut):
    
    welch_out = pd.DataFrame()
    
    for col in df.columns:
        filtered_yf = butter_bandpass(df[col], lowcut, highcut, fs, order=2)
        
        welch_xf, welch_yf = welch(filtered_yf, fs=fs, nperseg=len(filtered_yf) / 2, noverlap = len(filtered_yf) / 2.5) #spectrum
        
        welch_20_index = np.argwhere((welch_xf<25) & (welch_xf>0))        
        welch_yf_20 = welch_yf[welch_20_index] #cutting on 20Hz
        welch_xf_20 = welch_xf[welch_20_index] #cutting on 20Hz
        
        welch_yf_20 = pd.DataFrame(welch_yf_20, columns=[col + '_yf'])
        welch_xf_20 = pd.DataFrame(welch_xf_20, columns=[col + '_xf']) 

        welch_out = pd.concat([welch_out, welch_yf_20, welch_xf_20], axis=1)
        
    return(welch_out)

In [21]:
"""

Parameters: 
-----------
class_data_list: list
    List with DataFrames 
    
Returns: 
-------
all_fft_mem, all_fft_niom: list, list
    Dataframes with spectrum and frequencies DataFrames
"""

def full_spec_and_freq(class_data_list):
    
    all_fft_mem = pd.DataFrame()
    all_fft_niom = pd.DataFrame()

    for k, df in enumerate(class_data_list):

        electrode_signal = df[df.columns[::2]]
        optical_signal = df[df.columns[1::2]]

        electrode_signal = del_nul_and_nan(electrode_signal)
        optical_signal = del_nul_and_nan(optical_signal)        
        
        N = df.shape[0] # Number of sample points
        Fs_el = 1017.25 # sampling rate
        Fs_om = 1000.0
        lowcut = 4
        highcut = 20

        fft_mem = spec_and_freq_for_single_df_welch(electrode_signal, N, Fs_el, lowcut=lowcut, highcut=highcut)
        fft_niom = spec_and_freq_for_single_df_welch(optical_signal, N, Fs_om, lowcut=lowcut, highcut=highcut)

        all_fft_mem = pd.concat([all_fft_mem, fft_mem], axis=1)
        all_fft_niom = pd.concat([all_fft_niom, fft_niom], axis=1)
        
    return(all_fft_mem, all_fft_niom)

In [22]:
"""

Parameters: 
-----------
class_data_list: list
    List with DataFrames 
label: int, {0,1}
    Target label
    
Returns: 
-------
all_fft_mem, all_fft_niom: DataFrame
   Dataframes with added target value
"""

def add_target(class_data_list, label):
    
    all_fft_mem, all_fft_niom = full_spec_and_freq(class_data_list)

    all_fft_mem.fillna(value=0, axis=1, inplace=True)
    all_fft_niom.fillna(value=0, axis=1, inplace=True)


    target_mem = np.full(shape=(1, all_fft_mem.shape[1]), fill_value = label)
    target_mem = pd.DataFrame(target_mem, columns=all_fft_mem.columns, index=['target'])

    target_niom = np.full(shape=(1, all_fft_niom.shape[1]), fill_value = label)
    target_niom = pd.DataFrame(target_niom, columns=all_fft_niom.columns, index=['target'])

    all_fft_mem = pd.concat([all_fft_mem, target_mem], axis=0)
    all_fft_niom = pd.concat([all_fft_niom, target_niom], axis=0)
    return(all_fft_mem, all_fft_niom)

In [23]:
"""
Function that save the Welch spectrumn in .csv format 

Parameters: 
-----------
path: string
    Path where to save file in csv format
name_mem: string
    Name of the file of MEM spectrum 
name_niom: string
    Name of the file of NIOM spectrum 
    
"""

def spectrum(path, name_mem, name_niom):
    
    if not os.path.exists(path):
        os.mkdir(path)

    driver_spectrum_mem, driver_spectrum_niom  = add_target(drivers, 1)
    nondriver_spectrum_mem, nondriver_spectrum_niom = add_target(nondrivers, 0)
    noise_spectrum_mem, noise_spectrum_niom = add_target(noises, -1)


    full_spectrum_mem = pd.concat([driver_spectrum_mem, nondriver_spectrum_mem, noise_spectrum_mem], axis=1)
    full_spectrum_niom = pd.concat([driver_spectrum_niom, nondriver_spectrum_niom, noise_spectrum_niom], axis=1)
    
    full_spectrum_mem.to_csv(path + name_mem, sep=',', index=True)
    full_spectrum_niom.to_csv(path + name_niom, sep=',', index=True)
    
    return full_spectrum_mem, full_spectrum_niom

In [24]:
path = r'C:\Users\ecath\Desktop\Research\Raw Data\LD dataset spectrum'
mem, niom = spectrum(path, '\Spectrum of electrode LD Welch.csv', '\Spectrum of optical LD Welch.csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
