# Loading datasets

In [1]:
def load_all_datasets(path):
    import os
    import numpy as np
    import matplotlib.pyplot as plt
    import mne
    from mne.io import read_raw_edf
    list_files = os.listdir(path=path)
    
    extension = '.edf'
    index = 0
    list_dataset = []
    for file in list_files:
        if extension in list_files[index]:
            list_dataset.append(list_files[index])
        index += 1

    list_load_dataset = []
    for n_file in range(0, len(list_dataset)):
        dataset = read_raw_edf(list_dataset[n_file], preload=True)
        list_load_dataset.append(dataset)
        
    return list_load_dataset

# Referencing scheme, montage, filtering, annotating, segmenting

In [2]:

def preliminary_steps(raw_datasets):
    import mne
    pre_processed_datasets = []
    
    # re-referencing the data to 'CQ_CMS', 'CQ_DRL'
    
    for dataset in raw_datasets:
        dataset.set_eeg_reference(ref_channels=['CQ_CMS', 'CQ_DRL'])
    
    # selecting only the electrodes of interest
    
        include_channels = ['AF3','F7','F3','FC5','T7','P7','O1','O2','P8','T8','FC6','F4','F8','AF4']
        
        for channels in dataset.ch_names:
            if channels not in include_channels:
                dataset.drop_channels(channels)
            
    # setting montage 
    
        dataset.set_montage(mne.channels.make_standard_montage("standard_1020"))
        
    
    # high pass filter to remove slow drifts
    
        dataset = dataset.copy().filter(l_freq=0.16, h_freq=None)

        
    # notch filter to remove powerline noise
    
        freqs = (50, 100)
        dataset = dataset.copy().notch_filter(freqs=freqs)
        
    
    # annotating raw data
    
        # we can create multiple annotations at the same time
        # total time -> 60s + 5 x(20s) = 160 s

        start_time = 60
        delay = 1
        dur = 20
        len_event = 7 
        tot_time = start_time + delay 


        start_push = [tot_time + 0*dur, tot_time + 1*dur, tot_time+2*dur, tot_time+3*dur, tot_time+4*dur]
        start_relax = [tot_time + 0*dur+10, tot_time + 1*dur+10, tot_time+2*dur+10, tot_time+3*dur+10, tot_time+4*dur+10]

        
        push_annotations = mne.Annotations(onset=start_push, duration=[len_event]*5, description=["Push"]*5, orig_time=dataset.info['meas_date'])
        relax_annotations = mne.Annotations (onset=start_relax, duration=[len_event]*5, description=["Relax"]*5, orig_time=dataset.info['meas_date'])

        dataset.set_annotations(push_annotations+relax_annotations)
        
    
    # creating events from annotations 
    
    
    
    # now that we have annotations, we will tranfer them into events
    # this is needed to be able to then create epochs
        events_from_annot, event_dict = mne.events_from_annotations(dataset)
        pre_processed_datasets.append(dataset)
    return pre_processed_datasets
        

In [3]:
def create_epochs(pre_processed_datasets):
    import mne
    events_from_annot, event_dict = mne.events_from_annotations(pre_processed_datasets[0])
    delay = 0.5
    
    baseline = (0.5, 0.5)
    event_dict = {"Push" : 1, "Relax" : 2}
    # not sure what to set it to; resting state activity looks super noisy 
    epochs_all = mne.Epochs(pre_processed_datasets[0], events=events_from_annot, event_id = event_dict, baseline = baseline, tmin = 0.5, tmax = (10-delay), preload = True, reject_by_annotation=False)
    # reject by annotation argument passed
    
    for dataset in pre_processed_datasets:
        
        baseline = (0.5, 0.5)
        epochs = mne.Epochs(dataset, events=events_from_annot, event_id = event_dict, baseline = baseline, tmin = 0.5, tmax = (10-delay), preload = True, reject_by_annotation=False)
        epochs_all = mne.concatenate_epochs([epochs_all, epochs])
        
    epochs_all.drop([0,1,2,3,4,5,6,7,8,9])
    return epochs_all
    

# Standard data cleaning approach (ICA)

In [4]:
def apply_ica(cleaned_epochs):
    import numpy as np
    from mne.preprocessing import ICA
    ica = ICA(n_components=14, random_state=97, method="fastica")
    ica.fit(cleaned_epochs)

    ica.plot_sources(cleaned_epochs, show_scrollbars=False)
    ica.plot_components()
    
    components = np.arange(0,14)
    for component in components:
        ica.plot_properties(cleaned_epochs, picks=component)

# Automated data cleaning approach (Autoreject)

In [5]:
def clean_epochs(epochs_all):
        
    import autoreject
    import numpy as np 
    from autoreject import AutoReject
    from autoreject import get_rejection_threshold
    import mne
    from autoreject import compute_thresholds  
    import matplotlib.pyplot as plt 
    from autoreject import set_matplotlib_defaults 
    import matplotlib.pyplot as plt
    
    # creating a random list of parameters which will be modified by learning 
    # n_interpolates are the ρ values that we would like autoreject to try
    # consensus_percs are the κ values that autoreject will try 
    
    n_interpolates = np.array([1, 4, 32])
    consensus_percs = np.linspace(0, 1.0, 11)
    
    # specifying the channel type
    info = epochs_all.info
    picks = mne.pick_types(info, meg=False, eeg=True, stim=False, eog=False, ecg=False, emg=False, ref_meg='auto', misc=False, resp=False, chpi=False, exci=False, ias=False, syst=False, seeg=False, dipole=False, gof=False, bio=False, ecog=False, fnirs=False, include=(), exclude='bads', selection=None)
    
    # initiating the algorithm 
    
    ar = AutoReject(n_interpolates, consensus_percs, picks=picks,
                thresh_method='random_search', random_state=42)

    
    # need to fit the ar first in order to then transform the epochs which can be repaired 
    
    ar.fit(epochs_all)
    

    
    
    reject_log = ar.get_reject_log(epochs_all)
    reject_log.plot()
    reject_log.plot_epochs(epochs_all)
    
    # repairing epochs where possible 
    
    cleaned_epochs = ar.transform(epochs_all)


    # get a dictionary of rejection thresholds
    threshes = compute_thresholds(epochs_all, picks=picks, method='random_search',
                              random_state=42, augment=False,
                              verbose='progressbar')
  
    values = list(threshes.values())
    plt.bar(epochs_all.ch_names, values, color ='maroon')

    plt.xlabel("Channel name")
    plt.ylabel("Threshold value [V]")
    plt.title("Different threshold values across the sensors")
    plt.show()
    
    return cleaned_epochs, threshes
    
    

# Feature extraction

In [8]:
def create_time_frequency_matrices(low_band, high_band, num, n_epochs, channels, n_samples, epochs):
    import numpy as np
    import mne
    # frequencies of interest
    freqs = np.logspace(*np.log10([low_band, high_band]), num=num)

    # empty matrices 
    power_data = np.zeros(shape=(n_epochs,channels, num, n_samples))

    itc_data = np.zeros(shape=(n_epochs,channels, num, n_samples))

    # number of epochs per class
    instances = np.arange(0,n_epochs,1)
    
    for i in instances:
        n_cycles = freqs / 2.

        power_data_single, itc_data_single = mne.time_frequency.tfr_morlet(epochs[i], freqs=freqs, n_cycles=n_cycles, use_fft=True,
                                           return_itc=True, decim=3, n_jobs=1)
    
        power_data[i,:,:,:] = power_data_single.data
        itc_data[i,:,:,:] = itc_data_single.data
    
    
    return  freqs, power_data, itc_data, instances

# Feature selection

In [14]:
def engineer_channels(frequency_vectors_class1,frequency_vectors_class2,epochs_object,n_epochs):
    import numpy as np 
    import mne
    # calculating the mean over the frequencies between specified Hz band 
    
    frequency_vectors_class1_mean = frequency_vectors_class1.mean(axis=2)
    frequency_vectors_class2_mean = frequency_vectors_class2.mean(axis=2)
    
    # calculating mean over all instances
    
    frequency_vectors_class1_mean = frequency_vectors_class1_mean.mean(axis=0)
    frequency_vectors_class2_mean = frequency_vectors_class2_mean.mean(axis=0)
    
    # getting a channel list to be able to loop over it
    
    channel_list = epochs_object.ch_names
    
    frequency_difference_vectors = np.zeros((14,769))

    for item in range(0, len(channel_list)):
        frequency_difference_vectors[item,:] = frequency_vectors_class1_mean[item] - frequency_vectors_class2_mean[item]
        
    # calculate absolute values of differences

    frequency_difference_vectors = abs(frequency_difference_vectors)
    
    # creating an empty vector that will hold averaged difference values in 10 intervals 
    averaged_frequency_differences_per_channel_in_intervals = np.zeros((14,11))
    slices = np.arange(0,769-76, 76)
    slices = slices.tolist()
    
    # populating this empty vector with actual averaged differences per interval
    for item in range(0, len(channel_list)):
        for interval in slices:
            averaged_frequency_differences_per_channel_in_intervals[item, slices.index(interval)] = frequency_difference_vectors[item,interval:interval+76].mean()
            
    # calculating differences across all of the intervals per channel
    frequency_channel_differences = np.zeros(14)
    for channel in range(0, len(channel_list)):
        frequency_channel_differences[channel] = averaged_frequency_differences_per_channel_in_intervals[channel].mean()
        
    # creating a copy of these differences and converting it to a list
    # so that we will be able to do proper indexing 
    
    frequency_channel_differences_indexing = frequency_channel_differences.copy()
    frequency_channel_differences_indexing = frequency_channel_differences_indexing.tolist()
    
    # creating an empty matrix that will hold maximum values of channels with biggest differences
    max_channel_differences = []
    
    # populating the max values 
    
    max1 = frequency_channel_differences.max()
    max_channel_differences =  np.append(max_channel_differences, max1, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max1))
    max2 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max2, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max2))
    max3 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max3, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max3))
    max4 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max4, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max4))
    max5 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max5, axis=None)                                      
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max5))
    max6 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max6, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max6))
                                          
    # getting indices of those max values to be able to know which channels to include
                                          
    channel_indices = []
    for item in max_channel_differences:
        index = frequency_channel_differences_indexing.index(item)
        channel_indices = np.append(index, channel_indices, axis=None)
    
    # converting the channel indices into a list so it can be used as an index in input vector slicing 
    channel_indices = channel_indices.tolist()
    # converting floats to integers 
    channel_indices = [int(channel_indices) for channel_indices in channel_indices]
    
    # creating frequency input vectors with engineered channels
    import my_code 

    class1_channel_engineered = my_code.select_channels(channel_indices, frequency_vectors_class1, n_epochs, 769)
    class2_channel_engineered = my_code.select_channels(channel_indices, frequency_vectors_class2, n_epochs, 769)
    
    return class1_channel_engineered, class2_channel_engineered, channel_indices, frequency_channel_differences_indexing

In [15]:
def engineer_time(frequency_vectors_class1,frequency_vectors_class2,epochs_object):
        # calculating the mean over the frequencies between specified Hz band 
    import numpy as np 
    import mne
    frequency_vectors_class1_mean = frequency_vectors_class1.mean(axis=2)
    frequency_vectors_class2_mean = frequency_vectors_class2.mean(axis=2)
    
    # calculating mean over all instances
    
    frequency_vectors_class1_mean = frequency_vectors_class1_mean.mean(axis=0)
    frequency_vectors_class2_mean = frequency_vectors_class2_mean.mean(axis=0)
    
    # getting a channel list to be able to loop over it
    
    channel_list = epochs_object.ch_names
    
    frequency_difference_vectors = np.zeros((14,769))

    for item in range(0, len(channel_list)):
        frequency_difference_vectors[item,:] = frequency_vectors_class1_mean[item] - frequency_vectors_class2_mean[item]
        
    # calculate absolute values of differences

    frequency_difference_vectors = abs(frequency_difference_vectors)
    
    # creating an empty vector that will hold averaged difference values in 10 intervals 
    averaged_frequency_differences_per_channel_in_intervals = np.zeros((14,10))
    slices = np.arange(0,769-76, 76)
    slices = slices.tolist()
    
    # populating this empty vector with actual averaged differences per interval
    for item in range(0, len(channel_list)):
        for interval in slices:
            averaged_frequency_differences_per_channel_in_intervals[item, slices.index(interval)] = frequency_difference_vectors[item,interval:interval+76].mean()
    
    
    
    # collapsing channels so that we work with all the 10 intervals
    
    frequency_differences_collapsed_channels = averaged_frequency_differences_per_channel_in_intervals.mean(axis=0)
    
    # getting the interval with biggest differences 
    
    max_time_differences = frequency_differences_collapsed_channels.max()
    
    # copying and converting into a list
    frequency_differences_collapsed_channels_list = frequency_differences_collapsed_channels.copy()
    frequency_differences_collapsed_channels_list=  frequency_differences_collapsed_channels_list.tolist()
    max_time_difference_index = frequency_differences_collapsed_channels_list.index(max_time_differences)
    
    # now, based on the interval with biggest time differences
    # we will crop our input vectors such that
    # we will take the interval with biggest time differences
    # (which takes up 1/11 of data)
    # plus 230 backwards and 230 forward so that we are using 70% of timepoints in total
    if max_time_difference_index > 4 and max_time_difference_index < 6:
        
        class1_channel_engineered = frequency_vectors_class1[:,:, :, slices[max_time_difference_index]-230:slices[max_time_difference_index]+76+230]
        class2_channel_engineered = frequency_vectors_class2[:,:, :, slices[max_time_difference_index]-230:slices[max_time_difference_index]+76+230]
        
    elif max_time_difference_index < 4:
    
        class1_channel_engineered = frequency_vectors_class1[:,:, :, slices[max_time_difference_index]-slices[max_time_difference_index]:slices[max_time_difference_index]+76+230+(230-slices[max_time_difference_index])]
        class2_channel_engineered = frequency_vectors_class2[:,:, :, slices[max_time_difference_index]-slices[max_time_difference_index]:slices[max_time_difference_index]+76+230+(230-slices[max_time_difference_index])]
        
    else: 
        
        class1_channel_engineered = frequency_vectors_class1[:,:, :, slices[max_time_difference_index]-230-(230-(760-(slices[max_time_difference_index]+76))):760]
        class2_channel_engineered = frequency_vectors_class2[:,:, :, slices[max_time_difference_index]-230-(230-(760-(slices[max_time_difference_index]+76))):760]
    
   
    
    return class1_channel_engineered, class2_channel_engineered, max_time_difference_index, averaged_frequency_differences_per_channel_in_intervals

In [16]:
def engineer_channels_time(frequency_vectors_class1,frequency_vectors_class2,epochs_object,n_epochs):
    import numpy as np 
    import mne
    import my_code
    # calculating the mean over the frequencies between specified Hz band 
    
    frequency_vectors_class1_mean = frequency_vectors_class1.mean(axis=2)
    frequency_vectors_class2_mean = frequency_vectors_class2.mean(axis=2)
    
    # calculating mean over all instances
    
    frequency_vectors_class1_mean = frequency_vectors_class1_mean.mean(axis=0)
    frequency_vectors_class2_mean = frequency_vectors_class2_mean.mean(axis=0)
    
    # getting a channel list to be able to loop over it
    
    channel_list = epochs_object.ch_names
    
    frequency_difference_vectors = np.zeros((14,769))

    for item in range(0, len(channel_list)):
        frequency_difference_vectors[item,:] = frequency_vectors_class1_mean[item] - frequency_vectors_class2_mean[item]
        
    # calculate absolute values of differences

    frequency_difference_vectors = abs(frequency_difference_vectors)
    
    # creating an empty vector that will hold averaged difference values in 10 intervals 
    averaged_frequency_differences_per_channel_in_intervals = np.zeros((14,11))
    slices = np.arange(0,769-76, 76)
    slices = slices.tolist()
    
    # populating this empty vector with actual averaged differences per interval
    for item in range(0, len(channel_list)):
        for interval in slices:
            averaged_frequency_differences_per_channel_in_intervals[item, slices.index(interval)] = frequency_difference_vectors[item,interval:interval+76].mean()
            
    # calculating differences across all of the intervals per channel
    frequency_channel_differences = np.zeros(14)
    for channel in range(0, len(channel_list)):
        frequency_channel_differences[channel] = averaged_frequency_differences_per_channel_in_intervals[channel].mean()
        
    # creating a copy of these differences and converting it to a list
    # so that we will be able to do proper indexing 
    
    frequency_channel_differences_indexing = frequency_channel_differences.copy()
    frequency_channel_differences_indexing = frequency_channel_differences_indexing.tolist()
    
    # creating an empty matrix that will hold maximum values of channels with biggest differences
    max_channel_differences = []
    
    # populating the max values 
    
    max1 = frequency_channel_differences.max()
    max_channel_differences =  np.append(max_channel_differences, max1, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max1))
    max2 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max2, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max2))
    max3 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max3, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max3))
    max4 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max4, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max4))
    max5 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max5, axis=None)                                      
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max5))
    max6 = frequency_channel_differences.max()
    max_channel_differences = np.append(max_channel_differences, max6, axis=None)
    frequency_channel_differences = np.delete(frequency_channel_differences, np.where(frequency_channel_differences == max6))
                                          
    # getting indices of those max values to be able to know which channels to include
                                          
    channel_indices = []
    for item in max_channel_differences:
        index = frequency_channel_differences_indexing.index(item)
        channel_indices = np.append(index, channel_indices, axis=None)
    
    # converting the channel indices into a list so it can be used as an index in input vector slicing 
    channel_indices = channel_indices.tolist()
    # converting floats to integers 
    channel_indices = [int(channel_indices) for channel_indices in channel_indices]
    
    
    # now, we will be calculating biggest time differences out of the selected channels 
    # first, select the channels with biggest differences
    frequencies_cropped_channels_and_time = averaged_frequency_differences_per_channel_in_intervals[channel_indices,:]
    frequencies_cropped_channels_and_time_collapsed_channels = frequencies_cropped_channels_and_time.mean(axis=0)
    
    # next, calculate biggest time difference and its index
    max_time_differences = frequencies_cropped_channels_and_time_collapsed_channels.max()
    frequencies_cropped_channels_and_time_collapsed_channels_list = frequencies_cropped_channels_and_time_collapsed_channels.copy()
    frequencies_cropped_channels_and_time_collapsed_channels_list = frequencies_cropped_channels_and_time_collapsed_channels_list.tolist()
    max_time_difference_index = frequencies_cropped_channels_and_time_collapsed_channels_list.index(max_time_differences)
    
    # select channels from original frequency vectors

    frequencies_class1_channel_time_engineered = my_code.select_channels(channel_indices, frequency_vectors_class1, n_epochs, 769)
    frequencies_class2_channel_time_engineered = my_code.select_channels(channel_indices, frequency_vectors_class2, n_epochs, 769)
    
    # now, select the timepoints from those channel-engineered frequency vectors
    
    if max_time_difference_index < 4:
    
        class1_channel_time_engineered = frequencies_class1_channel_time_engineered[:,:, :, slices[max_time_difference_index]-slices[max_time_difference_index]:slices[max_time_difference_index]+76+230+(230-slices[max_time_difference_index])]
        class2_channel_time_engineered = frequencies_class2_channel_time_engineered[:,:, :, slices[max_time_difference_index]-slices[max_time_difference_index]:slices[max_time_difference_index]+76+230+(230-slices[max_time_difference_index])]
    
    
    else: 
        
        class1_channel_time_engineered = frequencies_class1_channel_time_engineered[:,:, :, slices[max_time_difference_index]-230:slices[max_time_difference_index]+76+230]
        class2_channel_time_engineered = frequencies_class2_channel_time_engineered[:,:, :, slices[max_time_difference_index]-230:slices[max_time_difference_index]+76+230]
    
  
    
    
    return class1_channel_time_engineered, class2_channel_time_engineered, max_time_difference_index, channel_indices

# Classification

In [17]:
def run_lda(class1, class2, n_epochs):
    
    
    import numpy as np
    # average over frequencies in the range 
    class1=class1.mean(axis=2)
    class2=class2.mean(axis=2)
    
    # flattening the input vectors
    
    class1 = class1.reshape(n_epochs, -1)
    class2 = class2.reshape(n_epochs, -1)
    
    # concatenating into one input vector
    
    input_data = np.concatenate((class1, class2), axis = 0)
    
    # creating labels
    
    push = np.ones(shape=(n_epochs))
    relax = np.zeros(shape=(n_epochs))
    labels = np.concatenate((push ,relax), axis = 0)
    
    
    # Now, we run the alpha data through LDA

    from sklearn.pipeline import Pipeline
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
    from sklearn.model_selection import ShuffleSplit, cross_val_score

    scores = []
    all_data = input_data
    all_data_train = input_data.copy()
    cv = ShuffleSplit(10, test_size=0.2, random_state=42)
    cv_split = cv.split(all_data_train)

# Assemble a classifier
    lda = LinearDiscriminantAnalysis()

# Use scikit-learn Pipeline with cross_val_score function
    from sklearn.preprocessing import StandardScaler
    clf = Pipeline([('standardscaler', StandardScaler()), ('LDA', lda)])
    scores = cross_val_score(clf, all_data_train, labels, cv=cv, n_jobs=1)

# Printing the results
    class_balance = np.mean(labels == labels[0])
    class_balance = max(class_balance, 1. - class_balance)
    print("LDA Classification accuracy:", np.mean(scores))
    
    
    return scores



In [18]:
def lda_for_ready_input(input_data, labels):
    
    # Now, we run the alpha data through LDA
    import numpy as np
    from sklearn.pipeline import Pipeline
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
    from sklearn.model_selection import ShuffleSplit, cross_val_score
    from sklearn.preprocessing import StandardScaler
    
    input_data = StandardScaler().fit_transform(input_data)
    
    scores = []
    all_data = input_data
    all_data_train = input_data.copy()
    cv = ShuffleSplit(10, test_size=0.2, random_state=42)
    cv_split = cv.split(all_data_train)

    # Assemble a classifier
    lda = LinearDiscriminantAnalysis()

    # Use scikit-learn Pipeline with cross_val_score function
    clf = Pipeline([('LDA', lda)])
    scores = cross_val_score(clf, all_data_train, labels, cv=cv, n_jobs=1)
    import numpy as np
    # Printing the results
    print("LDA Classification accuracy:", scores.mean())
    return scores

In [19]:
def run_svm(class1, class2, n_epochs):
    import numpy as np
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    from sklearn.svm import SVC
    from sklearn.model_selection import ShuffleSplit, cross_val_score

    # average over frequencies in the range 
    class1=class1.mean(axis=2)
    class2=class2.mean(axis=2)
    
    # concatenating into one input vector
    
    input_data = np.concatenate((class1, class2), axis = 0)
    
    # creating labels
    
    push = np.ones(shape=(n_epochs))
    relax = np.zeros(shape=(n_epochs))
    labels = np.concatenate((push ,relax), axis = 0)
    
    # flattening the input data
    
    input_data = input_data.reshape(len(input_data), -1)

    # normalizing the input data 
    input_data = StandardScaler().fit_transform(input_data)
    



    # Define an SVM classifier (SVC) with a linear kernel
    clf = SVC(C=1, kernel='poly', degree=1)

    
    # specifing cv 
    cv = ShuffleSplit(len(input_data), 10, test_size=0.2, random_state=42)

    # classifying the data
    scores_full = cross_val_score(clf, input_data, labels, cv=cv, n_jobs=1)
    
    # printing results
    print("SVM Classification score: %s (std. %s)" % (np.mean(scores_full), np.std(scores_full)))
    
    return scores_full



In [20]:
def svm_for_ready_input(input_data, n_epochs):
    import numpy as np
    from sklearn.pipeline import Pipeline
    from sklearn.svm import SVC
    from sklearn.model_selection import ShuffleSplit, cross_val_score
    from sklearn.preprocessing import StandardScaler

    
    # creating labels
    
    push = np.ones(shape=(n_epochs))
    relax = np.zeros(shape=(n_epochs))
    labels = np.concatenate((push ,relax), axis = 0)
    
    # normalizing the input data
    
    input_data = StandardScaler().fit_transform(input_data)


    # Define an SVM classifier (SVC) with a linear kernel
    clf = SVC(C=1, kernel='poly', degree=1)

    
    # specifing cv 
    cv = ShuffleSplit(len(input_data), 10, test_size=0.2, random_state=42)

    # classifying the data
    scores_full = cross_val_score(clf, input_data, labels, cv=cv, n_jobs=1)
    
    # printing results
    print("SVM Classification score: %s (std. %s)" % (np.mean(scores_full), np.std(scores_full)))
    
    return scores_full

In [23]:
def prepare_raw_for_DL_input(epoch_object, n_epochs, train_size, validation_size, test_size):
    
    import numpy as np
    import pandas as pd
    from sklearn.preprocessing import StandardScaler
    from sklearn.utils import shuffle
    
    # we will start off with extracting data from our epoch object 
    
    
    push_data = epoch_object["Push"].get_data()
    relax_data = epoch_object["Relax"].get_data()
    # we then concatenate data for the two classes
    features = np.concatenate((push_data, relax_data), axis=0)
    # and flatten them
    features = features.reshape(n_epochs*2,-1)
    
    # creating labels
    labels = np.concatenate((np.zeros(n_epochs), np.ones(n_epochs)))
    
    # normalizing features
    scaler = StandardScaler()
    scaler.fit(features)
    features = scaler.transform(features)
    
    #shuffling the data
    labels = labels.reshape(n_epochs*2,1)
    features_and_labels = np.concatenate((features, labels), axis= 1)
    
    df = pd.DataFrame(data=features_and_labels)
    df = shuffle(df)
    # isolating labels
    labels = df[32270]
    labels = labels.tolist()
    labels = np.array(labels)
    features = df.iloc[:, 0:32270]
    features = features.to_numpy()
    
    
    # train-test partition
    
    train_samples = features[:train_size]
    validation_samples = train_samples[:validation_size]
    train_samples = train_samples[-validation_size:]

    train_labels = labels[:train_size]
    validation_labels = train_labels[:validation_size]
    train_labels = train_labels[-validation_size:]

    test_samples = features[test_size:]
    test_labels = labels[test_size:]

    # reshaping the data to be suitable for LSTM input
    
    train_samples = train_samples.reshape(validation_size, 1,  32270)
    validation_samples = validation_samples.reshape(validation_size, 1,  32270)
    
    return  train_samples, validation_samples, train_labels, validation_labels, test_samples, test_labels

In [24]:
def prepare_frequencies_for_DL_input(class1data, class2data, n_epochs, train_size, validation_size, test_size):
 

    import numpy as np
    import pandas as pd
    from sklearn.preprocessing import StandardScaler
    from sklearn.utils import shuffle
    
    # preparing frequency vectors for input to LSTM 
    # first off, we have frequency time vectors for each class separately
    # each of shape (115, 14, 20, 769)
    
    # we start by flattening the vectors
    
    class1_power_DL_input = class1data.mean(axis=2)
    class1_power_DL_input = class1_power_DL_input.reshape(n_epochs,-1)
    class2_power_DL_input = class2data.mean(axis=2)
    class2_power_DL_input =  class2_power_DL_input.reshape(n_epochs,-1)
    
    # we then concatenate data for the two classes
    
    features = np.concatenate((class1_power_DL_input, class2_power_DL_input), axis=0)
    
    # creating a label vector
    
    labels = np.concatenate((np.zeros(n_epochs), np.ones(n_epochs)))
    
    # normalizing the feature values
    
    scaler = StandardScaler()
    scaler.fit(features)
    features = scaler.transform(features)
    
    # shuffling of the data
    # first, concatenate features with labels
    
    labels = labels.reshape(n_epochs*2,1)
    features_and_labels = np.concatenate((features, labels), axis= 1)
    
    # now, shuffle
    df = pd.DataFrame(data=features_and_labels)
    df = shuffle(df)
    
    # isolate labels from features
    indexing = class2_power_DL_input.shape[1]
    labels = df[indexing]
    labels = labels.tolist()
    labels = np.array(labels)
    
    features = df.iloc[:, 0:indexing]
    features = features.to_numpy()
    
    # train-test partition
    
    train_samples = features[:train_size]
    validation_samples = train_samples[:validation_size]


    train_labels = labels[:train_size]
    validation_labels = train_labels[:validation_size]

    test_samples = features[-test_size:]
    test_labels = labels[-test_size:]

    # reshaping the data to be suitable for LSTM input
    
    train_samples = train_samples.reshape(train_size, 1,  class2_power_DL_input.shape[1])
    test_samples = test_samples.reshape(test_size, 1,  class2_power_DL_input.shape[1])
    
    return  train_samples, validation_samples, train_labels, validation_labels, test_samples, test_labels
    
    
    

In [25]:
def run_LSTM(train_samples, validation_samples, train_labels, validation_labels, test_samples, test_labels):
    
    def build_model():
        import tensorflow as tf
        from tensorflow.keras import Sequential

        from tensorflow.keras.layers import Dense
        from tensorflow.keras.layers import LSTM
        tf.keras.backend.clear_session()
        import tensorflow as tf
        from tensorflow.keras import Sequential
        from tensorflow.keras import models, layers, optimizers

        model = Sequential()
        model.add(LSTM(128, input_shape=(1,train_samples.shape[2]), return_sequences=True))
        model.add(layers.BatchNormalization())
        model.add(LSTM(64, return_sequences=True))
        model.add(layers.BatchNormalization())
        model.add(LSTM(32, return_sequences=True))
        model.add(layers.BatchNormalization())
        model.add(Dense(1, activation='sigmoid'))
        from tensorflow.keras.optimizers  import SGD
        opt = tf.keras.optimizers.RMSprop(learning_rate=0.01)
        model.compile(loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'])
        model.summary()


        return model

    import tensorflow as tf

    class CustomCallback(tf.keras.callbacks.Callback):
        def on_epoch_begin(self, epoch, logs=None):
            c = ['\b|', '\b/', '\b-', '\b\\'] 
            print(c[epoch % 4], end='')
        def on_epoch_end(self, epoch, logs=None):
            print('\b', end='')

    import numpy as np, tensorflow as tf
    K = 3
    num_val_samples = len(train_samples) // K
    num_epochs = 20
    all_loss_histories = []
    all_acc_histories = []
    all_training_accuracies_histories = []
    all_training_losses_histories = []
    for i in range(K):
        print('processing fold', i)
    
        # Prepare the validation data: data from partition i
        a, b = i * num_val_samples, (i + 1) * num_val_samples
        val_data = train_samples[a : b]
        val_targets = train_labels[a : b]
    
        # Prepare the training data: data from all other partitions
        partial_train_data = np.concatenate([train_samples[:a], train_samples[b:]], axis=0)
        partial_train_targets = np.concatenate([train_labels[:a], train_labels[b:]], axis=0)

        # Build the Keras model (already compiled)
        modelsmall = build_model()
    
        # Train the model (in silent mode, verbose=0)
        history = modelsmall.fit(partial_train_data, partial_train_targets,
                        validation_data=(val_data, val_targets),
                        epochs=num_epochs, batch_size=16, verbose=0, callbacks=[CustomCallback()])

        loss_history = history.history['val_loss']
        all_loss_histories.append(loss_history)
    
        acc_history = history.history["val_accuracy"]
        all_acc_histories.append(acc_history)
    
        training_loss_history = history.history['loss']
        all_training_losses_histories.append(training_loss_history)
    
        training_accuracy_history = history.history["accuracy"]
        all_training_accuracies_histories.append(training_accuracy_history)

    # this object holds all losses per epoch, per validation fold

    average_loss_history_small = [np.mean([x[i] for x in all_loss_histories])for i in range(20)]
    average_acc_history_small = [np.mean([x[i] for x in all_acc_histories])for i in range(20)]
    average_training_loss_history_small = [np.mean([x[i] for x in all_training_losses_histories])for i in range(20)]
    average_training_accuracy_history_small = [np.mean([x[i] for x in all_training_accuracies_histories])for i in range(20)]


    def plot_loss(start, data):
        import matplotlib.pyplot as plt

        plt.plot(range(start + 1, len(data) + 1), data[start:])
        plt.xlabel('Epochs')
        plt.ylabel('Validation Loss')
        plt.show()

    plot_loss(0, average_loss_history_small)

    def plot_losses(start, data1, data2):
        import matplotlib.pyplot as plt

        plt.plot(range(start + 1, len(data1) + 1), data1[start:], color="b", label="Averaged Validation loss")
        plt.plot(range(start+1, len(data2)+1), data2[start:], color="r", label="Averaged Training loss")
        plt.xlabel('Epochs')
        plt.ylabel('Validation & Training Loss')
        plt.legend()
        plt.show()

    plot_losses(0, average_loss_history_small, average_training_loss_history_small)

    average_acc_history_small = np.array(average_acc_history_small, np.float)
    average_training_accuracy_history_small = np.array(average_training_accuracy_history_small, np.float)

    print(average_acc_history_small.mean())
    print(average_training_accuracy_history_small.mean())
    return all_loss_histories, all_acc_histories, all_training_accuracies_histories, all_training_losses_histories

In [27]:
def run_CONV1D(train_samples, validation_samples, train_labels, validation_labels, test_samples, test_labels):
    
    
    
    def build_model():

        import tensorflow as tf
        from tensorflow.keras import Sequential

        from tensorflow.keras.layers import Dense
        from tensorflow.keras.layers import LSTM
        tf.keras.backend.clear_session()
        import tensorflow as tf
        from tensorflow.keras import Sequential
        from tensorflow.keras import models, layers, optimizers
        
        model = Sequential()
        model.add(layers.Conv1D(512, 1, activation='relu', input_shape=(1,train_samples.shape[2])))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling1D(1))
        model.add(layers.BatchNormalization())
        model.add(layers.Conv1D(512, 1,  activation='relu'))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling1D(1))
        model.add(layers.BatchNormalization())
        model.add(layers.Conv1D(128, 1,activation='relu'))
        model.add(layers.BatchNormalization())
        model.add(layers.GlobalMaxPooling1D())
        model.add(layers.BatchNormalization())
        model.add(layers.Dense(1, activation="sigmoid"))
        from tensorflow.keras.optimizers  import SGD
        opt = tf.keras.optimizers.RMSprop(learning_rate=0.01)
        model.compile(loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'])
        
        model.summary()

        return model

    import tensorflow as tf

    class CustomCallback(tf.keras.callbacks.Callback):
        def on_epoch_begin(self, epoch, logs=None):
            c = ['\b|', '\b/', '\b-', '\b\\'] 
            print(c[epoch % 4], end='')
        def on_epoch_end(self, epoch, logs=None):
            print('\b', end='')

    import numpy as np, tensorflow as tf
    K = 3
    num_val_samples = len(train_samples) // K
    num_epochs = 20
    all_loss_histories = []
    all_acc_histories = []
    all_training_accuracies_histories = []
    all_training_losses_histories = []
    for i in range(K):
        print('processing fold', i)
    
        # Prepare the validation data: data from partition i
        a, b = i * num_val_samples, (i + 1) * num_val_samples
        val_data = train_samples[a : b]
        val_targets = train_labels[a : b]
    
        # Prepare the training data: data from all other partitions
        partial_train_data = np.concatenate([train_samples[:a], train_samples[b:]], axis=0)
        partial_train_targets = np.concatenate([train_labels[:a], train_labels[b:]], axis=0)

        # Build the Keras model (already compiled)
        modelsmall = build_model()

        # Train the model (in silent mode, verbose=0)
        history = modelsmall.fit(partial_train_data, partial_train_targets,
                            validation_data=(val_data, val_targets),
                            epochs=num_epochs, batch_size=16, verbose=0, callbacks=[CustomCallback()])

        loss_history = history.history['val_loss']
        all_loss_histories.append(loss_history)

        acc_history = history.history["val_accuracy"]
        all_acc_histories.append(acc_history)

        training_loss_history = history.history['loss']
        all_training_losses_histories.append(training_loss_history)

        training_accuracy_history = history.history["accuracy"]
        all_training_accuracies_histories.append(training_accuracy_history)

    # this object holds all losses per epoch, per validation fold

    average_loss_history_small = [np.mean([x[i] for x in all_loss_histories])for i in range(20)]
    average_acc_history_small = [np.mean([x[i] for x in all_acc_histories])for i in range(20)]
    average_training_loss_history_small = [np.mean([x[i] for x in all_training_losses_histories])for i in range(20)]
    average_training_accuracy_history_small = [np.mean([x[i] for x in all_training_accuracies_histories])for i in range(20)]


    def plot_loss(start, data):
        import matplotlib.pyplot as plt

        plt.plot(range(start + 1, len(data) + 1), data[start:])
        plt.xlabel('Epochs')
        plt.ylabel('Validation Loss')
        plt.show()

    plot_loss(0, average_loss_history_small)

    def plot_losses(start, data1, data2):
        import matplotlib.pyplot as plt

        plt.plot(range(start + 1, len(data1) + 1), data1[start:], color="b", label="Averaged Validation loss")
        plt.plot(range(start+1, len(data2)+1), data2[start:], color="r", label="Averaged Training loss")
        plt.xlabel('Epochs')
        plt.ylabel('Validation & Training Loss')
        plt.legend()
        plt.show()

    plot_losses(0, average_loss_history_small, average_training_loss_history_small)

    average_acc_history_small = np.array(average_acc_history_small, np.float)
    average_training_accuracy_history_small = np.array(average_training_accuracy_history_small, np.float)

    print(average_acc_history_small.mean())
    print(average_training_accuracy_history_small.mean())

    return all_loss_histories, all_acc_histories, all_training_accuracies_histories, all_training_losses_histories

In [29]:
def run_CONV2D(n_epochs, train_size, validation_size, test_size):
    import numpy as np
    from PIL import Image
    import cv2
    import seaborn as sns
    import matplotlib.pyplot as plt

    sns.set(color_codes=True)
    # selecting only push spectrograms (ending with .png)
    import os
    path = os.getcwd()
    path

    path = "C:\\Users\\domin\\Desktop\\Thesis Coding\\Datafiles from Giuseppe"
    list_files = os.listdir(path=path)

    extension = 'p.png'
    index = 0
    list_dataset = []
    for file in list_files:
        if extension in list_files[index]:
            list_dataset.append(list_files[index])
        index += 1

    list_dataset

    list_of_images_push = np.zeros((n_epochs, 150, 150, 3))
    for item in list_dataset:
        image = cv2.imread(item)
        image.resize((150, 150, 3))
        list_of_images_push[list_dataset.index(item)] = image


    list_of_images_push.shape

    path = "C:\\Users\\domin\\Desktop\\Thesis Coding\\Datafiles from Giuseppe"
    list_files = os.listdir(path=path)

    extension = 'r.png'
    index = 0
    list_dataset2 = []
    for file in list_files:
        if extension in list_files[index]:
            list_dataset2.append(list_files[index])
        index += 1

    list_dataset2

    list_of_images_relax = np.zeros((n_epochs, 150, 150, 3))
    for item in list_dataset2:
        image = cv2.imread(item)
        image.resize((150, 150, 3))
        list_of_images_relax[list_dataset2.index(item)] = image


    # flattening the width and height 

    list_of_images_relax_resized = list_of_images_relax.reshape(n_epochs,150*150,3)

    list_of_images_relax_resized.shape

    list_of_images_push_resized = list_of_images_push.reshape(n_epochs,150*150,3)

    # converting to floats

    list_of_images_push_resized = list_of_images_push_resized.astype("float32")
    list_of_images_relax_resized = list_of_images_relax_resized.astype("float32")

    # dividing by 255 (max value of channel depth dimension)
    # to make sure classifier does not receive large values

    list_of_images_push_resized /= 255
    list_of_images_relax_resized /= 255

    import numpy as np
    import pandas as pd
    from sklearn.preprocessing import StandardScaler
    from sklearn.utils import shuffle

    # we then concatenate data for the two classes

    features = np.concatenate((list_of_images_push_resized, list_of_images_relax_resized ), axis=0)
    features.shape

    labels = np.concatenate((np.zeros(n_epochs), np.ones(n_epochs)))

    import random

    indices = np.arange(features.shape[0])
    np.random.shuffle(indices)

    features = features[indices]


    labels = labels[indices]
    instances = np.arange(0,230,1)
    instances

    for i in instances:
        scaler = StandardScaler()
        scaler.fit(features[i])
        features[i] = scaler.transform(features[i])

    features.shape

    features_new = features.reshape((n_epochs*2, 150, 150, 3))


    features = features_new



    # train-test partition

    train_samples = features[:train_size]
    validation_samples = train_samples[:validation_size]
    train_samples = train_samples[-validation_size:]

    train_labels = labels[:train_size]
    validation_labels = train_labels[:validation_size]
    train_labels = train_labels[-validation_size:]

    test_samples = features[test_size:]
    test_labels = labels[test_size:]



    def build_model():
    # similarly to LSTM, input is 3D
        from keras.models import Sequential
        from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
        from keras.utils import np_utils

        # to calculate accuracy
        from sklearn.metrics import accuracy_score
        from tensorflow.keras import layers
        from tensorflow.keras import models

        model = models.Sequential()
        model.add(layers.Conv2D(32, (3, 3), activation='relu',
                            input_shape=(150, 150, 3)))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.BatchNormalization())
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.BatchNormalization())
        model.add(layers.Flatten())
        model.add(layers.Dense(512, activation='relu'))
        model.add(layers.Dense(1, activation='sigmoid'))
        model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
        model.summary()

        return model

    import tensorflow as tf

    class CustomCallback(tf.keras.callbacks.Callback):
        def on_epoch_begin(self, epoch, logs=None):
            c = ['\b|', '\b/', '\b-', '\b\\'] 
            print(c[epoch % 4], end='')
        def on_epoch_end(self, epoch, logs=None):
            print('\b', end='')

    import numpy as np, tensorflow as tf
    K = 3
    num_val_samples = len(train_samples) // K
    num_epochs = 20
    all_loss_histories = []
    all_acc_histories = []
    all_training_accuracies_histories = []
    all_training_losses_histories = []
    for i in range(K):
        print('processing fold', i)

        # Prepare the validation data: data from partition i
        a, b = i * num_val_samples, (i + 1) * num_val_samples
        val_data = train_samples[a : b]
        val_targets = train_labels[a : b]

        # Prepare the training data: data from all other partitions
        partial_train_data = np.concatenate([train_samples[:a], train_samples[b:]], axis=0)
        partial_train_targets = np.concatenate([train_labels[:a], train_labels[b:]], axis=0)

        # Build the Keras model (already compiled)
        modelsmall = build_model()

        # Train the model (in silent mode, verbose=0)
        history = modelsmall.fit(partial_train_data, partial_train_targets,
                            validation_data=(val_data, val_targets),
                            epochs=num_epochs, batch_size=16, verbose=0, callbacks=[CustomCallback()])

        loss_history = history.history['val_loss']
        all_loss_histories.append(loss_history)

        acc_history = history.history["val_accuracy"]
        all_acc_histories.append(acc_history)

        training_loss_history = history.history['loss']
        all_training_losses_histories.append(training_loss_history)

        training_accuracy_history = history.history["accuracy"]
        all_training_accuracies_histories.append(training_accuracy_history)
    # this object holds all losses per epoch, per validation fold


    average_loss_history_small = [np.mean([x[i] for x in all_loss_histories])for i in range(20)]
    average_acc_history_small = [np.mean([x[i] for x in all_acc_histories])for i in range(20)]
    average_training_loss_history_small = [np.mean([x[i] for x in all_training_losses_histories])for i in range(20)]
    average_training_accuracy_history_small = [np.mean([x[i] for x in all_training_accuracies_histories])for i in range(20)]


    def plot_loss(start, data):
        import matplotlib.pyplot as plt

        plt.plot(range(start + 1, len(data) + 1), data[start:])
        plt.xlabel('Epochs')
        plt.ylabel('Validation Loss')
        plt.show()

    plot_loss(0, average_loss_history_small)

    def plot_losses(start, data1, data2):
        import matplotlib.pyplot as plt

        plt.plot(range(start + 1, len(data1) + 1), data1[start:], color="b", label="Averaged Validation loss")
        plt.plot(range(start+1, len(data2)+1), data2[start:], color="r", label="Averaged Training loss")
        plt.xlabel('Epochs')
        plt.ylabel('Validation & Training Loss')
        plt.legend()
        plt.show()

    plot_losses(0, average_loss_history_small, average_training_loss_history_small)

    average_acc_history_small = np.array(average_acc_history_small, np.float)
    average_training_accuracy_history_small = np.array(average_training_accuracy_history_small, np.float)

    print(average_acc_history_small.mean())
    print(average_training_accuracy_history_small.mean())

    return all_loss_histories, all_acc_histories, all_training_accuracies_histories, all_training_losses_histories

# Testing

In [1]:
def test_CONV1D(train_samples, validation_samples, train_labels, validation_labels, test_samples, test_labels):
    
    
    
    def build_model():

        import tensorflow as tf
        from tensorflow.keras import Sequential

        from tensorflow.keras.layers import Dense
        from tensorflow.keras.layers import LSTM
        tf.keras.backend.clear_session()
        import tensorflow as tf
        from tensorflow.keras import Sequential
        from tensorflow.keras import models, layers, optimizers
        
        model = Sequential()
        model.add(layers.Conv1D(512, 1, activation='relu', input_shape=(1,train_samples.shape[2])))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling1D(1))
        model.add(layers.BatchNormalization())
        model.add(layers.Conv1D(512, 1,  activation='relu'))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling1D(1))
        model.add(layers.BatchNormalization())
        model.add(layers.Conv1D(128, 1,activation='relu'))
        model.add(layers.BatchNormalization())
        model.add(layers.GlobalMaxPooling1D())
        model.add(layers.BatchNormalization())
        model.add(layers.Dense(1, activation="sigmoid"))
        from tensorflow.keras.optimizers  import SGD
        opt = tf.keras.optimizers.RMSprop(learning_rate=0.01)
        model.compile(loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'])
        
        model.summary()

        return model

    import tensorflow as tf

    class CustomCallback(tf.keras.callbacks.Callback):
        def on_epoch_begin(self, epoch, logs=None):
            c = ['\b|', '\b/', '\b-', '\b\\'] 
            print(c[epoch % 4], end='')
        def on_epoch_end(self, epoch, logs=None):
            print('\b', end='')

    import numpy as np, tensorflow as tf
    K = 3
    num_val_samples = len(train_samples) // K
    num_epochs = 2
    all_loss_histories = []
    all_acc_histories = []
    all_training_accuracies_histories = []
    all_training_losses_histories = []
    for i in range(K):
        print('processing fold', i)
    
        # Prepare the validation data: data from partition i
        a, b = i * num_val_samples, (i + 1) * num_val_samples
        val_data = train_samples[a : b]
        val_targets = train_labels[a : b]
    
        # Prepare the training data: data from all other partitions
        partial_train_data = np.concatenate([train_samples[:a], train_samples[b:]], axis=0)
        partial_train_targets = np.concatenate([train_labels[:a], train_labels[b:]], axis=0)

        # Build the Keras model (already compiled)
        modelsmall = build_model()

        # Train the model (in silent mode, verbose=0)
        history = modelsmall.fit(partial_train_data, partial_train_targets,
                            validation_data=(val_data, val_targets),
                            epochs=num_epochs, batch_size=16, verbose=0, callbacks=[CustomCallback()])

        loss_history = history.history['val_loss']
        all_loss_histories.append(loss_history)

        acc_history = history.history["val_accuracy"]
        all_acc_histories.append(acc_history)

        training_loss_history = history.history['loss']
        all_training_losses_histories.append(training_loss_history)

        training_accuracy_history = history.history["accuracy"]
        all_training_accuracies_histories.append(training_accuracy_history)

    # this object holds all losses per epoch, per validation fold

    average_loss_history_small = [np.mean([x[i] for x in all_loss_histories])for i in range(num_epochs)]
    average_acc_history_small = [np.mean([x[i] for x in all_acc_histories])for i in range(num_epochs)]
    average_training_loss_history_small = [np.mean([x[i] for x in all_training_losses_histories])for i in range(num_epochs)]
    average_training_accuracy_history_small = [np.mean([x[i] for x in all_training_accuracies_histories])for i in range(num_epochs)]


    average_acc_history_small = np.array(average_acc_history_small, np.float)
    average_training_accuracy_history_small = np.array(average_training_accuracy_history_small, np.float)

    print(average_acc_history_small.mean())
    print(average_training_accuracy_history_small.mean())
    
    
    results = modelsmall.evaluate(test_samples, test_labels)
    print("Test accuracy of this model was", results[1])
    return all_loss_histories, all_acc_histories, all_training_accuracies_histories, all_training_losses_histories, results

In [None]:
def test_CONV2D(n_epochs, train_size, validation_size, test_size):
    import numpy as np
    from PIL import Image
    import cv2
    import seaborn as sns
    import matplotlib.pyplot as plt

    sns.set(color_codes=True)
    # selecting only push spectrograms (ending with .png)
    import os
    path = os.getcwd()
    path

    path = "C:\\Users\\domin\\Desktop\\Thesis Coding\\Datafiles from Giuseppe"
    list_files = os.listdir(path=path)

    extension = 'p.png'
    index = 0
    list_dataset = []
    for file in list_files:
        if extension in list_files[index]:
            list_dataset.append(list_files[index])
        index += 1

    list_dataset

    list_of_images_push = np.zeros((n_epochs, 150, 150, 3))
    for item in list_dataset:
        image = cv2.imread(item)
        image.resize((150, 150, 3))
        list_of_images_push[list_dataset.index(item)] = image


    list_of_images_push.shape

    path = "C:\\Users\\domin\\Desktop\\Thesis Coding\\Datafiles from Giuseppe"
    list_files = os.listdir(path=path)

    extension = 'r.png'
    index = 0
    list_dataset2 = []
    for file in list_files:
        if extension in list_files[index]:
            list_dataset2.append(list_files[index])
        index += 1

    list_dataset2

    list_of_images_relax = np.zeros((n_epochs, 150, 150, 3))
    for item in list_dataset2:
        image = cv2.imread(item)
        image.resize((150, 150, 3))
        list_of_images_relax[list_dataset2.index(item)] = image


    # flattening the width and height 

    list_of_images_relax_resized = list_of_images_relax.reshape(n_epochs,150*150,3)

    list_of_images_relax_resized.shape

    list_of_images_push_resized = list_of_images_push.reshape(n_epochs,150*150,3)

    # converting to floats

    list_of_images_push_resized = list_of_images_push_resized.astype("float32")
    list_of_images_relax_resized = list_of_images_relax_resized.astype("float32")

    # dividing by 255 (max value of channel depth dimension)
        # to make sure classifier does not receive large values

    list_of_images_push_resized /= 255
    list_of_images_relax_resized /= 255

    import numpy as np
    import pandas as pd
    from sklearn.preprocessing import StandardScaler
    from sklearn.utils import shuffle

    # we then concatenate data for the two classes

    features = np.concatenate((list_of_images_push_resized, list_of_images_relax_resized ), axis=0)
    features.shape

    labels = np.concatenate((np.zeros(n_epochs), np.ones(n_epochs)))

    import random

    indices = np.arange(features.shape[0])
    np.random.shuffle(indices)

    features = features[indices]


    labels = labels[indices]
    instances = np.arange(0,230,1)
    instances

    for i in instances:
        scaler = StandardScaler()
        scaler.fit(features[i])
        features[i] = scaler.transform(features[i])

    features.shape

    features_new = features.reshape((n_epochs*2, 150, 150, 3))


    features = features_new



    # train-test partition

    train_samples = features[:train_size]
    validation_samples = train_samples[:validation_size]


    train_labels = labels[:train_size]
    validation_labels = train_labels[:validation_size]


    test_samples = features[test_size:]
    test_labels = labels[test_size:]



    def build_model():
    # similarly to LSTM, input is 3D
        from keras.models import Sequential
        from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
        from keras.utils import np_utils

        # to calculate accuracy
        from sklearn.metrics import accuracy_score
        from tensorflow.keras import layers
        from tensorflow.keras import models

        model = models.Sequential()
        model.add(layers.Conv2D(32, (3, 3), activation='relu',
                                input_shape=(150, 150, 3)))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.BatchNormalization())
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.BatchNormalization())
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.BatchNormalization())
        model.add(layers.Flatten())
        model.add(layers.Dense(512, activation='relu'))
        model.add(layers.Dense(1, activation='sigmoid'))
        model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
        model.summary()

        return model

    import tensorflow as tf

    class CustomCallback(tf.keras.callbacks.Callback):
        def on_epoch_begin(self, epoch, logs=None):
            c = ['\b|', '\b/', '\b-', '\b\\'] 
            print(c[epoch % 4], end='')
        def on_epoch_end(self, epoch, logs=None):
            print('\b', end='')

    import numpy as np, tensorflow as tf
    K = 3
    num_val_samples = len(train_samples) // K
    num_epochs = 2
    all_loss_histories = []
    all_acc_histories = []
    all_training_accuracies_histories = []
    all_training_losses_histories = []
    for i in range(K):
        print('processing fold', i)

        # Prepare the validation data: data from partition i
        a, b = i * num_val_samples, (i + 1) * num_val_samples
        val_data = train_samples[a : b]
        val_targets = train_labels[a : b]

        # Prepare the training data: data from all other partitions
        partial_train_data = np.concatenate([train_samples[:a], train_samples[b:]], axis=0)
        partial_train_targets = np.concatenate([train_labels[:a], train_labels[b:]], axis=0)

        # Build the Keras model (already compiled)
        modelsmall = build_model()

        # Train the model (in silent mode, verbose=0)
        history = modelsmall.fit(partial_train_data, partial_train_targets,
                                validation_data=(val_data, val_targets),
                                epochs=num_epochs, batch_size=16, verbose=0, callbacks=[CustomCallback()])

        loss_history = history.history['val_loss']
        all_loss_histories.append(loss_history)

        acc_history = history.history["val_accuracy"]
        all_acc_histories.append(acc_history)

        training_loss_history = history.history['loss']
        all_training_losses_histories.append(training_loss_history)

        training_accuracy_history = history.history["accuracy"]
        all_training_accuracies_histories.append(training_accuracy_history)
        # this object holds all losses per epoch, per validation fold


    average_loss_history_small = [np.mean([x[i] for x in all_loss_histories])for i in range(num_epochs)]
    average_acc_history_small = [np.mean([x[i] for x in all_acc_histories])for i in range(num_epochs)]
    average_training_loss_history_small = [np.mean([x[i] for x in all_training_losses_histories])for i in range(num_epochs)]
    average_training_accuracy_history_small = [np.mean([x[i] for x in all_training_accuracies_histories])for i in range(num_epochs)]



    average_acc_history_small = np.array(average_acc_history_small, np.float)
    average_training_accuracy_history_small = np.array(average_training_accuracy_history_small, np.float)

    print(average_acc_history_small.mean())
    print(average_training_accuracy_history_small.mean())

    results = modelsmall.evaluate(test_samples, test_labels)
    print("Test accuracy of this model was", results[1])
    return all_loss_histories, all_acc_histories, all_training_accuracies_histories, all_training_losses_histories, results

    

    
