# Import library

In [1]:
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

# Feature extraction

In [2]:
def Mean_pressure_over_channel_and_time(data):
    """
    This function is used for compute
    the mean value of all channels over time
    Parameters
    -----------
    data : tensor-like of shape (number_of_samples, time_length, 8, 8)
    
    Returns
    -------
    mean value of all channels over time 
    """
    
    sum_value = []
    mean_value = []
    for i in range(len(data)):
        frame_sum_value = []
        for j in range(len(data[i])):
            frame_sum_value.append(np.sum(data[i][j], axis = None))
        sum_value.append(frame_sum_value)
        
    for k in range(len(sum_value)):
        mean_value.append(np.sum(sum_value[k], axis = None)/(64 * len(sum_value[k])))
        
    return mean_value

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
def Max_pressure_over_channel_and_time(data):
    """
    This function is used for compute
    the max value of all channels in a period
    Parameters
    -----------
    data : tensor-like of shape (number_of_samples, time_length, 8, 8)
    
    Returns
    -------
    max value of all channels in a period 
    """
    sample_value = []
    max_value = []
    
    for i in range(len(data)):
        frame_value = []
        for j in range(len(data[i])):
            frame_value.append(np.max(data[i][j], axis = None))
        sample_value.append(frame_value)
        
    for k in range(len(sample_value)):
        max_value.append(np.max(sample_value[k], axis = None))
    return max_value

In [5]:
def Pressure_variability(data, number_of_sensors):
    """
    This function is used for compute
    the summation of each frame's standard error of the mean
    in a period
    Parameters
    -----------
    data : tensor-like of shape (number_of_samples, time_length, 8, 8)
    number_of_sensor : number of sensors
    Returns
    -------
    summation of each frame's standard error of the mean
    in a period
    """
    pressure_variability = []
    for i in range(len(data)):
        value = []
        for j in range(len(data[i])):
            value.append(np.sqrt(np.var(data[i][j]))/np.sqrt(number_of_sensors))
        pressure_variability.append(sum(value))
    return pressure_variability

In [6]:
def Each_row_column_mean_value(data):
    """
    This function is used for compute
    the mean values of each column and row over time
    Parameters
    -----------
    data : tensor-like of shape (number_of_samples, time_length, 8, 8)
    Returns
    -------
    mean values of each column and row over time
    """
    sample_row_sum_value = []
    sample_column_sum_value = []
    sample_row_mean_value = []
    sample_column_mean_value = []

    for i in range(len(data)):
        each_frame_row_sum_value = []
        each_frame_column_sum_value = []
        for j in range(len(data[i])):
            row_sum_value = np.sum(data[i][j],axis = 1)
            column_sum_value = np.sum(data[i][j],axis = 0)
            each_frame_row_sum_value.append(row_sum_value)
            each_frame_column_sum_value.append(column_sum_value)
        sample_row_sum_value.append(each_frame_row_sum_value)
        sample_column_sum_value.append(each_frame_column_sum_value)

    for a in range(len(sample_row_sum_value)):
        sample_row_mean_value.append((np.sum(sample_row_sum_value[a],axis = 0))/(len(sample_row_sum_value[a])*8))
        sample_column_mean_value.append((np.sum(sample_column_sum_value[a],axis = 0))/(len(sample_column_sum_value[a])*8))
  
    return sample_row_mean_value,sample_column_mean_value

In [7]:
def Mean_contact_area_contact_area_with_max_pressure(data):
    """
    This function is used for compute mean contact area
    and contact area with max pressure
    -----------
    Mean contact area is the mean value over time
    of the contact area per frame.
    The contact area per frame is the number of channels with a value 
    above 50% divided by the total number of channels.
    -----------
    Contact area with max pressure is the contact area of
    the frame that has the highest mean pressure over the
    channels
    Parameters
    -----------
    data : tensor-like of shape (number_of_samples, time_length, 8, 8)
    number_of_sensor : number of sensors
    Returns
    -------
    mean contat area, contact area with max pressure
    """
    b = 0
    area_per_sample = []
    contact_area_with_max_pressure = []
    mean_contact_area = []
    mean_pressure = []
    location = []

    for i in range(len(data)):
        area_per_frame = []
        for j in range(len(data[i])):
            area_per_frame.append(len(np.where(data[i][j]>(np.max(data[i][j]))*0.5)[0])/64)
        area_per_sample.append(area_per_frame)

    for k in range(len(area_per_sample)):
        mean_contact_area.append(np.sum(area_per_sample[k], axis = None)/len(area_per_sample[k]))

    for l in range(len(data)):
        mean_value_frame = []
        for m in range(len(data[l])):
            mean_value_frame.append(np.sum(data[l][m], axis = None)/64)
        mean_pressure.append(mean_value_frame)

    for n in range(len(mean_pressure)):
        location.append(np.argmax(mean_pressure[n]))

    for a in range(len(area_per_sample)):
        contact_area_with_max_pressure.append(area_per_sample[a][location[b]])
        b += 1 
  
    return mean_contact_area,contact_area_with_max_pressure

In [8]:
def Temporal_peak_count(data):
    """
    This function is used for compute temporal peak count
    and number of positive crossing the threshold
    -----------
    Temporal peak count is the count of the number of frames
    whose mean value was larger than that of its neighbouring frames.
    -----------
    Number of positive crossing the threshold is the count of the number 
    of channels above the threshold.
    The threshold is the mean value of all channels.
    Parameters
    -----------
    data : tensor-like of shape (number_of_samples, time_length, 8, 8)
    number_of_sensor : number of sensors
    Returns
    -------
    temporal peak count, number of positive crossing the threshold
    """
    avg_pressure = []
    count_sample = []
    numbers_per_sample = []

    for i in range(len(data)):
        value = []
        for j in range(len(data[i])):
            value.append(np.sum(data[i][j])/64)
        avg_pressure.append(value)

    for k in range(len(avg_pressure)):
        count = 0
        count_frame = []
        for l in range(len(avg_pressure[k])-2):
            if avg_pressure[k][l+1] > avg_pressure[k][l-1] and avg_pressure[k][l+1] > avg_pressure[k][l+2]:
                count += 1
        count_sample.append(count)

    for m in range(len(data)):
        number_per_frame = []
        for n in range(len(data[m])):
            number_per_frame.append(len(np.where(data[m][n] > avg_pressure[m][n])[0]))
        numbers_per_sample.append(np.sum(number_per_frame))
    
    return count_sample,numbers_per_sample

In [9]:
def Center_of_pressure_and_displacement_of_pressure(data):
    """
    This function is used for compute mean of centre of pressure(COP)
    and summed of absolute difference(ABS) of COP
    -----------
    Mean of centre of pressure(COP) is the mean centre of the pressure 
    in the column and row directions over time.
    -----------
    Summed of absolute difference (ABS) of COP is the total displacement 
    of the centre of the pressure over time.
    Parameters
    -----------
    data : tensor-like of shape (number_of_samples, time_length, 8, 8)
    number_of_sensor : number of sensors
    Returns
    -------
    mean of centre of pressure(COP), 
    Summed of absolute difference (ABS) of COP
    """
    row_center_of_pressure_sample = []
    column_center_of_pressure_sample = []
    center_of_pressure_row_over_time = []
    center_of_pressure_column_over_time = []
    abs_row_displacement_of_center_of_pressure = []
    abs_column_displacement_of_center_of_pressure = []
    row = [i for i in range(1,9)]
    column = [j for j in range(1,9)]

    for k in range(len(data)):
        row_center_of_pressure_frame = []
        column_center_of_pressure_frame = []
        for l in range(len(data[k])):
            row_center_of_pressure_frame.append(np.around(np.average(row, weights = np.sum(data[k][l], axis = 1))))
            column_center_of_pressure_frame.append(np.around(np.average(column, weights = np.sum(data[k][l], axis = 0))))
        row_center_of_pressure_sample.append(row_center_of_pressure_frame)
        column_center_of_pressure_sample.append(column_center_of_pressure_frame)

    for m in range(len(row_center_of_pressure_sample)):
        center_of_pressure_row_over_time.append(np.sum(row_center_of_pressure_sample[m])/len(row_center_of_pressure_sample[m]))
        center_of_pressure_column_over_time.append(np.sum(column_center_of_pressure_sample[m])/len(column_center_of_pressure_sample[m]))
  
    for n in range(len(row_center_of_pressure_sample)):
        row_displacement = []
        column_displacement = []
        for o in range(len(row_center_of_pressure_sample[n])-1):
            row_displacement.append(np.absolute(row_center_of_pressure_sample[n][o + 1] - row_center_of_pressure_sample[n][o]))
            column_displacement.append(np.absolute(column_center_of_pressure_sample[n][o + 1] - column_center_of_pressure_sample[n][o]))
        abs_row_displacement_of_center_of_pressure.append(np.sum(row_displacement))
        abs_column_displacement_of_center_of_pressure.append(np.sum(column_displacement))
  
    return center_of_pressure_row_over_time,center_of_pressure_column_over_time,abs_row_displacement_of_center_of_pressure,abs_column_displacement_of_center_of_pressure

# Save as csv file

In [10]:
def concat_features():
    """
    This function is used for extract features from raw data
    and save it as a csv file in a folder
    """    
#     conditions = ['stationary','holding']
#     times = ['2s','1s','0.5s']
#     gestures = ['pat','stroke','grab','poke','scratch','notouch']
#     for condition in conditions:
#         for time in times:
#             for gesture in gestures:
#                 if condition == 'stationary':
#                     pickle_path = '/home/motionlab/Desktop/weitai/pickle/' + condition + ' test'
#                     pickle_file_name = 'raw_data_' + gesture + '_' + time + '_s1s25_s.pck1'
#                     csv_path = '/home/motionlab/Desktop/weitai/svm_rf_feature_s1s25/' + condition
#                     csv_name = gesture + '_' + 's' + '_' + time + '_s1s25' + '.csv'
#                     print(pickle_file_name + ' is load')
#                     file = open(os.path.join(pickle_path,pickle_file_name),'rb')
#                     data = pickle.load(file)
#                     file.close()
#                 else:
#                     pickle_path = '/home/motionlab/Desktop/weitai/pickle/' + condition + ' test'
#                     pickle_file_name = 'raw_data_' + gesture + '_' + time + '_s1s25_h.pck1'
#                     csv_path = '/home/motionlab/Desktop/weitai/svm_rf_feature_s1s25/' + condition
#                     csv_name = gesture + '_' + 'h' + '_' + time + '_s1s25' +'.csv'
#                     print(pickle_file_name + ' is load')
#                     file = open(os.path.join(pickle_path,pickle_file_name),'rb')
#                     data = pickle.load(file)
#                     file.close()
#                 row = ['first row mean pressure','second row mean pressure',
#                        'third row mean pressure','forth row mean pressure',
#                        'fifth row mean pressure','sixth row mean pressure',
#                        'seventh row mean pressure','eighth row mean pressure']
#                 column = ['first column mean pressure',
#                           'second column column pressure',
#                           'third column mean pressure',
#                           'forth column mean pressure',
#                           'fifth column mean pressure',
#                           'sixth column mean pressure',
#                           'seventh column mean pressure',
#                           'eighth column mean pressure']
#                 a = Each_row_column_mean_value(data)
#                 b = Mean_contact_area_contact_area_with_max_pressure(data)
#                 c = Temporal_peak_count(data)
#                 d = Center_of_pressure_and_displacement_of_pressure(data)
#                 f1 = pd.DataFrame(Mean_pressure_over_channel_and_time(data),
#                                   columns = ['Mean_pressure'])
#                 f2 = pd.DataFrame(Max_pressure_over_channel_and_time(data),
#                                   columns = ['Max_pressure'])
#                 f3 = pd.DataFrame(Pressure_variability(data, 64),
#                                   columns = ['Pressure_variability'])
#                 f4  =pd.DataFrame(a[0],
#                                   columns = row)
#                 f5  =pd.DataFrame(a[1],
#                                     columns = column)
#                 f6 = pd.DataFrame(b[0],
#                                   columns = ['Mean_contact_area'])
#                 f7 = pd.DataFrame(b[1],
#                                   columns = ['Contact_area_with_max_pressure'])
#                 f8 = pd.DataFrame(c[0],
#                                   columns = ['Temporal_peak_count'])
#                 f9 = pd.DataFrame(c[1],
#                                   columns = ['Number_of_posstive_crossing_of_the_threshold'])
#                 f10 = pd.DataFrame(d[0],
#                                    columns = ['Row_center_of_pressure'])
#                 f11 = pd.DataFrame(d[1],
#                                    olumns = ['Column_center_of_pressure'])
#                 f12 = pd.DataFrame(d[2],
#                                    columns = ['Abs_row_displacement_of_center_of_pressure'])
#                 f13 = pd.DataFrame(d[3],
#                                    columns = ['Abs_column_displacement_of_center_of_pressure'])
#                 concat_data = pd.concat([f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13], axis = 1)
#                 csv_file = open(os.path.join(csv_path,csv_name),'w')
#                 concat_data.to_csv(csv_file)
#                 csv_file.close()

In [None]:
concat_features()

In [None]:
def concat_random_features_stationary():
    """
    This function is used for extract features from raw data
    and save it as a csv file in a various folder
    """    
#     gestures = ['pat','stroke','grab','poke','scratch','notouch']
#     times = ['2s','1s','0.5s']
#     conditions = ['train','test']
#     for folder in range(1,11):
#         csv_folder_path = '/home/motionlab/Desktop/weitai/different subject train test/stationary_s1s25/' + 'cv_' + str(folder)
#         os.chdir(csv_folder_path)
#         file = 'feature_cv_' + str(folder)
#         os.mkdir(file)
#     for folder in range(1,11):
#         for gesture in gestures:
#             for time in times:
#                 for condition in conditions:
#                     pickle_folder_path = '/home/motionlab/Desktop/weitai/different subject train test/stationary_s1s25/' + 'cv_' + str(folder)
#                     pickle_file_name = 'raw_data_' + condition + '_' + gesture + '_' + time + '_' + 's1s25' + '_s_' + 'cv_' + str(folder) + '.pck1'     
#                     csv_path = '/home/motionlab/Desktop/weitai/different subject train test/stationary_s1s25/' + 'cv_' + str(folder) + '/' + 'feature_cv_' +  str(folder)
#                     csv_name = condition + '_' + gesture + '_' + 's' + '_' + time + '_s1s25_' + 'cv_' + str(folder) + '.csv'
#                     file = open(os.path.join(pickle_folder_path,pickle_file_name), 'rb')
#                     data = pickle.load(file)
#                     file.close()
#                     print(pickle_file_name + ' is load')
#                     row = ['first row mean pressure','second row mean pressure',
#                            'third row mean pressure','forth row mean pressure',
#                            'fifth row mean pressure','sixth row mean pressure',
#                            'seventh row mean pressure','eighth row mean pressure']
#                     column = ['first column mean pressure',
#                               'second column column pressure',
#                               'third column mean pressure',
#                               'forth column mean pressure',
#                               'fifth column mean pressure',
#                               'sixth column mean pressure',
#                               'seventh column mean pressure',
#                               'eighth column mean pressure']
#                     a = Each_row_column_mean_value(data)
#                     b = Mean_contact_area_contact_area_with_max_pressure(data)
#                     c = Temporal_peak_count(data)
#                     d = Center_of_pressure_and_displacement_of_pressure(data)
#                     f1 = pd.DataFrame(Mean_pressure_over_channel_and_time(data),
#                                       columns = ['Mean_pressure'])
#                     f2 = pd.DataFrame(Max_pressure_over_channel_and_time(data),
#                                       columns = ['Max_pressure'])
#                     f3 = pd.DataFrame(Pressure_variability(data, 64),
#                                       columns = ['Pressure_variability'])
#                     f4  =pd.DataFrame(a[0],columns = row)
#                     f5  =pd.DataFrame(a[1],columns = column)
#                     f6 = pd.DataFrame(b[0],
#                                       columns = ['Mean_contact_area'])
#                     f7 = pd.DataFrame(b[1],
#                                       columns = ['Contact_area_with_max_pressure'])
#                     f8 = pd.DataFrame(c[0],
#                                       columns = ['Temporal_peak_count'])
#                     f9 = pd.DataFrame(c[1],
#                                       columns = ['Number_of_posstive_crossing_of_the_threshold'])
#                     f10 = pd.DataFrame(d[0],
#                                        columns = ['Row_center_of_pressure'])
#                     f11 = pd.DataFrame(d[1],
#                                        columns = ['Column_center_of_pressure'])
#                     f12 = pd.DataFrame(d[2],
#                                        columns = ['Abs_row_displacement_of_center_of_pressure'])
#                     f13 = pd.DataFrame(d[3],
#                                        columns = ['Abs_column_displacement_of_center_of_pressure'])
#                     concat_data = pd.concat([f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13], axis = 1)
#                     csv_file = open(os.path.join(csv_path,csv_name),'w')
#                     concat_data.to_csv(csv_file)
#                     csv_file.close()


In [None]:
concat_random_features_stationary()

In [None]:
def concat_random_features_holding():
    """
    This function is used for extract features from raw data
    and save it as a csv file in a various folder
    """  
#     gestures = ['pat','stroke','grab','poke','scratch','notouch']
#     times = ['2s','1s','0.5s']
#     conditions = ['train','test']
#     for folder in range(1,11):
#         csv_folder_path = '/home/motionlab/Desktop/weitai/different subject train test/holding_s1s25/' + 'cv_' + str(folder)
#         os.chdir(csv_folder_path)
#         file = 'feature_cv_' + str(folder)
#         os.mkdir(file)
#     for folder in range(1,11):
#         for gesture in gestures:
#             for time in times:
#                 for condition in conditions:
#                     pickle_folder_path = '/home/motionlab/Desktop/weitai/different subject train test/holding_s1s25/' + 'cv_' + str(folder)
#                     pickle_file_name = 'raw_data_' + condition + '_' + gesture + '_' + time + '_' + 's1s25' + '_h_' + 'cv_' + str(folder) + '.pck1'     
#                     csv_path = '/home/motionlab/Desktop/weitai/different subject train test/holding_s1s25/' + 'cv_' + str(folder) + '/' + 'feature_cv_' +  str(folder)
#                     csv_name = condition + '_' + gesture + '_' + 'h' + '_' + time + '_s1s25_' + 'cv_' + str(folder) + '.csv'
#                     file = open(os.path.join(pickle_folder_path,pickle_file_name), 'rb')
#                     data = pickle.load(file)
#                     file.close()
#                     print(pickle_file_name + ' is load')
#                     row = ['first row mean pressure','second row mean pressure',
#                            'third row mean pressure','forth row mean pressure',
#                            'fifth row mean pressure','sixth row mean pressure',
#                            'seventh row mean pressure','eighth row mean pressure']
#                     column = ['first column mean pressure',
#                               'second column column pressure',
#                               'third column mean pressure',
#                               'forth column mean pressure',
#                               'fifth column mean pressure',
#                               'sixth column mean pressure',
#                               'seventh column mean pressure',
#                               'eighth column mean pressure']
#                     a = Each_row_column_mean_value(data)
#                     b = Mean_contact_area_contact_area_with_max_pressure(data)
#                     c = Temporal_peak_count(data)
#                     d = Center_of_pressure_and_displacement_of_pressure(data)
#                     f1 = pd.DataFrame(Mean_pressure_over_channel_and_time(data),
#                                       columns = ['Mean_pressure'])
#                     f2 = pd.DataFrame(Max_pressure_over_channel_and_time(data),
#                                       columns = ['Max_pressure'])
#                     f3 = pd.DataFrame(Pressure_variability(data), columns = ['Pressure_variability'])
#                     f4  =pd.DataFrame(a[0],columns = row)
#                     f5  =pd.DataFrame(a[1],columns = column)
#                     f6 = pd.DataFrame(b[0],
#                                       columns = ['Mean_contact_area'])
#                     f7 = pd.DataFrame(b[1],
#                                       columns = ['Contact_area_with_max_pressure'])
#                     f8 = pd.DataFrame(c[0],
#                                       columns = ['Temporal_peak_count'])
#                     f9 = pd.DataFrame(c[1],
#                                       columns = ['Number_of_posstive_crossing_of_the_threshold'])
#                     f10 = pd.DataFrame(d[0],
#                                        columns = ['Row_center_of_pressure'])
#                     f11 = pd.DataFrame(d[1],
#                                        columns = ['Column_center_of_pressure'])
#                     f12 = pd.DataFrame(d[2],
#                                        columns = ['Abs_row_displacement_of_center_of_pressure'])
#                     f13 = pd.DataFrame(d[3],
#                                        columns = ['Abs_column_displacement_of_center_of_pressure'])
#                     concat_data = pd.concat([f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13], axis = 1)
#                     csv_file = open(os.path.join(csv_path,csv_name),'w')
#                     concat_data.to_csv(csv_file)
#                     csv_file.close()


In [None]:
concat_random_features_holding()