In [21]:
from feature_functions import *

In [2]:
controlFiles, patientFiles = fileClassification()

In [10]:
'''
    Parameter: file path
    Returns: set of preprocessed data split into static and dynamic parts
'''

def data_preprocessing(filename):  
    df = pd.read_csv(filename, split = ";", header = None)

    # split into static and dynamic dataframes
    static_df, dynamic_df = staticDynamicSplit()

    static_time = static_df[5]
    dynamic_time = dynamic_df[5]
    static_x = static_df[0]
    static_y = static_df[1]
    static_curve = np.array([static_x, static_y])

    dynamic_x = dynamic_df[0]
    dynamic_y = dynamic_df[1]
    dynamic_curve = np.array([dynamic_x, dynamic_y])

    static_velocity, static_acceleration, static_jerk, _, _ = smoothCurveFeature(static_curve, 10000)
    dynamic_velocity, dynamic_acceleration, dynamic_jerk, _, _ = smoothCurveFeature(dynamic_curve, 10000)

    _, _, _, static_curvature, _ = smoothCurveFeature(static_curve, 100000)
    _, _, _, dynamic_curvature, _ = smoothCurveFeature(dynamic_curve, 100000)
    
    static_pressure = static_df[3]
    static_risingIndex, static_fallingIndex = mainSignalThreshold(static_pressure)

    dynamic_pressure = dynamic_df[3]
    dynamic_risingIndex, dynamic_fallingIndex = mainSignalThreshold(dynamic_pressure)

    static_pressure_rising = static_pressure[0:static_risingIndex]
    static_pressure_main = static_pressure[static_risingIndex:static_fallingIndex]
    static_pressure_falling = static_pressure[static_fallingIndex:-1]

    dynamic_pressure_rising = dynamic_pressure[0:dynamic_risingIndex]
    dynamic_pressure_main = dynamic_pressure[dynamic_risingIndex:dynamic_fallingIndex]
    dynamic_pressure_falling = dynamic_pressure[dynamic_fallingIndex:-1]

    static_altitude = static_df[4]
    dynamic_altitude = dynamic_df[4]

    return static_x, static_y, dynamic_x, dynamic_y, static_velocity, static_acceleration, static_jerk, 
    static_curvature, static_pressure, static_pressure_rising, static_pressure_main, static_pressure_falling, 
    dynamic_velocity, dynamic_acceleration, dynamic_jerk, dynamic_curvature, dynamic_pressure,
    dynamic_pressure_rising, dynamic_pressure_main, dynamic_pressure_falling, static_altitude, dynamic_altitude, 
    time, static_risingIndex, static_fallingIndex, dynamic_risingIndex, dynamic_fallingIndex

In [None]:
'''
    Parameter: filename - file path of patient csv
    Returns: 
'''

def calculate(filename): 
    # retrieve all preprocessed data
    static_x, static_y, dynamic_x, dynamic_y, static_velocity, static_acceleration, static_jerk, 
    static_curvature, static_pressure, static_pressure_rising, static_pressure_main, static_pressure_falling, 
    dynamic_velocity, dynamic_acceleration, dynamic_jerk, dynamic_curvature, dynamic_pressure,
    dynamic_pressure_rising, dynamic_pressure_main, dynamic_pressure_falling, static_altitude, dynamic_altitude, 
    time, static_risingIndex, static_fallingIndex, dynamic_risingIndex, dynamic_fallingIndex = data_preprocessing(filename)

    # entropy: 
    static_x_entropy = entropyCalc(static_x)
    static_y_entropy = entropyCalc(static_y)
    dynamic_x_entropy = entropyCalc(dynamic_x)
    dynamic_y_entropy = entropyCalc(dynamic_y)
    
    # kinematic features
    # velocity features
    static_velocity_mean = np.mean(static_velocity)
    dynamic_velocity_mean = np.mean(dynamic_velocity)
    static_velocity_std = np.std(static_velocity)
    dynamic_velocity_std = np.std(dynamic_velocity)
    static_velocity_max = max(static_velocity)
    dynamic_velocity_max = max(dynamic_velocity)
    static_velocity_inversion_rate = rateOfInversions(static_velocity, static_time)
    dynamic_velocity_inversion_rate = rateOfInversions(dynamic_velocity, dynamic_time)

    # acceleration features
    static_acceleration_mean = np.mean(static_acceleration)
    dynamic_acceleration_mean = np.mean(dynamic_acceleration)
    static_acceleration_std = np.std(static_acceleration)
    dynamic_acceleration_std = np.std(dynamic_acceleration)
    static_acceleration_max = max(static_acceleration)
    dynamic_acceleration_max = max(dynamic_acceleration)
    static_acceleration_inversion_rate = rateOfInversions(static_acceleration, static_time)
    dynamic_acceleration_inversion_rate = rateOfInversions(dynamic_acceleration, dynamic_time)

    # jerk features
    static_jerk_mean = np.mean(static_jerk)
    dynamic_jerk_mean = np.mean(dynamic_jerk)
    static_jerk_std = np.std(static_jerk)
    dynamic_jerk_std = np.std(dynamic_jerk)
    static_jerk_max = max(static_jerk)
    dynamic_jerk_max = max(dynamic_jerk)
    static_jerk_inversion_rate = rateOfInversions(static_jerk, static_time)
    dynamic_jerk_inversion_rate = rateOfInversions(dynamic_jerk, dynamic_time)

    # curvature rate of inversion
    static_curv_inversion_rate = rateOfInversions(static_curvature, static_time)
    dynamic_curv_inversion_rate = rateOfInversions(dynamic_curvature, dynamic_time)

    # fourier transform pressure
    static_pressure_low_freq, static_pressure_high_freq = fourierFreqCalc(static_pressure_main, static_time)
    dynamic_pressure_low_freq, dynamic_pressure_high_freq = fourierFreqCalc(dynamic_pressure_main, dynamic_time)
    
    # fourier transform altitude
    static_altitude_low_freq, static_altitude_high_freq = fourierFreqCalc(static_altitude, static_time)
    dynamic_altitude_low_freq, dynamic_altitude_high_freq = fourierFreqCalc(dynamic_altitude, dynamic_time)

    # pressure regression
    static_pressure_reg_main_r2, static_pressure_reg_main_x0, static_pressure_reg_main_x1, static_pressure_reg_main_sumresid = regression(static_pressure_main)
    dynamic_pressure_reg_main_r2, dynamic_pressure_reg_main_x0, dynamic_pressure_reg_main_x1, dynamic_pressure_reg_main_sumresid = regression(dynamic_pressure_main)

    # curvature regression
    static_curv_reg_r2, static_curv_reg_x0, static_curv_reg_x1, static_curv_reg_sumresid = regression(static_curvature)
    dynamic_curv_reg_r2, dynamic_curv_reg_x0, dynamic_curv_reg_x1, dynamic_curv_reg_sumresid = regression(dynamic_curvature)

    # velocity regression
    static_velocity_reg_r2, static_velocity_reg_x0, static_velocity_reg_x1, static_velocity_reg_sumresid = regression(static_velocity)
    dynamic_velocity_reg_r2, dynamic_velocity_reg_x0, dynamic_velocity_reg_x1, dynamic_velocity_reg_sumresid = regression(dynamic_velocity)

    # pressure rising & falling duration/range
    static_pressure_rising_duration = static_time[static_risingIndex] - static_time[0]
    static_pressure_rising_range = static_pressure[static_risingIndex] - static_pressure[0]
    static_pressure_falling_duration = static_time[len(static-time)-1] - static_time[static_fallingIndex]
    static_pressure_fallign_range = static_pressure[static_fallingIndex] - static_pressure[len(static_time)-1]

    return static_x_entropy, static_y_entropy, 

In [None]:
# CALCULATE ALL STATIC FEATURES AND PUT INTO DATAFRAME

static_subject_id = []
static_mean_vel = []
static_max_vel = []
static_vel_std = []
static_vel_niv = [] #rate
static_nvv = [] 
static_mean_accel = []
static_max_accel = []
static_std_accel = []
static_nia = [] #rate
static_mean_jerk = []  
static_max_jerk = []
static_std_jerk = []
static_nij = [] #rate
static_duration = []
static_nic = []
static_mean_pressure = []
static_max_pressure = []
static_std_pressure = []
static_rate_ip = []
static_x_entropy = []
static_y_entropy = []
static_x_skewness = []
static_y_skewness = []
static_x_kurtosis = []
static_y_kurtosis = []
static_pressure_high_freq = []
static_pressure_low_freq = []
static_altitude_high_freq = []
static_altitude_low_freq = []
static_pressure_regression = []
static_curvature_regression = []
static_velocity_regression = []

df = pd.DataFrame(['Static Subject ID' : static_subject_id,
'Mean Velocity': static_mean_vel, 'Velocity Std': static_vel_std, 'Max Velocity': static_max_vel, 'NVV': static_nvv, 'NIV': static_vel_niv,
'Mean Acceleration': static_mean_accel, 'Accleration Std': static_std_accel, 'Max Acceleration': static_max_accel, 'NIA': static_nia,
'Mean Jerk': static_mean_jerk, 'Jerk Std': static_std_jerk, 'Max Jerk': static_max_jerk, 'NIJ': static_nij, 
'Duration': static_duration, 'NIC': static_nic,
'Mean Pressure': static_mean_pressure, 'Max Pressure': static_max_pressure, 'Pressure Std': static_std_pressure, 'Rate of Inversion in Pressure': static_rate_ip,
'X Entropy': static_x_entropy, 'Y Entropy': static_y_entropy, 
'X Skewness': static_x_skewness, 'Y Skewness': static_y_skewness, 
'X Kurtosis': static_x_kurtosis, 'Y Kurtosis': static_y_kurtosis,
'Pressure High Freq': static_pressure_high_freq, 'Pressure Low Freq': static_pressure_low_freq, 
'Altitude High Freq': static_altitude_high_freq, 'Altitude Low Freq': static_altitude_low_freq, 
'Pressure Regression R^2': pressure_reg, 'Curvature Regression R^2': curvature_reg, 'Velocity Regression R^2': velocity_reg])

for filename in controlFiles: 
    calculate(filename)
    
for(filename in patientFiles):
    calculate(filename)



In [None]:
# CALCULATE ALL DYNAMIC FEATURES AND PUT INTO DATAFRAME

dynamic_subject_id = []
dynamic_mean_vel = []
dynamic_max_vel = []
dynamic_vel_std = []
dynamic_vel_niv = [] #rate
dynamic_nvv = []
dynamic_mean_accel = []
dynamic_max_accel = []
dynamic_std_accel = []
dynamic_nia = [] #rate
dynamic_mean_jerk = []
dynamic_max_jerk = []
dynamic_std_jerk = []
dynamic_nij = [] #rate
dynamic_duration = []
dynamic_nic = []
dynamic_mean_pressure = []
dynamic_max_pressure = []
dynamic_std_pressure = []
dynamic_rate_ip = []
dynamic_x_entropy = []
dynamic_y_entropy = []
dynamic_x_skewness = []
dynamic_y_skewness = []
dynamic_x_kurtosis = []
dynamic_y_kurtosis = []
dynamic_pressure_high_freq = []
dynamic_pressure_low_freq = []
dynamic_altitude_high_freq = []
dynamic_altitude_low_freq = []
dynamic_pressure_regression = []
dynamic_curvature_regression = []
dynamic_velocity_regression = []

df = pd.DataFrame(['dynamic Subject ID' : dynamic_subject_id,
'Mean Velocity': dynamic_mean_vel, 'Velocity Std': dynamic_vel_std, 'Max Velocity': dynamic_max_vel, 'NVV': dynamic_nvv, 'NIV': dynamic_vel_niv,
'Mean Acceleration': dynamic_mean_accel, 'Accleration Std': dynamic_std_accel, 'Max Acceleration': dynamic_max_accel, 'NIA': dynamic_nia,
'Mean Jerk': dynamic_mean_jerk, 'Jerk Std': dynamic_std_jerk, 'Max Jerk': dynamic_max_jerk, 'NIJ': dynamic_nij,
'Duration': dynamic_duration, 'NIC': dynamic_nic,
'Mean Pressure': dynamic_mean_pressure, 'Max Pressure': dynamic_max_pressure, 'Pressure Std': dynamic_std_pressure, 'Rate of Inversion in Pressure': dynamic_rate_ip,
'X Entropy': dynamic_x_entropy, 'Y Entropy': dynamic_y_entropy,
'X Skewness': dynamic_x_skewness, 'Y Skewness': dynamic_y_skewness,
'X Kurtosis': dynamic_x_kurtosis, 'Y Kurtosis': dynamic_y_kurtosis,
'Pressure High Freq': dynamic_pressure_high_freq, 'Pressure Low Freq': dynamic_pressure_low_freq,
'Altitude High Freq': dynamic_altitude_high_freq, 'Altitude Low Freq': dynamic_altitude_low_freq,
'Pressure Regression R^2': pressure_reg, 'Curvature Regression R^2': curvature_reg, 'Velocity Regression R^2': velocity_reg])

for filename in controlFiles:
    calculate(filename)

for(filename in patientFiles):
    calculate(filename)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=54ea7590-1f30-42ee-b9c1-c929b94e3f2f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>