In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install tsfresh

In [15]:
#import all libraries
import os
import pandas as pd
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute

In [4]:
# Directory paths for fall and ADL sequences
fall_dir = '/content/drive/MyDrive/Capstone/AccelerometerData/Fall'
adl_dir = '/content/drive/MyDrive/Capstone/AccelerometerData/ADL'

# Function to load CSV files and return a list of dataframes
def load_csv_data(directory,classification):
    data = []
    for file_name in os.listdir(directory):
        if file_name.endswith('.csv'):
            file_path = os.path.join(directory, file_name)
            # Specify column names explicitly
            df = pd.read_csv(file_path, header=None, names=['time', 'SV_total', 'Ax', 'Ay', 'Az'])
            df['time'] = pd.to_datetime(df['time'], unit='ms')
            df['classification'] = classification
            data.append(df)
    return data

# Load CSV files for fall and ADL sequences
fall_data = load_csv_data(fall_dir,0)
adl_data = load_csv_data(adl_dir,1)

combined_data = fall_data + adl_data

In [5]:
def add_sequence_id(df, sequence_id):
    df['sequence_id'] = sequence_id
    return df

time_series = [add_sequence_id(df, sequence_id=i) for i, df in enumerate(combined_data)]


In [None]:
time_series

In [None]:
def extract_tsfresh_features(ts_df):
    print(ts_df.head())
    features = extract_features(ts_df, column_id="sequence_id", column_sort="time")
    return features

features = pd.concat([extract_tsfresh_features(df) for df in time_series])

In [None]:
features

In [17]:
y = pd.Series([df['classification'][0] for df in time_series])
impute(features)
features_filtered = select_features(features, y)

In [19]:
features_filtered

Unnamed: 0,classification__mean_n_absolute_max__number_of_maxima_7,"classification__cwt_coefficients__coeff_2__w_2__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_2__w_5__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_2__w_10__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_3__w_2__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_3__w_5__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_3__w_10__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_4__w_2__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_4__w_5__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_4__w_10__widths_(2, 5, 10, 20)",...,"Az__fft_aggregated__aggtype_""kurtosis""","Az__fft_coefficient__attr_""real""__coeff_95","SV_total__fft_coefficient__attr_""abs""__coeff_80","Ay__fft_coefficient__attr_""abs""__coeff_16","SV_total__change_quantiles__f_agg_""mean""__isabs_False__qh_0.4__ql_0.0","Ay__fft_coefficient__attr_""abs""__coeff_38","Ax__fft_coefficient__attr_""abs""__coeff_92",SV_total__count_below_mean,Az__median,Ay__quantile__q_0.9
0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,8.269579,-1.447145,4.485553,7.395637,-0.013888,4.828358,2.102241,141.0,0.182515,1.123779
1,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,7.335085,-0.143353,1.396028,4.321444,-0.010073,1.223421,1.153246,104.0,0.269842,0.956986
2,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,6.641739,-0.143353,1.396028,4.875203,-0.022572,0.402328,1.153246,98.0,0.662506,0.819522
3,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,5.111684,-0.659080,5.577839,5.689109,-0.003950,1.743277,4.316382,225.0,0.165319,1.052738
4,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,13.039737,-9.194754,13.517261,5.433309,-0.023599,5.228768,8.096039,145.0,0.294775,1.065389
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,1.0,0.759824,0.719559,0.538397,0.601882,0.976286,0.787617,0.328326,1.13112,1.013992,...,7.195812,-2.968675,14.176168,16.253792,0.000214,2.558777,12.554145,1047.0,0.025391,0.995117
65,1.0,0.759824,0.719559,0.538397,0.601882,0.976286,0.787617,0.328326,1.13112,1.013992,...,8.071858,0.643480,19.652374,18.434103,0.000068,5.490542,9.439369,951.0,0.044922,0.960449
66,1.0,0.759824,0.719559,0.538397,0.601882,0.976286,0.787617,0.328326,1.13112,1.013992,...,4.645562,-1.317875,5.312049,9.300432,-0.004563,1.488959,2.835157,255.0,-0.358397,1.095775
67,1.0,0.759824,0.719559,0.538397,0.601882,0.976286,0.787617,0.328326,1.13112,1.013992,...,4.523999,-0.668786,1.333366,3.862977,-0.001630,1.814015,0.914139,157.0,-0.415878,1.085212


In [20]:
from sklearn.feature_selection import SelectKBest, mutual_info_classif

# Calculate mutual information scores
mi_scores = mutual_info_classif(features_filtered, y)

# Select top 500 features based on mutual information scores
k_best = SelectKBest(mutual_info_classif, k=500)
selected_features = k_best.fit_transform(features_filtered, y)

# Get the indices of the selected features
selected_indices = k_best.get_support(indices=True)

# Filter the original features dataframe to keep only the selected features
selected_features_df = features_filtered.iloc[:, selected_indices]


In [21]:
selected_features_df

Unnamed: 0,classification__mean_n_absolute_max__number_of_maxima_7,"classification__cwt_coefficients__coeff_2__w_2__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_2__w_5__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_2__w_10__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_3__w_2__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_3__w_5__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_3__w_10__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_4__w_2__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_4__w_5__widths_(2, 5, 10, 20)","classification__cwt_coefficients__coeff_4__w_10__widths_(2, 5, 10, 20)",...,"SV_total__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""var""","SV_total__agg_linear_trend__attr_""slope""__chunk_len_10__f_agg_""var""",Az__skewness,"Ax__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""var""","Az__agg_linear_trend__attr_""intercept""__chunk_len_5__f_agg_""var""","SV_total__agg_linear_trend__attr_""slope""__chunk_len_10__f_agg_""max""","classification__fft_coefficient__attr_""angle""__coeff_70","classification__fft_coefficient__attr_""angle""__coeff_44",Az__time_reversal_asymmetry_statistic__lag_1,SV_total__lempel_ziv_complexity__bins_3
0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,0.006715,0.018707,4.845216,0.005165,-0.003645,0.042582,0.000000,0.000000,0.116340,0.103627
1,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,0.013051,0.031584,5.575011,0.002687,0.043456,0.084055,0.000000,0.000000,0.374337,0.138462
2,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,-0.008226,-0.015660,5.728065,-0.001124,0.428037,-0.012551,0.000000,0.000000,0.311670,0.153846
3,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,0.000648,0.001496,1.087121,0.000706,-0.001582,0.004801,0.000000,0.000000,0.000850,0.104762
4,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,...,0.030532,0.065547,9.128598,0.027573,-0.052154,0.078907,0.000000,0.000000,-0.305197,0.100000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,1.0,0.759824,0.719559,0.538397,0.601882,0.976286,0.787617,0.328326,1.13112,1.013992,...,-0.000021,-0.000108,-1.169632,-0.000026,0.003244,-0.001154,122.875648,144.093264,-0.000030,0.071891
65,1.0,0.759824,0.719559,0.538397,0.601882,0.976286,0.787617,0.328326,1.13112,1.013992,...,-0.000013,-0.000067,-0.353551,-0.000016,0.002591,-0.000768,-179.883193,66.930565,-0.000037,0.079169
66,1.0,0.759824,0.719559,0.538397,0.601882,0.976286,0.787617,0.328326,1.13112,1.013992,...,0.000579,0.001207,-1.523931,0.000172,0.007530,0.003175,50.417690,165.405405,0.005727,0.073710
67,1.0,0.759824,0.719559,0.538397,0.601882,0.976286,0.787617,0.328326,1.13112,1.013992,...,0.000233,0.000380,-0.769503,0.000070,0.007837,-0.001031,0.000000,0.000000,-0.003934,0.107937
