In [1]:
import pandas as pd
import numpy as np
import os
from glob import glob
import joblib

In [2]:
def compute_jerk(arr, dt=0.011):
    jerk = np.diff(arr) / dt
    return jerk

In [3]:
from scipy.signal import lfilter
import numpy as np

# Butterworth filter coefficients
b = [0.00482434, 0.01929737, 0.02894606, 0.01929737, 0.00482434]
a = [1.0, -2.36951, 2.31399, -1.05467, 0.18738]

def butterworth_filter(signal):
    return lfilter(b, a, signal)


In [4]:
from scipy.stats import entropy
def compute_sma(x, y, z):
    return np.mean(np.abs(x) + np.abs(y) + np.abs(z))

# def compute_entropy(signal, bins=30):
#     hist, bin_edges = np.histogram(signal, bins=bins, density=True)
#     hist = hist[hist > 0]  # Remove zero entries for valid entropy
#     return entropy(hist)

def compute_entropy(signal):
    value, counts = np.unique(signal, return_counts=True)
    probability_distribution = counts / len(signal)
    return entropy(probability_distribution)


In [8]:
def extract_features(df):
    # # Apply Butterworth filter
    # x = butterworth_filter(df['X'].values)
    # y = butterworth_filter(df['Y'].values)
    # z = butterworth_filter(df['Z'].values)

    x= df['X'].values
    y=df['Y'].values
    z=df['Z'].values
    
    # Basic stats
    features = {
        'X_rms': np.sqrt(np.mean(x**2)),
        'X_std': np.std(x),
        'Y_min': np.min(y),
        'Y_max': np.max(y),
        'Y_range': np.max(y) - np.min(y),
        'Y_rms': np.sqrt(np.mean(y**2)),
        'Y_std': np.std(y),
        'Z_min': np.min(z),
        'Z_max': np.max(z),
        'Z_range': np.max(z) - np.min(z),
        'Z_rms': np.sqrt(np.mean(z**2)),
        'Z_std': np.std(z),
    }

    # Jerk metrics
    x_jerk = compute_jerk(x)
    y_jerk = compute_jerk(y)
    z_jerk = compute_jerk(z)
    total_jerk = np.sqrt(x_jerk**2 + y_jerk**2 + z_jerk**2)

    features.update({
        'X_jerk_rms': np.sqrt(np.mean(x_jerk**2)),
        'X_jerk_std': np.std(x_jerk),
        'Y_jerk_min': np.min(y_jerk),
        'Y_jerk_max': np.max(y_jerk),
        'Y_jerk_range': np.max(y_jerk) - np.min(y_jerk),
        'Y_jerk_rms': np.sqrt(np.mean(y_jerk**2)),
        'Y_jerk_std': np.std(y_jerk),
        'Z_jerk_min': np.min(z_jerk),
        'Z_jerk_max': np.max(z_jerk),
        'Z_jerk_range': np.max(z_jerk) - np.min(z_jerk),
        'Z_jerk_rms': np.sqrt(np.mean(z_jerk**2)),
        'Z_jerk_std': np.std(z_jerk),
        'total_jerk_min': np.min(total_jerk),
        'total_jerk_max': np.max(total_jerk),
        'total_jerk_range': np.max(total_jerk) - np.min(total_jerk),
        'total_jerk_rms': np.sqrt(np.mean(total_jerk**2)),
        'total_jerk_std': np.std(total_jerk),
    })

    # SMA and Entropy
    features.update({
        'SMA': compute_sma(x, y, z),
        'X_entropy': compute_entropy(x),
        'Y_entropy': compute_entropy(y),
        'Z_entropy': compute_entropy(z),
    })

    return features

In [9]:
# Replace with your directory path
csv_dir = "/home/ette/Desktop/Gesture_Classification_Accelerometer/Notebooks_8Gestures/Small_Model/Atique/"
csv_files = sorted(glob(os.path.join(csv_dir, "*.csv")))

In [10]:
all_features = []
for file in csv_files:
    df = pd.read_csv(file)
    features = extract_features(df)
    all_features.append(features)

In [11]:
features_df = pd.DataFrame(all_features)
print(features_df.shape)  # Should be (40, 29)

(25, 33)


In [12]:
features_df.columns

Index(['X_rms', 'X_std', 'Y_min', 'Y_max', 'Y_range', 'Y_rms', 'Y_std',
       'Z_min', 'Z_max', 'Z_range', 'Z_rms', 'Z_std', 'X_jerk_rms',
       'X_jerk_std', 'Y_jerk_min', 'Y_jerk_max', 'Y_jerk_range', 'Y_jerk_rms',
       'Y_jerk_std', 'Z_jerk_min', 'Z_jerk_max', 'Z_jerk_range', 'Z_jerk_rms',
       'Z_jerk_std', 'total_jerk_min', 'total_jerk_max', 'total_jerk_range',
       'total_jerk_rms', 'total_jerk_std', 'SMA', 'X_entropy', 'Y_entropy',
       'Z_entropy'],
      dtype='object')

In [13]:
# Assuming your 40x29 DataFrame is called `df_scaled`
mean = np.array([  0.32293313,   0.23458425,  -0.97149161,   0.13949324,   1.11098485,
                   0.46144171,   0.32423791,  -0.2176104 ,   1.19794635,   1.41555674,
                   0.63065017,   0.31084388,   5.99067673,   6.01622849, -15.12637371,
                  14.21260802,  29.33898173,   5.96695335,   5.99235316, -22.2283154 ,
                  23.90334617,  46.13166158,   9.19402778,   9.23604226, -22.60907426,
                  22.17540496,  44.78447922,   8.40983338,   8.45044401, 1.1986844,
   4.41702167,   4.41702167,   4.41702167])

scale = np.array([ 0.1147093 ,  0.1204033 ,  0.74855733,  0.26006534,  0.93148539,  0.28785013,
                  0.27129799,  0.74273534,  0.58587301,  1.21793086,  0.19353642,  0.27698543,
                  3.53352765,  3.55364682, 16.43394694, 14.79571618, 29.98799448,  6.46485982,
                  6.50596541, 19.59113847, 23.22586334, 41.0584831 ,  8.61429846,  8.6692176 ,
                  20.64856919, 18.98960513, 37.57782951,  7.04367089,  7.08829518, 0.34040449,
  0.32462139,  0.32462139,  0.32462139])

In [14]:
# Convert df_scaled to numpy for vectorized operations
df_scaled_values = features_df.values  # shape: (40, 29)

# Apply Z-score normalization
standardized_values = (df_scaled_values - mean) / scale

# Convert back to DataFrame (optional, for better readability)
df_standardized = pd.DataFrame(standardized_values, columns=features_df.columns)

In [15]:
df_standardized

Unnamed: 0,X_rms,X_std,Y_min,Y_max,Y_range,Y_rms,Y_std,Z_min,Z_max,Z_range,...,Z_jerk_std,total_jerk_min,total_jerk_max,total_jerk_range,total_jerk_rms,total_jerk_std,SMA,X_entropy,Y_entropy,Z_entropy
0,-0.006463,0.359006,1.351184,9.82695,1.657794,3.610277,1.532653,12.98791,14.690989,-0.853518,...,-0.985014,1.097328,0.335192,-0.433583,-0.484429,-0.627632,29.525778,4.559565,4.559565,4.547832
1,4.213912,4.153497,0.405111,10.17721,2.515864,3.408545,2.133199,12.741624,14.688624,-0.704462,...,-0.962274,1.098265,0.224324,-0.490123,-0.274841,-0.510395,30.248443,4.312992,4.312992,4.312992
2,3.315314,3.230759,1.662944,10.463958,1.585107,3.521979,1.190612,12.922068,14.6873,-0.815139,...,-0.969896,1.098855,0.201396,-0.502034,-0.457463,-0.644239,30.25858,4.004684,4.004684,4.004684
3,1.443695,1.458174,0.793435,8.351733,1.694138,2.97452,1.923904,13.023063,14.693725,-0.873639,...,-0.994219,1.098333,0.45708,-0.37254,-0.531828,-0.687678,29.40992,4.153052,4.139665,4.153052
4,1.621993,2.167789,-0.294053,6.990116,2.187906,3.226477,2.5961,13.131992,14.681597,-0.945902,...,-1.011384,1.096668,-0.096861,-0.651554,-0.47583,-0.642137,29.832347,4.30381,4.30381,4.291062
5,6.431258,6.257491,-6.37134,7.666195,7.260473,5.551837,6.366456,10.902973,14.691049,0.417976,...,-0.752252,1.098804,0.281745,-0.461402,0.322357,-0.116788,30.558809,3.593339,3.593339,3.577284
6,3.849034,4.177285,-6.493929,7.072936,7.193353,4.778698,5.506073,10.815628,14.690826,0.471135,...,-0.733566,1.101084,0.748514,-0.226778,0.277956,-0.027224,29.165724,4.0148,4.0148,3.972796
7,4.50957,4.979448,-6.288972,6.526597,6.876112,4.77497,5.550012,10.772005,14.69055,0.497604,...,-0.759822,1.09505,0.234185,-0.483374,0.053115,-0.170985,30.176043,4.491106,4.491106,4.419131
8,5.24741,5.273343,-6.158236,8.048292,7.195899,5.207221,6.028804,11.136981,14.692301,0.275872,...,-0.771727,1.096756,0.672912,-0.262605,0.289646,-0.030194,30.258326,3.922532,3.922532,3.893677
9,3.223457,3.803379,-6.079393,8.504468,7.259901,4.492534,5.195855,11.043628,14.659786,0.317161,...,-0.782152,1.0955,0.354227,-0.422959,0.090751,-0.136539,29.873616,4.764189,4.764189,4.742233


In [16]:
model = joblib.load('xgboost_model.pkl')

In [17]:
X = df_standardized.values.astype(np.float32)

# Make predictions (outputs are probabilities)
Y = model.predict(X)  # Shape: (40, num_classes)

In [18]:
Y

array([7, 0, 0, 7, 0, 0, 0, 0, 0, 0, 7, 0, 1, 7, 0, 0, 0, 0, 0, 6, 6, 1,
       1, 0, 1])