<a href="https://www.kaggle.com/code/llkh0a/ensemble-training?scriptVersionId=244414570" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Ensemble Modeling for BFRB Sensor Data Classification

This notebook demonstrates an ensemble approach for classifying body-focused repetitive behaviors (BFRBs) using sensor data from the CMI competition. The following models are used in the ensemble:

- **LightGBM**
- **XGBoost**
- **CatBoost**

## Feature Selection

Features are constructed as follows:
- For each sequence, statistical features (mean, std, min, max, skew, kurtosis) are extracted from all numeric sensor columns (IMU, thermopile, ToF, etc.).
- Demographic features from the `train_demographics.csv` file (such as age, sex, handedness, height, etc.) are merged and included as input features for the models.
- The final feature set for each sequence includes all aggregated sensor statistics and all demographic columns except for the subject identifier.

for model submission checkout this notebook: https://www.kaggle.com/code/llkh0a/ensemble-inference/

# Data Loading & Exploration
Load the sensor and demographic data, and explore their structure.

In [5]:
import numpy as np
import pandas as pd

# Read data
train = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
train_demo = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train_demographics.csv')

print('Train shape:', train.shape)
print('Train demographics shape:', train_demo.shape)
print('Train columns:', train.columns.tolist())
print(train.head())

Train shape: (574945, 341)
Train demographics shape: (81, 8)
Train columns: ['row_id', 'sequence_type', 'sequence_id', 'sequence_counter', 'subject', 'orientation', 'behavior', 'phase', 'gesture', 'acc_x', 'acc_y', 'acc_z', 'rot_w', 'rot_x', 'rot_y', 'rot_z', 'thm_1', 'thm_2', 'thm_3', 'thm_4', 'thm_5', 'tof_1_v0', 'tof_1_v1', 'tof_1_v2', 'tof_1_v3', 'tof_1_v4', 'tof_1_v5', 'tof_1_v6', 'tof_1_v7', 'tof_1_v8', 'tof_1_v9', 'tof_1_v10', 'tof_1_v11', 'tof_1_v12', 'tof_1_v13', 'tof_1_v14', 'tof_1_v15', 'tof_1_v16', 'tof_1_v17', 'tof_1_v18', 'tof_1_v19', 'tof_1_v20', 'tof_1_v21', 'tof_1_v22', 'tof_1_v23', 'tof_1_v24', 'tof_1_v25', 'tof_1_v26', 'tof_1_v27', 'tof_1_v28', 'tof_1_v29', 'tof_1_v30', 'tof_1_v31', 'tof_1_v32', 'tof_1_v33', 'tof_1_v34', 'tof_1_v35', 'tof_1_v36', 'tof_1_v37', 'tof_1_v38', 'tof_1_v39', 'tof_1_v40', 'tof_1_v41', 'tof_1_v42', 'tof_1_v43', 'tof_1_v44', 'tof_1_v45', 'tof_1_v46', 'tof_1_v47', 'tof_1_v48', 'tof_1_v49', 'tof_1_v50', 'tof_1_v51', 'tof_1_v52', 'tof_1_v53', 'to

In [6]:
train.columns

Index(['row_id', 'sequence_type', 'sequence_id', 'sequence_counter', 'subject',
       'orientation', 'behavior', 'phase', 'gesture', 'acc_x',
       ...
       'tof_5_v54', 'tof_5_v55', 'tof_5_v56', 'tof_5_v57', 'tof_5_v58',
       'tof_5_v59', 'tof_5_v60', 'tof_5_v61', 'tof_5_v62', 'tof_5_v63'],
      dtype='object', length=341)

# Data Cleaning
Check and handle missing values and outliers in sensor data.

In [7]:
# Check missing values
missing = train.isnull().sum()
print('Missing values per column:')
print(missing[missing > 0])

# Replace -1 in ToF columns with NaN for easier statistics
tof_cols = [col for col in train.columns if col.startswith('tof_')]
train[tof_cols] = train[tof_cols].replace(-1, np.nan)

Missing values per column:
rot_w         3692
rot_x         3692
rot_y         3692
rot_z         3692
thm_1         6987
             ...  
tof_5_v59    30142
tof_5_v60    30142
tof_5_v61    30142
tof_5_v62    30142
tof_5_v63    30142
Length: 329, dtype: int64


# Feature Engineering
Extract statistical features for each sequence and merge with demographics.

In [8]:
def extract_features(df):
    feats = []
    # Only use numeric columns for aggregation
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    # Remove columns that should not be aggregated
    exclude_cols = ['row_id', 'sequence_id', 'sequence_counter', 'subject']
    numeric_cols = [c for c in numeric_cols if c not in exclude_cols]
    for seq_id, group in df.groupby('sequence_id'):
        feat = {'sequence_id': seq_id}
        for col in numeric_cols:
            feat[col + '_mean'] = group[col].mean()
            feat[col + '_std'] = group[col].std()
            feat[col + '_min'] = group[col].min()
            feat[col + '_max'] = group[col].max()
            feat[col + '_skew'] = group[col].skew()
            feat[col + '_kurt'] = group[col].kurt()
            feat[col + '_missing'] = group[col].isnull().sum()
        # New feature: total missing values in sequence
        feat['total_missing'] = group.isnull().sum().sum()
        # New feature: duration of sequence
        feat['duration'] = group['sequence_counter'].max() - group['sequence_counter'].min()
        # New feature: IMU signal energy
        for axis in ['acc_x', 'acc_y', 'acc_z']:
            if axis in group.columns:
                feat[axis + '_energy'] = (group[axis] ** 2).sum()
        feat['subject'] = group['subject'].iloc[0]
        feats.append(feat)
    return pd.DataFrame(feats)

X = extract_features(train)
# Merge demographic features (excluding subject key)
demographic_features = [col for col in train_demo.columns if col != 'subject']
X = X.merge(train_demo, on='subject', how='left')
# Fill any remaining missing values in features with column mean (or 0 as fallback)
X = X.fillna(X.mean(numeric_only=True)).fillna(0)
# Add demographic features to training set
feature_cols = [col for col in X.columns if col not in ['sequence_id', 'subject']]

# Target Mapping
Map all gestures not in the target list to 'non_target' for proper evaluation.

In [9]:
target_gestures = [
    'Above ear - pull hair',
    'Cheek - pinch skin',
    'Eyebrow - pull hair',
    'Eyelash - pull hair',
    'Forehead - pull hairline',
    'Forehead - scratch',
    'Neck - pinch skin',
    'Neck - scratch',
]
non_target_gestures = [
    'Write name on leg',
    'Wave hello',
    'Glasses on/off',
    'Text on phone',
    'Write name in air',
    'Feel around in tray and pull out an object',
    'Scratch knee/leg skin',
    'Pull air toward your face',
    'Drink from bottle/cup',
    'Pinch knee/leg skin'
]

# Map all gestures not in target_gestures to 'non_target'
train['gesture_mapped'] = train['gesture'].apply(lambda x: x if x in target_gestures else 'non_target')
y = train.groupby('sequence_id')['gesture_mapped'].first().values

  train['gesture_mapped'] = train['gesture'].apply(lambda x: x if x in target_gestures else 'non_target')


In [10]:
import joblib
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_enc = le.fit_transform(y)
joblib.dump(le, 'label_encoder.joblib')
# Print label map for reference
label_map = {i: label for i, label in enumerate(le.classes_)}
print('Label map:', label_map)

Label map: {0: 'Above ear - pull hair', 1: 'Cheek - pinch skin', 2: 'Eyebrow - pull hair', 3: 'Eyelash - pull hair', 4: 'Forehead - pull hairline', 5: 'Forehead - scratch', 6: 'Neck - pinch skin', 7: 'Neck - scratch', 8: 'non_target'}


# Model Training, Hyperparameter Tuning, and Ensemble

This section covers hyperparameter tuning for base models, followed by ensemble training and evaluation using the competition metric.

In [11]:
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import make_scorer, accuracy_score, f1_score
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import VotingClassifier
import joblib
import numpy as np

# Helper for binary F1
def to_binary(y, target_gestures, label_encoder):
    y_labels = label_encoder.inverse_transform(y)
    return [1 if gesture in target_gestures else 0 for gesture in y_labels]

# Custom scorer for competition metric
def competition_metric(y_true, y_pred):
    binary_f1 = f1_score(to_binary(y_true, target_gestures, le), to_binary(y_pred, target_gestures, le))
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    return (binary_f1 + macro_f1) / 2

competition_scorer = make_scorer(competition_metric, greater_is_better=True)

# Split data
X_train, X_val, y_train, y_val = train_test_split(
    X[feature_cols], y_enc, test_size=0.2, random_state=42, stratify=y_enc)


In [12]:

# Hyperparameter tuning for LightGBM
lgbm_params = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'num_leaves': [15, 31, 63],
    'max_depth': [3, 5, 7, -1],
}
lgbm_search = RandomizedSearchCV(
    LGBMClassifier(),
    param_distributions=lgbm_params,
    n_iter=10,
    scoring=competition_scorer,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)
lgbm_search.fit(X_train, y_train)
print('Best LightGBM params:', lgbm_search.best_params_)
print('Best LightGBM score:', lgbm_search.best_score_)
lgbm_best = lgbm_search.best_estimator_


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.471922 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 537681
[LightGBM] [Info] Number of data points in the train set: 4346, number of used features: 2333
[LightGBM] [Info] Start training from score -2.548066
[LightGBM] [Info] Start training from score -2.548066
[LightGBM] [Info] Start training from score -2.548066
[LightGBM] [Info] Start training from score -2.542200
[LightGBM] [Info] Start training from score -2.545129
[LightGBM] [Info] Start training from score -2.545129
[LightGBM] [Info] Start training from score -2.545129
[LightGBM] [Info] Start training from score -2.545129
[LightGBM] [Info] Start training from score -0.986830
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 537681
[LightGBM] [Info] Number of data points in the train set: 4346, num

Exception ignored on calling ctypes callback function: <function _log_callback at 0x7f908651bc40>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/lightgbm/basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
    
KeyboardInterrupt: 
Exception ignored on calling ctypes callback function: <function _log_callback at 0x79b09ab1bc40>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/lightgbm/basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
    
KeyboardInterrupt: 



No further splits with positive gain, best gain: -inf

Exception ignored on calling ctypes callback function: <function _log_callback at 0x7ce2fc33fc40>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/lightgbm/basic.py", line 287, in _log_callback
    def _log_callback(msg: bytes) -> None:
    
KeyboardInterrupt: 


KeyboardInterrupt: 

In [13]:

# Hyperparameter tuning for XGBoost
xgb_params = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 5, 7],
    'subsample': [0.7, 0.85, 1.0],
    'colsample_bytree': [0.7, 0.85, 1.0],
}
xgb_search = RandomizedSearchCV(
    XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
    param_distributions=xgb_params,
    n_iter=10,
    scoring=competition_scorer,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)
xgb_search.fit(X_train, y_train)
print('Best XGBoost params:', xgb_search.best_params_)
print('Best XGBoost score:', xgb_search.best_score_)
xgb_best = xgb_search.best_estimator_


Fitting 3 folds for each of 10 candidates, totalling 30 fits


KeyboardInterrupt: 

In [14]:

# Hyperparameter tuning for CatBoost
cat_params = {
    'iterations': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [3, 5, 7],
    'l2_leaf_reg': [1, 3, 5, 7],
}
cat_search = RandomizedSearchCV(
    CatBoostClassifier(verbose=0),
    param_distributions=cat_params,
    n_iter=10,
    scoring=competition_scorer,
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=-1
)
cat_search.fit(X_train, y_train)
print('Best CatBoost params:', cat_search.best_params_)
print('Best CatBoost score:', cat_search.best_score_)
cat_best = cat_search.best_estimator_

Fitting 3 folds for each of 10 candidates, totalling 30 fits


KeyboardInterrupt: 

In [None]:


# Ensemble with best estimators
ensemble = VotingClassifier(estimators=[
    ('lgbm', lgbm_best),
    ('xgb', xgb_best),
    ('cat', cat_best)
], voting='soft')

ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_val)
print('Validation accuracy:', accuracy_score(y_val, y_pred))

# Save the ensemble model to a file
joblib.dump(ensemble, 'ensemble_model.joblib')
print('Model saved as ensemble_model.joblib')

In [None]:
from sklearn.metrics import f1_score
# Calculate competition metrics
y_true = y_val
binary_f1 = f1_score(to_binary(y_true, target_gestures, le), to_binary(y_pred, target_gestures, le))
macro_f1 = f1_score(y_true, y_pred, average='macro')
print('Binary F1 score:', binary_f1)
print('Macro F1 score:', macro_f1)
competition_metric_score = competition_metric(y_val, y_pred)

print('Competition metric score on validation set:', competition_metric_score)