# **Model Training and Feature Engineering**

In [1]:
#### TEMPORARY
import sys
sys.path.append('/home/bac/code/kaggle/kaggle-cmi-detect-behavior/')

In [3]:
import os
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, balanced_accuracy_score

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

In [4]:
from src.config import PROJECT_PATH, DATA_PATH, USE_WANDB, WANDB_PROJECT, WANDB_ENTITY
from src.tracking import ExperimentTracker

In [5]:
def display_all(df):
    with pd.option_context("display.max_rows", 100, "display.max_columns", 1000):
        display(df)

In [6]:
# Initialize the experiment tracker
tracker = ExperimentTracker(
    project_path=PROJECT_PATH,
    use_wandb=USE_WANDB,
    wandb_project_name=WANDB_PROJECT,
    wandb_entity=WANDB_ENTITY
)

In [8]:
# Load data
train_sensor = pd.read_csv(os.path.join(os.path.expanduser(DATA_PATH), 'train.csv'))
train_demos = pd.read_csv(os.path.join(os.path.expanduser(DATA_PATH), 'train_demographics.csv'))

# Merge
train_df = pd.merge(train_sensor, train_demos, on='subject', how='left')
display_all(train_df.head())

Unnamed: 0,row_id,sequence_type,sequence_id,sequence_counter,subject,orientation,behavior,phase,gesture,acc_x,acc_y,acc_z,rot_w,rot_x,rot_y,rot_z,thm_1,thm_2,thm_3,thm_4,thm_5,tof_1_v0,tof_1_v1,tof_1_v2,tof_1_v3,tof_1_v4,tof_1_v5,tof_1_v6,tof_1_v7,tof_1_v8,tof_1_v9,tof_1_v10,tof_1_v11,tof_1_v12,tof_1_v13,tof_1_v14,tof_1_v15,tof_1_v16,tof_1_v17,tof_1_v18,tof_1_v19,tof_1_v20,tof_1_v21,tof_1_v22,tof_1_v23,tof_1_v24,tof_1_v25,tof_1_v26,tof_1_v27,tof_1_v28,tof_1_v29,tof_1_v30,tof_1_v31,tof_1_v32,tof_1_v33,tof_1_v34,tof_1_v35,tof_1_v36,tof_1_v37,tof_1_v38,tof_1_v39,tof_1_v40,tof_1_v41,tof_1_v42,tof_1_v43,tof_1_v44,tof_1_v45,tof_1_v46,tof_1_v47,tof_1_v48,tof_1_v49,tof_1_v50,tof_1_v51,tof_1_v52,tof_1_v53,tof_1_v54,tof_1_v55,tof_1_v56,tof_1_v57,tof_1_v58,tof_1_v59,tof_1_v60,tof_1_v61,tof_1_v62,tof_1_v63,tof_2_v0,tof_2_v1,tof_2_v2,tof_2_v3,tof_2_v4,tof_2_v5,tof_2_v6,tof_2_v7,tof_2_v8,tof_2_v9,tof_2_v10,tof_2_v11,tof_2_v12,tof_2_v13,tof_2_v14,tof_2_v15,tof_2_v16,tof_2_v17,tof_2_v18,tof_2_v19,tof_2_v20,tof_2_v21,tof_2_v22,tof_2_v23,tof_2_v24,tof_2_v25,tof_2_v26,tof_2_v27,tof_2_v28,tof_2_v29,tof_2_v30,tof_2_v31,tof_2_v32,tof_2_v33,tof_2_v34,tof_2_v35,tof_2_v36,tof_2_v37,tof_2_v38,tof_2_v39,tof_2_v40,tof_2_v41,tof_2_v42,tof_2_v43,tof_2_v44,tof_2_v45,tof_2_v46,tof_2_v47,tof_2_v48,tof_2_v49,tof_2_v50,tof_2_v51,tof_2_v52,tof_2_v53,tof_2_v54,tof_2_v55,tof_2_v56,tof_2_v57,tof_2_v58,tof_2_v59,tof_2_v60,tof_2_v61,tof_2_v62,tof_2_v63,tof_3_v0,tof_3_v1,tof_3_v2,tof_3_v3,tof_3_v4,tof_3_v5,tof_3_v6,tof_3_v7,tof_3_v8,tof_3_v9,tof_3_v10,tof_3_v11,tof_3_v12,tof_3_v13,tof_3_v14,tof_3_v15,tof_3_v16,tof_3_v17,tof_3_v18,tof_3_v19,tof_3_v20,tof_3_v21,tof_3_v22,tof_3_v23,tof_3_v24,tof_3_v25,tof_3_v26,tof_3_v27,tof_3_v28,tof_3_v29,tof_3_v30,tof_3_v31,tof_3_v32,tof_3_v33,tof_3_v34,tof_3_v35,tof_3_v36,tof_3_v37,tof_3_v38,tof_3_v39,tof_3_v40,tof_3_v41,tof_3_v42,tof_3_v43,tof_3_v44,tof_3_v45,tof_3_v46,tof_3_v47,tof_3_v48,tof_3_v49,tof_3_v50,tof_3_v51,tof_3_v52,tof_3_v53,tof_3_v54,tof_3_v55,tof_3_v56,tof_3_v57,tof_3_v58,tof_3_v59,tof_3_v60,tof_3_v61,tof_3_v62,tof_3_v63,tof_4_v0,tof_4_v1,tof_4_v2,tof_4_v3,tof_4_v4,tof_4_v5,tof_4_v6,tof_4_v7,tof_4_v8,tof_4_v9,tof_4_v10,tof_4_v11,tof_4_v12,tof_4_v13,tof_4_v14,tof_4_v15,tof_4_v16,tof_4_v17,tof_4_v18,tof_4_v19,tof_4_v20,tof_4_v21,tof_4_v22,tof_4_v23,tof_4_v24,tof_4_v25,tof_4_v26,tof_4_v27,tof_4_v28,tof_4_v29,tof_4_v30,tof_4_v31,tof_4_v32,tof_4_v33,tof_4_v34,tof_4_v35,tof_4_v36,tof_4_v37,tof_4_v38,tof_4_v39,tof_4_v40,tof_4_v41,tof_4_v42,tof_4_v43,tof_4_v44,tof_4_v45,tof_4_v46,tof_4_v47,tof_4_v48,tof_4_v49,tof_4_v50,tof_4_v51,tof_4_v52,tof_4_v53,tof_4_v54,tof_4_v55,tof_4_v56,tof_4_v57,tof_4_v58,tof_4_v59,tof_4_v60,tof_4_v61,tof_4_v62,tof_4_v63,tof_5_v0,tof_5_v1,tof_5_v2,tof_5_v3,tof_5_v4,tof_5_v5,tof_5_v6,tof_5_v7,tof_5_v8,tof_5_v9,tof_5_v10,tof_5_v11,tof_5_v12,tof_5_v13,tof_5_v14,tof_5_v15,tof_5_v16,tof_5_v17,tof_5_v18,tof_5_v19,tof_5_v20,tof_5_v21,tof_5_v22,tof_5_v23,tof_5_v24,tof_5_v25,tof_5_v26,tof_5_v27,tof_5_v28,tof_5_v29,tof_5_v30,tof_5_v31,tof_5_v32,tof_5_v33,tof_5_v34,tof_5_v35,tof_5_v36,tof_5_v37,tof_5_v38,tof_5_v39,tof_5_v40,tof_5_v41,tof_5_v42,tof_5_v43,tof_5_v44,tof_5_v45,tof_5_v46,tof_5_v47,tof_5_v48,tof_5_v49,tof_5_v50,tof_5_v51,tof_5_v52,tof_5_v53,tof_5_v54,tof_5_v55,tof_5_v56,tof_5_v57,tof_5_v58,tof_5_v59,tof_5_v60,tof_5_v61,tof_5_v62,tof_5_v63,adult_child,age,sex,handedness,height_cm,shoulder_to_wrist_cm,elbow_to_wrist_cm
0,SEQ_000007_000000,Target,SEQ_000007,0,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,6.683594,6.214844,3.355469,0.134399,-0.355164,-0.447327,-0.809753,28.943842,31.822186,29.553024,28.592863,28.310535,131.0,134.0,132.0,135.0,98.0,74.0,64.0,60.0,-1.0,-1.0,152.0,153.0,141.0,89.0,68.0,63.0,-1.0,-1.0,-1.0,-1.0,169.0,118.0,86.0,73.0,-1.0,-1.0,-1.0,-1.0,-1.0,147.0,110.0,87.0,126.0,-1.0,-1.0,-1.0,-1.0,-1.0,137.0,108.0,115.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,128.0,110.0,129.0,140.0,-1.0,126.0,131.0,-1.0,-1.0,-1.0,108.0,122.0,139.0,113.0,121.0,-1.0,118.0,96.0,-1.0,-1.0,-1.0,-1.0,165.0,124.0,100.0,102.0,119.0,-1.0,-1.0,115.0,130.0,-1.0,124.0,107.0,117.0,132.0,136.0,116.0,120.0,-1.0,141.0,118.0,115.0,122.0,145.0,128.0,130.0,137.0,131.0,-1.0,116.0,117.0,130.0,115.0,116.0,117.0,108.0,-1.0,-1.0,119.0,118.0,110.0,93.0,90.0,90.0,-1.0,-1.0,-1.0,116.0,103.0,87.0,82.0,81.0,-1.0,-1.0,-1.0,115.0,91.0,84.0,80.0,85.0,58.0,55.0,59.0,59.0,63.0,96.0,93.0,-1.0,57.0,59.0,58.0,64.0,72.0,103.0,98.0,-1.0,55.0,57.0,62.0,63.0,88.0,103.0,105.0,108.0,56.0,59.0,58.0,77.0,94.0,106.0,-1.0,113.0,57.0,58.0,66.0,78.0,93.0,-1.0,-1.0,-1.0,59.0,67.0,69.0,82.0,104.0,-1.0,-1.0,-1.0,63.0,70.0,79.0,96.0,-1.0,-1.0,-1.0,-1.0,79.0,83.0,-1.0,-1.0,-1.0,-1.0,102.0,100.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,74.0,130.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,69.0,134.0,137.0,136.0,145.0,131.0,126.0,83.0,60.0,-1.0,138.0,135.0,148.0,121.0,109.0,69.0,51.0,-1.0,143.0,139.0,148.0,113.0,91.0,67.0,52.0,-1.0,-1.0,-1.0,-1.0,101.0,81.0,62.0,54.0,-1.0,-1.0,-1.0,-1.0,124.0,78.0,68.0,55.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,66.0,60.0,128.0,130.0,147.0,165.0,-1.0,-1.0,-1.0,122.0,121.0,140.0,164.0,-1.0,-1.0,-1.0,140.0,119.0,135.0,156.0,166.0,-1.0,-1.0,155.0,137.0,112.0,148.0,163.0,164.0,153.0,133.0,131.0,121.0,118.0,134.0,134.0,128.0,121.0,119.0,121.0,129.0,-1.0,113.0,124.0,122.0,131.0,-1.0,-1.0,-1.0,-1.0,120.0,127.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,12,1,1,163.0,52,24.0
1,SEQ_000007_000001,Target,SEQ_000007,1,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,6.949219,6.214844,3.125,0.143494,-0.340271,-0.42865,-0.824524,29.340816,31.874645,29.79174,28.663383,28.406172,130.0,138.0,131.0,135.0,101.0,76.0,66.0,61.0,-1.0,-1.0,156.0,155.0,141.0,93.0,74.0,64.0,-1.0,-1.0,-1.0,-1.0,165.0,116.0,86.0,75.0,130.0,-1.0,-1.0,-1.0,-1.0,142.0,114.0,91.0,127.0,-1.0,-1.0,-1.0,-1.0,-1.0,145.0,114.0,114.0,-1.0,-1.0,-1.0,135.0,-1.0,-1.0,132.0,110.0,121.0,138.0,142.0,123.0,131.0,-1.0,-1.0,-1.0,106.0,120.0,139.0,119.0,124.0,131.0,117.0,109.0,-1.0,-1.0,-1.0,-1.0,165.0,134.0,108.0,106.0,123.0,-1.0,-1.0,121.0,147.0,-1.0,131.0,114.0,114.0,138.0,145.0,121.0,141.0,144.0,138.0,-1.0,120.0,124.0,147.0,115.0,141.0,135.0,125.0,-1.0,-1.0,122.0,122.0,117.0,103.0,108.0,108.0,-1.0,-1.0,-1.0,129.0,108.0,100.0,92.0,93.0,-1.0,-1.0,-1.0,116.0,99.0,93.0,90.0,91.0,-1.0,-1.0,-1.0,113.0,101.0,94.0,88.0,95.0,75.0,67.0,68.0,71.0,74.0,102.0,99.0,-1.0,64.0,68.0,67.0,72.0,88.0,112.0,103.0,-1.0,65.0,68.0,69.0,75.0,105.0,111.0,109.0,-1.0,66.0,71.0,72.0,81.0,109.0,116.0,121.0,118.0,61.0,67.0,75.0,93.0,116.0,128.0,130.0,121.0,62.0,72.0,80.0,92.0,115.0,-1.0,-1.0,-1.0,67.0,73.0,82.0,98.0,-1.0,-1.0,-1.0,-1.0,77.0,82.0,110.0,-1.0,-1.0,-1.0,112.0,105.0,134.0,-1.0,-1.0,-1.0,-1.0,-1.0,91.0,82.0,132.0,145.0,148.0,157.0,143.0,-1.0,117.0,66.0,142.0,142.0,149.0,147.0,136.0,109.0,80.0,60.0,142.0,142.0,143.0,135.0,126.0,92.0,73.0,61.0,-1.0,147.0,148.0,137.0,109.0,82.0,71.0,60.0,-1.0,-1.0,-1.0,-1.0,101.0,83.0,69.0,62.0,-1.0,-1.0,-1.0,-1.0,109.0,84.0,76.0,64.0,-1.0,-1.0,-1.0,-1.0,-1.0,93.0,72.0,74.0,126.0,137.0,157.0,174.0,-1.0,-1.0,140.0,130.0,124.0,143.0,168.0,-1.0,-1.0,-1.0,142.0,122.0,138.0,157.0,-1.0,-1.0,-1.0,155.0,133.0,117.0,145.0,170.0,163.0,157.0,139.0,127.0,126.0,121.0,136.0,142.0,133.0,127.0,123.0,127.0,134.0,-1.0,116.0,122.0,123.0,126.0,-1.0,-1.0,-1.0,-1.0,122.0,129.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,12,1,1,163.0,52,24.0
2,SEQ_000007_000002,Target,SEQ_000007,2,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,5.722656,5.410156,5.421875,0.219055,-0.274231,-0.356934,-0.865662,30.339359,30.935045,30.090014,28.796087,28.529778,137.0,136.0,147.0,109.0,90.0,81.0,74.0,74.0,-1.0,164.0,165.0,146.0,106.0,94.0,77.0,77.0,-1.0,-1.0,-1.0,180.0,140.0,118.0,103.0,92.0,-1.0,-1.0,-1.0,-1.0,-1.0,155.0,119.0,122.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,148.0,130.0,123.0,158.0,-1.0,141.0,147.0,-1.0,157.0,141.0,113.0,131.0,152.0,147.0,146.0,157.0,144.0,127.0,-1.0,115.0,127.0,129.0,119.0,112.0,117.0,120.0,119.0,-1.0,-1.0,125.0,154.0,165.0,145.0,122.0,111.0,126.0,143.0,157.0,143.0,166.0,149.0,137.0,116.0,123.0,149.0,148.0,136.0,141.0,143.0,139.0,-1.0,134.0,133.0,142.0,123.0,118.0,116.0,128.0,-1.0,-1.0,-1.0,143.0,123.0,109.0,112.0,117.0,-1.0,-1.0,-1.0,-1.0,119.0,127.0,114.0,128.0,-1.0,-1.0,-1.0,138.0,136.0,129.0,125.0,132.0,-1.0,-1.0,-1.0,-1.0,-1.0,159.0,150.0,-1.0,73.0,94.0,109.0,117.0,126.0,-1.0,-1.0,-1.0,94.0,119.0,101.0,123.0,139.0,-1.0,-1.0,-1.0,98.0,97.0,101.0,128.0,144.0,-1.0,-1.0,-1.0,88.0,107.0,101.0,154.0,141.0,-1.0,-1.0,-1.0,84.0,89.0,107.0,140.0,146.0,149.0,147.0,131.0,76.0,100.0,108.0,141.0,-1.0,158.0,143.0,117.0,77.0,89.0,105.0,133.0,-1.0,-1.0,141.0,108.0,79.0,99.0,114.0,-1.0,-1.0,-1.0,130.0,118.0,139.0,149.0,167.0,162.0,-1.0,107.0,92.0,81.0,135.0,140.0,148.0,151.0,155.0,111.0,82.0,94.0,132.0,139.0,147.0,138.0,120.0,97.0,78.0,85.0,140.0,146.0,136.0,131.0,98.0,86.0,75.0,80.0,149.0,156.0,147.0,113.0,97.0,84.0,81.0,71.0,-1.0,-1.0,174.0,117.0,96.0,89.0,80.0,78.0,-1.0,-1.0,-1.0,145.0,104.0,92.0,88.0,76.0,-1.0,-1.0,-1.0,-1.0,-1.0,117.0,98.0,105.0,92.0,110.0,157.0,180.0,-1.0,128.0,123.0,126.0,142.0,165.0,185.0,-1.0,-1.0,-1.0,145.0,139.0,138.0,164.0,-1.0,-1.0,-1.0,-1.0,145.0,120.0,151.0,165.0,-1.0,-1.0,-1.0,151.0,138.0,127.0,151.0,187.0,-1.0,156.0,136.0,135.0,134.0,-1.0,133.0,142.0,131.0,130.0,132.0,136.0,-1.0,-1.0,112.0,121.0,123.0,125.0,-1.0,-1.0,-1.0,-1.0,112.0,119.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,12,1,1,163.0,52,24.0
3,SEQ_000007_000003,Target,SEQ_000007,3,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,6.601562,3.53125,6.457031,0.297546,-0.26416,-0.238159,-0.885986,30.54373,27.044001,29.310717,29.018711,27.40201,143.0,147.0,170.0,127.0,109.0,98.0,95.0,95.0,-1.0,177.0,189.0,177.0,136.0,121.0,107.0,104.0,-1.0,-1.0,-1.0,202.0,171.0,160.0,141.0,135.0,-1.0,-1.0,-1.0,-1.0,-1.0,197.0,168.0,150.0,131.0,-1.0,-1.0,-1.0,170.0,179.0,174.0,164.0,125.0,140.0,161.0,175.0,154.0,174.0,160.0,159.0,-1.0,126.0,143.0,167.0,149.0,137.0,130.0,131.0,-1.0,-1.0,-1.0,141.0,137.0,129.0,115.0,124.0,108.0,123.0,146.0,166.0,152.0,168.0,158.0,161.0,123.0,133.0,138.0,155.0,163.0,151.0,132.0,151.0,-1.0,216.0,-1.0,-1.0,175.0,157.0,146.0,140.0,-1.0,-1.0,-1.0,-1.0,-1.0,173.0,153.0,164.0,-1.0,-1.0,-1.0,-1.0,246.0,189.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,225.0,-1.0,-1.0,-1.0,-1.0,-1.0,243.0,-1.0,220.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,119.0,132.0,164.0,-1.0,-1.0,-1.0,-1.0,-1.0,117.0,156.0,176.0,-1.0,-1.0,-1.0,-1.0,-1.0,144.0,151.0,187.0,-1.0,-1.0,-1.0,-1.0,-1.0,126.0,162.0,184.0,-1.0,-1.0,-1.0,-1.0,-1.0,128.0,158.0,180.0,189.0,202.0,171.0,168.0,164.0,117.0,153.0,183.0,197.0,192.0,164.0,156.0,160.0,113.0,142.0,192.0,197.0,192.0,157.0,149.0,146.0,114.0,158.0,-1.0,173.0,159.0,156.0,147.0,-1.0,139.0,146.0,153.0,181.0,161.0,113.0,112.0,100.0,131.0,153.0,148.0,162.0,138.0,120.0,106.0,116.0,141.0,157.0,159.0,153.0,128.0,111.0,111.0,115.0,149.0,173.0,162.0,141.0,120.0,117.0,108.0,120.0,181.0,178.0,210.0,137.0,143.0,112.0,126.0,112.0,-1.0,-1.0,209.0,202.0,144.0,163.0,133.0,155.0,-1.0,-1.0,-1.0,-1.0,-1.0,168.0,179.0,155.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,164.0,175.0,105.0,132.0,171.0,-1.0,157.0,167.0,149.0,131.0,149.0,189.0,203.0,-1.0,-1.0,164.0,133.0,-1.0,162.0,181.0,-1.0,-1.0,-1.0,152.0,134.0,-1.0,148.0,187.0,-1.0,-1.0,149.0,142.0,135.0,-1.0,159.0,181.0,150.0,135.0,129.0,139.0,-1.0,-1.0,141.0,136.0,120.0,122.0,132.0,-1.0,-1.0,-1.0,107.0,112.0,115.0,140.0,-1.0,-1.0,-1.0,-1.0,101.0,111.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,12,1,1,163.0,52,24.0
4,SEQ_000007_000004,Target,SEQ_000007,4,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,5.566406,0.277344,9.632812,0.333557,-0.218628,-0.063538,-0.914856,29.317265,25.270855,26.808746,29.408604,27.357603,178.0,191.0,183.0,157.0,146.0,139.0,143.0,148.0,-1.0,-1.0,236.0,238.0,208.0,200.0,185.0,190.0,-1.0,-1.0,-1.0,210.0,246.0,225.0,228.0,202.0,149.0,206.0,219.0,219.0,225.0,218.0,214.0,-1.0,162.0,177.0,206.0,219.0,207.0,182.0,225.0,-1.0,-1.0,-1.0,-1.0,233.0,195.0,204.0,190.0,-1.0,-1.0,-1.0,-1.0,-1.0,209.0,210.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,138.0,155.0,173.0,188.0,180.0,176.0,211.0,235.0,-1.0,-1.0,-1.0,-1.0,210.0,210.0,223.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,161.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,165.0,207.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,178.0,221.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,184.0,216.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,162.0,212.0,237.0,-1.0,-1.0,-1.0,-1.0,-1.0,152.0,198.0,221.0,213.0,204.0,211.0,235.0,-1.0,143.0,205.0,213.0,189.0,191.0,194.0,198.0,-1.0,139.0,138.0,159.0,145.0,120.0,121.0,118.0,116.0,149.0,143.0,152.0,136.0,127.0,138.0,125.0,125.0,163.0,161.0,148.0,135.0,127.0,137.0,153.0,129.0,184.0,197.0,155.0,146.0,140.0,149.0,154.0,164.0,-1.0,229.0,200.0,176.0,169.0,166.0,169.0,171.0,-1.0,-1.0,-1.0,-1.0,219.0,208.0,202.0,-1.0,-1.0,-1.0,202.0,-1.0,224.0,211.0,-1.0,-1.0,146.0,179.0,-1.0,191.0,192.0,194.0,-1.0,-1.0,127.0,185.0,-1.0,199.0,187.0,186.0,-1.0,-1.0,143.0,-1.0,-1.0,216.0,205.0,-1.0,-1.0,-1.0,197.0,-1.0,-1.0,219.0,192.0,-1.0,-1.0,-1.0,204.0,-1.0,-1.0,212.0,181.0,-1.0,-1.0,-1.0,184.0,-1.0,179.0,162.0,-1.0,-1.0,-1.0,-1.0,169.0,171.0,145.0,140.0,-1.0,-1.0,-1.0,-1.0,132.0,125.0,131.0,-1.0,-1.0,-1.0,-1.0,-1.0,101.0,109.0,125.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,12,1,1,163.0,52,24.0


In [9]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 574945 entries, 0 to 574944
Columns: 348 entries, row_id to elbow_to_wrist_cm
dtypes: float64(334), int64(6), object(8)
memory usage: 1.5+ GB


### Feature Engineering and Training - Wave 0 - Baseline

In [10]:
def create_baseline_features(df):
    """
    Create Wave 0 features: Simple sequence level aggregations.
    """
    aggs = {
        'acc_x': ['mean', 'std', 'min', 'max', 'median', 'skew'],
        'acc_y': ['mean', 'std', 'min', 'max', 'median', 'skew'],
        'acc_z': ['mean', 'std', 'min', 'max', 'median', 'skew'],
        'rot_w': ['mean', 'std', 'min', 'max', 'median', 'skew'],
        'rot_x': ['mean', 'std', 'min', 'max', 'median', 'skew'],
        'rot_y': ['mean', 'std', 'min', 'max', 'median', 'skew'],
        'rot_z': ['mean', 'std', 'min', 'max', 'median', 'skew'],
    }
    
    # Add Thermopile aggregations
    for i in range(1, 6):
        aggs[f'thm_{i}'] = ['mean', 'std', 'min', 'max']
        
    # Add time of flight aggregations (simple mean and std for baseline)
    tof_cols = [f'tof_{s}_v{p}' for s in range(1, 6) for p in range(64)]
    # Replace -1 with NaN to correctly calculate stats
    df[tof_cols] = df[tof_cols].replace(-1, np.nan)
    aggs['tof_mean'] = ['mean', 'std']
    df['tof_mean'] = df[tof_cols].mean(axis=1)
    
    # Group by sequence and aggregate and flatten multi index cols
    agg_df = df.groupby('sequence_id').agg(aggs)
    agg_df.columns = ['_'.join(col).strip() for col in agg_df.columns.values]
    
    # Get sequence level metadata (target, subject, etc.)
    meta_df = df.groupby('sequence_id').first()
    
    # Combine aggregated features with metadata
    final_df = pd.concat([meta_df[['subject', 'gesture']], agg_df], axis=1)
    
    # Encode gesture target
    final_df['gesture_encoded'] = final_df['gesture'].astype('category').cat.codes
    
    print(f"Feature engineering complete. Shape of features: {final_df.shape}")
    return final_df

In [11]:
# Create features
features_df = create_baseline_features(train_df)
display_all(features_df.head())

Feature engineering complete. Shape of features: (8151, 67)


Unnamed: 0_level_0,subject,gesture,acc_x_mean,acc_x_std,acc_x_min,acc_x_max,acc_x_median,acc_x_skew,acc_y_mean,acc_y_std,acc_y_min,acc_y_max,acc_y_median,acc_y_skew,acc_z_mean,acc_z_std,acc_z_min,acc_z_max,acc_z_median,acc_z_skew,rot_w_mean,rot_w_std,rot_w_min,rot_w_max,rot_w_median,rot_w_skew,rot_x_mean,rot_x_std,rot_x_min,rot_x_max,rot_x_median,rot_x_skew,rot_y_mean,rot_y_std,rot_y_min,rot_y_max,rot_y_median,rot_y_skew,rot_z_mean,rot_z_std,rot_z_min,rot_z_max,rot_z_median,rot_z_skew,thm_1_mean,thm_1_std,thm_1_min,thm_1_max,thm_2_mean,thm_2_std,thm_2_min,thm_2_max,thm_3_mean,thm_3_std,thm_3_min,thm_3_max,thm_4_mean,thm_4_std,thm_4_min,thm_4_max,thm_5_mean,thm_5_std,thm_5_min,thm_5_max,tof_mean_mean,tof_mean_std,gesture_encoded
sequence_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1
SEQ_000007,SUBJ_059520,Cheek - pinch skin,6.153098,1.334155,3.613281,9.015625,6.488281,-0.545319,3.91557,3.048287,-2.019531,6.519531,5.488281,-1.18487,5.577782,2.337517,1.09375,9.792969,4.964844,0.586111,0.263574,0.069033,0.134399,0.379272,0.254578,0.307637,-0.280817,0.056597,-0.442871,-0.204163,-0.275757,-0.846226,-0.33147,0.17505,-0.478027,0.005066,-0.414978,1.258707,-0.837994,0.040723,-0.914856,-0.757935,-0.825012,-0.477119,28.630612,0.582076,27.69651,30.54373,29.57187,2.576799,24.558798,32.010178,28.576605,1.260533,25.90749,30.090014,29.177937,0.278147,28.592863,29.76148,27.957446,0.877846,26.047148,29.428299,105.199184,53.223485,1
SEQ_000008,SUBJ_020948,Forehead - pull hairline,3.400506,1.087142,1.734375,5.90625,3.4375,0.146452,5.311179,3.268073,-0.222656,8.667969,7.0,-0.830467,6.581629,2.475402,1.722656,11.074219,5.839844,0.186346,0.243493,0.064414,0.157593,0.34198,0.226562,0.32673,-0.117145,0.049384,-0.263306,-0.050537,-0.097382,-0.937947,-0.342327,0.190164,-0.508606,-0.031555,-0.442169,0.830673,-0.875143,0.042626,-0.937805,-0.814697,-0.860046,-0.510329,30.464309,2.709212,25.985313,32.870808,29.678206,3.88508,23.907709,33.100945,29.179852,3.074828,24.414917,32.316135,30.501325,0.976249,28.755495,31.613327,25.824221,1.16594,24.181562,28.054575,150.73884,46.11008,6
SEQ_000013,SUBJ_040282,Cheek - pinch skin,-7.058962,1.295184,-9.25,-3.347656,-7.144531,0.518519,2.346182,2.564639,-3.273438,4.683594,3.382812,-1.445762,-6.068544,1.330784,-10.945312,-3.515625,-5.851562,-1.039566,0.392208,0.150629,0.061157,0.540771,0.439514,-1.359968,0.340804,0.182002,0.140991,0.726501,0.258362,1.350803,0.800506,0.090017,0.580505,0.881653,0.838135,-1.593112,0.002644,0.164305,-0.406799,0.129761,0.066101,-1.682267,24.522526,0.449773,24.181389,25.634346,24.367174,0.620555,23.933413,26.175961,24.892424,0.294962,24.406981,25.512794,24.93084,0.572871,24.419798,26.452927,24.733322,0.475044,24.16798,26.051331,195.963626,45.069032,1
SEQ_000016,SUBJ_052342,Write name on leg,5.524654,1.074108,3.4375,9.378906,5.390625,0.747648,-4.408491,0.598318,-5.71875,-2.960938,-4.492188,0.505319,-3.162077,6.139752,-8.078125,8.355469,-6.667969,0.964846,0.361083,0.041568,0.277527,0.459045,0.352234,0.525728,-0.728107,0.207529,-0.893677,-0.384827,-0.857361,0.843064,-0.223281,0.156706,-0.368713,0.035889,-0.315857,0.845564,-0.363684,0.301057,-0.817688,-0.082275,-0.190979,-0.794437,31.651703,4.006846,25.413513,36.053188,31.601259,4.495657,25.018881,36.705894,29.320353,3.274493,24.128819,33.617542,32.790761,3.253195,27.227589,35.665222,30.860562,3.310154,26.312038,35.801083,40.090805,46.833388,17
SEQ_000018,SUBJ_032165,Forehead - pull hairline,5.363715,1.627637,1.964844,6.832031,6.101562,-1.397824,4.109737,3.525304,-3.164062,6.71875,6.007812,-1.347944,5.937066,2.104544,4.148438,9.933594,4.761719,1.040372,0.859159,0.034238,0.828247,0.925049,0.846283,1.03008,0.177468,0.178091,-0.184204,0.305542,0.270111,-1.342766,-0.352176,0.149264,-0.457458,-0.022644,-0.422699,1.463709,-0.216601,0.073268,-0.367676,-0.159668,-0.18103,-1.283574,28.90361,1.144503,26.533083,30.267483,29.438643,1.658719,25.795074,31.035217,27.058073,0.951421,25.12772,28.468761,27.841705,0.431424,26.827133,28.400864,31.014364,1.394629,28.282324,32.180752,136.302421,33.106553,6


In [12]:
#Config
EXPERIMENT_NAME = "Baseline-Wave0-LGBM-2"
MODEL_NAME = "LightGBM"
FEATURE_WAVE = "Wave-0"
N_SPLITS = 5
SEED = 42

# Model Params
params = {
    'objective': 'multiclass',
    'num_class': features_df['gesture_encoded'].nunique(),
    'metric': 'multi_logloss',
    'n_estimators': 1000,
    'learning_rate': 0.05,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 1,
    'lambda_l1': 0.1,
    'lambda_l2': 0.1,
    'num_leaves': 31,
    'verbose': -1,
    'n_jobs': -1,
    'seed': SEED,
    'boosting_type': 'gbdt',
}

# Prep data for CV
X = features_df.drop(columns=['subject', 'gesture', 'gesture_encoded'])
y = features_df['gesture_encoded']
groups = features_df['subject']

In [13]:
# Cross Validation Setup
cv = StratifiedGroupKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)

oof_preds = np.zeros((len(features_df), y.nunique()))
oof_true = np.zeros(len(features_df))
fold_scores = []

# Training loop
for fold, (train_idx, val_idx) in enumerate(cv.split(X, y, groups)):
    print(f"--- Fold {fold+1}/{N_SPLITS} ---")
    
    # Split data
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]
    
    # Init and train model
    model = lgb.LGBMClassifier(**params)
    model.fit(X_train, y_train,
              eval_set=[(X_val, y_val)],
              eval_metric='multi_logloss',
              callbacks=[lgb.early_stopping(100, verbose=False)])
    
    # Predict on validation set
    val_preds = model.predict_proba(X_val)
    oof_preds[val_idx] = val_preds
    oof_true[val_idx] = y_val
    
    # Eval fold performance using balanced_accuracy_score since it is robust to class imbalance
    fold_score = balanced_accuracy_score(y_val, np.argmax(val_preds, axis=1))
    fold_scores.append(fold_score)
    print(f"Fold {fold+1} Balanced Accuracy: {fold_score:.5f}")
    
# Final score and logging
mean_cv_score = np.mean(fold_scores)
print("\n--- CV Summary ---")
print(f"Mean Balanced Accuracy: {mean_cv_score:.5f}")
print(f"Std Dev.: {np.std(fold_scores):.5f}")

# Log the experiment
tracker.log_experiment(
    experiment_name=EXPERIMENT_NAME,
    model_name=MODEL_NAME,
    feature_wave=FEATURE_WAVE,
    cv_score=mean_cv_score,
    params=params,
    notes="Inital baseline model with sequence-level statistical aggregates."
)    

--- Fold 1/5 ---
Fold 1 Balanced Accuracy: 0.60245
--- Fold 2/5 ---
Fold 2 Balanced Accuracy: 0.52807
--- Fold 3/5 ---
Fold 3 Balanced Accuracy: 0.54332
--- Fold 4/5 ---
Fold 4 Balanced Accuracy: 0.48495
--- Fold 5/5 ---
Fold 5 Balanced Accuracy: 0.52315

--- CV Summary ---
Mean Balanced Accuracy: 0.53639
Std Dev.: 0.03821
Experiment 'Baseline-Wave0-LGBM-2' logged to ~/code/kaggle/kaggle-cmi-detect-behavior/experiment_log.csv


[34m[1mwandb[0m: Currently logged in as: [33mb-a-chaudhry[0m ([33mb-a-chaudhry-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
cv_score,▁

0,1
cv_score,0.53639
feature_wave,Wave-0
model_name,LightGBM
notes,Inital baseline mode...


Experiment 'Baseline-Wave0-LGBM-2' logged to W&B


#### Analysis:

1. With 18 classes, a random classifier would achieve a balanced accuracy of approximately 1/18 (~5.6%). The current score of ~54% is nearly 10 times better and is a strong confirmation that even the simplest `Wave-0` features contain a strong predictive signal.
2. The standard deviation of 0.0381 across the folds is relatively low, indicating that the model's performance is consistent and not wildly dependent on a specific subset of subjects. 
3. The benchmark score ofo 0.53639 is now the number to beat! 