the traning process is in: https://www.kaggle.com/code/llkh0a/ensemble-training/

In [1]:
%ls /kaggle/input/ensemble-training/

[0m[01;34mcatboost_info[0m/  ensemble_model.joblib  __notebook__.ipynb  __results__.html
custom.css      label_encoder.joblib   __output__.json


In [2]:
import os
import polars as pl
import pandas as pd
import numpy as np
import joblib

# Set the model path (Kaggle input directory)
model_path = '/kaggle/input/ensemble-training/ensemble_model.joblib'
le_path = '/kaggle/input/ensemble-training/label_encoder.joblib'
# model_path = '/kaggle/input/ensemble-cmo/other/default/1/ensemble_model.joblib'
# Load the ensemble model
model = joblib.load(model_path)

# Load the label encoder (assumes it was saved as label_encoder.joblib in the same directory)
import os
if os.path.exists(le_path):
    label_encoder = joblib.load(le_path)
else:
    label_encoder = None

In [3]:
%cd /kaggle/working

/kaggle/working


In [4]:
print(label_encoder)

LabelEncoder()


# Feature Extraction and Inference API
This cell defines the feature extraction function and the predict function, following the competition's API format.

In [5]:
def extract_features(sequence: pl.DataFrame, demographics: pl.DataFrame) -> pd.DataFrame:
    # Convert to pandas if polars
    if hasattr(sequence, 'to_pandas'):
        df = sequence.to_pandas()
    else:
        df = sequence
    if hasattr(demographics, 'to_pandas'):
        demo = demographics.to_pandas()
    else:
        demo = demographics
    feats = {}
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    exclude_cols = ['row_id', 'sequence_id', 'sequence_counter', 'subject']
    numeric_cols = [c for c in numeric_cols if c not in exclude_cols]
    # Statistical features
    for col in numeric_cols:
        feats[col + '_mean'] = df[col].mean()
        feats[col + '_std'] = df[col].std()
        feats[col + '_min'] = df[col].min()
        feats[col + '_max'] = df[col].max()
        feats[col + '_skew'] = df[col].skew()
        feats[col + '_kurt'] = df[col].kurt()
        feats[col + '_missing'] = df[col].isnull().sum()
    # Total missing values in sequence
    feats['total_missing'] = df.isnull().sum().sum()
    # Duration of sequence
    if 'sequence_counter' in df.columns:
        feats['duration'] = df['sequence_counter'].max() - df['sequence_counter'].min()
    else:
        feats['duration'] = 0
    # IMU signal energy
    for axis in ['acc_x', 'acc_y', 'acc_z']:
        if axis in df.columns:
            feats[axis + '_energy'] = (df[axis] ** 2).sum()
    # Add demographic features (excluding subject)
    for col in demo.columns:
        if col != 'subject':
            feats[col] = demo[col].iloc[0]
    # Fill NaN with 0
    for k, v in feats.items():
        if pd.isnull(v):
            feats[k] = 0
    return pd.DataFrame([feats])

def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    X = extract_features(sequence, demographics)
    pred = model.predict(X)[0]
    # Đảm bảo label_encoder là đúng kiểu và có đủ class
    if label_encoder is not None:
        try:
            return str(label_encoder.inverse_transform([pred])[0])
        except Exception as e:
            print('Label decode error:', e)
            return str(pred)
    return str(pred)

# Example usage
This cell demonstrates how to use the predict function with sample data.

In [6]:
%ls /kaggle/input/cmi-detect-behavior-with-sensor-data/

[0m[01;34mkaggle_evaluation[0m/  test_demographics.csv  train_demographics.csv
test.csv            train.csv


In [7]:
test_demo_df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv')
test_seq_df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv')

In [8]:
test_demo_df.head()

Unnamed: 0,subject,adult_child,age,sex,handedness,height_cm,shoulder_to_wrist_cm,elbow_to_wrist_cm
0,SUBJ_016452,1,25,1,1,165.0,52,23.0
1,SUBJ_055840,0,13,0,1,177.0,52,27.0


In [9]:
# Example (uncomment and adapt paths to use):
result = predict(test_seq_df, test_demo_df)
print('Predicted gesture:', result)

Predicted gesture: Neck - scratch


# Competition Submission: Inference Server Integration
Set up the inference server for competition submission. The server will use the trained ensemble model and the hardcoded label map to respond to inference requests.

In [10]:
import kaggle_evaluation.cmi_inference_server
import polars as pl
import os



In [11]:
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )