# CMI BFRB Detection - IMU Improved Model Inference

Single cell implementation for Kaggle submission

In [None]:
# All-in-one inference cell for CMI BFRB Detection
import os
import sys
import warnings
import pickle
import numpy as np
import pandas as pd
import polars as pl
warnings.filterwarnings('ignore')

# Configuration
MODEL_PATH = '/kaggle/input/cmi-imu-improved-models/'  # Update with your dataset

# Gesture mapping
GESTURE_MAPPER = {
    'Above ear - pull hair': 0, 'Cheek - pinch skin': 1, 'Eyebrow - pull hair': 2,
    'Eyelash - pull hair': 3, 'Forehead - pull hairline': 4, 'Forehead - scratch': 5,
    'Neck - pinch skin': 6, 'Neck - scratch': 7, 'Drink from bottle/cup': 8,
    'Feel around in tray and pull out an object': 9, 'Glasses on/off': 10,
    'Pinch knee/leg skin': 11, 'Pull air toward your face': 12,
    'Scratch knee/leg skin': 13, 'Text on phone': 14, 'Wave hello': 15,
    'Write name in air': 16, 'Write name on leg': 17,
}
REVERSE_GESTURE_MAPPER = {v: k for k, v in GESTURE_MAPPER.items()}

# Load models
print('Loading models...')
try:
    # Try different file names
    for fname in ['imu_improved_model.pkl', 'model_data.pkl', 'lightgbm_models.pkl']:
        fpath = os.path.join(MODEL_PATH, fname)
        if os.path.exists(fpath):
            with open(fpath, 'rb') as f:
                model_data = pickle.load(f)
            print(f'Loaded from {fname}')
            break
    else:
        # Load individual files
        model_data = {}
        with open(os.path.join(MODEL_PATH, 'lightgbm_models.pkl'), 'rb') as f:
            model_data['models'] = pickle.load(f)
        with open(os.path.join(MODEL_PATH, 'feature_columns.pkl'), 'rb') as f:
            model_data['feature_columns'] = pickle.load(f)
    
    models = model_data.get('models', [])
    feature_cols = model_data.get('feature_columns', model_data.get('feature_names', []))
    print(f'Loaded {len(models)} models, {len(feature_cols)} features')
except Exception as e:
    print(f'Model loading failed: {e}')
    models = []
    feature_cols = []

# Feature extraction function
def extract_features(seq_df, demo_df=None):
    features = {}
    
    # Sequence length
    features['sequence_length'] = len(seq_df)
    
    # Demographics with defaults
    if demo_df is not None and len(demo_df) > 0:
        demo = demo_df.iloc[0]
        features['age'] = demo.get('age', 30)
        features['adult_child'] = demo.get('adult_child', 1)
        features['sex'] = demo.get('sex', 0)
        features['handedness'] = demo.get('handedness', 1)
        features['height_cm'] = demo.get('height_cm', 170)
        features['shoulder_to_wrist_cm'] = demo.get('shoulder_to_wrist_cm', 50)
        features['elbow_to_wrist_cm'] = demo.get('elbow_to_wrist_cm', 30)
    else:
        features.update({'age': 30, 'adult_child': 1, 'sex': 0, 'handedness': 1,
                        'height_cm': 170, 'shoulder_to_wrist_cm': 50, 'elbow_to_wrist_cm': 30})
    
    # IMU features
    for col in ['acc_x', 'acc_y', 'acc_z', 'rot_w', 'rot_x', 'rot_y', 'rot_z']:
        if col in seq_df.columns:
            data = seq_df[col].fillna(0).values
            
            # Basic stats
            features[f'{col}_mean'] = np.mean(data)
            features[f'{col}_std'] = np.std(data)
            features[f'{col}_min'] = np.min(data)
            features[f'{col}_max'] = np.max(data)
            features[f'{col}_median'] = np.median(data)
            features[f'{col}_q25'] = np.percentile(data, 25)
            features[f'{col}_q75'] = np.percentile(data, 75)
            features[f'{col}_iqr'] = features[f'{col}_q75'] - features[f'{col}_q25']
            features[f'{col}_range'] = features[f'{col}_max'] - features[f'{col}_min']
            features[f'{col}_var'] = np.var(data)
            features[f'{col}_first'] = data[0] if len(data) > 0 else 0
            features[f'{col}_last'] = data[-1] if len(data) > 0 else 0
            features[f'{col}_delta'] = features[f'{col}_last'] - features[f'{col}_first']
            
            # Segments
            if len(data) >= 9:
                seg_size = len(data) // 3
                for i in range(3):
                    start = i * seg_size
                    end = (i + 1) * seg_size if i < 2 else len(data)
                    seg = data[start:end]
                    features[f'{col}_seg{i+1}_mean'] = np.mean(seg)
                    features[f'{col}_seg{i+1}_std'] = np.std(seg)
            else:
                for i in range(1, 4):
                    features[f'{col}_seg{i}_mean'] = features[f'{col}_mean']
                    features[f'{col}_seg{i}_std'] = features[f'{col}_std']
    
    # Magnitude
    if all(c in seq_df.columns for c in ['acc_x', 'acc_y', 'acc_z']):
        mag = np.sqrt(seq_df['acc_x'].fillna(0)**2 + seq_df['acc_y'].fillna(0)**2 + seq_df['acc_z'].fillna(0)**2)
        features['acc_magnitude_mean'] = np.mean(mag)
        features['acc_magnitude_std'] = np.std(mag)
        features['acc_magnitude_max'] = np.max(mag)
        features['acc_magnitude_min'] = np.min(mag)
    
    return pd.DataFrame([features])

# Prediction function
def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    try:
        # Convert to pandas
        seq_df = sequence.to_pandas() if isinstance(sequence, pl.DataFrame) else sequence
        demo_df = demographics.to_pandas() if isinstance(demographics, pl.DataFrame) else demographics
        
        # Check models
        if not models or not feature_cols:
            return 'Text on phone'
        
        # Extract features
        features = extract_features(seq_df, demo_df)
        
        # Ensure all features present
        for col in feature_cols:
            if col not in features.columns:
                features[col] = 0
        
        X_pred = features[feature_cols]
        
        # Get predictions
        predictions = []
        for model in models:
            try:
                if hasattr(model, 'predict_proba'):
                    pred = np.argmax(model.predict_proba(X_pred)[0])
                else:
                    pred = model.predict(X_pred)[0]
                predictions.append(pred)
            except:
                continue
        
        # Majority vote
        if predictions:
            from collections import Counter
            final_pred = Counter(predictions).most_common(1)[0][0]
        else:
            final_pred = 14  # Text on phone
        
        return REVERSE_GESTURE_MAPPER.get(final_pred, 'Text on phone')
        
    except Exception:
        return 'Text on phone'

# Test prediction
print('Testing prediction...')
test_seq = pl.DataFrame({
    'acc_x': np.random.randn(100), 'acc_y': np.random.randn(100), 'acc_z': np.random.randn(100),
    'rot_w': np.random.randn(100), 'rot_x': np.random.randn(100), 
    'rot_y': np.random.randn(100), 'rot_z': np.random.randn(100)
})
test_demo = pl.DataFrame({'age': [25], 'adult_child': [1], 'sex': [0], 'handedness': [1]})
result = predict(test_seq, test_demo)
print(f'Test result: {result}')
assert isinstance(result, str) and result in GESTURE_MAPPER, "Invalid prediction"
print('Test passed!')

# Initialize CMI inference server
sys.path.append('/kaggle/input/cmi-detect-behavior-with-sensor-data')
import kaggle_evaluation.cmi_inference_server

print('Initializing inference server...')
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)
print('Ready for inference')

# Run inference
print('Starting inference...')
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print('Competition environment - serving predictions...')
    inference_server.serve()
else:
    print('Local testing mode...')
    try:
        inference_server.run_local_gateway(
            data_paths=(
                '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
                '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
            )
        )
        print('Inference completed!')
    except Exception as e:
        print(f'Inference error (may be normal): {e}')

print('Process completed.')