
# IMU Classification (Base) — **Simple & Robust Mapping**

**Assumption:** CSV labels are only `normal` and `collision`.**Goal:** Keep mapping trivial, avoid `classes_` entirely.

We infer which column of `predict_proba` corresponds to **`collision`** by choosing the column
that yields a **higher ROC-AUC** against `y_test_bin = (y_test == "collision")`.
This removes any dependence on internal class encoding and avoids brittle assertions.


In [1]:
from extract_features import process_dataset, load_and_process_sample
from visualization import signal_viewer
from imu_pipeline import IMUPipeline
from pathlib import Path
import pandas as pd
import joblib
import os

In [2]:
signal_viewer(
    data_dir=Path('data/raw/train'),
    labels_csv=Path('data/train.csv')
)

VBox(children=(Dropdown(description='Sample ID:', layout=Layout(width='50%'), options=('00104b76-d512-43d6-b2e…

# ❓ Questions to Reflect On
What do you observe when comparing the model’s predictions on the new data to its known performance?

Is there anything in the data that might explain differences in behavior?

Can you identify patterns or trends related to when the model succeeds or fails?

Are there signals or features that seem to affect the model’s reliability?

What could be done in the short term to handle the current situation?

What are potential long-term steps to improve model performance in similar scenarios?

What would you want to explore further if given more time or data?

What assumptions did the model rely on during training — and are they still valid?

In [3]:

# --- Setup
import os, json, warnings
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.metrics import (
    accuracy_score, precision_recall_fscore_support,
    roc_auc_score, average_precision_score, confusion_matrix
)
import joblib

warnings.filterwarnings("ignore")

DATA_DIR = Path("data")
MANUAL_ANN = DATA_DIR / "manual_annotation"
MODEL_PATH = Path("models/imu_pipeline.pkl")

TRAIN_CSV = DATA_DIR / "train.csv"
TEST_CSV  = DATA_DIR / "test.csv"
INF_CSV   = DATA_DIR / "inference.csv"
INF_LABELS_CSV = MANUAL_ANN / "inference_labels.csv"

assert TRAIN_CSV.exists(), f"Missing {TRAIN_CSV}"
assert TEST_CSV.exists(),  f"Missing {TEST_CSV}"
assert MODEL_PATH.exists(), f"Missing {MODEL_PATH}"


In [4]:

# --- (Optional) feature generation for inference
if not INF_CSV.exists():
    try:
        from extract_features import process_dataset
        print("Generating inference features ...")
        process_dataset("inference")
        print("Done. Created:", INF_CSV)
    except Exception as e:
        print("Couldn't generate inference features automatically. Reason:", e)
        print("Please run: from extract_features import process_dataset; process_dataset('inference')")


In [5]:

# --- Load data
train = pd.read_csv(TRAIN_CSV)
test  = pd.read_csv(TEST_CSV)

inf_labels = pd.read_csv(INF_LABELS_CSV) if INF_LABELS_CSV.exists() else None
inf = pd.read_csv(INF_CSV) if INF_CSV.exists() else None

train.head(2), test.head(2)


(     x_mean     x_std     x_max     x_min   x_range    x_skew  x_kurtosis  \
 0  0.057289  0.140447  0.265758 -0.214983  0.480742 -0.388161   -1.292653   
 1  0.058397  0.147305  0.315987 -0.237875  0.553862 -0.371543   -1.247677   
 
    x_n_peaks  x_energy    y_mean  ...  temperature  humidity  altitude  \
 0         17  2.300756  0.014241  ...           20        46       409   
 1         12  2.510906  0.015394  ...           18        34       632   
 
    session_id  firmware_version  calibration_status  battery_level  \
 0     S607704            v1.2.3              recent             93   
 1     S513749            v1.3.0              recent             94   
 
    gps_accuracy  network_type  device_model  
 0      6.111424            5g       model_c  
 1      5.448867          wifi       model_c  
 
 [2 rows x 61 columns],
      x_mean     x_std     x_max     x_min   x_range    x_skew  x_kurtosis  \
 0  0.055139  0.149892  0.277077 -0.204022  0.481099 -0.298338   -1.362385   

In [6]:

# --- Utility: split features/labels
def split_xy(df, label_col="label"):
    X = df.drop(columns=[c for c in df.columns if c == label_col], errors="ignore")
    y = df[label_col] if label_col in df.columns else None
    return X, y

X_train, y_train = split_xy(train, "label")
X_test,  y_test  = split_xy(test,  "label")
print(X_train.shape, y_train.shape if y_train is not None else None)
print(X_test.shape,  y_test.shape if y_test is not None else None)


(800, 60) (800,)
(200, 60) (200,)


In [7]:

# --- Load model and pick which column is 'collision' by maximizing ROC-AUC on test
model = joblib.load(MODEL_PATH)

def pick_collision_index(model, X, y):
    proba = model.predict_proba(X)
    if proba.ndim != 2 or proba.shape[1] != 2:
        raise ValueError(f"Expected binary predict_proba with 2 columns, got shape={proba.shape}")
    y_bin = (y == "collision").astype(int)
    aucs = [roc_auc_score(y_bin, proba[:, i]) for i in range(2)]
    pos_idx = int(np.argmax(aucs))
    print(f"AUC(proba[:,0])={aucs[0]:.3f}, AUC(proba[:,1])={aucs[1]:.3f}  -> using column {pos_idx} as P(collision)")
    return pos_idx

POS_IDX = pick_collision_index(model, X_test, y_test)


AUC(proba[:,0])=0.928, AUC(proba[:,1])=0.072  -> using column 0 as P(collision)


In [8]:

# --- Evaluate on TEST set (simple & consistent)
y_test_bin = (y_test == "collision").astype(int)      # {0,1}

y_prob_test = model.predict_proba(X_test)[:, POS_IDX] # P(collision)
y_hat_test  = (y_prob_test >= 0.5).astype(int)        # {0,1}

acc = accuracy_score(y_test_bin, y_hat_test)
prec, rec, f1, _ = precision_recall_fscore_support(y_test_bin, y_hat_test, average="binary", zero_division=0)
roc = roc_auc_score(y_test_bin, y_prob_test)
pr  = average_precision_score(y_test_bin, y_prob_test)
cm  = confusion_matrix(y_test_bin, y_hat_test, labels=[0,1])

print("=== TEST metrics (threshold=0.5) ===")
print(f"Accuracy={acc:.3f}  Precision={prec:.3f}  Recall={rec:.3f}  F1={f1:.3f}")
print(f"ROC-AUC={roc:.3f}   PR-AUC={pr:.3f}")
print("Confusion Matrix (rows=true, cols=pred) [0,1]:\n", cm)


=== TEST metrics (threshold=0.5) ===
Accuracy=0.945  Precision=1.000  Recall=0.890  F1=0.942
ROC-AUC=0.928   PR-AUC=0.956
Confusion Matrix (rows=true, cols=pred) [0,1]:
 [[100   0]
 [ 11  89]]


In [9]:

# --- Evaluate on INFERENCE set (if labels are available)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, average_precision_score

if inf is None:
    print("No inference.csv; skipping inference evaluation.")
else:
    # Merge labels if present
    if inf_labels is not None:
        key_candidates = [c for c in ["id","uuid","file","filename","clip_id","sample_id"]
                          if c in inf.columns and c in inf_labels.columns]
        if key_candidates:
            key = key_candidates[0]
            merged = inf.merge(inf_labels, on=key, how="inner", suffixes=("", "_y"))
            label_col = "label" if "label" in merged.columns else ("y" if "y" in merged.columns else None)
            X_inf, y_inf = split_xy(merged, label_col=label_col)
        else:
            label_col = "label" if "label" in inf.columns else None
            X_inf, y_inf = split_xy(inf, label_col=label_col)
    else:
        label_col = "label" if "label" in inf.columns else None
        X_inf, y_inf = split_xy(inf, label_col=label_col)

    y_prob_inf = model.predict_proba(X_inf)[:, POS_IDX]   # P(collision)
    y_hat_inf  = (y_prob_inf >= 0.5).astype(int)

    if y_inf is not None:
        y_inf_bin = (y_inf == "collision").astype(int)
        metrics_inf = {
            "accuracy": float(accuracy_score(y_inf_bin, y_hat_inf)),
            "precision": float(precision_recall_fscore_support(y_inf_bin, y_hat_inf, average="binary", zero_division=0)[0]),
            "recall": float(precision_recall_fscore_support(y_inf_bin, y_hat_inf, average="binary", zero_division=0)[1]),
            "f1": float(precision_recall_fscore_support(y_inf_bin, y_hat_inf, average="binary", zero_division=0)[2]),
            "roc_auc": float(roc_auc_score(y_inf_bin, y_prob_inf)),
            "pr_auc": float(average_precision_score(y_inf_bin, y_prob_inf)),
        }
        print("=== INFERENCE metrics (threshold=0.5) ===")
        print(metrics_inf)
    else:
        print("Inference labels missing; computed probabilities only.")


=== INFERENCE metrics (threshold=0.5) ===
{'accuracy': 0.6448202959830867, 'precision': 0.5885416666666666, 'recall': 0.9576271186440678, 'f1': 0.7290322580645161, 'roc_auc': 0.7610670099406421, 'pr_auc': 0.7615169345222946}


In [10]:

# --- Minimal monitoring snapshot (repo-local outputs/)
snapshot = {
    "dataset": "Nexar IMU",
    "test": {
        "accuracy": float(acc),
        "precision": float(prec),
        "recall": float(rec),
        "f1": float(f1),
        "roc_auc": float(roc),
        "pr_auc": float(pr),
        "threshold": 0.5,
    },
}

out_path = Path("outputs") / "metrics_snapshot.json"
out_path.parent.mkdir(parents=True, exist_ok=True)

with open(out_path, "w") as f:
    json.dump(snapshot, f, indent=2)

print("Wrote:", out_path.resolve())


Wrote: /home/almog/projects/sensor-analysis-assignment/outputs/metrics_snapshot.json
