In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from sklearn.ensemble import IsolationForest
from sklearn.metrics import (
	auc,
	classification_report,
	confusion_matrix,
	f1_score,
	precision_recall_curve,
	precision_score,
	recall_score,
	roc_auc_score,
)
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM

plt.style.use("seaborn-v0_8-darkgrid")
sns.set_palette("husl")

# Activity mapping
activity_map = {
	1: "lying",
	2: "sitting",
	3: "standing",
	4: "walking",
	5: "running",
	6: "cycling",
	7: "Nordic walking",
	12: "ascending stairs",
	13: "descending stairs",
	16: "vacuum cleaning",
	17: "ironing",
	24: "rope jumping",
}

In [2]:
X_train = pd.read_csv("../data/PAMAP2/x_train_data.csv")
y_train = pd.read_csv("../data/PAMAP2/y_train_data.csv")
X_test = pd.read_csv("../data/PAMAP2/x_test_data.csv")
y_test = pd.read_csv("../data/PAMAP2/y_test_data.csv")

X_train["activityID"] = y_train.values
X_test["activityID"] = y_test.values
df_total = pd.concat([X_train, X_test], ignore_index=True)

SENSOR_COLS = [
	col for col in df_total.columns if col not in ["timestamp", "subject", "activityID"]
]

print(f"\nDataset loaded successfully!")
print(f"  Total samples: {len(df_total):,}")
print(f"  Total features: {len(SENSOR_COLS)}")
print(f"  Activities: {sorted(df_total['activityID'].unique())}")


Dataset loaded successfully!
  Total samples: 1,893,200
  Total features: 30
  Activities: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(12), np.int64(13), np.int64(16), np.int64(17)]


In [3]:
window_size = 100  # 1 second at 100Hz
overlap = 50  # 50% overlap
step = window_size - overlap

print(f"\nWindow Configuration:")
print(f"  Window size: {window_size} samples (1.0s)")
print(f"  Overlap: {overlap} samples ({overlap / window_size * 100:.0f}%)")
print(f"  Step size: {step} samples")

# Sort by subject and timestamp
df_sorted = df_total.sort_values(["subject", "timestamp"]).reset_index(drop=True)

# Create windows with features
windows_data = []
window_labels = []
window_subjects = []

print("\nExtracting features from windows...")

for start_idx in range(0, len(df_sorted) - window_size + 1, step):
	if start_idx % 50000 == 0:
		print(f"  Processing window {start_idx // step + 1}...")

	window = df_sorted.iloc[start_idx : start_idx + window_size]

	# Ensure single activity and subject in window
	if window["activityID"].nunique() == 1 and window["subject"].nunique() == 1:
		window_features = {}

		for col in SENSOR_COLS:
			values = window[col].values

			# Time-domain features
			window_features[f"{col}_mean"] = np.mean(values)
			window_features[f"{col}_std"] = np.std(values)
			window_features[f"{col}_min"] = np.min(values)
			window_features[f"{col}_max"] = np.max(values)
			window_features[f"{col}_range"] = np.max(values) - np.min(values)
			window_features[f"{col}_skew"] = stats.skew(values)
			window_features[f"{col}_kurtosis"] = stats.kurtosis(values)
			window_features[f"{col}_energy"] = np.sum(values**2)
			window_features[f"{col}_rms"] = np.sqrt(np.mean(values**2))

		windows_data.append(window_features)
		window_labels.append(window["activityID"].iloc[0])
		window_subjects.append(window["subject"].iloc[0])

windows_df = pd.DataFrame(windows_data)
windows_df["activityID"] = window_labels
windows_df["subject"] = window_subjects

print(f"\nWindowing complete!")
print(f"  Total windows: {len(windows_df):,}")
print(f"  Features per window: {len(windows_df.columns) - 2}")
print(f"\nWindow distribution by activity:")
for activity_id in sorted(windows_df["activityID"].unique()):
	count = (windows_df["activityID"] == activity_id).sum()
	print(
		f"  {activity_id:2d} - {activity_map.get(activity_id, 'Unknown'):20s}: {count:6,} windows"
	)


Window Configuration:
  Window size: 100 samples (1.0s)
  Overlap: 50 samples (50%)
  Step size: 50 samples


  window_features[f"{col}_skew"] = stats.skew(values)
  window_features[f"{col}_kurtosis"] = stats.kurtosis(values)



Extracting features from windows...
  Processing window 1...
  Processing window 1001...
  Processing window 2001...
  Processing window 3001...
  Processing window 4001...
  Processing window 5001...
  Processing window 6001...
  Processing window 7001...
  Processing window 8001...
  Processing window 9001...
  Processing window 10001...
  Processing window 11001...
  Processing window 12001...
  Processing window 13001...
  Processing window 14001...
  Processing window 15001...
  Processing window 16001...
  Processing window 17001...
  Processing window 18001...
  Processing window 19001...
  Processing window 20001...
  Processing window 21001...
  Processing window 22001...
  Processing window 23001...
  Processing window 24001...
  Processing window 25001...
  Processing window 26001...
  Processing window 27001...
  Processing window 28001...
  Processing window 29001...
  Processing window 30001...
  Processing window 31001...
  Processing window 32001...
  Processing window

In [4]:
# Known activities (train on these)
known_activities = [1, 2, 3, 4, 6, 7, 17]
# Unseen activities (test novelty detection on these)
unseen_activities = [5, 12, 13, 16, 24]

print("\nKNOWN Activities (Normal class):")
for activity_id in known_activities:
	count = (windows_df["activityID"] == activity_id).sum()
	print(f"  {activity_id:2d} - {activity_map[activity_id]:20s}: {count:6,} windows")

print("\nUNSEEN Activities (Novelty class):")
for activity_id in unseen_activities:
	count = (windows_df["activityID"] == activity_id).sum()
	print(f"  {activity_id:2d} - {activity_map[activity_id]:20s}: {count:6,} windows")

# Use subjects 1-6 for training, 7-8 for testing
train_subjects = [1, 2, 3, 4, 5, 6]
test_subjects = [7, 8]

print(f"\nTrain subjects: {train_subjects}")
print(f"Test subjects: {test_subjects}")


KNOWN Activities (Normal class):
   1 - lying               :  3,834 windows
   2 - sitting             :  3,686 windows
   3 - standing            :  3,781 windows
   4 - walking             :  4,759 windows
   6 - cycling             :  3,278 windows
   7 - Nordic walking      :  3,750 windows
  17 - ironing             :  4,757 windows

UNSEEN Activities (Novelty class):
   5 - running             :  1,951 windows
  12 - ascending stairs    :  2,314 windows
  13 - descending stairs   :  2,062 windows
  16 - vacuum cleaning     :  3,491 windows
  24 - rope jumping        :      0 windows

Train subjects: [1, 2, 3, 4, 5, 6]
Test subjects: [7, 8]


In [5]:
# Training set: known activities from train subjects
train_mask = windows_df["activityID"].isin(known_activities) & windows_df[
	"subject"
].isin(train_subjects)
X_train_known = windows_df[train_mask].drop(["activityID", "subject"], axis=1)
y_train_known = windows_df[train_mask]["activityID"]

# Test set: known activities from test subjects (should classify as normal)
test_known_mask = windows_df["activityID"].isin(known_activities) & windows_df[
	"subject"
].isin(test_subjects)
X_test_known = windows_df[test_known_mask].drop(["activityID", "subject"], axis=1)
y_test_known = windows_df[test_known_mask]["activityID"]

# Test set: unseen activities from test subjects (should classify as novelty)
test_unseen_mask = windows_df["activityID"].isin(unseen_activities) & windows_df[
	"subject"
].isin(test_subjects)
X_test_unseen = windows_df[test_unseen_mask].drop(["activityID", "subject"], axis=1)
y_test_unseen = windows_df[test_unseen_mask]["activityID"]

# Combine test sets
X_test_combined = pd.concat([X_test_known, X_test_unseen], ignore_index=True)
y_test_combined = pd.concat([y_test_known, y_test_unseen], ignore_index=True)
y_test_binary = np.array(
	[0 if act in known_activities else 1 for act in y_test_combined]
)

print(f"\nDataset sizes:")
print(f"  Training (known activities): {len(X_train_known):,} windows")
print(f"  Test (known activities): {len(X_test_known):,} windows")
print(f"  Test (unseen activities): {len(X_test_unseen):,} windows")
print(f"  Total test set: {len(X_test_combined):,} windows")
print(f"    - Normal (0): {(y_test_binary == 0).sum():,}")
print(f"    - Novelty (1): {(y_test_binary == 1).sum():,}")


Dataset sizes:
  Training (known activities): 20,484 windows
  Test (known activities): 7,361 windows
  Test (unseen activities): 2,308 windows
  Total test set: 9,669 windows
    - Normal (0): 7,361
    - Novelty (1): 2,308


In [None]:
print("\n[1/3] Training One-Class SVM...")
ocsvm = OneClassSVM(
	kernel="rbf",
	gamma="auto",
	nu=0.1,  # Expected fraction of outliers
	verbose=False,
)
ocsvm.fit(X_train)
print("  ✓ One-Class SVM trained successfully")

# Model 2: Isolation Forest
print("\n[2/3] Training Isolation Forest...")
iforest = IsolationForest(
	n_estimators=100,
	contamination=0.1,  # Expected proportion of outliers
	max_samples="auto",
	random_state=42,
	verbose=0,
)
iforest.fit(X_train)
print("  ✓ Isolation Forest trained successfully")

# Model 3: Local Outlier Factor
print("\n[3/3] Training Local Outlier Factor...")
lof = LocalOutlierFactor(
	n_neighbors=20,
	contamination=0.1,
	novelty=True,  # Important: enables predict for new data
)
lof.fit(X_train)
print("  ✓ Local Outlier Factor trained successfully")


[1/3] Training One-Class SVM...


## Get decision scores (higher = more normal, lower = more anomalous)
> Note: sklearn outputs are inconsistent, we need to normalize

In [None]:
# One-Class SVM decision scores
ocsvm_scores = ocsvm.decision_function(X_test)
ocsvm_pred = ocsvm.predict(X_test)
ocsvm_pred_binary = np.where(ocsvm_pred == 1, 0, 1)  # 1=normal->0, -1=anomaly->1

# Isolation Forest scores
iforest_scores = iforest.decision_function(X_test)
iforest_pred = iforest.predict(X_test)
iforest_pred_binary = np.where(iforest_pred == 1, 0, 1)  # 1=normal->0, -1=anomaly->1

# LOF scores
lof_scores = lof.decision_function(X_test)
lof_pred = lof.predict(X_test)
lof_pred_binary = np.where(lof_pred == 1, 0, 1)  # 1=normal->0, -1=anomaly->1

print("\nDecision scores computed:")
print(f"  One-Class SVM: range [{ocsvm_scores.min():.3f}, {ocsvm_scores.max():.3f}]")
print(
	f"  Isolation Forest: range [{iforest_scores.min():.3f}, {iforest_scores.max():.3f}]"
)
print(f"  LOF: range [{lof_scores.min():.3f}, {lof_scores.max():.3f}]")

In [None]:
# Strategy 1: Majority Voting
ensemble_voting = (ocsvm_pred_binary + iforest_pred_binary + lof_pred_binary) >= 2
ensemble_voting = ensemble_voting.astype(int)

# Strategy 2: Average of normalized scores
# Normalize scores to [0, 1] where higher = more anomalous
ocsvm_scores_norm = (ocsvm_scores - ocsvm_scores.min()) / (
	ocsvm_scores.max() - ocsvm_scores.min()
)
ocsvm_scores_norm = 1 - ocsvm_scores_norm  # Invert: higher = more anomalous

iforest_scores_norm = (iforest_scores - iforest_scores.min()) / (
	iforest_scores.max() - iforest_scores.min()
)
iforest_scores_norm = 1 - iforest_scores_norm  # Invert

lof_scores_norm = (lof_scores - lof_scores.min()) / (
	lof_scores.max() - lof_scores.min()
)
lof_scores_norm = 1 - lof_scores_norm  # Invert

ensemble_avg_scores = (ocsvm_scores_norm + iforest_scores_norm + lof_scores_norm) / 3

# Strategy 3: Weighted Average (can tune weights)
weights = {"ocsvm": 0.3, "iforest": 0.4, "lof": 0.3}
ensemble_weighted_scores = (
	weights["ocsvm"] * ocsvm_scores_norm
	+ weights["iforest"] * iforest_scores_norm
	+ weights["lof"] * lof_scores_norm
)
print("\nEnsemble strategies created:")
print("  [1] Majority Voting")
print("  [2] Average Score")
print("  [3] Weighted Average Score")

In [None]:
def evaluate_model(y_true, y_pred, scores, model_name):
	print(f"\n{model_name}")
	print("-" * 60)

	# Classification metrics
	precision = precision_score(y_true, y_pred)
	recall = recall_score(y_true, y_pred)
	f1 = f1_score(y_true, y_pred)

	print(f"  Precision: {precision:.4f}")
	print(f"  Recall: {recall:.4f}")
	print(f"  F1-Score: {f1:.4f}")

	# ROC-AUC
	roc_auc = roc_auc_score(y_true, scores)
	print(f"  ROC-AUC: {roc_auc:.4f}")

	# Precision-Recall AUC
	prec, rec, _ = precision_recall_curve(y_true, scores)
	pr_auc = auc(rec, prec)
	print(f"  PR-AUC: {pr_auc:.4f}")

	# Confusion matrix
	cm = confusion_matrix(y_true, y_pred)
	print(f"\n  Confusion Matrix:")
	print(f"    TN: {cm[0, 0]:6,}  |  FP: {cm[0, 1]:6,}")
	print(f"    FN: {cm[1, 0]:6,}  |  TP: {cm[1, 1]:6,}")

	return {
		"precision": precision,
		"recall": recall,
		"f1": f1,
		"roc_auc": roc_auc,
		"pr_auc": pr_auc,
		"cm": cm,
	}

In [None]:
results = {
	"ocsvm": evaluate_model(
		y_test_binary, ocsvm_pred_binary, ocsvm_scores_norm, "ONE-CLASS SVM"
	),
	"iforest": evaluate_model(
		y_test_binary, iforest_pred_binary, iforest_scores_norm, "ISOLATION FOREST"
	),
	"lof": evaluate_model(
		y_test_binary, lof_pred_binary, lof_scores_norm, "LOCAL OUTLIER FACTOR"
	),
	"voting": evaluate_model(
		y_test_binary, ensemble_voting, ensemble_avg_scores, "ENSEMBLE: MAJORITY VOTING"
	),
	"avg": evaluate_model(  # For average scores, need to threshold
		y_test_binary,
		(ensemble_avg_scores > 0.5).astype(int),
		ensemble_avg_scores,
		"ENSEMBLE: AVERAGE SCORE",
	),
	"weighted": evaluate_model(  # For weighted scores
		y_test_binary,
		(ensemble_weighted_scores > 0.5).astype(int),
		ensemble_weighted_scores,
		"ENSEMBLE: WEIGHTED AVERAGE",
	),
}

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

models = ["One-Class SVM", "Isolation Forest", "LOF", "Voting", "Avg Score", "Weighted"]
model_keys = ["ocsvm", "iforest", "lof", "voting", "avg", "weighted"]
colors_models = ["#3498db", "#e74c3c", "#2ecc71", "#f39c12", "#9b59b6", "#1abc9c"]

# Precision
ax = axes[0, 0]
precisions = [results[key]["precision"] for key in model_keys]
bars = ax.bar(
	range(len(models)), precisions, color=colors_models, edgecolor="black", alpha=0.8
)
ax.set_xticks(range(len(models)))
ax.set_xticklabels(models, rotation=45, ha="right")
ax.set_ylabel("Precision", fontsize=12)
ax.set_title("Precision Comparison", fontsize=14, fontweight="bold")
ax.set_ylim([0, 1])
ax.grid(axis="y", alpha=0.3)
for i, bar in enumerate(bars):
	height = bar.get_height()
	ax.text(
		bar.get_x() + bar.get_width() / 2.0,
		height,
		f"{precisions[i]:.3f}",
		ha="center",
		va="bottom",
		fontsize=10,
	)

# Recall
ax = axes[0, 1]
recalls = [results[key]["recall"] for key in model_keys]
bars = ax.bar(
	range(len(models)), recalls, color=colors_models, edgecolor="black", alpha=0.8
)
ax.set_xticks(range(len(models)))
ax.set_xticklabels(models, rotation=45, ha="right")
ax.set_ylabel("Recall", fontsize=12)
ax.set_title("Recall Comparison", fontsize=14, fontweight="bold")
ax.set_ylim([0, 1])
ax.grid(axis="y", alpha=0.3)
for i, bar in enumerate(bars):
	height = bar.get_height()
	ax.text(
		bar.get_x() + bar.get_width() / 2.0,
		height,
		f"{recalls[i]:.3f}",
		ha="center",
		va="bottom",
		fontsize=10,
	)

# F1-Score
ax = axes[1, 0]
f1_scores = [results[key]["f1"] for key in model_keys]
bars = ax.bar(
	range(len(models)), f1_scores, color=colors_models, edgecolor="black", alpha=0.8
)
ax.set_xticks(range(len(models)))
ax.set_xticklabels(models, rotation=45, ha="right")
ax.set_ylabel("F1-Score", fontsize=12)
ax.set_title("F1-Score Comparison", fontsize=14, fontweight="bold")
ax.set_ylim([0, 1])
ax.grid(axis="y", alpha=0.3)
for i, bar in enumerate(bars):
	height = bar.get_height()
	ax.text(
		bar.get_x() + bar.get_width() / 2.0,
		height,
		f"{f1_scores[i]:.3f}",
		ha="center",
		va="bottom",
		fontsize=10,
	)

# ROC-AUC
ax = axes[1, 1]
roc_aucs = [results[key]["roc_auc"] for key in model_keys]
bars = ax.bar(
	range(len(models)), roc_aucs, color=colors_models, edgecolor="black", alpha=0.8
)
ax.set_xticks(range(len(models)))
ax.set_xticklabels(models, rotation=45, ha="right")
ax.set_ylabel("ROC-AUC", fontsize=12)
ax.set_title("ROC-AUC Comparison", fontsize=14, fontweight="bold")
ax.set_ylim([0, 1])
ax.grid(axis="y", alpha=0.3)
for i, bar in enumerate(bars):
	height = bar.get_height()
	ax.text(
		bar.get_x() + bar.get_width() / 2.0,
		height,
		f"{roc_aucs[i]:.3f}",
		ha="center",
		va="bottom",
		fontsize=10,
	)

plt.tight_layout()
plt.show()

# =============================================================================
# STEP 11: ROC & PR CURVES
# =============================================================================

from sklearn.metrics import roc_curve

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# ROC Curves
ax = axes[0]
all_scores = {
	"One-Class SVM": ocsvm_scores_norm,
	"Isolation Forest": iforest_scores_norm,
	"LOF": lof_scores_norm,
	"Ensemble (Avg)": ensemble_avg_scores,
	"Ensemble (Weighted)": ensemble_weighted_scores,
}

for idx, (name, scores) in enumerate(all_scores.items()):
	fpr, tpr, _ = roc_curve(y_test_binary, scores)
	roc_auc = auc(fpr, tpr)
	ax.plot(fpr, tpr, linewidth=2, label=f"{name} (AUC={roc_auc:.3f})")

ax.plot([0, 1], [0, 1], "k--", linewidth=1.5, label="Random Classifier")
ax.set_xlabel("False Positive Rate", fontsize=12)
ax.set_ylabel("True Positive Rate", fontsize=12)
ax.set_title("ROC Curves", fontsize=14, fontweight="bold")
ax.legend(loc="lower right", fontsize=10)
ax.grid(alpha=0.3)

# Precision-Recall Curves
ax = axes[1]
for idx, (name, scores) in enumerate(all_scores.items()):
	prec, rec, _ = precision_recall_curve(y_test_binary, scores)
	pr_auc = auc(rec, prec)
	ax.plot(rec, prec, linewidth=2, label=f"{name} (AUC={pr_auc:.3f})")

baseline = y_test_binary.sum() / len(y_test_binary)
ax.axhline(
	y=baseline,
	color="k",
	linestyle="--",
	linewidth=1.5,
	label=f"Baseline ({baseline:.3f})",
)
ax.set_xlabel("Recall", fontsize=12)
ax.set_ylabel("Precision", fontsize=12)
ax.set_title("Precision-Recall Curves", fontsize=14, fontweight="bold")
ax.legend(loc="lower left", fontsize=10)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

cms = [
	(results["ocsvm"]["cm"], "One-Class SVM"),
	(results["iforest"]["cm"], "Isolation Forest"),
	(results["lof"]["cm"], "Local Outlier Factor"),
	(results["voting"]["cm"], "Ensemble: Voting"),
	(results["avg"]["cm"], "Ensemble: Average"),
	(results["weighted"]["cm"], "Ensemble: Weighted"),
]

for idx, (cm, title) in enumerate(cms):
	ax = axes[idx]

	sns.heatmap(
		cm,
		annot=True,
		fmt="d",
		cmap="Blues",
		ax=ax,
		cbar_kws={"label": "Count"},
		xticklabels=["Normal", "Novelty"],
		yticklabels=["Normal", "Novelty"],
	)
	ax.set_xlabel("Predicted", fontsize=11)
	ax.set_ylabel("Actual", fontsize=11)
	ax.set_title(title, fontsize=12, fontweight="bold")

plt.tight_layout()
plt.show()

In [None]:
unique_test_activities = sorted(y_test_combined.unique())

activity_scores = {}
for activity_id in unique_test_activities:
	mask = y_test_combined == activity_id
	activity_scores[activity_id] = ensemble_weighted_scores[mask]

# Visualize score distributions per activity
fig, ax = plt.subplots(figsize=(16, 8))

positions = []
labels = []
data = []

for idx, activity_id in enumerate(unique_test_activities):
	scores = activity_scores[activity_id]
	data.append(scores)
	positions.append(idx)

	activity_name = activity_map.get(activity_id, f"ID{activity_id}")
	label = f"{activity_id}: {activity_name}"
	if activity_id in known_activities:
		label += " (KNOWN)"
	else:
		label += " (UNSEEN)"
	labels.append(label)

bp = ax.boxplot(
	data,
	positions=positions,
	labels=labels,
	patch_artist=True,
	showfliers=True,
	widths=0.6,
)

# Color boxes: blue for known, red for unseen
for idx, (patch, activity_id) in enumerate(zip(bp["boxes"], unique_test_activities)):
	if activity_id in known_activities:
		patch.set_facecolor("#3498db")
		patch.set_alpha(0.7)
	else:
		patch.set_facecolor("#e74c3c")
		patch.set_alpha(0.7)

ax.axhline(
	y=0.5,
	color="black",
	linestyle="--",
	linewidth=2,
	label="Decision Threshold (0.5)",
	alpha=0.7,
)
ax.set_xlabel("Activity", fontsize=12)
ax.set_ylabel("Novelty Score (Ensemble)", fontsize=12)
ax.set_title("Novelty Score Distribution by Activity", fontsize=14, fontweight="bold")
ax.legend(loc="upper right", fontsize=11)
ax.grid(axis="y", alpha=0.3)
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

In [None]:
print("\nPer-Activity Novelty Scores (Ensemble Weighted):")
print("-" * 80)
print(f"{'Activity':<30} {'Type':<10} {'Mean':>8} {'Std':>8} {'Min':>8} {'Max':>8}")
print("-" * 80)

for activity_id in unique_test_activities:
	scores = activity_scores[activity_id]
	activity_name = activity_map.get(activity_id, f"ID{activity_id}")
	activity_type = "KNOWN" if activity_id in known_activities else "UNSEEN"

	print(
		f"{activity_id:2d} - {activity_name:<25} {activity_type:<10} "
		f"{np.mean(scores):8.4f} {np.std(scores):8.4f} "
		f"{np.min(scores):8.4f} {np.max(scores):8.4f}"
	)

fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Distribution for known vs unseen
ax = axes[0]

known_mask = y_test_binary == 0
unseen_mask = y_test_binary == 1

ax.hist(
	ensemble_weighted_scores[known_mask],
	bins=50,
	alpha=0.6,
	label="Known Activities",
	color="#3498db",
	edgecolor="black",
)
ax.hist(
	ensemble_weighted_scores[unseen_mask],
	bins=50,
	alpha=0.6,
	label="Unseen Activities",
	color="#e74c3c",
	edgecolor="black",
)
ax.axvline(x=0.5, color="black", linestyle="--", linewidth=2, label="Threshold (0.5)")
ax.set_xlabel("Novelty Score (Ensemble)", fontsize=12)
ax.set_ylabel("Frequency", fontsize=12)
ax.set_title("Score Distribution: Known vs Unseen", fontsize=14, fontweight="bold")
ax.legend(fontsize=11)
ax.grid(alpha=0.3)

# Individual model comparison
ax = axes[1]
ax.hist(
	ocsvm_scores_norm,
	bins=50,
	alpha=0.4,
	label="One-Class SVM",
	color="#3498db",
	edgecolor="black",
)
ax.hist(
	iforest_scores_norm,
	bins=50,
	alpha=0.4,
	label="Isolation Forest",
	color="#e74c3c",
	edgecolor="black",
)
ax.hist(
	lof_scores_norm, bins=50, alpha=0.4, label="LOF", color="#2ecc71", edgecolor="black"
)
ax.axvline(x=0.5, color="black", linestyle="--", linewidth=2, label="Threshold")
ax.set_xlabel("Normalized Novelty Score", fontsize=12)
ax.set_ylabel("Frequency", fontsize=12)
ax.set_title("Score Distribution by Model", fontsize=14, fontweight="bold")
ax.legend(fontsize=11)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
print("\n1. BEST PERFORMING MODEL:")
best_model_key = max(model_keys, key=lambda k: results[k]["f1"])
best_model_names = {
	"ocsvm": "One-Class SVM",
	"iforest": "Isolation Forest",
	"lof": "Local Outlier Factor",
	"voting": "Ensemble: Majority Voting",
	"avg": "Ensemble: Average Score",
	"weighted": "Ensemble: Weighted Average",
}
best_model = best_model_names[best_model_key]
print(f"   {best_model}")
print(f"   F1-Score: {results[best_model_key]['f1']:.4f}")
print(f"   ROC-AUC: {results[best_model_key]['roc_auc']:.4f}")
print(f"   Precision: {results[best_model_key]['precision']:.4f}")
print(f"   Recall: {results[best_model_key]['recall']:.4f}")

print("\n2. MODEL COMPARISON:")
print(f"   {'Model':<30} {'F1':>8} {'ROC-AUC':>8} {'Precision':>10} {'Recall':>8}")
print(f"   {'-' * 70}")
for key, name in zip(model_keys, models):
	print(
		f"   {name:<30} {results[key]['f1']:8.4f} {results[key]['roc_auc']:8.4f} "
		f"{results[key]['precision']:10.4f} {results[key]['recall']:8.4f}"
	)

print("\n3. ENSEMBLE BENEFITS:")
ensemble_improvement = results["weighted"]["f1"] - np.mean(
	[results["ocsvm"]["f1"], results["iforest"]["f1"], results["lof"]["f1"]]
)
print(f"   F1-Score improvement over individual models: {ensemble_improvement:+.4f}")

print("\n4. ACTIVITIES SUCCESSFULLY DETECTED AS NOVEL:")
correctly_detected = []
for activity_id in unseen_activities:
	scores = activity_scores.get(activity_id)
	if scores is not None:
		mean_score = np.mean(scores)
		if mean_score > 0.5:
			correctly_detected.append(activity_id)
			print(
				f"   ✓ {activity_id} - {activity_map[activity_id]}: avg score = {mean_score:.4f}"
			)

print("\n5. CHALLENGING ACTIVITIES (potential misclassification):")
for activity_id in unseen_activities:
	scores = activity_scores.get(activity_id)
	if scores is not None:
		mean_score = np.mean(scores)
		if mean_score <= 0.5:
			print(
				f"   ✗ {activity_id} - {activity_map[activity_id]}: avg score = {mean_score:.4f}"
			)
			print(f"      → May be similar to known activities, consider refinement")

print("\n6. RECOMMENDATIONS:")
print("   • Current ensemble uses equal/near-equal weights")
print("   • Consider grid search to optimize individual model hyperparameters")
print("   • Try different weight combinations for ensemble")
print("   • Experiment with different window sizes (0.5s, 2s, 3s)")
print("   • Add frequency-domain features for better separation")
print("   • Consider deep learning approaches (Autoencoders, VAE) for complex patterns")
print("   • Implement online/streaming novelty detection for real-time applications")

print("\n" + "=" * 80)
print("ENSEMBLE NOVELTY DETECTION COMPLETE!")
print("=" * 80)