In [None]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report



In [2]:
import numpy as np

# NTU-60 skeleton parent list (used for bone vectors)
PARENTS = [0, 0, 1, 2,
           1, 4, 5, 6,
           1, 8, 9,10,
           0,12,13,14,
           0,16,17,18,
           1, 7, 7,11,11]

def extract_basic(data):
    """Mean+Std over time, joints, persons → (N, 6)"""
    N,C,T,V,M = data.shape
    m = data.mean(axis=(2,3,4))
    s = data.std(axis=(2,3,4))
    return np.concatenate([m, s], axis=1)

def extract_rich(data):
    """Adds velocity, acceleration, and bone-vector stats → (N, 48)"""
    N,C,T,V,M = data.shape
    X = data.mean(axis=4)     # (N,C,T,V)

    vel = np.diff(X, axis=2)      # (N,C,T−1,V)
    acc = np.diff(vel, axis=2)    # (N,C,T−2,V)

    # bone vectors
    bone = np.zeros_like(X)
    for j in range(1, V):
        bone[..., j] = X[..., j] - X[..., PARENTS[j]]

    def pool(Y):
        m = Y.mean(axis=(2,3))
        s = Y.std(axis=(2,3))
        return np.concatenate([m, s], axis=1)

    return np.concatenate([pool(X), pool(vel), pool(acc), pool(bone)], axis=1)

def extract_extended(data, joint_pairs):
    """
    In addition to extract_rich, compute for each (i,j) in joint_pairs:
    mean+std of Euclid-dist(frame,i−j) → 2*len(joint_pairs) dims
    """
    N,C,T,V,M = data.shape
    X = data.mean(axis=4)  # (N,C,T,V)
    d_feats = []
    for (i,j) in joint_pairs:
        # per-frame distance
        d = np.linalg.norm(X[:,:,:,i] - X[:,:,:,j], axis=1)  # (N,T)
        d_feats.append(d.mean(axis=1))
        d_feats.append(d.std (axis=1))
    d_feats = np.stack(d_feats, axis=1)  # (N, 2*len(pairs))

    return np.concatenate([ extract_rich(data),
                            d_feats ], axis=1)


In [3]:
import pickle

# adjust these paths
DATA_PATH  = r'D:\UniGe\2\CV\Babel Project\Normalized Data\train_ntu_sk_60_pre.npy'
LABEL_PATH = r'D:\UniGe\2\CV\Babel Project\BABEL\action_recognition\data\release\train_label_60.pkl'

data = np.load(DATA_PATH)         # (45473, 3, 150, 25, 1)
with open(LABEL_PATH,'rb') as f:
    labels = pickle.load(f)[1][0]  # flat array of shape (45473,)


In [4]:
# 1) Prepare feature sets:
X_basic    = extract_basic   (data)
X_rich     = extract_rich    (data)
# pick some pairs (i,j) manually, e.g. head (3)→neck (2), wrist(6)→elbow(5)...
pairs      = [(3,2),(6,5),(9,8),(12,10),(15,13)]
X_ext      = extract_extended(data, pairs)

# 2) Train/test split
Xb_tr, Xb_te, y_tr, y_te = train_test_split(X_basic, labels, test_size=0.2, random_state=42)
Xr_tr, Xr_te            = train_test_split(X_rich,  labels, test_size=0.2, random_state=42)[0:2]
Xe_tr, Xe_te            = train_test_split(X_ext,   labels, test_size=0.2, random_state=42)[0:2]

In [None]:
# 3) Define models
models = [
  ('LogReg',   LogisticRegression(max_iter=1000, class_weight='balanced')),
  ('KNN',      KNeighborsClassifier(n_neighbors=5)),
  ('RandomF',  RandomForestClassifier(n_estimators=200, class_weight='balanced')),
  ('GBoost',   GradientBoostingClassifier(n_estimators=100)),
  ('SVM',      SVC(kernel='rbf', C=1.0, class_weight='balanced'))
]

In [6]:
# 4) Evaluate each on Basic→Rich→Ext
for name, clf in models:
    for X_tr, X_te, tag in [(Xb_tr,Xb_te,'Basic'),
                            (Xr_tr,Xr_te,'Rich'),
                            (Xe_tr,Xe_te,'Ext')]:
        clf.fit(X_tr, y_tr)
        p = clf.predict(X_te)
        acc = accuracy_score(y_te, p)
        print(f"{name:8s} | {tag:5s} → acc: {acc:.3f}")
    print('-'*40)

LogReg   | Basic → acc: 0.174
LogReg   | Rich  → acc: 0.234
LogReg   | Ext   → acc: 0.247
----------------------------------------
KNN      | Basic → acc: 0.291
KNN      | Rich  → acc: 0.328
KNN      | Ext   → acc: 0.344
----------------------------------------
RandomF  | Basic → acc: 0.269
RandomF  | Rich  → acc: 0.306
RandomF  | Ext   → acc: 0.326
----------------------------------------
GBoost   | Basic → acc: 0.284
GBoost   | Rich  → acc: 0.326
GBoost   | Ext   → acc: 0.342
----------------------------------------
SVM      | Basic → acc: 0.248
SVM      | Rich  → acc: 0.282
SVM      | Ext   → acc: 0.293
----------------------------------------


In [None]:
from sklearn.decomposition      import PCA
from sklearn.neighbors         import KNeighborsClassifier
from sklearn.model_selection   import GridSearchCV

# 1) PCA to 30 dims
pca = PCA(n_components=30, random_state=42)
Xp = pca.fit_transform(X_ext)

# 2) Grid-search k & weights
param_grid = {
    'n_neighbors': [3,5,7,9],
    'weights':    ['uniform','distance'],
    'metric':     ['euclidean','cosine']
}
knn = KNeighborsClassifier()
gs  = GridSearchCV(knn, param_grid, cv=3, n_jobs=-1, verbose=2)
gs.fit(Xp, labels)

print("Best params:", gs.best_params_)
print("CV score:  ", gs.best_score_)


Fitting 3 folds for each of 16 candidates, totalling 48 fits
