In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import os
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, f1_score, ConfusionMatrixDisplay

data = pd.read_csv('dataset/KU-HAR/3.Time_domain_subsamples/KU-HAR_time_domain_subsamples_subset.csv', header=None)
activity_labels = [
    'Stand',
    'Sit',
    'Talk-sit',
    'Talk-stand',
    'Stand-sit',
    'Lay',
    'Lay-stand',
    'Pick',
    'Jump',
    'Push-up',
    'Sit-up',
    'Walk',
    'Walk-backward',
    'Walk-circle',
    'Run',
    'Stair-up',
    'Stair-down',
    'Table-tennis'
]
#print(data.head())
sensor_data = data.iloc[:, :1800].to_numpy()
labels = data.iloc[:,1800].to_numpy()
#(sensor_data.shape[0])

In [13]:
sensor_index = [
    (0, 300),    #acc_x
    (300, 600),   #acc_y
    (600, 900),   #acc_z
    (900, 1200),  #gyro_x
    (1200, 1500), #gyro_y
    (1500, 1800)  #gyro_z
]

feature_list = []

for i in range(sensor_data.shape[0]):
    segment_row = sensor_data[i]
    segment_features = []
    for j in range(len(sensor_index)):
        start, end = sensor_index[j]
        data = segment_row[start:end]

        segment_features.append(np.mean(data))
        segment_features.append(np.min(data))
        segment_features.append(np.max(data))
        segment_features.append(np.std(data))
        segment_features.append(np.median(data))
        segment_features.append(stats.median_abs_deviation(data)) # mad
        segment_features.append(np.sqrt(np.mean(data**2))) # rms
        segment_features.append(np.sum(data**2)) # enerji
        
        hist, _ = np.histogram(data, bins=10, density=True)
        segment_features.append(stats.entropy(hist))  # entropi

    feature_list.append(segment_features)

feature_extraction = pd.DataFrame(feature_list)
feature_names = ['mean', 'min', 'max', 'std', 'median', 'mad', 'rms', 'energy', 'entropy']
axis_name = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']

column_names = [f'{axis}_{feat}' for axis in axis_name for feat in feature_names]
feature_extraction.columns = column_names
y_labels = labels

# print(feature_extraction.shape)
# print(feature_extraction.head())
#print(y_labels)

In [None]:
x_train, x_test, y_train, Y_Tesst = train_test_split(feature_extraction, y_labels, test_size=0.3, random_state=42,stratify =y_labels)

scaler = joblib.load('models/kuhar/standard_scaler.joblib')
pca = joblib.load('models/kuhar/pca_transformer.joblib')

#StandardScaler (SVM, KNN, MLP)
x_test_scaled=scaler.transform(x_test)

#PCA
x_test_pca=pca.transform(x_test)
x_test_scaled_pca = pca.transform(x_test_scaled)

In [15]:
def evaluate_model(model, X_test, y_test, name):
    y_pred = model.predict(X_test)
    print(f"\n Model: {name}")
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}\n")
    print(classification_report(y_test, y_pred, target_names=activity_labels, zero_division=0))
    print("Confusion Matrix:")
    figure, axes = plt.subplots(figsize=(15,15))
    ConfusionMatrixDisplay.from_estimator(
        model,
        X_test,
        y_test,
        display_labels=activity_labels,
        cmap='Greens',  
        xticks_rotation='vertical', 
        ax=axes 
    )
    plt.title(name)
    plt.show()

model_dir = 'models/kuhar'
for file in sorted(os.listdir(model_dir)):
    if file.endswith('.joblib'):
        model = joblib.load(os.path.join(model_dir, file))
        file_lower = file.lower()
        if 'with_pca' in file_lower:
            if any(x in file_lower for x in ['svm', 'knn', 'mlp']):
                X_test = x_test_scaled_pca
            else:
                X_test = x_test_pca
        elif 'hp_optimization' in file_lower:
            if any(x in file_lower for x in ['svm', 'knn', 'mlp']):
                X_test = x_test_scaled_pca
            else:
                X_test = x_test_pca
        else:  # without pca
            if any(x in file_lower for x in ['svm', 'knn', 'mlp']):
                X_test = x_test_scaled
            else:
                X_test = x_test

        evaluate_model(model, X_test, Y_Tesst, name=file)

ValueError: X has 8 features, but KNeighborsClassifier is expecting 9 features as input.