In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import os
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, f1_score, ConfusionMatrixDisplay

cols = ['user', 'activity', 'timestamp', 'x-accel', 'y-accel', 'z-accel']
raw_data = pd.read_csv('dataset/wisdm/WISDM_ar_v1.1/WISDM_ar_v1.1_raw.txt', header=None,names=cols, sep=",",on_bad_lines='skip')
raw_data['z-accel'] = raw_data['z-accel'].str.replace(';', '').astype(float)
raw_data.interpolate(method='linear', limit_direction ='both', inplace=True) # eksik veri tamamlandı

#print(raw_data.head())

#Aktivite ve Kodları:
# 0: Downstairs
# 1: Jogging
# 2: Sitting
# 3: Standing
# 4: Upstairs
# 5: Walking
raw_data['actvity_code'] = raw_data['activity'].astype('category').cat.codes
work_data = raw_data[['user','actvity_code','timestamp', 'x-accel', 'y-accel', 'z-accel']]



  raw_data.interpolate(method='linear', limit_direction ='both', inplace=True) # eksik veri tamamlandı


In [7]:
sample_frequency = 20
seconds=3
window_size = seconds * sample_frequency #20Hz 3 seconds 20*3=60
feature_count = 3 #x,y,z
step = window_size # for non-overlap segments

segments = []
labels = []

data_for_segmentation = work_data[['x-accel', 'y-accel', 'z-accel']].values
labels_for_segmentation = work_data['actvity_code'].values

for user in work_data['user'].unique():
    for act_code in work_data['actvity_code'].unique():
        s_index = work_data.index[(work_data['user'] == user) & (work_data['actvity_code'] == act_code)].tolist()
        for i in range(0, len(s_index) - window_size, step):
            window_start = s_index[i]
            window_end = window_start + window_size
            
            segment = data_for_segmentation[window_start : window_end]
            label = stats.mode(labels_for_segmentation[window_start : window_end], keepdims=True).mode[0]
            
            segments.append(segment)
            labels.append(label)

reshaped_segments = np.asarray(segments, dtype=np.float32)
labels = np.asarray(labels)
#print(reshaped_segments)

In [8]:
feature_list = []

for i in range(reshaped_segments.shape[0]): #bütün segmentler, 18bin tane yklşk
    segment = reshaped_segments[i]
    segment_features = []
    
    for j in range(segment.shape[1]): #3 ekseni dön, xyz
        data = segment[:, j]
        segment_features.append(np.mean(data))
        segment_features.append(np.min(data))
        segment_features.append(np.max(data))
        segment_features.append(np.std(data))
        segment_features.append(np.median(data))
        segment_features.append(stats.median_abs_deviation(data)) #mad
        segment_features.append(np.sqrt(np.mean(data**2))) #rsm
        segment_features.append(np.sum(data**2)) #enerji - jog/walk yüksek enerji, sit/stand düşük enerji
        
        hist, _ = np.histogram(data, bins=10, density=True)
        segment_features.append(stats.entropy(hist))  #entropi

    feature_list.append(segment_features)
    
#print(len(feature_list))
#print(feature_list[0])
#print(feature_list[1])
feature_extraction = pd.DataFrame(feature_list)

feature_names = ['mean', 'min', 'max', 'std', 'median', 'mad', 'rms', 'energy', 'entropy']
axis_name = ['x', 'y', 'z']
column_names = [f'{axis}_{feat}' for axis in axis_name for feat in feature_names ]
feature_extraction.columns = column_names
y_labels = labels.ravel()

#print(feature_extraction.head())

In [9]:
x_train, x_test, y_train, y_test = train_test_split(feature_extraction, y_labels, test_size=0.3, random_state=42,stratify =y_labels)

scaler = joblib.load('models/wisdm/standard_scaler.joblib')
pca = joblib.load('models/wisdm/pca_transformer.joblib')

#StandardScaler (SVM, KNN, MLP)
x_test_scaled=scaler.transform(x_test)

#PCA
x_test_pca=pca.transform(x_test)
x_text_scaled_pca = pca.transform(x_test_scaled)

activity_labels = list(raw_data['activity'].astype('category').cat.categories)





In [10]:
def evaluate_model(model, X_test, y_test, name):
    print(f"\n Model: {name}")
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}\n")
    print(classification_report(y_test, y_pred, target_names=activity_labels, zero_division=0))
    print("Confusion Matrix:")
    figure, axes = plt.subplots(figsize=(15,15))
    ConfusionMatrixDisplay.from_predictions(
        y_test,
        y_pred,
        display_labels=activity_labels,
        cmap='Greens',  
        xticks_rotation='horizontal', 
        ax=axes 
    )
    plt.title(name)
    plt.show()

model_dir = 'models/kuhar'
for file in sorted(os.listdir(model_dir)):
    if file.endswith('.joblib'):
        model = joblib.load(os.path.join(model_dir, file))
        file_lower = file.lower()
        if 'with_pca' in file_lower:
            if any(x in file_lower for x in ['svm', 'knn', 'mlp']):
                X_test = x_text_scaled_pca
            else:
                X_test = x_test_pca
        elif 'hp_optimization' in file_lower:
            if any(x in file_lower for x in ['svm', 'knn', 'mlp']):
                X_test = x_text_scaled_pca
            else:
                X_test = x_test_pca
        else:  # without pca
            if any(x in file_lower for x in ['svm', 'knn', 'mlp']):
                X_test = x_test_scaled
            else:
                X_test = x_test

        evaluate_model(model, X_test, y_test, name=file)


 Model: KNN_With_PCA_kuhar.joblib


ValueError: X has 10 features, but KNeighborsClassifier is expecting 9 features as input.