# k-NN Classification: Real vs Fake Mouse Activity
This notebook loads processed feature data for real and fake mouse activity, trains a k-Nearest Neighbors classifier, and evaluates its performance.

In [21]:
import pandas as pd
import glob
import os
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [23]:
# Klasör yolları
REAL_DIR = r"C:\\store\\git\\km-stat-activity\\data\\real"
FAKE_DIR = r"C:\\store\\git\\km-stat-activity\\processed\\fake"

In [25]:
# Öznitelik isimleri
FEATURE_COLUMNS = [
    'x_direction_changes', 'y_direction_changes',
    'min_x', 'min_y', 'max_x', 'max_y', 'bbox_area',
    'avg_speed', 'avg_acceleration',
    'mouse_idle_ratio', 'movement_entropy', 'linearity'
]

In [27]:
def load_data_from_folder(folder_path, label):
    pattern = os.path.join(folder_path, "**", "*processed.csv")
    all_files = glob.glob(pattern, recursive=True)
    
    if not all_files:
        print(f"[UYARI] Hiç dosya bulunamadı: {pattern}")
        return pd.DataFrame(columns=FEATURE_COLUMNS + ['label'])

    df_list = []
    for file in all_files:
        try:
            df = pd.read_csv(file, usecols=FEATURE_COLUMNS)
            df['label'] = label
            df_list.append(df)
        except Exception as e:
            print(f"[HATA] Dosya okunamadı: {file} → {e}")
    
    return pd.concat(df_list, ignore_index=True)


In [29]:
# Verileri oku ve birleştir
df_real = load_data_from_folder(REAL_DIR, label=0)
df_fake = load_data_from_folder(FAKE_DIR, label=1)

df_all = pd.concat([df_real, df_fake], ignore_index=True).sample(frac=1, random_state=42).reset_index(drop=True)

X = df_all[FEATURE_COLUMNS]
y = df_all['label']

In [31]:
# Eğitim ve test verilerini ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)

In [33]:
# k-NN eğitimi
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

ValueError: could not convert string to float: '(449, 205, 1499, 907)'

In [None]:
# Değerlendirme metrikleri
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1]) if (cm[0, 0] + cm[0, 1]) != 0 else 0

print("Confusion Matrix:\n", cm)
print(f"Accuracy   : {accuracy:.4f}")
print(f"Precision  : {precision:.4f}")
print(f"Recall     : {recall:.4f}")
print(f"Specificity: {specificity:.4f}")