In [1]:
# Parameters
input_path = "C:\\store\\git\\km-stat-activity\\data\\real\\km_stat_nature_20250421.csv"
output_path = "C:\\store\\git\\km-stat-activity\\data\\real\\km_stat_nature_20250421_features.csv"


In [2]:
# Parameters
try:
    input_path
except NameError:
    input_path = None

try:
    output_path
except NameError:
    output_path = None


In [3]:
import os
import pandas as pd
import numpy as np
import math
import ast
from pathlib import Path

def safe_parse(val):
    try:
        result = ast.literal_eval(val)
        return list(result) if not isinstance(result, list) else result
    except:
        return []


def compute_direction_changes(coord):
    if not coord or len(coord) < 3:
        return 0
    return sum(
        np.sign(coord[i+1] - coord[i]) != np.sign(coord[i] - coord[i-1])
        for i in range(1, len(coord) - 1)
    )

def compute_bounding_box(x, y):
    if not x or not y or len(x) != len(y):
        return (0, 0, 0, 0), 0
    min_x, max_x = min(x), max(x)
    min_y, max_y = min(y), max(y)
    area = (max_x - min_x) * (max_y - min_y)
    return (min_x, min_y, max_x, max_y), area

def compute_avg_speed(x, y):
    if not x or not y or len(x) != len(y) or len(x) < 2:
        return 0.0
    distances = [
        math.hypot(x[i+1] - x[i], y[i+1] - y[i])
        for i in range(len(x) - 1)
    ]
    return np.mean(distances)


def compute_avg_acceleration(speeds):
    if speeds is None or len(speeds) == 0:
        return []
    return pd.Series(speeds).diff().fillna(0).tolist()

def compute_idle_ratio(x):
    if not x or len(x) < 2:
        return 1.0
    diffs = np.diff(x)
    return np.sum(diffs == 0) / len(diffs)

def compute_entropy(x):
    if not x or len(x) < 2:
        return 0.0
    diffs = np.diff(x)
    values, counts = np.unique(diffs, return_counts=True)
    probs = counts / counts.sum()
    return -np.sum(probs * np.log2(probs))

def compute_linearity(x, y):
    if not x or not y or len(x) != len(y) or len(x) < 2:
        return 0.0
    path = np.sum(np.hypot(np.diff(x), np.diff(y)))
    straight = math.hypot(x[-1] - x[0], y[-1] - y[0])
    return straight / path if path > 0 else 0.0



In [4]:
# Veriyi oku
df = pd.read_csv(input_path, converters={
    "x": safe_parse,
    "y": safe_parse,
    "key_codes": safe_parse,
    "key_counts": safe_parse
}, parse_dates=["start_date_time", "end_date_time"])

# Dönüştürmeler
df["x"] = df["x"].apply(safe_parse)
df["y"] = df["y"].apply(safe_parse)

# Özellik hesaplama
df["x_direction_changes"] = df["x"].apply(compute_direction_changes)
df["y_direction_changes"] = df["y"].apply(compute_direction_changes)

bbox_results = df.apply(lambda row: compute_bounding_box(row["x"], row["y"]), axis=1)
df["bounding_box"] = bbox_results.apply(lambda x: x[0])
df["bbox_area"] = bbox_results.apply(lambda x: x[1])

df["avg_speed"] = df.apply(lambda row: compute_avg_speed(row["x"], row["y"]), axis=1)
df["avg_acceleration"] = compute_avg_acceleration(df["avg_speed"])

df["mouse_idle_ratio"] = df["x"].apply(compute_idle_ratio)
df["movement_entropy"] = df["x"].apply(compute_entropy)
df["linearity"] = df.apply(lambda row: compute_linearity(row["x"], row["y"]), axis=1)



In [5]:
df.to_csv(output_path, index=False)
print(f"✔️ Saved to {output_path}")


✔️ Saved to C:\store\git\km-stat-activity\data\real\km_stat_nature_20250421_features.csv
