# Compute Mouse Movement Linearity

Provide your CSV file path in the `input_path` variable below,
then run all cells to compute the `linearity` metric and save a new CSV.

In [1]:
# Parameters
mode = "fake"
input_path = ["C:\\store\\git\\km-stat-activity\\parquet_dataset\\date=2025-04-25\\profile_guid=7bf57bc1-d089-43d1-b8c4-ac81ad519915\\part.33.parquet", "C:\\store\\git\\km-stat-activity\\parquet_dataset\\date=2025-04-25\\profile_guid=7bf57bc1-d089-43d1-b8c4-ac81ad519915\\part.34.parquet"]
output_path = "C:\\store\\git\\km-stat-activity\\processed\\fake\\profile_guid=7bf57bc1-d089-43d1-b8c4-ac81ad519915\\2025-04-25-processed.csv"


In [2]:
# Parameters
try:
    mode
except NameError:
    mode = None

try:
    input_path
except NameError:
    input_path = None

try:
    output_path
except NameError:
    output_path = None



In [3]:
import os
import pandas as pd
from pathlib import Path
import ast
import numpy as np

# 1. Veri yükleme fonksiyonu
def load_df(input_path, output_path):
    if output_path and os.path.exists(output_path):
        df = pd.read_csv(output_path)
    elif isinstance(input_path, list):
        paths = [Path(p) for p in input_path]
        df = pd.concat([pd.read_parquet(p, engine="pyarrow") for p in paths], ignore_index=True)
    elif input_path:
        df = pd.read_csv(input_path)
    else:
        raise ValueError("input_path veya output_path geçerli değil")
    return df

df = load_df(input_path, output_path)

# 2. .1, .2 gibi tekrar eden sütunları temizle
df = df.loc[:, ~df.columns.str.contains(r'\.\d+$')]

# 3. Liste formatını düzelt
def safe_parse_list(s):
    if isinstance(s, list):
        return s
    try:
        return list(ast.literal_eval(s))
    except:
        return []

df["x"] = df["x"].apply(safe_parse_list)
df["y"] = df["y"].apply(safe_parse_list)

# 4. Linearity hesapla
def compute_linearity(x, y):
    if not x or not y or len(x) < 2:
        return 0.0
    path_dist = np.sum(np.sqrt(np.diff(x)**2 + np.diff(y)**2))
    straight_dist = np.sqrt((x[-1] - x[0])**2 + (y[-1] - y[0])**2)
    return straight_dist / path_dist if path_dist != 0 else 0.0

df["linearity"] = df.apply(lambda row: compute_linearity(row["x"], row["y"]), axis=1)

# 5. Yaz
df_result = df
df_result.to_csv(output_path, index=False)
print(f"✔️ Saved CSV with full df including linearity: {output_path}")


✔️ Saved CSV with full df including linearity: C:\store\git\km-stat-activity\processed\fake\profile_guid=7bf57bc1-d089-43d1-b8c4-ac81ad519915\2025-04-25-processed.csv
