# Compute Average Mouse Speed (Math.hypot) and Save to CSV
This notebook reads the CSV, computes the average mouse speed (pixels/second) using Python's `math.hypot`, adds it as a new column `avg_speed`, and overwrites the original CSV.

In [1]:
# Parameters
mode = "fake"
input_path = ["C:\\store\\git\\km-stat-activity\\parquet_dataset\\date=2025-04-24\\profile_guid=ca730baf-3749-401e-b908-a151e68eb9ab\\part.23.parquet"]
output_path = "C:\\store\\git\\km-stat-activity\\processed\\fake\\profile_guid=ca730baf-3749-401e-b908-a151e68eb9ab\\2025-04-24-processed.csv"


In [2]:
# Parameters
try:
    mode
except NameError:
    mode = None

try:
    input_path
except NameError:
    input_path = None

try:
    output_path
except NameError:
    output_path = None



In [3]:
import os
import pandas as pd
import numpy as np
import math
from pathlib import Path
import ast

# 0. Parametre kontrolü (Papermill ile çalışıyorsa bunlar yukarıdan gelebilir)
try:
    input_path
except NameError:
    input_path = None

try:
    output_path
except NameError:
    output_path = None

# 1. Veri Yükleme fonksiyonu
def load_df(input_path, output_path):
    if output_path and os.path.exists(output_path):
        df = pd.read_csv(output_path)
    elif isinstance(input_path, list):
        paths = [Path(p) for p in input_path]
        df = pd.concat([pd.read_parquet(p, engine="pyarrow") for p in paths], ignore_index=True)
    elif input_path:
        df = pd.read_csv(input_path)
    else:
        raise ValueError("input_path veya output_path geçerli değil")
    return df

# 2. Liste parse fonksiyonu (stringleri listeye çevir)
def safe_parse_list(s):
    if isinstance(s, list):
        return s
    try:
        parsed = ast.literal_eval(s)
        return parsed if isinstance(parsed, list) else []
    except Exception:
        return []


# 3. Ortalama hız hesapla
def compute_avg_speed(x, y):
    if not x or not y or len(x) < 2:
        return 0.0
    distances = [math.hypot(x[i+1] - x[i], y[i+1] - y[i]) for i in range(len(x)-1)]
    return np.mean(distances)

# 4. DataFrame'i yükle
df = load_df(input_path, output_path)

# 5. .1, .2 gibi tekrar eden sütunları temizle
df = df.loc[:, ~df.columns.str.contains(r'\.\d+$')]

# 6. x ve y'yi parse et
if df["x"].dtype == "object":
    df["x"] = df["x"].apply(safe_parse_list)
if df["y"].dtype == "object":
    df["y"] = df["y"].apply(safe_parse_list)


# 7. Ortalama hız hesapla
df["avg_speed"] = df.apply(lambda row: compute_avg_speed(row["x"], row["y"]), axis=1)

# 8. Sonuç olarak bu df kullanılacak
df_result = df

# 9. Çıktı dosyasına merge ederek yaz
if output_path and os.path.exists(output_path):
    df_existing = pd.read_csv(output_path)
    df_existing = df_existing.loc[:, ~df_existing.columns.str.contains(r'\.\d+$')]  # tekrar eden sütunları temizle
    
    # Eğer avg_speed zaten varsa üzerine yaz, yoksa ekle
    df_existing["avg_speed"] = df_result["avg_speed"].values
    df_merged = df_existing
else:
    df_merged = df_result

# 10. Kaydet
# 5. Var olan CSV’ye yaz (concat DEĞİL!)
df_result.to_csv(output_path, index=False)

print(f"✔️ Saved CSV with full df including avg_speed: {output_path}")


✔️ Saved CSV with full df including avg_speed: C:\store\git\km-stat-activity\processed\fake\profile_guid=ca730baf-3749-401e-b908-a151e68eb9ab\2025-04-24-processed.csv
