# Compute Mouse Average Acceleration and Save to New CSV
This notebook reads the existing CSV with average speed (`avg_speed`), computes the average acceleration (change in speed over time), adds it as a new column `avg_acceleration`, and writes to a new CSV copy.

In [1]:
# Parameters
mode = "fake"
input_path = ["C:\\store\\git\\km-stat-activity\\parquet_dataset\\date=2025-04-22\\profile_guid=2badd898-49c0-4cf9-8ab4-8c195e3dcf7c\\part.10.parquet"]
output_path = "C:\\store\\git\\km-stat-activity\\processed\\fake\\profile_guid=2badd898-49c0-4cf9-8ab4-8c195e3dcf7c\\2025-04-22-processed.csv"


In [2]:
# Parameters
try:
    mode
except NameError:
    mode = None

try:
    input_path
except NameError:
    input_path = None

try:
    output_path
except NameError:
    output_path = None




In [3]:
import os
import pandas as pd
from pathlib import Path

# 1. Veri Yükleme fonksiyonu
def load_df(input_path, output_path):
    if output_path and os.path.exists(output_path):
        df = pd.read_csv(output_path)
    elif isinstance(input_path, list):
        paths = [Path(p) for p in input_path]
        df = pd.concat([pd.read_parquet(p, engine="pyarrow") for p in paths], ignore_index=True)
    elif input_path:
        df = pd.read_csv(input_path)
    else:
        raise ValueError("input_path veya output_path geçerli değil")
    return df

# 2. Yükle
df = load_df(input_path, output_path)

# 3. Önceki notebook'lardan kalan .1, .2 gibi tekrar eden sütunları temizle
df = df.loc[:, ~df.columns.str.contains(r'\.\d+$')]

# 4. avg_speed varsa ivmeyi hesapla
if "avg_speed" in df.columns and df["avg_speed"].notna().any():
    if "avg_acceleration" not in df.columns:
        df["avg_acceleration"] = df["avg_speed"].diff().fillna(0)
else:
    raise ValueError("avg_speed column is missing or entirely empty in input data.")

# 5. Tam veri çıktısı
df_result = df

# 6. Varsa önceki içerikle birleştir
if output_path and os.path.exists(output_path):
    df_existing = pd.read_csv(output_path)
    df_existing = df_existing.loc[:, ~df_existing.columns.str.contains(r'\.\d+$')]
    df_merged = pd.concat(
        [df_existing.reset_index(drop=True), df_result.reset_index(drop=True)],
        axis=1
    )
else:
    df_merged = df_result

# 7. CSV yaz
df_result.to_csv(output_path, index=False)
print(f"✔️ Saved CSV with full df including avg_acceleration: {output_path}")


✔️ Saved CSV with full df including avg_acceleration: C:\store\git\km-stat-activity\processed\fake\profile_guid=2badd898-49c0-4cf9-8ab4-8c195e3dcf7c\2025-04-22-processed.csv
