# Filter 60s Intervals with File Finder
1. List CSV files in your data directory.
2. Set `input_path` to the correct file name from the list.
3. Run filtering to keep only 60-second intervals, display dropped rows, and save a new CSV.

In [None]:
import os
from pathlib import Path

# 1) Set your data directory here:
data_dir = Path(r"C:\store\git\km-stat-activity\data\real")
print("Working directory:", data_dir)
print("CSV files found:")
for f in data_dir.glob("*.csv"):
    print(" -", f.name)

In [None]:
import pandas as pd

# 2) After checking the list above, set the exact file name:
from pathlib import Path
input_path = Path(r"C:\store\git\km-stat-activity\data\real\YOUR_FILE_NAME.csv")

# 3) Read and filter durations
df = pd.read_csv(
    input_path,
    parse_dates=["start_date_time", "end_date_time"],
    converters={"x": pd.eval, "y": pd.eval}
)
df["duration_s"] = (df["end_date_time"] - df["start_date_time"]).dt.total_seconds()

# 4) Display dropped rows
dropped = df[df["duration_s"] != 60]
if not dropped.empty:
    print(f"Dropped {len(dropped)} rows:")
    display(dropped)
else:
    print("No rows dropped; all intervals are exactly 60 seconds.")

# 5) Keep only 60-second intervals
df_clean = df[df["duration_s"] == 60].drop(columns="duration_s")

# 6) Save to new CSV
output_path = input_path.with_name(input_path.stem + "_60s_intervals" + input_path.suffix)
df_clean.to_csv(output_path, index=False, encoding="utf-8-sig")
print("✅ Cleaned CSV saved to:", output_path)