In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, FloatRangeSlider, Checkbox, FloatSlider, Layout

CSV_INPUT = "/Users/xiaodong/Desktop/UOX-data/UOX1/ICFuox1min15p.csv"
CSV_FILTERED_OUTPUT_BASENAME = "filtered_centers.csv"  # We'll create the output in the same folder

# Attempt to load the CSV file
try:
    df = pd.read_csv(CSV_INPUT)
    print(f"Loaded {len(df)} rows from {CSV_INPUT}")
except FileNotFoundError:
    print(f"ERROR: The file {CSV_INPUT} was not found. Please check the path.")
    df = pd.DataFrame()

# Define default range values for the sliders from the dataset
x_min_default = df['center_x'].min() if not df.empty else 0
x_max_default = df['center_x'].max() if not df.empty else 1000

y_min_default = df['center_y'].min() if not df.empty else 0
y_max_default = df['center_y'].max() if not df.empty else 1000

def filter_and_plot(x_range, y_range, remove_outliers, outlier_std):
    """
    Filter the DataFrame (df) by center_x and center_y ranges, optionally remove outliers,
    and display a scatter plot along with histograms of the filtered data.
    
    Parameters
    ----------
    x_range : tuple(float, float)
        The (min, max) range for 'center_x'.
    y_range : tuple(float, float)
        The (min, max) range for 'center_y'.
    remove_outliers : bool
        Whether to remove outliers from the filtered data.
    outlier_std : float
        The threshold in standard deviations for outlier removal.
    """
    if df.empty:
        print("No data loaded. Exiting.")
        return

    # Unpack the slider values
    x_min, x_max = x_range
    y_min, y_max = y_range
    
    # Filter the data by slider range
    filtered = df[
        (df['center_x'] >= x_min) & (df['center_x'] <= x_max) &
        (df['center_y'] >= y_min) & (df['center_y'] <= y_max)
    ]
    
    # Optionally remove outliers based on standard deviation threshold
    if remove_outliers:
        # Calculate means and standard deviations on the filtered data
        x_mean = filtered['center_x'].mean()
        x_std = filtered['center_x'].std()
        y_mean = filtered['center_y'].mean()
        y_std = filtered['center_y'].std()
        
        # Remove rows where either coordinate is more than outlier_std standard deviations from the mean
        filtered = filtered[
            (abs(filtered['center_x'] - x_mean) <= outlier_std * x_std) &
            (abs(filtered['center_y'] - y_mean) <= outlier_std * y_std)
        ]
        print(f"Outliers removed using threshold: {outlier_std} standard deviations.")
    
    # Print out descriptive statistics
    print("=== Filtered Data Statistics ===")
    print(f"Number of rows: {len(filtered)}")
    for col in ['center_x', 'center_y']:
        mean_val = filtered[col].mean()
        median_val = filtered[col].median()
        std_val = filtered[col].std()
        print(f"{col} => mean: {mean_val:.3f}, median: {median_val:.3f}, std: {std_val:.3f}")
    
    # Save the filtered data to a new CSV file in the same folder
    output_folder = os.path.dirname(CSV_INPUT)
    output_filename = os.path.join(output_folder, CSV_FILTERED_OUTPUT_BASENAME)
    filtered.to_csv(output_filename, index=False)
    print(f"\nFiltered CSV saved to: {output_filename}\n")
    
    # Plot the scatter of the filtered centers
    plt.figure(figsize=(8, 6))
    plt.scatter(filtered['center_x'], filtered['center_y'], marker='o')
    plt.xlabel('Center X')
    plt.ylabel('Center Y')
    plt.title('Scatter Plot of Center Coordinates')
    plt.grid(True)
    plt.show()
    
    # Plot histogram for center_x
    plt.figure(figsize=(8, 6))
    plt.hist(filtered['center_x'], bins=30, edgecolor='black')
    plt.xlabel('Center X')
    plt.ylabel('Frequency')
    plt.title('Histogram of Center X')
    plt.grid(True)
    plt.show()
    
    # Plot histogram for center_y
    plt.figure(figsize=(8, 6))
    plt.hist(filtered['center_y'], bins=30, edgecolor='black')
    plt.xlabel('Center Y')
    plt.ylabel('Frequency')
    plt.title('Histogram of Center Y')
    plt.grid(True)
    plt.show()

# Create an interactive widget for filtering with outlier removal option
_ = interact(
    filter_and_plot,
    x_range=FloatRangeSlider(
        value=(x_min_default, x_max_default),
        min=x_min_default, max=x_max_default, step=0.1,
        description='X range',
        layout=Layout(width='800px')
    ),
    y_range=FloatRangeSlider(
        value=(y_min_default, y_max_default),
        min=y_min_default, max=y_max_default, step=0.1,
        description='Y range',
        layout=Layout(width='800px')
    ),
    remove_outliers=Checkbox(
        value=False,
        description='Remove Outliers'
    ),
    outlier_std=FloatSlider(
        value=3.0,
        min=1.0, max=5.0, step=0.1,
        description='Outlier Std'
    )
)


In [None]:

filtered_csv_path = os.path.join(os.path.dirname(CSV_INPUT), CSV_FILTERED_OUTPUT_BASENAME)
try:
    df_filtered = pd.read_csv(filtered_csv_path)
    print(f"Loaded filtered data ({len(df_filtered)} rows) from {filtered_csv_path}")
except FileNotFoundError:
    print("ERROR: filtered_centers.csv not found. Have you run the filtering above?")
    df_filtered = pd.DataFrame()

if not df_filtered.empty:
    plt.figure(figsize=(12, 5))
    
    # Left subplot: center_x vs frame_number
    plt.subplot(1, 2, 1)
    plt.plot(df_filtered["frame_number"], df_filtered["center_x"], marker='o')
    plt.title("Center X vs Frame Number")
    plt.xlabel("Frame Number")
    plt.ylabel("Center X")
    plt.grid(True)

    # Right subplot: center_y vs frame_number
    plt.subplot(1, 2, 2)
    plt.plot(df_filtered["frame_number"], df_filtered["center_y"], marker='o')
    plt.title("Center Y vs Frame Number")
    plt.xlabel("Frame Number")
    plt.ylabel("Center Y")
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()
