In [None]:
import pandas as pd
import numpy as np

Decimating background data:
Loading background_data.csv...
  Original: 47,273,698 samples, 15.76 min
  Decimated: 4,727,369 samples
  Reduction: 90.0%
  New sampling rate: 5,000 Hz
  Saved to: background_data_decimated.csv

Decimating EDC fluctuation data:
Loading edc_full_data.csv...
  Original: 101,535,470 samples, 33.85 min
  Decimated: 10,153,547 samples
  Reduction: 90.0%
  New sampling rate: 5,000 Hz
  Saved to: edc_full_data_decimated.csv

File sizes:
  Background: 219 MB
  EDC: 465 MB
  Total: 684 MB
  Target: <1024 MB ✓


In [None]:
def decimate_dataset(input_csv, output_csv, decimation_factor=10):
    """
    Load CSV and reduce size by averaging every N points
    
    Parameters:
    -----------
    decimation_factor : int
        Average every N points (e.g., 10 reduces 50kHz to 5kHz)
    """
    print(f"Loading {input_csv}...")
    df = pd.read_csv(input_csv)
    
    original_size = len(df)
    original_duration = df['time_s'].iloc[-1]
    
    print(f"  Original: {original_size:,} samples, {original_duration/60:.2f} min")
    
    # Reshape into chunks and average
    n_complete_chunks = len(df) // decimation_factor
    trimmed_length = n_complete_chunks * decimation_factor
    
    # Trim to multiple of decimation_factor
    df_trimmed = df.iloc[:trimmed_length].copy()
    
    # Reshape and average
    time_decimated = df_trimmed['time_s'].values.reshape(-1, decimation_factor).mean(axis=1)
    isd_decimated = df_trimmed['Isd'].values.reshape(-1, decimation_factor).mean(axis=1)
    vsd_decimated = df_trimmed['Vsd'].values.reshape(-1, decimation_factor).mean(axis=1)
    
    # Create decimated dataframe
    df_decimated = pd.DataFrame({
        'time_s': time_decimated,
        'Isd': isd_decimated,
        'Vsd': vsd_decimated
    })
    
    # Save
    df_decimated.to_csv(output_csv, index=False)
    
    new_size = len(df_decimated)
    reduction = (1 - new_size/original_size) * 100
    
    print(f"  Decimated: {new_size:,} samples")
    print(f"  Reduction: {reduction:.1f}%")
    print(f"  New sampling rate: {50000/decimation_factor:,.0f} Hz")
    print(f"  Saved to: {output_csv}\n")
    
    return df_decimated

# Decimate background data (2 GB → ~200 MB)
print("Decimating background data:")
background_decimated = decimate_dataset(
    'background_data.csv',
    'background_data_decimated.csv',
    decimation_factor=10
)

# Decimate full EDC data (4 GB → ~400 MB)
print("Decimating EDC fluctuation data:")
edc_decimated = decimate_dataset(
    'edc_full_data.csv',
    'edc_full_data_decimated.csv',
    decimation_factor=10
)

In [None]:
# Check total size
import os
bg_size = os.path.getsize('background_data_decimated.csv') / 1024**2
edc_size = os.path.getsize('edc_full_data_decimated.csv') / 1024**2
total_size = bg_size + edc_size

print(f"File sizes:")
print(f"  Background: {bg_size:.0f} MB")
print(f"  EDC: {edc_size:.0f} MB")
print(f"  Total: {total_size:.0f} MB")
print(f"  Target: <1024 MB ✓" if total_size < 1024 else f"  Target: <1024 MB ✗ (need factor={int(10 * total_size/1024)})")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

# Load decimated datasets
background_df = pd.read_csv('background_data_decimated.csv')
edc_df = pd.read_csv('edc_full_data_decimated.csv')

def plot_time_window(dataset_name, start_time, window_duration):
    """
    Plot a time window from selected dataset
    
    Parameters:
    -----------
    dataset_name : str
        'Background' or 'EDC Fluctuation'
    start_time : float
        Start time in seconds
    window_duration : float
        Duration to plot in seconds
    """
    # Select dataset
    if dataset_name == 'Background':
        df = background_df
        color = 'navy'
    else:
        df = edc_df
        color = 'darkred'
    
    # Extract window
    end_time = start_time + window_duration
    mask = (df['time_s'] >= start_time) & (df['time_s'] <= end_time)
    window = df.loc[mask]
    
    # Plot
    fig, ax = plt.subplots(figsize=(14, 5))
    ax.plot(window['time_s'].values, window['Isd'].values, 
            linewidth=1, color=color, alpha=0.8)
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Current Isd (A)')
    ax.set_title(f'{dataset_name}: {start_time:.1f}s to {end_time:.1f}s')
    ax.grid(alpha=0.3)
    
    # Add statistics
    stats_text = f'Mean: {window["Isd"].mean():.6f} A\n'
    stats_text += f'Std: {window["Isd"].std():.6f} A\n'
    stats_text += f'Samples: {len(window):,}'
    ax.text(0.02, 0.98, stats_text, transform=ax.transAxes,
            verticalalignment='top', bbox=dict(boxstyle='round', 
            facecolor='wheat', alpha=0.5))
    
    plt.tight_layout()
    plt.show()

# Create interactive widgets
dataset_widget = widgets.Dropdown(
    options=['Background', 'EDC Fluctuation'],
    value='Background',
    description='Dataset:'
)

start_time_widget = widgets.FloatSlider(
    value=0,
    min=0,
    max=900,  # 15 minutes
    step=1,
    description='Start (s):',
    continuous_update=False
)

duration_widget = widgets.Dropdown(
    options=[1, 5, 10, 30, 60],
    value=10,
    description='Duration (s):'
)

# Create interactive plot
interactive_plot = widgets.interactive(
    plot_time_window,
    dataset_name=dataset_widget,
    start_time=start_time_widget,
    window_duration=duration_widget
)

# Display
display(interactive_plot)

interactive(children=(Dropdown(description='Dataset:', options=('Background', 'EDC Fluctuation'), value='Backg…