# Interactive Spectral Deconvolution Explorer

This notebook provides an interactive tool to explore spectral deconvolution results from PDC enzyme assays.

**Features:**
- View NADH concentration vs time
- Interactively explore spectral fits at each timepoint using a slider
- Visualize raw data, fitted spectrum, and individual components (NADH, pyruvate)
- Inspect fit quality (R²) in real-time

## Setup

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display
from pathlib import Path
import sys

# Add parent directory to path to import pda modules
parent_path = str(Path.cwd().parent.parent)
if parent_path not in sys.path:
    sys.path.insert(0, parent_path)

from pda.data_io import load_kinetic_data
from pda.spectral import calculate_concentrations
from pda.timecourse import process_pdc_timecourse

print("✓ Setup complete")

## Load Data

In [None]:
# Load metadata
metadata_url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vRVpwYqImFkaUigsWgrO9MRtWjYWwps82EExnomLqNr_hOUNViKF_fFyAhJfIqe3hDq0IEG76W4v_fO/pub?output=csv"
metadata_df = pd.read_csv(metadata_url)

# Load standards
standards_df = pd.read_csv("../spectra_data/NADH_Pyruvate_Standards.csv")

# Filter for PDC forward assays
pdc_assays = metadata_df[
    (metadata_df['Assay'] == 'PDC_fwd') & 
    (metadata_df['Ignore'].isna())
].copy()

# Add CSV filename column
assay_data_path = Path("../assay_data")
pdc_assays['csv_filename'] = pdc_assays['Filename'].str.replace('.KD', '.csv', regex=False)
pdc_assays['csv_exists'] = pdc_assays['csv_filename'].apply(
    lambda x: (assay_data_path / x).exists()
)
pdc_assays = pdc_assays[pdc_assays['csv_exists']].copy()

print(f"✓ Loaded {len(pdc_assays)} PDC assays")
print(f"✓ Loaded {len(standards_df)} wavelength points from standards")

## Select and Process Dataset

Specify the dataset you want to explore by setting the `dataset_name` variable below.

In [None]:
# Specify dataset to explore (format: "filename.csv - CUVETTE")
dataset_name = "0108 1600MM PYR -1.csv - CELL_1"

# Parse dataset name
parts = dataset_name.rsplit(' - ', 1)
csv_filename = parts[0]
cuvette = parts[1]

# Get assay metadata
assay = pdc_assays[
    (pdc_assays['csv_filename'] == csv_filename) & 
    (pdc_assays['Cuvette'] == cuvette)
].iloc[0]

# Load spectral data
csv_path = assay_data_path / assay['csv_filename']
spectral_df = load_kinetic_data(str(csv_path), sample_filter=assay['Cuvette'])

# Process timecourse with fit_intercept=True for smooth results
print(f"Processing: {dataset_name}")
results = process_pdc_timecourse(
    spectral_df=spectral_df,
    standards_df=standards_df,
    assay_start_time=assay['Start_time_s'],
    blank_time=assay['Blank_time_s'],
    initial_pyruvate_mM=assay['Pyruvate_mM'],
    method='constrained',
    wavelength_range=(320, 420),
    absorbance_max=2,
    fit_intercept=True,
    plot=False,
    verbose=False
)

print(f"✓ Processed {len(results)} time points")
print(f"  NADH range: {results['NADH_mM'].min():.4f} to {results['NADH_mM'].max():.4f} mM")
print(f"  Mean R²: {results['R_squared'].mean():.4f}")

## Interactive Explorer

Use the slider to navigate through timepoints and see:
- **Top panel**: NADH concentration vs time (vertical line shows selected timepoint)
- **Bottom panel**: Spectral deconvolution at the selected timepoint
  - Gray points: Raw absorbance data
  - Black line: Total fitted spectrum
  - Blue dashed: NADH component
  - Red dashed: Pyruvate component

In [None]:
# Get spectral columns
spectral_cols = [c for c in spectral_df.columns 
                 if c not in ['sample', 'Time_s', 'filename']]

# Create figure with subplots
fig = make_subplots(
    rows=2, cols=1,
    row_heights=[0.5, 0.5],
    subplot_titles=('NADH Concentration vs Time', 'Spectral Deconvolution at Selected Timepoint'),
    vertical_spacing=0.12
)

# Top panel: NADH vs time
fig.add_trace(
    go.Scatter(
        x=results['Time_Relative_s'],
        y=results['NADH_mM'],
        mode='lines',
        name='NADH',
        line=dict(color='blue', width=2),
        showlegend=False
    ),
    row=1, col=1
)

# Vertical line for selected timepoint
fig.add_trace(
    go.Scatter(
        x=[0, 0],
        y=[results['NADH_mM'].min(), results['NADH_mM'].max()],
        mode='lines',
        name='Selected Time',
        line=dict(color='red', width=2, dash='dash'),
        showlegend=False
    ),
    row=1, col=1
)

# Bottom panel: Spectral deconvolution (placeholder traces)
fig.add_trace(go.Scatter(x=[], y=[], mode='markers', name='Raw Data', 
                          marker=dict(size=4, color='gray')), row=2, col=1)
fig.add_trace(go.Scatter(x=[], y=[], mode='lines', name='Total Fit', 
                          line=dict(color='black', width=2)), row=2, col=1)
fig.add_trace(go.Scatter(x=[], y=[], mode='lines', name='NADH Component', 
                          line=dict(color='blue', width=2, dash='dash')), row=2, col=1)
fig.add_trace(go.Scatter(x=[], y=[], mode='lines', name='Pyruvate Component', 
                          line=dict(color='red', width=2, dash='dash')), row=2, col=1)

# Update layout
fig.update_xaxes(title_text="Time from Assay Start (s)", row=1, col=1)
fig.update_yaxes(title_text="NADH Concentration (mM)", row=1, col=1)
fig.update_xaxes(title_text="Wavelength (nm)", row=2, col=1)
fig.update_yaxes(title_text="Absorbance", row=2, col=1)

fig.update_layout(
    height=800,
    template='plotly_white',
    showlegend=True,
    legend=dict(yanchor="top", y=0.48, xanchor="right", x=0.99)
)

# Create FigureWidget for interactivity
fig_widget = go.FigureWidget(fig)

# Create slider
slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(results)-1,
    step=1,
    description='Timepoint:',
    continuous_update=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%')
)

# Info label
info_label = widgets.Label(value=f"Time: {results.iloc[0]['Time_Relative_s']:.1f} s")

print("✓ Interactive plot created")

In [None]:
# Define update function for slider
def update_plot(change):
    idx = slider.value
    
    # Get selected timepoint data
    selected_row = results.iloc[idx]
    selected_time_rel = selected_row['Time_Relative_s']
    selected_time_abs = selected_row['Time_s']
    
    # Update info label
    info_label.value = (
        f"Time: {selected_time_rel:.1f} s | "
        f"NADH: {selected_row['NADH_mM']:.4f} mM | "
        f"R²: {selected_row['R_squared']:.4f}"
    )
    
    # Update plots
    with fig_widget.batch_update():
        # Update vertical line position
        fig_widget.data[1].x = [selected_time_rel, selected_time_rel]
        
        # Get raw spectrum at this timepoint
        idx_spectrum = (spectral_df['Time_s'] - selected_time_abs).abs().idxmin()
        spectrum_row = spectral_df.loc[idx_spectrum]
        
        # Extract wavelengths and absorbances
        wavelengths = []
        absorbances = []
        for col in spectral_cols:
            try:
                wavelength = float(col)
                if 320 <= wavelength <= 420:
                    wavelengths.append(wavelength)
                    absorbances.append(spectrum_row[col])
            except ValueError:
                continue
        
        wavelengths = np.array(wavelengths)
        absorbances = np.array(absorbances)
        
        # Run spectral deconvolution
        spectrum_df_temp = pd.DataFrame({
            'Wavelength': wavelengths,
            'Absorbance': absorbances
        })
        
        deconv_result = calculate_concentrations(
            spectrum_df=spectrum_df_temp,
            standards_df=standards_df,
            wavelength_range=(320, 420),
            absorbance_max=2,
            fit_intercept=True,
            fixed_pyr=selected_row['Pyruvate_mM'],
            plot=False
        )
        
        # Update bottom panel traces (convert numpy arrays to lists)
        fig_widget.data[2].x = deconv_result['wavelengths'].tolist()
        fig_widget.data[2].y = deconv_result['raw_absorbance'].tolist()
        
        fig_widget.data[3].x = deconv_result['wavelengths'].tolist()
        fig_widget.data[3].y = deconv_result['fitted_absorbance'].tolist()
        
        fig_widget.data[4].x = deconv_result['wavelengths'].tolist()
        fig_widget.data[4].y = deconv_result['nadh_component'].tolist()
        
        fig_widget.data[5].x = deconv_result['wavelengths'].tolist()
        fig_widget.data[5].y = deconv_result['pyr_component'].tolist()

# Attach callback
slider.observe(update_plot, names='value')

# Initialize with first timepoint
update_plot({'new': 0})

# Display widgets
display(widgets.VBox([
    widgets.HTML("<h3>Interactive Spectral Deconvolution Explorer</h3>"),
    info_label,
    slider,
    fig_widget
]))

print("✓ Interactive explorer ready! Use the slider to explore timepoints.")

In [None]:
# Create raw spectrum viewer with two timepoints
fig_spectrum = go.Figure()

# Add first spectrum trace (blue)
fig_spectrum.add_trace(
    go.Scatter(
        x=[],
        y=[],
        mode='lines+markers',
        name='Timepoint 1',
        line=dict(color='blue', width=2),
        marker=dict(size=4)
    )
)

# Add second spectrum trace (red)
fig_spectrum.add_trace(
    go.Scatter(
        x=[],
        y=[],
        mode='lines+markers',
        name='Timepoint 2',
        line=dict(color='red', width=2),
        marker=dict(size=4)
    )
)

# Update layout
fig_spectrum.update_layout(
    title='Raw Absorbance Spectra Comparison',
    xaxis_title='Wavelength (nm)',
    yaxis_title='Absorbance',
    template='plotly_white',
    height=500,
    hovermode='x unified',
    showlegend=True
)

# Create FigureWidget
fig_spectrum_widget = go.FigureWidget(fig_spectrum)

# Calculate tick values for sliders (every 10 seconds)
time_min = results['Time_Relative_s'].min()
time_max = results['Time_Relative_s'].max()
time_range = time_max - time_min
num_ticks = int(time_range / 10) + 1

# Create first time slider
time_slider_1 = widgets.FloatSlider(
    value=results.iloc[0]['Time_Relative_s'],
    min=time_min,
    max=time_max,
    step=0.5,
    description='Time 1 (s):',
    continuous_update=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%'),
    readout_format='.1f'
)

# Create second time slider (initialize to midpoint or later timepoint)
mid_idx = len(results) // 2
time_slider_2 = widgets.FloatSlider(
    value=results.iloc[mid_idx]['Time_Relative_s'],
    min=time_min,
    max=time_max,
    step=0.5,
    description='Time 2 (s):',
    continuous_update=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%'),
    readout_format='.1f'
)

# Info labels for both timepoints
spectrum_info_label_1 = widgets.Label(value="Timepoint 1: ...")
spectrum_info_label_2 = widgets.Label(value="Timepoint 2: ...")

# Helper function to get spectrum at a given time
def get_spectrum_at_time(target_time_rel):
    # Get the closest result row
    idx_closest = (results['Time_Relative_s'] - target_time_rel).abs().idxmin()
    selected_row = results.iloc[idx_closest]
    selected_time_abs = selected_row['Time_s']
    actual_time_rel = selected_row['Time_Relative_s']
    
    # Get raw spectrum at this timepoint
    idx_spectrum = (spectral_df['Time_s'] - selected_time_abs).abs().idxmin()
    spectrum_row = spectral_df.loc[idx_spectrum]
    
    # Extract wavelengths and absorbances
    wavelengths = []
    absorbances = []
    for col in spectral_cols:
        try:
            wavelength = float(col)
            if 320 <= wavelength <= 420:
                wavelengths.append(wavelength)
                absorbances.append(spectrum_row[col])
        except ValueError:
            continue
    
    return wavelengths, absorbances, actual_time_rel, selected_row['NADH_mM']

# Update function for first spectrum
def update_spectrum_1(change):
    wavelengths, absorbances, actual_time_rel, nadh = get_spectrum_at_time(time_slider_1.value)
    
    # Update info label
    spectrum_info_label_1.value = f"Time 1: {actual_time_rel:.1f} s | NADH: {nadh:.4f} mM"
    
    # Update first spectrum trace
    with fig_spectrum_widget.batch_update():
        fig_spectrum_widget.data[0].x = wavelengths
        fig_spectrum_widget.data[0].y = absorbances

# Update function for second spectrum
def update_spectrum_2(change):
    wavelengths, absorbances, actual_time_rel, nadh = get_spectrum_at_time(time_slider_2.value)
    
    # Update info label
    spectrum_info_label_2.value = f"Time 2: {actual_time_rel:.1f} s | NADH: {nadh:.4f} mM"
    
    # Update second spectrum trace
    with fig_spectrum_widget.batch_update():
        fig_spectrum_widget.data[1].x = wavelengths
        fig_spectrum_widget.data[1].y = absorbances

# Attach callbacks
time_slider_1.observe(update_spectrum_1, names='value')
time_slider_2.observe(update_spectrum_2, names='value')

# Initialize both timepoints
update_spectrum_1({'new': time_slider_1.value})
update_spectrum_2({'new': time_slider_2.value})

# Display spectrum viewer
display(widgets.VBox([
    widgets.HTML("<h3>Raw Spectrum Comparison</h3>"),
    widgets.HTML("<p>Compare raw absorbance spectra at two different timepoints</p>"),
    widgets.HBox([spectrum_info_label_1, spectrum_info_label_2]),
    time_slider_1,
    time_slider_2,
    fig_spectrum_widget
]))

print("✓ Raw spectrum viewer ready! Use the sliders to compare spectra at different times.")

## Raw Spectrum Viewer

Explore how the raw absorbance spectrum evolves over time. This view shows the unprocessed spectral data at each timepoint.