<a href="https://colab.research.google.com/github/eoinleen/AKTA-Prime_final/blob/main/EM_AKTA_ana_SEC_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
SEC Chromatogram Visualization for Publication
---------------------------------------------
This script creates publication-quality SEC chromatogram figures with:
- Dual wavelength plotting (A280 and A260)
- Fraction markers with selectable range
- Standard markers as triangles above the top plot
- Stacked multiple chromatograms
- Customizable axis limits and appearance
- Arial 11 font for labels

INSTRUCTIONS:
1. Upload your .csv files when prompted
2. Modify the configuration section below to customize your plot
3. Run the entire script to generate and save the figure
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from matplotlib.ticker import MultipleLocator, AutoMinorLocator
import re
from google.colab import files
import io

# =====================================================================
# CONFIGURATION - MODIFY THESE PARAMETERS AS NEEDED
# =====================================================================

# Whether to show A260 traces (set to False to hide A260)
SHOW_A260 = True

# Plot arrangement - which sample should appear on top
# Options: 'first_on_top', 'last_on_top', or a list of indices
PLOT_ORDER = 'first_on_top'

# Fractions to display (set to None to show all fractions)
FRACTION_RANGE = {
    'start': '3.A.1',  # First fraction to show
    'end': '9.H.12'    # Last fraction to show
}

# Standard markers to show above the top plot (triangles)
STANDARDS = [
    {'position': 8.15, 'label': 'Void'},
    {'position': 12.3, 'label': '440 kDa'},
    {'position': 14.8, 'label': '158 kDa'},
    {'position': 16.5, 'label': '44 kDa'},
    {'position': 18.2, 'label': '17 kDa'}
]

# Axis limits for each plot [x_min, x_max, y_min, y_max]
# Add more entries for more plots
AXIS_LIMITS = [
    [5, 25, -20, 1500],  # Limits for top plot
    [5, 25, -20, 1000]   # Limits for bottom plot (if present)
]

# Figure size in inches (width, height)
FIGURE_SIZE = (10, 8)

# Colors for traces
A280_COLOR = 'blue'
A260_COLOR = 'red'

# =====================================================================
# DATA PARSING FUNCTIONS
# =====================================================================

def parse_sec_data(file_content):
    """
    Parse SEC data from the provided format.

    Returns a dictionary with:
    - ml: elution volumes
    - a280: absorbance at 280nm
    - a260: absorbance at 260nm
    - fractions: list of fraction information
    """
    lines = file_content.strip().split('\n')

    # Parse header to identify columns
    header_row1 = lines[0].split()
    header_row2 = lines[1].split()

    # Initialize column indices
    ml_index = None
    a280_index = None
    a260_index = None
    fraction_index = None
    fraction_id_index = None

    # Identify column indices based on headers
    for i, (h1, h2) in enumerate(zip(header_row1, header_row2)):
        if h2 == 'ml' and ml_index is None:
            # Use first ml column for x-axis
            ml_index = i
        elif h2 == 'mAU' and '280' in h1:
            a280_index = i
        elif h2 == 'mAU' and '260' in h1:
            a260_index = i
        elif h1 == 'Fraction':
            fraction_index = i
            # The fraction ID is typically in the next column
            if i+1 < len(header_row1):
                fraction_id_index = i+1

    # If UV column wasn't found by name, try column 6 which often has elution volume
    # and column 1 which often has UV absorbance
    if ml_index is None:
        ml_index = 6 if len(header_row1) > 6 else 0
    if a280_index is None:
        a280_index = 1 if len(header_row1) > 1 else 0

    # Extract data
    ml_values = []
    a280_values = []
    a260_values = []
    fractions = []

    for i, line in enumerate(lines[2:], 2):
        values = line.split()
        if len(values) <= max(ml_index, a280_index):
            continue  # Skip if line is too short

        try:
            # Extract essential values
            ml = float(values[ml_index])
            a280 = float(values[a280_index])

            ml_values.append(ml)
            a280_values.append(a280)

            # Extract A260 if available
            if a260_index is not None and a260_index < len(values):
                try:
                    a260 = float(values[a260_index])
                    a260_values.append(a260)
                except ValueError:
                    a260_values.append(np.nan)
            else:
                a260_values.append(np.nan)

            # Extract fraction info if available
            if fraction_id_index is not None and fraction_id_index < len(values):
                fraction_id = values[fraction_id_index]
                if re.match(r'\d+\.[A-Z]+\.\d+', fraction_id):  # Matches format like "4.E.1"
                    fractions.append({
                        'ml': ml,
                        'id': fraction_id
                    })
        except (ValueError, IndexError):
            continue  # Skip lines that can't be parsed

    # Convert to numpy arrays for better performance
    return {
        'ml': np.array(ml_values),
        'a280': np.array(a280_values),
        'a260': np.array(a260_values),
        'fractions': fractions
    }

def is_fraction_in_range(fraction_id, start_fraction, end_fraction):
    """Check if a fraction is within the specified range"""
    if not (start_fraction and end_fraction):
        return True  # No range specified, include all

    # Parse fraction IDs
    pattern = r'(\d+)\.([A-Z]+)\.(\d+)'

    frac_match = re.match(pattern, fraction_id)
    start_match = re.match(pattern, start_fraction)
    end_match = re.match(pattern, end_fraction)

    if not (frac_match and start_match and end_match):
        return True  # Can't parse properly, include it

    f_row, f_col, f_num = frac_match.groups()
    s_row, s_col, s_num = start_match.groups()
    e_row, e_col, e_num = end_match.groups()

    # Convert to comparable values
    f_val = (int(f_row), f_col, int(f_num))
    s_val = (int(s_row), s_col, int(s_num))
    e_val = (int(e_row), e_col, int(e_num))

    # Check if fraction is within range
    return s_val <= f_val <= e_val

# =====================================================================
# PLOTTING FUNCTIONS
# =====================================================================

def create_publication_figure(sec_data_list, labels, config):
    """
    Create a publication-quality SEC figure with all requested features.

    Parameters:
    -----------
    sec_data_list : list of dict
        List of SEC data dictionaries from parse_sec_data()
    labels : list of str
        Labels for each data set
    config : dict
        Configuration dictionary with plotting parameters

    Returns:
    --------
    fig : matplotlib Figure
        The created figure
    axes : list of matplotlib Axes
        List of subplot axes
    """
    # Set font properties for all text elements
    font_props = fm.FontProperties(family='Arial', size=11)
    plt.rcParams['font.family'] = 'Arial'
    plt.rcParams['font.size'] = 11

    # Determine plot order
    if config['plot_order'] == 'first_on_top':
        plot_order = list(range(len(sec_data_list)))
    elif config['plot_order'] == 'last_on_top':
        plot_order = list(range(len(sec_data_list)-1, -1, -1))
    elif isinstance(config['plot_order'], list):
        plot_order = config['plot_order']
    else:
        plot_order = list(range(len(sec_data_list)))

    # Create figure with subplots
    fig, axes = plt.subplots(len(sec_data_list), 1,
                            figsize=config['figure_size'],
                            sharex=False)

    # Handle case of single plot
    if len(sec_data_list) == 1:
        axes = [axes]

    # Create plots
    for i, idx in enumerate(plot_order):
        ax = axes[i]
        data = sec_data_list[idx]
        label = labels[idx]

        # Plot A280 trace
        line_280 = ax.plot(data['ml'], data['a280'], '-', color=config['a280_color'],
                linewidth=1.5, label='A280')[0]

        # Plot A260 trace if requested and available
        line_260 = None
        if config['show_a260'] and len(data['a260']) > 0 and not all(np.isnan(data['a260'])):
            line_260 = ax.plot(data['ml'], data['a260'], '-', color=config['a260_color'],
                    linewidth=1.5, label='A260', alpha=0.7)[0]

        # Set axis labels
        ax.set_ylabel('Absorbance (mAU)', fontproperties=font_props)
        if i == len(plot_order) - 1:  # Bottom plot
            ax.set_xlabel('Elution Volume (ml)', fontproperties=font_props)

        # Set title
        ax.set_title(label, fontproperties=font_props)

        # Add legend
        ax.legend(prop=font_props, frameon=True, framealpha=0.7)

        # Plot fraction markers if available
        if data['fractions']:
            # Filter fractions by range
            fractions_to_plot = []
            for frac in data['fractions']:
                frac_id = frac['id']
                if config['fraction_range'] is None or is_fraction_in_range(
                    frac_id,
                    config['fraction_range'].get('start'),
                    config['fraction_range'].get('end')
                ):
                    fractions_to_plot.append(frac)

            # Plot markers for filtered fractions
            frac_x = [frac['ml'] for frac in fractions_to_plot]
            frac_y = [0] * len(frac_x)  # Place markers at y=0
            ax.plot(frac_x, frac_y, 'kx', markersize=6)

            # Add fraction labels - uncomment to enable
            # Tip: Keep commented out if you have many fractions to avoid clutter
            """
            for frac in fractions_to_plot:
                # Only label every nth fraction to avoid overcrowding
                if fractions_to_plot.index(frac) % 5 == 0:
                    ax.text(frac['ml'], -50, frac['id'], rotation=90,
                           fontsize=8, ha='center', va='top')
            """

        # Add standard markers to the top plot
        if i == 0 and config['standards']:
            # Calculate y position for markers (above the plot)
            y_range = ax.get_ylim()[1] - ax.get_ylim()[0]
            marker_y = ax.get_ylim()[1] + y_range * 0.05

            for standard in config['standards']:
                ax.plot(standard['position'], marker_y, '^',
                       color='green', markersize=8)
                ax.text(standard['position'], marker_y + y_range * 0.02,
                       standard['label'], ha='center', fontproperties=font_props)

        # Set axis limits if provided
        if i < len(config['axis_limits']):
            ax.set_xlim(config['axis_limits'][i][0], config['axis_limits'][i][1])
            ax.set_ylim(config['axis_limits'][i][2], config['axis_limits'][i][3])

        # Add grid
        ax.grid(True, linestyle='--', alpha=0.5)

        # Add minor ticks
        ax.xaxis.set_minor_locator(AutoMinorLocator(2))
        ax.yaxis.set_minor_locator(AutoMinorLocator(2))

    # Adjust layout
    plt.tight_layout()

    return fig, axes

def toggle_a260_visibility(fig, axes, visible=True):
    """
    Toggle visibility of A260 traces in the figure.

    This can be used after creating the plot to show/hide A260 traces
    without recreating the entire figure.

    Parameters:
    -----------
    fig : matplotlib Figure
        The figure to modify
    axes : list of matplotlib Axes
        List of subplot axes
    visible : bool
        Whether A260 traces should be visible
    """
    for ax in axes:
        for line in ax.get_lines():
            if line.get_label() == 'A260':
                line.set_visible(visible)

    # Update the figure
    fig.canvas.draw_idle()

def update_axis_limits(axes, axis_limits):
    """
    Update axis limits for each subplot.

    Parameters:
    -----------
    axes : list of matplotlib Axes
        List of subplot axes
    axis_limits : list of lists
        List of [x_min, x_max, y_min, y_max] for each subplot
    """
    for i, ax in enumerate(axes):
        if i < len(axis_limits):
            x_min, x_max, y_min, y_max = axis_limits[i]
            ax.set_xlim(x_min, x_max)
            ax.set_ylim(y_min, y_max)

    # Update the layout
    plt.tight_layout()

# =====================================================================
# INTERACTIVE FUNCTIONS FOR GOOGLE COLAB
# =====================================================================

def run_interactive_analysis():
    """Run interactive analysis in Google Colab"""
    print("Please upload your SEC data file(s)...")
    uploaded = files.upload()

    # Process uploaded files
    sec_data_list = []
    labels = []

    for filename, content in uploaded.items():
        file_content = content.decode('utf-8')
        sec_data = parse_sec_data(file_content)
        sec_data_list.append(sec_data)
        labels.append(filename.split('.')[0])
        print(f"Processed: {filename}")

    # Prepare configuration
    config = {
        'show_a260': SHOW_A260,
        'plot_order': PLOT_ORDER,
        'fraction_range': FRACTION_RANGE,
        'standards': STANDARDS,
        'axis_limits': AXIS_LIMITS,
        'figure_size': FIGURE_SIZE,
        'a280_color': A280_COLOR,
        'a260_color': A260_COLOR
    }

    # Create the figure
    fig, axes = create_publication_figure(sec_data_list, labels, config)

    # Display the figure
    plt.show()

    # Save as high-resolution PNG
    output_filename = "SEC_publication_figure.png"
    fig.savefig(output_filename, dpi=300, bbox_inches='tight')
    print(f"Figure saved as {output_filename}")
    files.download(output_filename)

    # Save as PDF for publication
    output_pdf = "SEC_publication_figure.pdf"
    fig.savefig(output_pdf, format='pdf', bbox_inches='tight')
    print(f"Figure saved as {output_pdf} (vector format for publication)")
    files.download(output_pdf)

    return fig, axes, sec_data_list

# =====================================================================
# EXAMPLE USAGE
# =====================================================================

# Example of how to use this script:
"""
# Run the analysis with default configuration
fig, axes, data_list = run_interactive_analysis()

# CUSTOMIZATION EXAMPLES:

# To hide A260 traces after creating the plot:
toggle_a260_visibility(fig, axes, visible=False)
plt.draw()

# To change axis limits after creating the plot:
new_limits = [
    [7, 22, -20, 1000],  # New limits for first plot
    [7, 22, -20, 800]    # New limits for second plot
]
update_axis_limits(axes, new_limits)
plt.draw()

# To save the modified figure:
fig.savefig("modified_SEC_figure.png", dpi=300, bbox_inches='tight')
files.download("modified_SEC_figure.png")
"""

# =====================================================================
# MAIN CODE - RUN THE ANALYSIS
# =====================================================================

if __name__ == "__main__":
    fig, axes, data_list = run_interactive_analysis()