In [1]:
import pandas as pd

In [2]:
timepoint_file = "/Users/noot/Documents/IMC/data/241218_IMC_Alun/IMC_241218_Alun_ROI_D7_M2_03_26.txt"

imc_timepoint_df = pd.read_csv(timepoint_file, sep="\t")

In [6]:
imc_timepoint_df.columns

Index(['Start_push', 'End_push', 'Pushes_duration', 'X', 'Y', 'Z',
       '80ArAr(ArAr80Di)', 'CD45(Y89Di)', '130Ba(Ba130Di)', '131Xe(Xe131Di)',
       'Ly6G(Pr141Di)', 'CD11b(Nd143Di)', 'CD140a(Nd148Di)', 'CD140b(Eu151Di)',
       'CD31(Sm154Di)', 'CD34(Er166Di)', 'CD206(Tm169Di)', 'CD44(Yb171Di)',
       '190BCKG(BCKG190Di)', 'DNA1(Ir191Di)', 'DNA2(Ir193Di)'],
      dtype='object')

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from itertools import combinations
import math

# Get the protein channel columns
protein_channels = [
    'CD45(Y89Di)', 'Ly6G(Pr141Di)', 'CD11b(Nd143Di)', 'CD140a(Nd148Di)', 
    'CD140b(Eu151Di)', 'CD31(Sm154Di)', 'CD34(Er166Di)', 'CD206(Tm169Di)', 
    'CD44(Yb171Di)'
]

# Make sure all these columns exist in your dataframe
valid_protein_channels = [col for col in protein_channels if col in imc_timepoint_df.columns]
print(f"Selected protein channels: {valid_protein_channels}")
print(f"Number of channels: {len(valid_protein_channels)}")

# Generate all unique pairs of channels
channel_pairs = list(combinations(valid_protein_channels, 2))
num_pairs = len(channel_pairs)
print(f"Number of channel pairs: {num_pairs}")

# Calculate grid dimensions - make it rectangular for better viewing
cols = 3  # You can adjust this
rows = math.ceil(num_pairs / cols)

# Create subplots
fig = make_subplots(rows=rows, cols=cols, 
                   subplot_titles=[f"{pair[0].split('(')[0]} vs {pair[1].split('(')[0]}" for pair in channel_pairs])

# Note: No downsampling - using the full dataset as requested
# This might be slower but will show all data points

# Add 2D histograms for each channel pair
for idx, (channel1, channel2) in enumerate(channel_pairs):
    row = idx // cols + 1
    col = idx % cols + 1
    
    # Extract just the protein name for display
    ch1_label = channel1.split('(')[0]
    ch2_label = channel2.split('(')[0]
    
    fig.add_trace(
        go.Histogram2d(
            x=imc_timepoint_df[channel1],
            y=imc_timepoint_df[channel2],
            colorscale='Viridis',
            nbinsx=50,
            nbinsy=50,
            showscale=False
        ),
        row=row, col=col
    )
    
    # Minimal axes labels with just the protein name (without the element info)
    fig.update_xaxes(title_text=ch1_label, title_font=dict(size=8), row=row, col=col)
    fig.update_yaxes(title_text=ch2_label, title_font=dict(size=8), row=row, col=col)

# Optimize layout
fig.update_layout(
    title="Protein Channel Pair Relationships (Full Dataset)",
    height=250 * rows,  
    width=250 * cols,   
    showlegend=False,
    margin=dict(l=40, r=20, t=60, b=20)
)

# Display the figure
fig.show()

# Function to plot specific channel pairs on demand
def plot_channel_pair(channel1, channel2, bins=100):
    """Plot a single channel pair with option to control bins"""
    # Using full dataset without sampling
    
    # Extract protein names for cleaner display
    ch1_label = channel1.split('(')[0]
    ch2_label = channel2.split('(')[0]
    
    fig = go.Figure(go.Histogram2d(
        x=imc_timepoint_df[channel1],
        y=imc_timepoint_df[channel2],
        colorscale='Viridis',
        nbinsx=bins,
        nbinsy=bins
    ))
    
    fig.update_layout(
        title=f"{ch1_label} vs {ch2_label}",
        xaxis_title=ch1_label,
        yaxis_title=ch2_label,
        width=600,
        height=600
    )
    
    fig.show()
    
# Example usage with real column names:
# plot_channel_pair('CD45(Y89Di)', 'CD11b(Nd143Di)', bins=50)