#### This particular notebook includes the code to output a volcano plot using the DESeq2 results from the Dataset 1 Xenium and CosMx data.

#### Required input files:
* DESeq2 results table (for both Xenium and CosMx) -- Can be calculated using the DESeq Rmd notebook

Environment: Please create and activate the conda environment provided in default_env.yaml before running this notebook

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import squidpy as sq

import gzip
import anndata

import os

from scipy.stats import pearsonr
from adjustText import adjust_text

## Xenium and CosMx DESeq Volcano Pot



In [None]:
### Load in dfs and view

Xenium_DESeq = pd.read_excel(
    "/path/DESeq_Results/Xenium_HCvsUCPRE_DESeq_table_with_sigificance.xlsx"
)

print(Xenium_DESeq.head(5))

CosMx_DESeq = pd.read_excel(
    "/path/DESeq_Results/Cosmx_HCvsUCPRE_DESeq_table_with_sigificance.xlsx"
)

print(CosMx_DESeq.head(5))

In [None]:
### Format dfs for merge

Xenium_DESeq.rename(columns={'Unnamed: 0': 'Gene'}, inplace=True)

Xenium_DESeq2 = Xenium_DESeq[['Gene', 'padj', 'log2FoldChange', 'Significance']]

Xenium_DESeq2 = Xenium_DESeq2.rename(columns={
    'padj': 'Xenium_padj',
    'log2FoldChange': 'Xenium_log2FoldChange',
    'Significance': 'Xenium_Significance'
})

print(Xenium_DESeq2.head(5))

CosMx_DESeq.rename(columns={'Unnamed: 0': 'Gene'}, inplace=True)

CosMx_DESeq2 = CosMx_DESeq[['Gene', 'padj', 'log2FoldChange', 'Significance']]

CosMx_DESeq2 = CosMx_DESeq2.rename(columns={
    'padj': 'CosMx_padj',
    'Gene ': 'Gene',
    'log2FoldChange': 'CosMx_log2FoldChange',
    'Significance': 'CosMx_Significance'
})

print(CosMx_DESeq2.head(5))

In [None]:
### Merge dfs based on Gene column

merged_DESeq = pd.merge(Xenium_DESeq2, CosMx_DESeq2, on='Gene', how='inner')

# merged_DESeq

In [None]:
merged_DESeq

In [None]:
### Set up for scatterplot

# Define a function to determine the color based on conditions
def assign_color(row):
    if row['Xenium_Significance'] == 'yes' and row['CosMx_Significance'] == 'yes':
        return 'Both'
    elif row['Xenium_Significance'] == 'yes' and row['CosMx_Significance'] == 'no':
        return 'Xenium'
    elif row['Xenium_Significance'] == 'no' and row['CosMx_Significance'] == 'yes':
        return 'CosMx'
    else:
        return 'Neither'
    
# Apply the function to create a new color column
merged_DESeq['Significance'] = merged_DESeq.apply(assign_color, axis=1)

In [None]:
# View
merged_DESeq

In [None]:
print(abs(merged_DESeq['Xenium_log2FoldChange'].min()))
print(abs(merged_DESeq['CosMx_log2FoldChange'].min()))

In [None]:
## Remove 'Neither' values
merged_DESeq_edited = merged_DESeq[merged_DESeq['Significance'] != 'Neither']

merged_DESeq_edited

In [None]:
### Plot scatterplot

plt.figure(figsize=(7, 7))
color_palette = {'Both': 'green', 'Xenium': 'cornflowerblue', 'CosMx': 'darkorange'}
scatter_plot = sns.scatterplot(data=merged_DESeq_edited, x='Xenium_log2FoldChange', y='CosMx_log2FoldChange', hue='Significance', palette=color_palette, edgecolor='none')

# Calculate Pearson's correlation -- WIth un-edited df to include neither values
corr, p_value = pearsonr(merged_DESeq['Xenium_log2FoldChange'], merged_DESeq['CosMx_log2FoldChange'])

# Set ticks at all integers from -4 to 4
plt.xticks(range(-2, 5))
plt.yticks(range(-2, 5))

# Define labels at specific ticks
xtick_labels = ['' if x not in [-2, 0, 2, 4] else str(x) for x in range(-2, 5)]
ytick_labels = ['' if y not in [-2, 0, 2, 4] else str(y) for y in range(-2, 5)]

# Apply these labels to the ticks
scatter_plot.set_xticklabels(xtick_labels, fontsize=12)
scatter_plot.set_yticklabels(ytick_labels, fontsize=12)

# Set x-axis and y-axis limits
scatter_plot.set_xlim(-2.1,4.1)
scatter_plot.set_ylim(-2.1,4.1)

# Setting labels for each axis with increased font size
scatter_plot.set_xlabel('Xenium Log2FC', fontsize=12) 
scatter_plot.set_ylabel('CosMx Log2FC', fontsize=12)

# Setting the title with a larger font
scatter_plot.set_title('DESeq2 Results', fontsize=12)

# Desired order of the legend entries
desired_order = ['Both', 'Xenium', 'CosMx']

# Extract current handles and labels
handles, labels = scatter_plot.get_legend_handles_labels()

# Create a dictionary from labels to handles
label_handle_dict = dict(zip(labels, handles))

# Reorder handles according to the desired order
ordered_handles = [label_handle_dict[label] for label in desired_order]

# Add a new legend to the plot with the desired order
scatter_plot.legend(ordered_handles, desired_order, title='Significance', loc='best', fontsize=12, title_fontsize=12)

# List to hold text instances for adjust_text
texts = []

# Label points with Gene names based on condition
for i, row in merged_DESeq_edited.iterrows():
    if row['Xenium_log2FoldChange'] > 1 or row['Xenium_log2FoldChange'] < -1 or row['CosMx_log2FoldChange'] > 1 or row['CosMx_log2FoldChange'] < -1:
        text = plt.text(row['Xenium_log2FoldChange'], row['CosMx_log2FoldChange'], row['Gene'],
                        horizontalalignment='left', fontsize=11, color='black')
        texts.append(text)

# Use adjust_text to optimize label positions
adjust_text(texts, arrowprops=dict(arrowstyle="->", color='black'))

# Add lines at 0
plt.axvline(x=0, color='gray', linestyle='--')
plt.axhline(y=0, color='gray', linestyle='--')

# Display Pearson's correlation coefficient and p-value on the plot
plt.text(-1.8, 3.6, f'Pearson R = {corr:.2f}\np value = {p_value:.2f}', fontsize=11)

# Disable the grid
plt.grid(False)

# Adjust the layout
plt.tight_layout()  # Adjust layout to make room for the legend if necessary

# Save plot
#plt.savefig('/path/DESeq2_XeniumCosMx_CombinedVolcanoPlot.pdf')

plt.show()