In [1]:
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 22 16:46:25 2024

@author: maialen

Script to generate an interactive plot to show the top 5 cell types obtained
from an spatial deconvolution analysis

Input:
    => CSV file containing normalized weights [barcodes as rownames, cell types as colnames]
    => CSV file containing spatial coordinates [barcodes as rownames, x and y coordinates as columns]

"""
# <! ------------------------------------------------------------------------!>
# <!                       IMPORTS                                           !>
# <! ------------------------------------------------------------------------!>
import pandas as pd
from math import pi
from bokeh.plotting import figure, output_file, save,output_notebook, show, curdoc

from bokeh.transform import cumsum
from bokeh.models import ColumnDataSource, HoverTool,Range1d
from bokeh.palettes import Category20
from bokeh.models import CustomJS, TapTool

# Define color dictionary
clusters_colordict = {
    0: "#CCCCCC",
    1: "#FF6600",
    2: "#00FFCC",
    3: "#F0E442",
    4: "#0066FF",
    5: "#FF00FF",
    6: "#00FF00",
    7: "#FF6666",
    8: "#FFCC00",
    9: "#00FFFF",
    10: "#FF0066",
    11: "#CCFF00",
    12: "#0000FF",
    13: "#FFCCCC",
    14: "#CC00FF",
}

# Define color dictionary
# colordict = {
#     "AC.like": "#CCCCCC",
#     "AC.like.Prolif": "#FF6600",
#     "Astrocyte": "#00FFCC",
#     "B.cell": "#F0E442",
#     "CD4.INF": "#0066FF",
#     "CD4.rest": "#FF00FF",
#     "CD8.cytotoxic": "#00FF00",
#     "CD8.EM": "#FF6666",
#     "CD8.NK.sig": "#FFCC00",
#     "cDC1": "#00FFFF",
#     "cDC2": "#FF0066",
#     "DC1": "#CCFF00",
#     "DC2": "#0000FF",
#     "DC3": "#FFCCCC",
#     "Endo.arterial": "#CC00FF",
#     "Endo.capilar": "#66FF00",
#     "Mast": "#FF00CC",
#     "MES.like.hypoxia.independent": "#00CCFF",
#     "MES.like.hypoxia.MHC": "#003399",
#     "Mono.anti.infl": "#FF3366",
#     "Mono.hypoxia": "#00FF66",
#     "Mono.naive": "#FF9999",
#     "Neuron": "#6600FF",
#     "NK": "#FFE6E6",
#     "NPC.like.neural": "#0072B2",
#     "NPC.like.OPC": "#FF0000",
#     "NPC.like.Prolif": "#999900",
#     "Oligodendrocyte": "#666666",
#     "OPC": "#CCFF99",
#     "OPC.like": "#000000",
#     "OPC.like.Prolif": "#990000",
#     "pDC": "#993300",
#     "Pericyte": "#996600",
#     "Perivascular.fibroblast": "#999999",
#     "Plasma.B": "#669900",
#     "Prolif.T": "#339900",
#     "Reg.T": "#CC79A7",
#     "RG": "#009933",
#     "Scavenging.endothelial": "#990099",
#     "Scavenging.pericyte": "#009900",
#     "SMC": "#330099",
#     "SMC.COL": "#CC9999",
#     "SMC.prolif": "#009999",
#     "Stress.sig": "#990066",
#     "TAM.BDM.anti.infl": "#990033",
#     "TAM.BDM.hypoxia.MES": "#CC3333",
#     "TAM.BDM.INF": "#CC6666",
#     "TAM.BDM.MHC": "#660099",
#     "TAM.MG.aging.sig": "#CCCC99",
#     "TAM.MG.pro.infl.I": "#56B4E9",
#     "TAM.MG.pro.infl.II": "#333333",
#     "TAM.MG.prolif": "#99CC99",
#     "Tip.like": "#99CC66",
#     "VLMC": "#99CC33",
#     "MES.like.hypoxia.independent" : "#990033",
#     "malignantcell" : "#99CB66"
# }


colordict = {
    "malignantcell": "#99CB66",
    "macrophage": "#FF3366",  # Utilisé la couleur de "Mono.anti.infl"
    "muralcell": "#996600",  # Utilisé la couleur de "Pericyte"
    "dendriticcell": "#00FFFF",  # Utilisé la couleur de "cDC1"
    "microglialcell": "#CCCC99",  # Utilisé la couleur de "TAM.MG.aging.sig"
    "monocyte": "#FF9999",  # Utilisé la couleur de "Mono.naive"
    "oligodendrocyte": "#666666",
    "endothelialcell": "#00FFCC",  # Utilisé la couleur de "Endo.capilar"
    "matureTcell": "#0066FF",  # Utilisé la couleur de "CD4.INF"
    "oligodendrocyteprecursorcell": "#CCFF99",  # Utilisé la couleur de "OPC"
    "mastcell": "#FF00CC",
    "Bcell": "#F0E442",
    "plasmacell": "#669900",
    "naturalkillercell": "#FFE6E6",  # Utilisé la couleur de "NK"
    "astrocyte": "#00FFCC",
    "radialglialcell": "#009933",  # Utilisé la couleur de "RG"
    "neuron": "#6600FF"
}

In [None]:
# from google.colab import drive

# drive.mount('/content/drive')

In [46]:

    # <! ------------------------------------------------------------------------!>
    # <!                           DATA PREPARATION                              !>
    # <! ------------------------------------------------------------------------!>


def process_data(norm_weights_filepath, st_coords_filepath, data_clustered, image_path, n_largest_cell_types, scale_factor):
        # Read spatial deconvolution result CSV file
        norm_weights_df = pd.read_csv(norm_weights_filepath, sep = '\t')
        norm_weights_df.index.name = None
        # print(norm_weights_df.head())

        # Read spatial coordinates CSV file
        st_coords_df = pd.read_csv(st_coords_filepath, header=None).set_index(0)
        st_coords_df.index.name = None
        st_coords_df.columns = [ "in_tissue", "array_row", "array_col", "pxl_row_in_fullres", "pxl_col_in_fullres"]
        st_coords_df["pxl_row_in_fullres"] = st_coords_df["pxl_row_in_fullres"]*scale_factor
        st_coords_df["pxl_col_in_fullres"] = st_coords_df["pxl_col_in_fullres"]*scale_factor
        from PIL import Image
        im = Image.open(image_path).convert("RGB")
        # Merge coordinate df and cell weight df
        image_display_infos = {
            "image_path" : image_path,
            "x0" : 0,
            "y0" : 0,
            "im_w" : im.size[0],
            "im_h" : im.size[1],
            
        }
        # It will be difficult to show the information of all 54 cell types when hovering
        # Thus, for each barcoded spot, retrieve the maximum 5 weights and create new columns
        # accordingly. Those 5 max columns will be the info shown in the hovertool
        max_weights = norm_weights_df.apply(lambda x: x.nlargest(n_largest_cell_types).index.values, axis=1)
        # print(max_weights)
        merged_df = pd.concat([st_coords_df, norm_weights_df], axis = 1, join = 'inner')

        data_with_clusters = pd.read_csv(data_clustered)
        clusters_col =  pd.DataFrame(data_with_clusters["BayesSpace"]).set_index(data_with_clusters["Unnamed: 0"])
        merged_df["Cluster"] = clusters_col

    
        # Create df columns with max cell types
        cell_type_storage_arrays = list()
        cell_value_storage_arrays = list()

        # Extract cell types with largest weights
        for i in range(n_largest_cell_types):

            cell_type_storage_array = list()
            cell_value_storage_array = list()

            for barcode in max_weights.index:

                max_cell_types = max_weights.loc[barcode]
                max_cell_type = max_cell_types[i]
                max_cell_value = merged_df.loc[barcode, max_cell_types[i]]

                cell_type_storage_array.append(max_cell_type)
                cell_value_storage_array.append(max_cell_value)


            cell_type_storage_arrays.append(cell_type_storage_array)
            cell_value_storage_arrays.append(cell_value_storage_array)

        # print(len(cell_type_storage_arrays[0]))
        # Assign to new columns in the dataframe
        for i in range(n_largest_cell_types):
            merged_df[''.join(['Deconv_cell', str(i + 1)])] = cell_type_storage_arrays[i]
            merged_df[''.join(['Deconv_cell', str(i + 1), '_value'])] = cell_value_storage_arrays[i]

        # Since we only consider the top N cell types, we need to correct the weight
        # values so that the scatterpies account to the totality of the circle (sum of weights == 1)

        deconv_weight_columns = [f"Deconv_cell{i + 1}_value" for i in range(n_largest_cell_types)]

        # Create new normalized columns
        for i in range(n_largest_cell_types):

            # Calculate the sum of the top cell type weights
            total = merged_df.loc[:, deconv_weight_columns].sum(axis=1)

            # Create column with corrected weight values
            merged_df[''.join(['Deconv_cell', str(i + 1), '_norm_value'])] =  merged_df[''.join(['Deconv_cell', str(i + 1), '_value'])] / total


        # SLim down the df by selecting columns of interest only
        columns_of_interest = ['pxl_row_in_fullres', 'pxl_col_in_fullres','Cluster' , "in_tissue"] + [f"Deconv_cell{i + 1}_norm_value" for i in range(n_largest_cell_types)] \
            + [f"Deconv_cell{i + 1}" for i in range(n_largest_cell_types)]
        reduced_df = merged_df.loc[:, columns_of_interest]
        return reduced_df, image_display_infos

In [47]:
# <! ------------------------------------------------------------------------!>
# <!                       BOKEH VISUALIZATION                               !>
# <! ------------------------------------------------------------------------!>
from bokeh.events import ButtonClick
from bokeh.models import BoxAnnotation, Label, Plot, Rect, Text, Button, CustomJS, Div,Slider, PanTool
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.layouts import column, row, gridplot,Spacer
from PIL import Image
import numpy as np
def vis_with_separate_clusters_view(reduced_df, image_display_infos, nb_spots_samples, output , show_legend = False, show_figure = False ):
        # Smaller sample
        test_df = reduced_df[reduced_df["in_tissue"] == 1].head(nb_spots_samples).copy()
        # test_df = reduced_df.head(nb_spots_samples).copy()

        # Create a single tooltip column for each circle
        test_df['tooltip_data'] = test_df.apply(lambda row: '<br>'.join( \
                                                [f"<span style='color: red;'> Spot</span> : (x = { row['pxl_col_in_fullres']:.2f}, y = {-row['pxl_row_in_fullres']:.2f})"] ),\
                                                axis=1)
        # Update the data dictionary
        data = {
            'x': [y/100 for y in test_df.pxl_col_in_fullres.tolist()],
            'y': [-x/100  for x in test_df.pxl_row_in_fullres.tolist()],
            'tooltip_data': test_df['tooltip_data'].tolist(),
            'Cluster' : test_df['Cluster'].tolist() ,
        }
        # Convert dictionary to dataframe
        df = pd.DataFrame(data)
        # Initialize the Bokeh plot
        p = figure(width = image_display_infos.get("im_w"), height = image_display_infos.get("im_h"),
                    title = "Clustering results",
                    x_axis_label = 'x',
                    y_axis_label = 'y',
                   output_backend="webgl"
                    )
        print(f"width = {image_display_infos.get("im_w")} height = {image_display_infos.get("im_h")}")
        # Add the image with a ColumnDataSource
        image_source = ColumnDataSource(data=dict(
            url=[ image_display_infos.get("image_path")],
            x=[ image_display_infos.get("x0")],
            y=[ image_display_infos.get("y0") ],
            # w=[image_display_infos.get("im_w") + image_display_infos.get("mx1") + image_display_infos.get("mx2") ],
            # h=[ image_display_infos.get("im_h") + image_display_infos.get("my1") + image_display_infos.get("my2")],
            w=[image_display_infos.get("im_w") ],
            h=[ image_display_infos.get("im_h")],
            alpha=[1.0]  # Initial alpha value
        ))
        image = p.image_url(url='url', x='x', y='y', w='w', h='h', alpha='alpha', source=image_source)
        # Create a slider for image transparency
        slider = Slider(start=0, end=1, value=1, step=.1, title="Image Transparency")
        # Create a callback to update the image alpha
        callback = CustomJS(args=dict(image_source=image_source), code="""
            var alpha = cb_obj.value;
            image_source.data['alpha'] = [alpha];
            image_source.change.emit();
        """)

        slider.js_on_change('value', callback)
        # Create a dictionary to store scatter renderers
        scatter_renderers = {}
        # Group the dataframe by cluster
        grouped = df.groupby('Cluster')
        # Plot each cluster separately
        for cluster, group in grouped:
            color = clusters_colordict.get(cluster, '#000000')
            source = ColumnDataSource(group)

            scatter = p.scatter(x='x', y='y', size=15,
                                marker="circle",  # Specify the marker shape
                                fill_color=color
                                , line_width=0,
                                source=source,
                                legend_label=f"Cluster {cluster}")
            scatter_renderers[cluster] = scatter

        # Create a single tooltip column for each circle
        test_df['tooltip_data'] = test_df.apply(lambda row: '<br>'.join([
            f"<div style='display:flex;align-items:center;'>"
            f"<div style='width:10px;height:10px;background-color:{colordict.get(row[f'Deconv_cell{i+1}'], '#000000')};margin-right:5px;'></div>"
            f"<span style='color: blue;'>{row[f'Deconv_cell{i+1}']}</span>: {row[f'Deconv_cell{i+1}_norm_value']*100:.2f}%"
            f"</div>"
            for i in range(n_largest_cell_types)
        ] +  [f"<span style='color: red;'> Spot</span> : (x = {row['pxl_col_in_fullres']:.2f}, y = {-row['pxl_row_in_fullres']:.2f})"]), axis=1)
        data["tooltip_data"] = test_df['tooltip_data'].tolist()
        for i in range(1, n_largest_cell_types + 1):
            data[f'DeconvCell{i}'] = test_df[f'Deconv_cell{i}'].tolist()
            data[f'DeconvCell{i}_w'] = test_df[f'Deconv_cell{i}_norm_value'].tolist()
        # Convert dictionary to dataframe
        df = pd.DataFrame(data)
        plot = figure(width=image_display_infos.get("im_w"), height=image_display_infos.get("im_h"),
                   title="Deconvolution results",
                   x_axis_label='x',
                   y_axis_label='y',
                   output_backend="webgl",
                  )
        plot.image_url(url='url', x='x', y='y', w='w', h='h', alpha='alpha', source=image_source)

        # Create a Div for displaying the message
        for index, row in df.iterrows():
            x, y = row['x'], row['y']
            categories = row[[f'DeconvCell{i+1}_w' for i in range(n_largest_cell_types)]].values
            cell_types = row[[f'DeconvCell{i+1}' for i in range(n_largest_cell_types)]].values
            colors = tuple([colordict[x] for x in cell_types])
            # Create a single ColumnDataSource for all wedges in this circle
            circle_source = ColumnDataSource({
                'x': [x],
                'y': [y],
                'tooltip_data': [row['tooltip_data']]
            })
            start_angle = 0
            for i, category_value in enumerate(categories):
                end_angle = start_angle + category_value * 2 * pi
                wedge = plot.wedge(x='x', y='y', radius=80,
                        start_angle=start_angle, end_angle=end_angle,
                        line_width=0, fill_color=colors[i],
                        legend_label=f"Cluster {row['Cluster']}", source=circle_source, visible=False)
                start_angle = end_angle
        text1 = """
            <div style="
                background-color: #f0f0f0;
                border: 2px solid #3c3c3c;
                border-radius: 10px;
                padding: 15px;
                margin: 10px 0;
                font-family: 'Helvetica', 'Arial', sans-serif;
                font-size: 14px;
                line-height: 1.5;
                color: #333333;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            ">
                <h3 style="
                    margin-top: 0;
                    color: #2c3e50;
                    font-size: 18px;
                    border-bottom: 1px solid #bdc3c7;
                    padding-bottom: 8px;
                ">Information sur la visualisation</h3>
                <p style="margin-bottom: 0;">
                    Cette vue montre les clusters. Chaque point représente un spot,
                    et les couleurs indiquent les différents clusters.
                </p>
            </div>
            """
        text2 = """
            <div style="
                background-color: #f0f0f0;
                border: 2px solid #3c3c3c;
                border-radius: 10px;
                padding: 15px;
                margin: 10px 0;
                font-family: 'Helvetica', 'Arial', sans-serif;
                font-size: 14px;
                line-height: 1.5;
                color: #333333;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            ">
                <h3 style="
                    margin-top: 0;
                    color: #2c3e50;
                    font-size: 18px;
                    border-bottom: 1px solid #bdc3c7;
                    padding-bottom: 8px;
                ">Information sur la visualisation</h3>
                <p style="margin-bottom: 0;">
                    Cette vue montre la déconvolution par cluster. Les graphiques représentent
                    la distribution des cellules dans chaque spot."
                </p>
            </div>
            """
        # Créez le widget Div
        info_box = Div(
            text= text1,
            width=1000,
            height=120
        )

        # Modifiez les callbacks des boutons pour mettre à jour le texte du Div
        show_all_button = Button(label="Show Clusters", width=100)
        show_all_button.js_on_click(CustomJS(args=dict(p=p, plot=plot, info_box=info_box, text1 = text1), code="""
            p.visible = true;
            plot.visible = false;
            info_box.text = text1;
        """))
        spacer = Spacer(width=50)  # Adjust the width as needed
        button = Button(label="Show deconvolution by Cluster", width=120)
        button.js_on_click(CustomJS(args=dict(p=p, plot=plot, info_box=info_box, text2 = text2), code="""
            p.visible = false;
            plot.visible = true;
            info_box.text = text2;
        """))
        spacer1 = Spacer(width=100)
        spacer2 = Spacer(width=100)
        # Assuming you have your data in a pandas DataFrame called 'df'
        csv_source = ColumnDataSource({'data': [df.drop(columns=['tooltip_data']).to_csv(index=False)]})
        download_button = Button(label="Download raw data", width=100)
        download_button.js_on_click(CustomJS(args=dict(source=csv_source), code="""
            const data = source.data['data'][0];
            const blob = new Blob([data], { type: 'text/csv;charset=utf-8;' });
            const url = URL.createObjectURL(blob);
            const link = document.createElement('a');
            link.href = url;
            link.download = "raw_data.csv";
            link.click();
            URL.revokeObjectURL(url);
        """))
        tap_tool = TapTool()
        p.add_tools(tap_tool)
        # Associez le callback au TapTool
        hover = HoverTool(tooltips="""
            <div style="width:220px">
                @tooltip_data
            </div>
        """)
        p.add_tools(hover)
        plot.add_tools(hover)
        leg = p.legend[0]
        p.add_layout(leg,'left')
        leg_plot = plot.legend[0]
        leg_plot.glyph_width = 0
        plot.add_layout(leg_plot,'left')

        from bokeh.layouts import column, row # to avoid a conflict with row from pandas
        # Créez le layout
        buttons_row = row(show_all_button, spacer, button, spacer1, download_button, spacer2, slider)
        layout = column(buttons_row, info_box, p, plot)
        # layout = column(info_box, p)

        p.legend.location = "top_right"
        p.legend.click_policy = "hide"
        p.legend.visible = True
        plot.legend.location = "top_right"
        plot.legend.click_policy = "hide"
        plot.visible = False
        plot.legend.visible = True

        if show_figure:
            show(layout)
        output_file(output, mode='inline')
        save(layout)

In [48]:
norm_weights_filepath = "res_rctd_cluster/proportions_rctd_sample2"
st_coords_filepath = "tissue_positions_list_248.csv"
data_clustered = "seurat_metadata_UKF248_T_ST.csv"
n_largest_cell_types = 5
scale_factor = 0.24414062
processed_data = process_data(norm_weights_filepath, st_coords_filepath,data_clustered, "original_tissue_images/tissue_hires_image_248.png", n_largest_cell_types, scale_factor = scale_factor)
nb_spots_samples = processed_data[0].shape[0] 
output_html = "visium_plot_sample2.html"
print(processed_data[1])
vis_with_separate_clusters_view(reduced_df=processed_data[0],image_display_infos= processed_data[1], nb_spots_samples = nb_spots_samples, output= output_html )  

{'image_path': 'original_tissue_images/tissue_hires_image_248.png', 'x0': 0, 'y0': 0, 'im_w': 1907, 'im_h': 2000}
width = 1907 height = 2000


In [27]:
processed_data[0].shape
deconv = pd.read_csv("res_rctd_cluster/proportions_rctd_sample3", sep = '\t')
deconv.shape

res_clust = pd.read_csv("seurat_metadata_UKF255_T_ST.csv")
res_clust.shape

(1937, 11)

In [101]:
# <! ------------------------------------------------------------------------!>
# <!                       BOKEH VISUALIZATION                               !>
# <! ------------------------------------------------------------------------!>
from bokeh.models import  Wedge

def vis_with_proportions(reduced_df, nb_spots_samples, output, show_legend = False, width =1000, height = 1000 , show_fig = False):
        # Smaller sample
        test_df = reduced_df.iloc[1:nb_spots_samples, ].copy()
        # Create a single tooltip column for each circle
        test_df['tooltip_data'] = test_df.apply(lambda row: '<br>'.join([
            f"<div style='display:flex;align-items:center;'>"
            f"<div style='width:10px;height:10px;background-color:{colordict.get(row[f'Deconv_cell{i+1}'], '#000000')};margin-right:5px;'></div>"
            f"<span style='color: blue;'>{row[f'Deconv_cell{i+1}']}</span>: {row[f'Deconv_cell{i+1}_norm_value']*100:.2f}%"
            f"</div>"
            for i in range(n_largest_cell_types)
        ] + [f"<span style='color: red;'> Spot</span>{ row['x'], row['y'] }"]), axis=1)

        # Update the data dictionary
        data = {
            'x': [y / 100 for y in test_df.y.tolist()],
            'y': [-x / 100 for x in test_df.x.tolist()],
            'x_full': test_df.y.tolist(),
            'y_full': [-x for x in test_df.x.tolist()],
            'tooltip_data': test_df['tooltip_data'].tolist(),
        }

        for i in range(1, n_largest_cell_types + 1):
            data[f'DeconvCell{i}'] = test_df[f'Deconv_cell{i}'].tolist()
            data[f'DeconvCell{i}_w'] = test_df[f'Deconv_cell{i}_norm_value'].tolist()
        # Convert dictionary to dataframe
        df = pd.DataFrame(data)
        # Convert dataframe to a ColumnDataSource
        # Initialize the Bokeh plot
        p = figure(width=width, height=height,
                   title="Deconvolution results",
                   x_axis_label='x',
                   y_axis_label='y',
                   output_backend="webgl",
                  )
        # Create a Div for displaying the message
        for index, row in df.iterrows():
            x, y = row['x'], row['y']
            categories = row[[f'DeconvCell{i+1}_w' for i in range(n_largest_cell_types)]].values
            cell_types = row[[f'DeconvCell{i+1}' for i in range(n_largest_cell_types)]].values
            colors = tuple([colordict[x] for x in cell_types])
            # Create a single ColumnDataSource for all wedges in this circle
            circle_source = ColumnDataSource({
                'x': [x],
                'y': [y],
                'tooltip_data': [row['tooltip_data']]
            })
            start_angle = 0
            for i, category_value in enumerate(categories):
                end_angle = start_angle + category_value * 2 * pi
                wedge = p.wedge(x='x', y='y', radius=0.05,
                        start_angle=start_angle, end_angle=end_angle,
                        line_color="white", fill_color=colors[i],
                        legend_label=f"{cell_types[i]}", source=circle_source)

                start_angle = end_angle
        leg = p.legend[0]
        p.add_layout(leg, 'left')
        # # Show no legend
        p.legend.visible= show_legend
        hover = HoverTool(tooltips="""
            <div style="width:200px">
                <h3>Proportions:</h3>
                @tooltip_data
            </div>
        """)
        # Add the hover tool to the plot
        p.add_tools(hover)
        # Assuming you have your data in a pandas DataFrame called 'df'
        csv_source = ColumnDataSource({'data': [df.drop(columns=['tooltip_data']).to_csv(index=False)]})
        download_button = Button(label="Download raw data", width=100)
        download_button.js_on_click(CustomJS(args=dict(source=csv_source), code="""
            const data = source.data['data'][0];
            const blob = new Blob([data], { type: 'text/csv;charset=utf-8;' });
            const url = URL.createObjectURL(blob);
            const link = document.createElement('a');
            link.href = url;
            link.download = "raw_data.csv";
            link.click();
            URL.revokeObjectURL(url);
        """))
    
        text = """
            <div style="
                background-color: #f0f0f0;
                border: 2px solid #3c3c3c;
                border-radius: 10px;
                padding: 15px;
                margin: 10px 0;
                font-family: 'Helvetica', 'Arial', sans-serif;
                font-size: 14px;
                line-height: 1.5;
                color: #333333;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            ">
                <h3 style="
                    margin-top: 0;
                    color: #2c3e50;
                    font-size: 18px;
                    border-bottom: 1px solid #bdc3c7;
                    padding-bottom: 8px;
                ">Information sur la visualisation</h3>
                <p style="margin-bottom: 0;">
                    Cette vue les resultats de la deconvolution, chaque element de la figure 
                    est un spot."
                </p>
            </div>
            """
        # Créez le widget Div
        info_box = Div(
            text= text,
            width=1000,
            height=120
        )
        from bokeh.layouts import column, row # to avoid a conflict with row from pandas

        layout = column(download_button, info_box, p)

        if show_fig :
            show(layout)
        output_file(output)
        save(layout)
        return layout

In [102]:
nb_spots_samples = 100
output_html = "visium_plot_sample1_vis1.html"
vis_with_proportions(processed_data, nb_spots_samples, output_html )

In [100]:
nb_spots_samples = 100
output_html = "visium_plot_sample1_majoritaires.html"
vis_type_majoritaires(processed_data, nb_spots_samples, output_html )

In [99]:
# <! ------------------------------------------------------------------------!>
# <!                       BOKEH VISUALIZATION                               !>
# <! ------------------------------------------------------------------------!>

def vis_type_majoritaires(reduced_df, nb_spots_samples, output, show_figure = False, show_legend = False, width =1000, height = 1000 ):
        # Smaller df for testing
        test_df = reduced_df.iloc[1:nb_spots_samples, ].copy()
        # Create a single tooltip column for each circle
        test_df['tooltip_data'] = test_df.apply(lambda row: '<br>'. \
                                                      join([f"<span style='color: blue;'>{row[f'Deconv_cell{1}']}</span>: {row[f'Deconv_cell{1}_norm_value']*100:.2f}%"] \
                                                           + [f"<span style='color: red;'> Spot</span> {row['x']:.2f} , {row['y']:.2f}"] ),\
                                                      axis=1)
        # Update the data dictionary
        data = {
            'x': [y / 100 for y in test_df.y.tolist()],
            'y': [-x / 100 for x in test_df.x.tolist()],
            'x_full': test_df.y.tolist(),
            'y_full': [-x for x in test_df.x.tolist()],
            'tooltip_data': test_df['tooltip_data'].tolist(),
        }
        data[f'DeconvCell{1}'] = test_df[f'Deconv_cell{1}'].tolist()
        data[f'DeconvCell{1}_w'] = test_df[f'Deconv_cell{1}_norm_value'].tolist()
        # Convert dictionary to dataframe
        df = pd.DataFrame(data)
        # Initialize the Bokeh plot
        p = figure(width =1000, height = 1000,
                    title = "Deconvolution results",
                    x_axis_label = 'x',
                    y_axis_label = 'y',
                   output_backend="webgl"
                    )
        for index, row in df.iterrows():
            x, y = row['x'], row['y']
            categorie = row[f'DeconvCell{1}_w' ]
            cell_type = row[f'DeconvCell{1}']
            color = colordict[cell_type]
            # Create a single ColumnDataSource for all wedges in this circle
            circle_source = ColumnDataSource({
                'x': [x],
                'y': [y],
                'tooltip_data': [row['tooltip_data']]
            })
            scatter = p.scatter(x='x', y='y', size=15, 
                                marker="circle",  # Specify the marker shape
                                fill_color=color, line_color=color
                                , line_width=0, 
                                source=circle_source,
                                legend_label=f"{cell_type}")
        hover = HoverTool(tooltips="""
            <div style="width:200px">
                <h3>Proportions:</h3>
                @tooltip_data
            </div>
        """)
        # Add the hover tool to the plot
        p.add_tools(hover)
        l = p.legend[0]
        p.add_layout(l, 'left')
        # Assuming you have your data in a pandas DataFrame called 'df'
        csv_source = ColumnDataSource({'data': [df.drop(columns=['tooltip_data']).to_csv(index=False)]})
        download_button = Button(label="Download raw data", width=100)
        download_button.js_on_click(CustomJS(args=dict(source=csv_source), code="""
            const data = source.data['data'][0];
            const blob = new Blob([data], { type: 'text/csv;charset=utf-8;' });
            const url = URL.createObjectURL(blob);
            const link = document.createElement('a');
            link.href = url;
            link.download = "raw_data.csv";
            link.click();
            URL.revokeObjectURL(url);
        """))
    
        text = """
            <div style="
                background-color: #f0f0f0;
                border: 2px solid #3c3c3c;
                border-radius: 10px;
                padding: 15px;
                margin: 10px 0;
                font-family: 'Helvetica', 'Arial', sans-serif;
                font-size: 14px;
                line-height: 1.5;
                color: #333333;
                box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            ">
                <h3 style="
                    margin-top: 0;
                    color: #2c3e50;
                    font-size: 18px;
                    border-bottom: 1px solid #bdc3c7;
                    padding-bottom: 8px;
                ">Information sur la visualisation</h3>
                <p style="margin-bottom: 0;">
                    Cette vue montre le type cellulaire majoritaire de chaque spot"
                </p>
            </div>
            """
        # Créez le widget Div
        info_box = Div(
            text= text,
            width=1000,
            height=120
        )
        from bokeh.layouts import column, row # to avoid a conflict with row from pandas

        layout = column(download_button, info_box, p)

        if show_figure:
            show(layout)
        output_file(output)
        save(layout)

In [364]:
# from PIL import Image

# def make_white_transparent(image_path, output_path, resize_dimensions=None):
#     # Open the image
#     img = Image.open(image_path).convert("RGBA")

#     # Get the data of the image
#     data = img.getdata()

#     new_data = []

#     # Process each pixel to make white pixels transparent
#     for item in data:
#         if item[0] > 200 and item[1] > 200 and item[2] > 200:
#             new_data.append((255, 255, 255, 0))
#         else:
#             new_data.append(item)

#     img.putdata(new_data)

#     # Convert the image data to a numpy array for easier processing
#     import numpy as np
#     img_array = np.array(img)

#     # Find the bounding box of the non-transparent pixels
#     non_transparent_pixels = np.any(img_array[:, :, 3] != 0, axis=1)
#     non_transparent_columns = np.any(img_array[:, :, 3] != 0, axis=0)

#     top_row = np.argmax(non_transparent_pixels)
#     bottom_row = len(non_transparent_pixels) - np.argmax(non_transparent_pixels[::-1])
#     left_column = np.argmax(non_transparent_columns)
#     right_column = len(non_transparent_columns) - np.argmax(non_transparent_columns[::-1])

#     # Crop the image to the bounding box
#     cropped_img = img.crop((left_column, top_row, right_column, bottom_row))

#     # # Resize the image if resize dimensions are provided
#     # if resize_dimensions:
#     #     cropped_img = cropped_img.resize(resize_dimensions, Image.ANTIALIAS)

#     # Save the image
#     cropped_img.save(output_path, "PNG")

# # Example usage
# make_white_transparent("tissue_hires_image.png", "tissue_hires_image_transformed.png", resize_dimensions=(200, 200))

from PIL import Image
import numpy as np

def remove_non_violet_lines_and_columns(image_path, output_path, violet_lower=(100, 0, 100), violet_upper=(200, 100, 255)):
    # Open the image
    img = Image.open(image_path).convert("RGB")

    # Convert the image to a numpy array
    img_array = np.array(img)

    # Create a mask for violet pixels
    violet_mask = np.all((img_array >= violet_lower) & (img_array <= violet_upper), axis=2)

    # Find rows and columns that contain violet pixels
    rows_with_violet = np.any(violet_mask, axis=1)
    cols_with_violet = np.any(violet_mask, axis=0)

    # Get the indices of rows and columns with violet pixels
    rows_to_keep = np.where(rows_with_violet)[0]
    cols_to_keep = np.where(cols_with_violet)[0]

    # Crop the image array
    cropped_array = img_array[rows_to_keep[:, np.newaxis], cols_to_keep]

    # Convert back to an image
    cropped_img = Image.fromarray(cropped_array)

    # Save the image
    cropped_img.save(output_path)

    print(f"Image processed and saved as {output_path}")
    print(f"Removed {img_array.shape[0] - len(rows_to_keep)} rows and {img_array.shape[1] - len(cols_to_keep)} columns")

# Example usage



Image processed and saved as tissue_hires_image_transformed.png
Removed 312 rows and 440 columns


In [None]:
# <! ------------------------------------------------------------------------!>
# <!                       BOKEH VISUALIZATION                               !>
# <! ------------------------------------------------------------------------!>


def vis_with_clustring(reduced_df, nb_spots_samples,  show_figure = False, show_legend = False, width =1000, height = 1000 ):
        # Smaller df for testing
        test_df = reduced_df.iloc[1:nb_spots_samples, ].copy()
        # Create a single tooltip column for each circle
        test_df['tooltip_data'] = test_df.apply(lambda row: '<br>'.join( \
                                                [f"<span style='color: red;'> Spot</span> : (x = { row['x']:.2f}, y = {row['y']:.2f})"] \
                                                + [f"<span style='color: blue;'> Cluster</span> { row['Cluster']}"] ),\
                                                axis=1)
        # Update the data dictionary
        data = {
            'x': [y / 100 for y in test_df.y.tolist()],
            'y': [-x / 100 for x in test_df.x.tolist()],
            'tooltip_data': test_df['tooltip_data'].tolist(),
            'Cluster' : test_df['Cluster'].tolist()
        }
        # Convert dictionary to dataframe
        df = pd.DataFrame(data)
        # Initialize the Bokeh plot
        p = figure(width =width, height = height,
                    title = "Deconvolution results",
                    x_axis_label = 'x',
                    y_axis_label = 'y',
                   output_backend="webgl"
                    )
        for index, row in df.iterrows():
            x, y = row['x'], row['y']
            cluster = row['Cluster']
            color = clusters_colordict[cluster]
            # Create a single ColumnDataSource for all wedges in this circle
            circle_source = ColumnDataSource({
                'x': [x],
                'y': [y],
                'tooltip_data': [row['tooltip_data']]
            })

            wedge = p.wedge(x='x', y='y', radius=0.02,
                    start_angle=0, end_angle=2*pi,
                    line_color="white", fill_color= color,  line_width = 0
                    , source=circle_source, legend_label= f"Cluster {cluster}")


        # Show no legend
        p.legend.visible= show_legend
        hover = HoverTool(tooltips="""
            <div style="width:200px">
                @tooltip_data
            </div>
        """)
        # Add the hover tool to the plot
        p.add_tools(hover)
        # Configurer la légende
        p.legend.location = "top_right"
        p.legend.click_policy = "hide"
        if show_figure:
            show(p)

In [380]:
import os

def get_files_with_pattern(directory, pattern):
    matching_files = []
    for filename in os.listdir(directory):
        if filename.startswith(pattern):
            matching_files.append(directory + filename)
    return matching_files

# Exemple d'utilisation
directory = "tissue_images/"
pattern = "tissue_hires_image"  # Le motif que vous recherchez

files = get_files_with_pattern(directory, pattern)
for file in files:
    remove_non_violet_lines_and_columns(file, f"{directory}tissue_hires_image_{file.split('_')[4].split('.')[0]}.png")

Image processed and saved as tissue_images/tissue_hires_image_255.png
Removed 0 rows and 0 columns
Image processed and saved as tissue_images/tissue_hires_image_259.png
Removed 302 rows and 549 columns
Image processed and saved as tissue_images/tissue_hires_image_242.png
Removed 0 rows and 0 columns
Image processed and saved as tissue_images/tissue_hires_image_334.png
Removed 644 rows and 588 columns
Image processed and saved as tissue_images/tissue_hires_image_243.png
Removed 772 rows and 813 columns
Image processed and saved as tissue_images/tissue_hires_image_259.png
Removed 0 rows and 0 columns
Image processed and saved as tissue_images/tissue_hires_image_275.png
Removed 0 rows and 0 columns
Image processed and saved as tissue_images/tissue_hires_image_313.png
Removed 0 rows and 0 columns
Image processed and saved as tissue_images/tissue_hires_image_275.png
Removed 0 rows and 0 columns
Image processed and saved as tissue_images/tissue_hires_image_242.png
Removed 0 rows and 0 column

In [67]:
print(processed_data[1])

{'x0': 1375, 'y0': -715, 'im_w': 5473, 'im_h': 5773, 'mx1': 561, 'mx2': 92, 'my1': 677, 'my2': 749}


In [89]:
processed_data

Unnamed: 0,pxl_row_in_fullres,pxl_col_in_fullres,Cluster,Deconv_cell1_norm_value,Deconv_cell2_norm_value,Deconv_cell3_norm_value,Deconv_cell4_norm_value,Deconv_cell5_norm_value,Deconv_cell1,Deconv_cell2,Deconv_cell3,Deconv_cell4,Deconv_cell5
ATTGTGACTTCGCTGC-1,1539,6067,9,0.800198,0.097400,0.054757,0.036318,0.011327,malignantcell,macrophage,Bcell,monocyte,endothelialcell
GGCTAATGATTGAAAT-1,1539,6240,9,0.781413,0.071641,0.069742,0.048279,0.028925,malignantcell,astrocyte,muralcell,macrophage,monocyte
TGCGAGATGGCGGCCA-1,1539,6326,2,0.476914,0.346384,0.083673,0.064033,0.028995,malignantcell,neuron,astrocyte,radialglialcell,microglialcell
CACACTTGTATTGCGA-1,1464,6369,2,0.550402,0.168960,0.135938,0.098447,0.046253,malignantcell,Bcell,monocyte,radialglialcell,macrophage
GCTGGTGACTCGTAGT-1,1539,6412,2,0.573703,0.235081,0.081614,0.057092,0.052511,malignantcell,neuron,astrocyte,radialglialcell,oligodendrocyte
...,...,...,...,...,...,...,...,...,...,...,...,...,...
GGCGCTTCATTCCCTG-1,5735,4437,7,0.460505,0.305689,0.098786,0.081762,0.053257,malignantcell,monocyte,macrophage,muralcell,oligodendrocyteprecursorcell
TGCTCGGTGGGTCACC-1,5661,4480,7,0.738224,0.090724,0.085083,0.048229,0.037739,malignantcell,macrophage,astrocyte,Bcell,monocyte
GACTAGGCCGTTAGGT-1,5660,4738,7,0.859756,0.062272,0.030610,0.024058,0.023304,malignantcell,astrocyte,muralcell,macrophage,endothelialcell
GAACTGTGGAGAGACA-1,5811,4221,7,0.460889,0.181037,0.161976,0.142524,0.053575,malignantcell,astrocyte,neuron,radialglialcell,dendriticcell


In [116]:

def f (proce):
    test_df = proce.head(nb_spots_samples)
    print(test_df)
f(processed_data)

                    pxl_row_in_fullres  pxl_col_in_fullres  Cluster  \
ATTGTGACTTCGCTGC-1                1539                6067        9   
GGCTAATGATTGAAAT-1                1539                6240        9   
TGCGAGATGGCGGCCA-1                1539                6326        2   
CACACTTGTATTGCGA-1                1464                6369        2   
GCTGGTGACTCGTAGT-1                1539                6412        2   
...                                ...                 ...      ...   
ACTAGTTGCGATCGTC-1                4461                5856        9   
TTGGACCATCTGGCAA-1                4535                5899        9   
CCGTATCTCGTCGTAG-1                4689                2541        3   
TCACGATGTCCGTGGA-1                4614                2584        3   
TCAACAAAGATAATTC-1                4689                2628        3   

                    Deconv_cell1_norm_value  Deconv_cell2_norm_value  \
ATTGTGACTTCGCTGC-1                 0.800198                 0.097400   
GGC

In [4]:
norm_weights_filepath = "norm_celltype_weights_UKF242_T_ST.csv"
st_coords_filepath = "spatial_coords_UKF242_T_ST.csv"
n_largest_cell_types = 5
data_with_clustering = "seurat_metadata_UKF242_T_ST.csv"
processed_data = process_data(norm_weights_filepath, st_coords_filepath, data_with_clustering, n_largest_cell_types)

5


In [136]:
data = pd.read_csv('res_rctd_cluster/proportions_rctd_sample1', sep = '\t', )
# import os
# os.listdir("./res_rctd_cluster/")
data

Unnamed: 0,Bcell,astrocyte,dendriticcell,endothelialcell,macrophage,malignantcell,mastcell,matureTcell,microglialcell,monocyte,muralcell,naturalkillercell,neuron,oligodendrocyte,oligodendrocyteprecursorcell,plasmacell,radialglialcell
AAACAAGTATCTCCCA-1,0.054317,0.000051,0.000051,0.011236,0.096617,0.793762,0.000286,0.005493,0.000051,0.036026,0.001758,0.000051,0.000051,0.000055,0.000051,0.000051,0.000096
AAACATTTCCCGGATT-1,0.000053,0.069903,0.000053,0.023682,0.047108,0.762452,0.000053,0.000053,0.000053,0.028223,0.068050,0.000053,0.000053,0.000053,0.000058,0.000053,0.000053
AAACCGGGTAGGTACC-1,0.000086,0.077167,0.021932,0.022068,0.000086,0.439829,0.000086,0.000086,0.026740,0.000086,0.000086,0.000086,0.319450,0.024289,0.008785,0.000086,0.059054
AAACCGTTCGTCCAGG-1,0.158716,0.026366,0.000078,0.000078,0.043448,0.517032,0.005036,0.000078,0.000078,0.127697,0.003854,0.000078,0.015355,0.009473,0.000078,0.000078,0.092479
AAACCTCATGAAGTTG-1,0.006927,0.075839,0.022840,0.011130,0.000081,0.533108,0.001074,0.000081,0.025349,0.000081,0.000081,0.000081,0.218447,0.048795,0.002951,0.000081,0.053052
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTAGCAAATTCGA-1,0.040935,0.008666,0.000075,0.000075,0.093787,0.437202,0.000325,0.000075,0.000075,0.290220,0.077625,0.000075,0.000075,0.000075,0.050562,0.000075,0.000075
TTGTTCAGTGTGCTAC-1,0.047353,0.083537,0.000051,0.000051,0.089076,0.724809,0.000051,0.000051,0.000059,0.037054,0.000051,0.000051,0.001491,0.000060,0.000051,0.000051,0.016150
TTGTTGTGTGTCAAGA-1,0.000053,0.061584,0.000053,0.023047,0.023792,0.850266,0.000053,0.003384,0.007092,0.000053,0.030272,0.000053,0.000053,0.000053,0.000067,0.000053,0.000070
TTGTTTCACATCCAGG-1,0.024200,0.154975,0.045862,0.025273,0.000070,0.394539,0.008191,0.000070,0.036232,0.006103,0.019679,0.000070,0.138658,0.023932,0.000070,0.000070,0.122006
