In [None]:
# Import relevant libraries
import numpy as np
import scanpy as sc
import os
import pandas as pd
import seaborn as sb
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sns
from collections import OrderedDict
from matplotlib import cm
import anndata as ann
import scanpy.external as sce
from datetime import datetime
import NaiveDE
import SpatialDE
from matplotlib_venn import venn3
%matplotlib inline
import glob
import plotly.graph_objects as go

# Set current directory
os.chdir("/Users/mendenlab/work/spatial_granuloma/scripts")

# assign the rigth colours to the right annotation
def _set_colors(adata, obs_name, colors):
    """Set palette with specific colors for specific categories

    Parameters
    ----------
    adata : annData
    obs_name : column to plot
    colors : OrderedDict(): colors named by categories

    Returns
    -------

    """
    if len(colors.values())>0:
        palette = []
        unique_colors = np.unique(adata.obs[obs_name])
        for key in adata.obs[obs_name].cat.categories.tolist():
            if key in colors.keys():
                palette.append(colors[key])
    return palette

#Set the colours per annotation
spot_colors = []
spot_colors = OrderedDict()
spot_colors["EPIDERMIS"] = 'blue'
spot_colors["DERMIS"] = '#E0EEE0'
spot_colors["INTERFACE"] = 'deepskyblue'
spot_colors["VESSEL"] = 'darkgreen'
spot_colors["HAIR FOLLICLE"] = "#543005"
spot_colors["SWEAT GLAND"] = 'yellow'
spot_colors["SEBACEOUS GLAND"] = 'mistyrose'
spot_colors["MUSCLE"] = 'darkcyan'
spot_colors["GA"] = 'firebrick'  
spot_colors["GNL"] = 'orchid'
spot_colors["GSS"] = 'blueviolet'
spot_colors["GSC"] = 'mediumvioletred'
spot_colors["UNDETERMINED"] = 'black'


dermis_colors = []
dermis_colors = OrderedDict()
dermis_colors["UNDETERMINED"] = 'black'
dermis_colors["upper EPIDERMIS"] = 'blue'
dermis_colors["middle EPIDERMIS"] = 'dodgerblue'
dermis_colors["basal EPIDERMIS"] = 'skyblue'
dermis_colors["DERdepth1"] = '#006837'
dermis_colors["DERdepth2"] = '#238443'
dermis_colors["DERdepth3"] = '#41AB5D'
dermis_colors["DERdepth4"] = '#78C679'
dermis_colors["DERdepth5"] = '#ADDD8E'
dermis_colors["DERdepth6"] = '#D9F0A3'
dermis_colors["DERdepth7"] = '#F7FCB9'

leiden_r13_colours = []
leiden_r13_colours = OrderedDict()
leiden_r13_colours["0"] = 'darkolivegreen'
leiden_r13_colours["1"] = "#D9F0A3"
leiden_r13_colours["2"] = '#238443'
leiden_r13_colours["3"] = 'firebrick'
leiden_r13_colours["4"] = '#78C679'
leiden_r13_colours["5"] = '#78C679'
leiden_r13_colours["6"] = '#41AB5D'
leiden_r13_colours["7"] = '#006837'
leiden_r13_colours["8"] = '#ADDD8E'
leiden_r13_colours["9"] = "#238443"
leiden_r13_colours["10"] = '#78C679'
leiden_r13_colours["11"] = 'blue'
leiden_r13_colours["12"] = 'orchid'
leiden_r13_colours["13"] = '#F46D43'
leiden_r13_colours["14"] = 'dodgerblue'
leiden_r13_colours["15"] = 'deepskyblue'
leiden_r13_colours["16"] = '#cfafaf'
leiden_r13_colours["17"] = 'yellow'
leiden_r13_colours["18"] = 'darkcyan'
leiden_r13_colours["19"] = '#006837'

In [None]:
# Import adata 
adata_path = "../results/current/"

adata = sc.read(os.path.join(adata_path, "final/Granuloma_QC_clustering.h5"))
    
# setting up "factors" with different levels, order = TRUE
# add less common annotations LAST so they are not overwritten

# Set spot_type and skin_layer as categories and define the levels in each category
# Spot type: annatomical annotations
adata.obs['spot_type'] = pd.Categorical(
    adata.obs['spot_type'],
    categories = ["EPIDERMIS", 'DERMIS',  'INTERFACE', 'VESSEL', 'HAIR FOLLICLE', 'SWEAT GLAND', 'SEBACEOUS GLAND', 
                'MUSCLE', 'GA', 'GNL', 'GSS', 'GSC', 'UNDETERMINED'],
                 ordered = True)

# Skin layer
adata.obs['skin_layer'] = pd.Categorical(
    adata.obs['skin_layer'],
    categories = ['UNDETERMINED', 
                'upper EPIDERMIS', 'middle EPIDERMIS', 'basal EPIDERMIS',
                'DERdepth1', 'DERdepth2', 'DERdepth3', 'DERdepth4',
                'DERdepth5', 'DERdepth6', 'DERdepth7'],
    ordered = True)

In [None]:
os.chdir("/Volumes/Drive/spatial_granuloma/output/SpatialDE/") # Set working directory so it saves it in the drive
print(os.getcwd())

In [None]:
adata.obs['sample_SPECIMEN'] = adata.obs['sample'].astype(str) + '_' + adata.obs['SPECIMEN'].astype(str)

## Sankey plot

In [None]:
spot_colors

In [None]:
adata.obs

In [None]:
manual_leiden = list(spot_colors.keys()) + list(leiden_r13_colours.keys())
manual_leiden_colours = list(spot_colors.values()) + list(leiden_r13_colours.values())

links_source = adata.obs['spot_type']

In [None]:
dict(zip(spot_colors.keys(), range(0,13)))

In [None]:
dict(zip(leiden_r13_colours.keys(), range(13,33)))

In [None]:
dict(zip(spot_colors.keys(), range(0,13)))

In [None]:
sankey_df = adata.obs[['spot_type', 'leiden_r1.3_patient']]
sankey_df = sankey_df.groupby(['spot_type', 'leiden_r1.3_patient']).size().reset_index(name="Count")
sankey_df["spot_type"].replace(dict(zip(spot_colors.keys(), range(0,13))), inplace=True)
sankey_df["spot_type"] = sankey_df["spot_type"].cat.reorder_categories(new_categories = list(range(0,13)), ordered = True)
sankey_df["leiden_r1.3_patient"].replace(dict(zip(leiden_r13_colours.keys(), range(13,33))), inplace=True)
sankey_df["leiden_r1.3_patient"] = sankey_df["leiden_r1.3_patient"].cat.reorder_categories(new_categories = list(range(13,33)), ordered = True)
sankey_df

In [None]:
# Nodes
nodes = dict(label = manual_leiden, color = manual_leiden_colours)
# Links
links = dict(source = sankey_df["spot_type"], # The origin or the source nodes of the link
            target = sankey_df["leiden_r1.3_patient"], # The destination or the target nodes of the link
             value = sankey_df['Count'], # The width (quantity) of the links
             color = pd.Series(spot_colors.values()).repeat(20) #pd.Series(manual_leiden_colours).repeat(20)
            )
data = go.Sankey(node = nodes, link = links)
fig = go.Figure(data)
fig.show()

In [None]:
dict(zip(categories_included, range(0,9)))

In [None]:
sankey_df["spot_type"]

In [None]:
# Removing some categories
categories_included = ['VESSEL', 'HAIR FOLLICLE', 'SWEAT GLAND', 'SEBACEOUS GLAND', 'MUSCLE', 'GA', 'GNL', 'GSS', 'GSC']
manual_leiden = categories_included + list(leiden_r13_colours.keys())
manual_leiden_colours = ['darkgreen', '#543005', 'yellow', 'mistyrose', 'darkcyan', 'firebrick', 'orchid', 'blueviolet', 'mediumvioletred'] + list(leiden_r13_colours.values())

In [None]:
sankey_df = adata.obs[['spot_type', 'leiden_r1.3_patient']]
sankey_df = sankey_df.groupby(['spot_type', 'leiden_r1.3_patient']).size().reset_index(name="Count")
sankey_df = sankey_df[sankey_df['spot_type'].isin(categories_included)]
sankey_df["spot_type"] = sankey_df["spot_type"].cat.remove_unused_categories()
sankey_df["spot_type"].replace(dict(zip(categories_included, range(0,9))), inplace=True)
sankey_df["spot_type"] = sankey_df["spot_type"].cat.reorder_categories(new_categories = list(range(0,9)), ordered = True)
sankey_df["leiden_r1.3_patient"].replace(dict(zip(leiden_r13_colours.keys(), range(9,29))), inplace=True)
sankey_df["leiden_r1.3_patient"] = sankey_df["leiden_r1.3_patient"].cat.reorder_categories(new_categories = list(range(9,29)), ordered = True)
sankey_df

In [None]:
manual_leiden_colours

In [None]:
# Nodes
nodes = dict(label = manual_leiden, color = manual_leiden_colours)
# Links
links = dict(source = sankey_df["spot_type"], # The origin or the source nodes of the link
            target = sankey_df["leiden_r1.3_patient"], # The destination or the target nodes of the link
             value = sankey_df['Count'], # The width (quantity) of the links
             color = pd.Series(manual_leiden_colours).repeat(20) #pd.Series(manual_leiden_colours).repeat(20)
            )
data = go.Sankey(node = nodes, link = links)
fig = go.Figure(data)
fig.show()

In [None]:
# Removing some categories
categories_included = ['EPIDERMIS', 'DERMIS', 'VESSEL', 'HAIR FOLLICLE', 'SWEAT GLAND', 'SEBACEOUS GLAND', 'MUSCLE', 'GA', 'GNL', 'GSS', 'GSC']
manual_leiden = categories_included + list(leiden_r13_colours.keys())
manual_leiden_colours = ['blue', '#E0EEE0', 'darkgreen', '#543005', 'yellow', 'mistyrose', 'darkcyan', 'firebrick', 'orchid', 'blueviolet', 'mediumvioletred'] + list(leiden_r13_colours.values())

sankey_df = adata.obs[['spot_type', 'leiden_r1.3_patient']]
sankey_df = sankey_df.groupby(['spot_type', 'leiden_r1.3_patient']).size().reset_index(name="Count")
sankey_df = sankey_df[sankey_df['spot_type'].isin(categories_included)]
sankey_df["spot_type"] = sankey_df["spot_type"].cat.remove_unused_categories()
sankey_df["spot_type"].replace(dict(zip(categories_included, range(0,11))), inplace=True)
sankey_df["spot_type"] = sankey_df["spot_type"].cat.reorder_categories(new_categories = list(range(0,11)), ordered = True)
sankey_df["leiden_r1.3_patient"].replace(dict(zip(leiden_r13_colours.keys(), range(11,31))), inplace=True)
sankey_df["leiden_r1.3_patient"] = sankey_df["leiden_r1.3_patient"].cat.reorder_categories(new_categories = list(range(11,31)), ordered = True)
sankey_df

# Nodes
nodes = dict(label = manual_leiden, color = manual_leiden_colours)
# Links
links = dict(source = sankey_df["spot_type"], # The origin or the source nodes of the link
            target = sankey_df["leiden_r1.3_patient"], # The destination or the target nodes of the link
             value = sankey_df['Count'], # The width (quantity) of the links
             color = pd.Series(manual_leiden_colours).repeat(20) #pd.Series(manual_leiden_colours).repeat(20)
            )
data = go.Sankey(node = nodes, link = links)
fig = go.Figure(data)
fig.show()