# Import libraries and setup

In [None]:
# Import libraries we may need
import scanpy as sc
import numpy as np
import scipy as sp
import squidpy as sq
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
import scanpy.external as sce
import wget
import yaml
import wget
import astir
import dill
import umap
reducer = umap.UMAP()

#My module for importing IMC data into AnnData format
import mikeimc as mimc
import mikeimc_v2 as mikeimc_v2

In [None]:
# Set up output figure settings
plt.rcParams['figure.figsize']=(64,64) #rescale figures, increase sizehere

# Set up scanpy settings
sc.settings.verbosity = 3
sc.set_figure_params(dpi=100, dpi_save=300) #Increase DPI for better resolution figures
#sc.logging.print_versions()

# Save / restore environemnt
Use these functions to either save or load the environmental variables, otherwise you will lose all the objects between opening/closing Jupyter sessions!

In [None]:
#load the session
dill.load_session('DC_ASTIR.db')

In [None]:
#save the session
dill.dump_session('DC_ASTIR.db')

Save adata object
This will save the adata object in the current directory as a file called 'adata'

In [None]:
adata.write('adata_subset')

In [None]:
adata.write('adata_subset2')

Load adata
This will open up a stored adata object (from the 'raw' directory)

In [None]:
adata = ad.read_h5ad('adata_subset2')

# Importing data and setup

In [None]:
#This approach of generating adata is different from the approach below and it uses different input files.

from importlib import reload
mimc = reload(mimc)

adata, df_misc = mimc.celltable_to_adata(column_properties='mikeimc_approach/Original_files/DC_columns_final.csv', #Contents define how the columns in the cell table should be handled
                                                cell_table='mikeimc_approach/Original_files/Denoise_DC_cells_final_edited.csv', #The full cell table as exported by whatever cell segmentation pipeline
                                                dictionary='mikeimc_approach/Original_files/dictionary_edited.csv', #If this is specified, it will add extra observations based upon the ROI, e.g. what group/patient/treatment it came from
                                                marker_normalisation='99th', #Marker normalisation, by default will be None
                                                misc_table=True) #Whether or not a second dataframe of 'misc' findings will be imported

In [None]:
adata.uns

In [None]:
all_markers = adata.var_names.tolist()
print ('List of all markers found:')
print (all_markers,end= '\n\n')

#skipe this step: Change this with a list of the markers you'd like to be remove entirely from the dataset, e.g. DNA stains
markers_to_remove = ['DNA1', 'DNA2']

#Remove markers from list
markers_limited = [m for m in all_markers if m not in markers_to_remove]

#print ('List of all markers with removed items:')
print (markers_limited)

In [None]:
#Filter to only markers specified above
adata_filtered = adata[:,markers_limited]
#The .copy() makes a completely seperate copy
adata_subset = adata_filtered.copy()
#This code further filters the data by removing cells from C10, C31 ROIs and C21_ROI3
#adata_subset = adata_filtered[~adata_filtered.obs['ROI'].isin(['C10_ROI1','C10_ROI2','C10_ROI3', 'C10_ROI4', 'C21_ROI3', 'C31_ROI1', 'C31_ROI2', 'C31_ROI3', 'C31_ROI4']),:].copy()

adata_subset.var_names

# QC Checking data

In [None]:
sc.pl.matrixplot(adata_subset, adata_subset.var_names, groupby='TMAID', swap_axes=True, save='QC_TMAID.png')

#Number of cells per ROI
#print(adata_subset.obs['ROI'].value_counts())

In [None]:
sc.pl.matrixplot(adata_subset, adata_subset.var_names, groupby='ROI', swap_axes=True, save='QC_ROI.png')

In [None]:
sc.pl.matrixplot(adata_subset, adata_subset.var_names, groupby='Region', swap_axes=True, save='QC_Region.png')

# PCA analyses

In [None]:
#Show those markers that yield the highest fraction of counts in each single cell, across all cells.\
sc.pl.highest_expr_genes(adata_subset, n_top=20, )

#Identify highly-variable markers:
sc.pp.highly_variable_genes(adata_subset, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(adata_subset)    

In [None]:
#Calculate PCA - this must be done first
#sc.tl.pca(adata_subset, svd_solver='arpack')

#Inspect the contribution of single PCs to the total variance in the data. 
#This gives us information about how many PCs we should consider in order to compute the neighborhood relations of cells,
#e.g. used in the clustering function 
sc.pl.pca_variance_ratio(adata_subset, log=False, save='DC_PCA.pdf')

In [None]:
#Visualise all PCAs
sc.pl.pca(adata_subset, color='Region', components = ['1,2', '3,4', '5,6', '7,8'], ncols=1, size=0.2, save='DC_PCA_Region.pdf')

In [None]:
#Plot loadings of PCA to figure out what is contributing to variability
sc.pl.pca_loadings(adata_subset, components=[1,2,3,4,5,6,7,8], save = '.pdf')

# Batch integration using BBKNN
This will batch correct using BBKNN. There are other options batch correction, such as Harmony, but I found this the easiest to implement. Read more here: https://bodenmillergroup.github.io/IMCDataAnalysis/batch-effects.html

Run PCA and BBKNN
batch_correction_obs -This defines which .obs should be used to identify the different batches, in the example here it is 'Case'

In [None]:
# Define the number of PCA dimensions to work with - one less than number of markers. Without this, it usually defaults to 50, which we don't have enough markers for.
n_for_pca = len(adata_subset.var_names)-1

# Define the 'obs' which defines the different cases
batch_correction_obs = 'Group'

# Calculate PCA, this must be done before BBKNN
sc.tl.pca(adata_subset, n_comps=n_for_pca)

# BBKNN - it is used in place of the scanpy 'neighbors' command that calculates nearest neighbours in the feature space
sc.external.pp.bbknn(adata_subset, batch_key='Group', n_pcs=n_for_pca)

In [None]:
#Calculate UMAP
sc.tl.umap(adata_subset)

Plot UMAPs
You can add extra .obs to UMAP_groups to colour the UMAPs by, e.g. treatment

In [None]:
#Define a list of .obs to colour the UMAP by
UMAP_groups = ['Patient', 'CaseID', 'Group', 'Region'] 

# Plot UMAPs coloured by list above
sc.pl.umap(adata_subset, color=UMAP_groups, ncols=1, size=3, save='UMAP_categories.png')

In [None]:
# This will plot a UMAP for each of the individual markers
sc.pl.umap(adata_subset, color=adata_subset.var_names.tolist(), color_map='viridis', vmax=0.75, ncols=4, save='UMAP_markers.png')

Visualizing distributions across batches

Often, batches correspond to experiments that one wants to compare. Scanpy offers to convenient visualization possibilities for this.

a density plot
a partial visualization of a subset of categories/groups in an emnbedding

In [None]:
#Density plot - it can be calculated for different columns in adata.obs
sc.tl.embedding_density(adata_subset, groupby='Group')

In [None]:
sc.pl.embedding_density(adata_subset, groupby='Group', save='UMAP_density_group.png')

Partial visualizaton of a subset of groups in embedding

In [None]:
adata_subset.obs['CaseID']

In [None]:
adata_query = adata_subset[adata_subset.obs['CaseID'].isin(['COS-002-05-J1', 'COS-004-05-J1', 'COS-006-05-J1'])]

for batch in ['COS-002-05-J1', 'COS-004-05-J1', 'COS-006-05-J1']:
    sc.pl.umap(adata_query, color='CaseID', groups=[batch])

# Supervised cell type identification using Astir

This uses Astir (https://github.com/camlab-bioml/astir) to do 'semi-supervised' cell identification using a neural network. You specify the populations and markers in hte 'markers.yml' file - see the Astir documentation for full details

Import markers file and data file

In [None]:
# Read in the YAML file with a list of the expected marker pairs
with open(r'markers.yml') as file:
  cell_types = yaml.load(file, Loader=yaml.FullLoader)

#View the contents of the YML file
!head markers.yml
print(cell_types['cell_types'])

In [None]:
adata_subset.write(filename='adata_subset_DC_astir')

import os
import warnings
from typing import Any

import anndata
import loompy
import matplotlib.cbook
import numpy as np
import pandas as pd
import torch
import yaml
from sklearn.preprocessing import OneHotEncoder

In [None]:
from astir.data import from_anndata_yaml
#the name of the anndata file below has to be as saved above!!

ast = from_anndata_yaml("adata_subset_DC_astir", "markers.yml", batch_name=None)
ast

Set training parameters

I've left these all as the default for now

In [None]:
# Create batch size proportional to the number of cells
N = ast.get_type_dataset().get_exprs_df().shape[0]
batch_size = int(N/100)

# Number of training epochs
max_epochs = 1000

# Set learning rate
learning_rate = 2e-3

# Set initial epochs
initial_epochs = 3

In [None]:
#Run the cell type identification
ast.fit_type(max_epochs = max_epochs,
             batch_size = batch_size,
             learning_rate = learning_rate,
             n_init_epochs = initial_epochs,
             delta_loss=0.001)

Number of cells of each type found

In [None]:
ast.get_celltypes().value_counts()

In [None]:
df = ast.diagnostics_celltype().head(n=20)
df

In [None]:
#plot the losses to assess convergence
plt.figure(figsize=(5,4))
plt.plot(np.arange(len(ast.get_type_losses())), ast.get_type_losses())
plt.ylabel("Loss")
plt.xlabel("Epoch")

In [None]:
#get cell type assignment probabilities
assignments = ast.get_celltype_probabilities()
assignments

In [None]:
assignments.to_csv("../assignments_probabilities.csv")

In [None]:
sb.heatmap(assignments, annot=False, xticklabels=1, yticklabels=False)

In [None]:
ast.get_celltypes()

In [None]:
ast.type_to_csv("../cell-types.csv")

Add hierarchy and cell type data to original Adata

In [None]:
# Check that the hierarchy dictionary have been loaded correctly
ast.get_hierarchy_dict()

In [None]:
#Retrieve the table of probabilites
hierarchy_table =  ast.assign_celltype_hierarchy(depth = 1)

cell_types = hierarchy_table.columns.tolist()

#This is  the threshold as above - probably best to keep the same!
threshold_for_classification = 0.55 #increased this slightly because several cells were classified as SMA when = 0.5

#Start a new list that will store the hierarchy data
hierarchy = []

#This will work down each row and figure out which hierarchy type have the highest probability
for index, row in hierarchy_table.iterrows():
    row_values = row.values
    max_prob = np.max(row_values)
    
    if max_prob < threshold_for_classification:
        #If the cell doesn't fit into any category, return Unknown
        hierarchy.append('Other')
    else:
        #Add to the list the 
        hierarchy.append(cell_types[np.argmax(row_values)])

adata_subset.obs["cell_type"] = ast.get_celltypes(threshold=threshold_for_classification)['cell_type']
adata_subset.obs["hierarchy"] = hierarchy

In [None]:
adata_subset

In [None]:
#Plot a heatmap grouped by hierarchy
sc.pl.heatmap(adata_subset, adata_subset.var_names, groupby='hierarchy', cmap='viridis', vmax=0.75, swap_axes=False, save='_hierarchy_unknown.png')

In [None]:
#Plot a heatmap grouped by hierarchy
sc.pl.heatmap(adata_subset, adata_subset.var_names, groupby='cell_type', cmap='viridis', vmax=0.75, swap_axes=False, save='_cell_type_unknown.png')

In [None]:
#Plot a heatmap grouped by ASTIR
sc.pl.matrixplot(adata_subset, adata_subset.var_names, groupby='cell_type', swap_axes=False, cmap='viridis', save='_ASTIR_cell_type_unknown.png')

In [None]:
#This code further filters the data by removing cells from C10, C31 ROIs and C21_ROI3
#The .copy() makes a completely seperate copy
adata_subset2 = adata_subset.copy()
adata_subset2 = adata_subset2[~adata_subset2.obs['cell_type'].isin(['Other','Unknown']),:].copy()
adata_subset2

In [None]:
adata_subset2

In [None]:
# Define the number of PCA dimensions to work with - one less than number of markers. Without this, it usually defaults to 50, which we don't have enough markers for.
n_for_pca = len(adata_subset.var_names)-1

# Define the 'obs' which defines the different cases
batch_correction_obs = 'Group'

# Calculate PCA, this must be done before BBKNN
sc.tl.pca(adata_subset, n_comps=n_for_pca)

# BBKNN - it is used in place of the scanpy 'neighbors' command that calculates nearest neighbours in the feature space
sc.external.pp.bbknn(adata_subset, batch_key='Group', n_pcs=n_for_pca)

In [None]:
#Calculate UMAP
sc.tl.umap(adata_subset)

In [None]:
#Plot PhenoGraph on UMAP
sc.pl.umap(adata_subset, color = "cell_type", s = 2, legend_loc='on data', title='', legend_fontsize = 10, size=2, save='_cell_type_unknown.png')

# Filtering Unknown population out

In [None]:
#Plot a heatmap grouped by hierarchy
sc.pl.heatmap(adata_subset2, adata_subset2.var_names, groupby='cell_type', cmap='viridis', vmax=0.75, swap_axes=False, save='_cell_type.png')

In [None]:
#Plot a heatmap grouped by cell types
sc.pl.heatmap(adata_subset2, adata_subset2.var_names, groupby='cell_type', vmax=0.75, swap_axes=True) #save='_ASTIR_celltype.pdf')

In [None]:
#Plot a heatmap grouped by ASTIR
sc.pl.matrixplot(adata_subset2, adata_subset2.var_names, groupby='cell_type', swap_axes=False, vmax=0.7, cmap='viridis', save='_ASTIR_cell_type.png')

In [None]:
#Calculate neighbors
#sc.pp.neighbors(adata_subset2, n_neighbors=100)

In [None]:
#Use BBKNN integration instead of calculating neighbors 
# Define the number of PCA dimensions to work with - one less than number of markers. Without this, it usually defaults to 50, which we don't have enough markers for.
n_for_pca = len(adata_subset2.var_names)-1

# Define the 'obs' which defines the different cases
batch_correction_obs = 'Group'

# Calculate PCA, this must be done before BBKNN
sc.tl.pca(adata_subset2, n_comps=n_for_pca)

# BBKNN - it is used in place of the scanpy 'neighbors' command that calculates nearest neighbours in the feature space
sc.external.pp.bbknn(adata_subset2, batch_key='Group', n_pcs=n_for_pca)

In [None]:
#Calculate UMAP
sc.tl.umap(adata_subset2)

In [None]:
#Plot PhenoGraph on UMAP
sc.pl.umap(adata_subset2, color = "cell_type", s = 2, legend_loc='on data', title='', legend_fontsize = 10, size=2, save='_cell_type.png')

In [None]:
sc.pl.umap(adata_subset2, color = "hierarchy", s = 2, legend_loc='on data', title='', legend_fontsize = 10, size=2, save='_hierarchy.png')

In [None]:
#Define a list of .obs to colour the UMAP by
UMAP_groups = ['Group'] 

# Plot UMAPs coloured by list above
sc.pl.umap(adata_subset2, color=UMAP_groups, ncols=1, size=3)

In [None]:
#Spatial mapping
sb.set_style("darkgrid", {'axes.grid' : False})

graph = sb.lmplot(data = adata_subset2.obs, x = 'X_loc',y='Y_loc',hue ='cell_type',palette = 'bright',height = 8,col = 'ROI',col_wrap = 10,fit_reg = False, aspect=1)
graph.savefig("MappedPlots_cell_type.png")

# Abundance graphs

In [None]:
#stack bar code and extracting number of cells, distribution of each cluster per case or the cluster composition per case
#for number of cells - don't use normalize
#for distribution of each case per cluster - use normalize = "index"
#for cluster composition per case - use normalize = "columns"
tmp = pd.crosstab(adata_subset2.obs['cell_type'], adata_subset2.obs['TMAID'], margins=False, normalize = "columns")

In [None]:
tmp

In [None]:
compression_opts = dict(method='zip',
                         archive_name='Celltype_TMAID.csv') 
tmp.to_csv('Celltype_TMAID.zip', index=True, compression=compression_opts)

In [None]:
tmp2 = pd.DataFrame.transpose(tmp)

In [None]:
tmp2

In [None]:
#color for stack bars for the disease groups matching the other bar graphs
#F08080 - light coral
#87CEFA - lightskyblue
#D3D3D3 - lightgrey

sb.set_style("whitegrid", {'axes.grid' : True})
pretty_colors = ['#F08080','#87CEFA','#D3D3D3']
color_pal = sb.color_palette('Paired')

#colour_palette['colour']
tmp2.plot.bar(stacked=True, color=color_pal, figsize=(4, 4)).legend(bbox_to_anchor=(1, 1))

Example breakdown of populations per case
Here, break down the myeloid and tumour populations per case

In [None]:
#Example breakdown of populations per case
#Here, break down the myeloid and lymphoid populations per case

adata_m = adata_subset2[adata_subset2.obs['hierarchy'].isin(['Myeloid'])].copy()
adata_l = adata_subset2[adata_subset2.obs['hierarchy'].isin(['Lymphoid'])].copy()
adata_v = adata_subset2[adata_subset2.obs['hierarchy'].isin(['Vascular'])].copy()

fig, axs = plt.subplots(1,3,figsize=(11, 6),constrained_layout=True)

tmp = pd.crosstab(adata_m.obs['Group'],adata_subset2.obs['cell_type'], normalize='index')
tmp.plot.bar(color=color_pal,ax=axs[0],stacked=True).legend(bbox_to_anchor=(0.75, -0.5))#.legend(bbox_to_anchor=(1.1, 1))

tmp = pd.crosstab(adata_l.obs['Group'],adata_subset2.obs['cell_type'], normalize='index')
tmp.plot.bar(color=color_pal,ax=axs[1],stacked=True).legend(bbox_to_anchor=(0.75, -0.5))#.legend(bbox_to_anchor=(1.1, 1))

tmp = pd.crosstab(adata_v.obs['Group'],adata_subset2.obs['cell_type'], normalize='index')
tmp.plot.bar(color=color_pal,ax=axs[2],stacked=True).legend(bbox_to_anchor=(0.75, -0.5))#.legend(bbox_to_anchor=(1.1, 1))

fig.savefig('population_breakdowns_Group.png')

In [None]:
adata_subset2

Abundances graphs with stats
Example of case averaging abundance stats

In [None]:
# Alter this list with adata.obs variables that you want to plot against the resulting leiden population
image_var = 'ROI'

for i in ['Patient']:

    mikeimc_v2.grouped_graph(adata_subset2,
                             ROI_id=image_var,
                             group_by_obs=i,
                             x_axis='cell_type',
                             fig_size=(16,4),
                             log_scale=False,
                            display_tables=True) #If you change display_tables to True, will also do stats on the groups
    plt.show()

In [None]:
mikeimc_v2.pop_stats(adata_subset2,
          groups='Region', #The adata.obs we want to use to compare between
          Case_id='Patient', #The adata.obs that defines case
          ROI_id='ROI',
          x_axis='cell_type',
          display_tables=True,
          fig_size=(8,3),
          save='pop_stats_Group.png',
           log_scale=False
         )

# Categorise samples using UMAP for cell abundance

This will use the abundance of the different populations to create a UMAP, then colour it by an adata.obs

In [None]:
mikeimc_v2.cellabundance_UMAP(adata_subset2,
                              ROI_id='Patient',
                              population='cell_type',
                              colour_by='Group',
                              annotate=True,
                              normalize=False,
                              dim_red='UMAP',
                              save='abundance_umap.svg')

# Create new adatas for all cell types for unsupervised analyses

# Myeloid cells

In [None]:
adata_myeloid = adata_subset2[adata_subset2.obs['hierarchy'].isin(['Myeloid'])].copy()
sc.pl.heatmap(adata_myeloid, adata_myeloid.var_names, groupby='cell_type', vmax=0.75, cmap = 'viridis', swap_axes=True, save='_ASTIR_myeloid.png')

In [None]:
#Plot clustered heatmap
sc.tl.dendrogram(adata_myeloid, groupby = 'cell_type')
sc.pl.heatmap(adata_myeloid, adata_myeloid.var_names, groupby='cell_type', cmap = 'viridis', vmax=0.75, swap_axes=True,dendrogram=True, save='_ASTIR_myeloid_clustered.png')

In [None]:
adata_neutrophil = adata_subset2[adata_subset2.obs['cell_type'].isin(['Neutrophil'])].copy()
adata_macrophage = adata_subset2[adata_subset2.obs['cell_type'].isin(['Macrophage'])].copy()

# Lymphoid cells

In [None]:
#Plot heatmap
adata_lymphoid = adata_subset2[adata_subset2.obs['hierarchy'].isin(['Lymphoid'])].copy()
sc.pl.heatmap(adata_lymphoid, adata_lymphoid.var_names, groupby='cell_type', vmax=0.75, cmap='viridis',swap_axes=True, save='_ASTIR_lymphoid.png')

In [None]:
#Plot scaled heatmap
adata_lymphoid_norm = adata_lymphoid.copy()
sc.pp.scale(adata_lymphoid_norm, max_value=1.5,  zero_center=True)
sc.pl.heatmap(adata_lymphoid_norm, adata_lymphoid_norm.var_names, groupby='cell_type', swap_axes=True, save='_scaled_ASTIR_lymphoid.pdf')

# Vascular and Stromal cells

In [None]:
adata_vascular = adata_subset2[adata_subset2.obs['hierarchy'].isin(['Vascular'])].copy()
sc.pl.heatmap(adata_vascular, adata_vascular.var_names, groupby='cell_type', vmax=0.75, cmap='viridis', swap_axes=True, save='_ASTIR_vascular.png')

adata_endothelium = adata_subset2[adata_subset2.obs['cell_type'].isin(['Endothelium'])].copy()
sc.pl.heatmap(adata_endothelium, adata_endothelium.var_names, groupby='cell_type', cmap='viridis', swap_axes=True, vmax=0.75, save='_ASTIR_endothelium.png')

adata_RBC = adata_subset2[adata_subset2.obs['cell_type'].isin(['RBCs'])].copy()
sc.pl.heatmap(adata_RBC, adata_RBC.var_names, groupby='cell_type', swap_axes=True, vmax=0.75, cmap='viridis', save='_ASTIR_RBC.png')

#Plot scaled heatmap
adata_vascular_norm = adata_vascular.copy()
sc.pp.scale(adata_vascular_norm, max_value=0.5,  zero_center=True)
sc.pl.heatmap(adata_vascular_norm, adata_vascular_norm.var_names, groupby='cell_type', cmap='viridis', swap_axes=True, save='_scaled_ASTIR_vascular.png')

In [None]:
adata_stromal = adata_subset2[adata_subset2.obs['hierarchy'].isin(['Stromal'])].copy()
sc.pl.heatmap(adata_stromal, adata_stromal.var_names, groupby='cell_type', cmap='viridis', swap_axes=True, vmax=0.75, save='_ASTIR_stromal.png' )

adata_epithelial = adata_subset2[adata_subset2.obs['cell_type'].isin(['Epithelial'])].copy()
sc.pl.heatmap(adata_epithelial, adata_epithelial.var_names, groupby='cell_type', cmap='viridis', swap_axes=True, vmax=0.75, save='_ASTIR_epithelial.png')

adata_fibroblast = adata_subset2[adata_subset2.obs['cell_type'].isin(['Fibroblast', 'Smooth Muscle Cell'])].copy()
sc.pl.heatmap(adata_fibroblast, adata_fibroblast.var_names, groupby='cell_type', cmap='viridis', swap_axes=True, vmax=0.75, save='_ASTIR_fibroblast.png')

# Unsupervised analyses on cellular types - ASTIR output

# ASTIR - clustering analysis including functional markers

In [None]:
#including functional markers
neutrophil_markers = ['Arginase1','CD11b', 'CD11c', 'CD14', 'CD16', 'CD163', 'CD206', 'CD45', 'CD66b', 'CD68', 'Iba1', 'CD74', 'MHCII', 'MHCI', 'Vista', 
                   'iNOS', 'CD107a', 'GranzymeB', 'Ki67', 'ClvdCaspase3', 'SARSCoV2']
adata_neutrophil_2 = adata_neutrophil[:,neutrophil_markers].copy()

macrophage_markers = ['Arginase1','CD11b', 'CD11c', 'CD14', 'CD16', 'CD163', 'CD206', 'CD45', 'CD66b', 'CD68', 'Iba1', 'CD74', 'MHCII', 'MHCI', 'Vista', 
                   'iNOS', 'CD107a', 'GranzymeB', 'Ki67', 'ClvdCaspase3', 'SARSCoV2']
adata_macrophage_2 = adata_macrophage[:,macrophage_markers].copy()

lymphoid_markers = ['CD3','CD4','CD8', 'CD11c', 'CD16','CD20','CD38', 'CD45RO', 'CD74','MHCI','MHCII','Foxp3','CD107a','GranzymeB','Ki67','ClvdCaspase3','SARSCoV2']
adata_lymphoid_2 = adata_lymphoid[:,lymphoid_markers].copy()

epithelial_markers = ['PanCK','CD74','MHCII','MHCI','CD107a','GranzymeB','Ki67','ClvdCaspase3','SARSCoV2']
adata_epithelial_2 = adata_epithelial[:,epithelial_markers].copy()

stromal_markers = ['Collagen1','SMA', 'Fibrinogen','Ki67','ClvdCaspase3','SARSCoV2']
adata_stromal = adata_stromal[:,stromal_markers].copy()

endothelial_markers = ['CD31','ICAM1','vWF','CD74','MHCII','MHCI','CD107a','GranzymeB','Ki67','ClvdCaspase3','SARSCoV2']
adata_endothelium_2 = adata_endothelium[:,endothelial_markers].copy()

RBC_markers = ['CD235ab','SARSCoV2']
adata_RBC_2 = adata_RBC[:,RBC_markers].copy()

# Export data

In [None]:
#Make a simplified dataframe to export
adata_export = adata_subset.obs[['Case','ROI','Type','hierarchy','pheno_cluster']].copy()

In [None]:
#Split up the X and Y columns from the adata
adata_export['X'], adata_export['Y'] = np.split(adata_subset.obsm['spatial'],[-1],axis=1)
#Save to file
adata_export.to_csv('adata_cluster_export.csv')

In [None]:
# export everything except the data using `.write_csvs`.
# Set `skip_data=False` if you also want to export the data.
adata_subset.write_csvs(dirname='./', skip_data=False)

# Loading images to anndata

In [None]:
pwd

In [None]:
#Method 1 to import the tif file to python and convert it to a numpy array
from PIL import Image
from pathlib import Path
import glob  
root = Path('./Images/').expanduser()

im = Image.open(root / '0205_1_A.tif')
im.show()

import numpy
imarray = numpy.array(im)

plt.imshow(im)

In [None]:
#ROI_list = glob.glob("*.tif")
#ROI_list
#for i in ROI_list:
 #       print('Loading image for '+i)
  #      i = plt.imread(i)

In [None]:
import matplotlib.pyplot as plt
Image_0205_1_A = plt.imread(root / '0205_1_A.tif')
Image_0205_2_A = plt.imread(root / '0205_2_A.tif')
Image_0305_1_A = plt.imread(root / '0305_1_A.tif')
Image_0305_2_A = plt.imread(root / '0305_2_A.tif')
Image_0307_1_A = plt.imread(root / '0307_1_A.tif')
Image_0307_2_A = plt.imread(root / '0307_2_A.tif')
Image_0405_1_A = plt.imread(root / '0405_1_A.tif')
Image_0405_2_A = plt.imread(root / '0405_2_A.tif')
Image_0407_1_A = plt.imread(root / '0407_1_A.tif')
Image_0407_2_A = plt.imread(root / '0407_2_A.tif')
Image_0505_1_A = plt.imread(root / '0505_1_A.tif')
Image_0507_2_A = plt.imread(root / '0507_2_A.tif')

Image_0605_1_A = plt.imread(root / '0605_1_A.tif')
Image_0605_2_A = plt.imread(root / '0605_2_A.tif')
Image_0607_1_A = plt.imread(root / '0607_1_A.tif')
Image_0607_2_A = plt.imread(root / '0607_2_A.tif')
Image_0307_1_A = plt.imread(root / '0307_1_A.tif')
Image_0307_2_A = plt.imread(root / '0307_2_A.tif')
Image_0405_1_A = plt.imread(root / '0405_1_A.tif')
Image_0405_2_A = plt.imread(root / '0405_2_A.tif')
Image_0407_1_A = plt.imread(root / '0407_1_A.tif')
Image_0407_2_A = plt.imread(root / '0407_2_A.tif')
Image_0505_1_A = plt.imread(root / '0505_1_A.tif')
Image_0507_2_A = plt.imread(root / '0507_2_A.tif')


In [None]:
Image_0205_1_B = plt.imread(root / '0205_1_B.tif')
Image_0307_1_B = plt.imread(root / '0307_1_B.tif')
Image_0407_1_B = plt.imread(root / '0407_1_B.tif')
Image_0407_2_B = plt.imread(root / '0407_2_B.tif')
Image_0507_1_B = plt.imread(root / '0507_1_B.tif')
Image_0507_2_B = plt.imread(root / '0507_2_B.tif')


In [None]:
Image_0205_1_C = plt.imread(root / '0205_1_C.tif')
Image_0305_1_C = plt.imread(root / '0305_1_C.tif')
Image_0307_1_C = plt.imread(root / '0307_1_C.tif')
Image_0307_2_C = plt.imread(root / '0307_2_C.tif')
Image_0405_1_C = plt.imread(root / '0405_1_C.tif')
Image_0405_2_C = plt.imread(root / '0405_2_C.tif')
Image_0407_1_C = plt.imread(root / '0407_1_C.tif')
Image_0407_2_C = plt.imread(root / '0407_2_C.tif')
Image_0505_1_C = plt.imread(root / '0505_1_C.tif')
Image_0505_2_C = plt.imread(root / '0505_2_C.tif')
Image_0507_1_C = plt.imread(root / '0507_1_C.tif')
Image_0507_2_C = plt.imread(root / '0507_2_C.tif')


In [None]:
Image_0205_1_A = plt.imread('0205_1_A.tif')
Image_0205_2_A = plt.imread('0205_2_A.tif')

In [None]:
from PIL import Image
im = Image.open('0205_1_A.tif')
im.show()

In [None]:
import tifffile as tp
Image_0205_1_A = tp.imread('0205_1_A.tif')

import numpy
imarray = numpy.array(Image_0205_1_A)
imarray

In [None]:
import cv2 as cv
from matplotlib import pyplot as plt
import pytesseract

img = cv.imread("0205_1_A.tif")
#.astype(np.float32)
#cv.rectangle(img,(29,2496),(604,2992),(255,0,0),5)
plt.imshow(img)

In [None]:
from PIL import Image
from numpy import asarray
# load the image
image = Image.open('0205_1_A.tif')
# convert image to numpy array
img2 = asarray(image)
print(type(img2))
# summarize shape
print(img2)

In [None]:
adata_subset3 = adata_subset2[adata_subset2.obs['ROI'].isin(['0205_1_A'])].copy()

In [None]:
adata_subset3

In [None]:
import squidpy as sq
spatial_key = "spatial"
library_id = '0205_1_A'
adata_subset3.uns[spatial_key] = {library_id: {}}
adata_subset3.uns[spatial_key][library_id]["images"] = {}
adata_subset3.uns[spatial_key][library_id]["images"] = {"hires": img}
adata_subset3.uns[spatial_key][library_id]["scalefactors"] = {"tissue_hires_scalef": 1, "spot_diameter_fullres": 0.5}

In [None]:
adata_subset3.uns

In [None]:
img.shape

In [None]:
 adata_subset3.uns[spatial_key][library_id]['images']['hires']

In [None]:
adata_subset3.uns['spatial'][library_id]

In [None]:
adata_subset3.uns[spatial_key][library_id]["scalefactors"]['tissue_hires_scalef']

In [None]:
img = sq.im.ImageContainer(adata_subset3.uns['spatial'][library_id]['images']['hires'],
                   scale=adata_subset3.uns['spatial'][library_id]["scalefactors"]['tissue_hires_scalef'])

In [None]:
img.show()

In [None]:
sq.gr.spatial_neighbors(adata_subset3, radius=3.0)

In [None]:
sq.pl.spatial_scatter(adata_subset3, color="cell_type", size=100, library_id='0205_1_A', spatial_key='spatial', img=True, img_cmap='gray', img_channel=0, palette=None, alpha=1.0, shape=None,connectivity_key="spatial_connectivities", edges_width=0.3)

In [None]:
sc.pl.spatial(adata_subset3, color ='cell_type', neighbors_key="spatial_neighbors", spot_size=20, edges=True, edges_width=1, edges_color='black', library_id = '0205_1_A', img='images', img_key='hires', return_fig=True)

In [None]:
sc.pl.spatial(adata_subset[adata_subset.obs.library_id =="'0205_1_A'"], color="cell_type", library_id = "0205_1_A")

In [None]:
adata_subset.obs

In [None]:
adata_subset.uns["spatial"]["0205_1_A"] 

In [None]:
spatial_key = "spatial"
library_id = '0205_2_A'
adata_subset.uns[spatial_key] = {library_id: {}}
adata_subset.uns[spatial_key][library_id] = {"hires": Image_0205_2_A}
adata_subset.uns[spatial_key][library_id]["scalefactors"] = {"tissue_hires_scalef": 1, "spot_diameter_fullres": 0.5}

In [None]:
import stlearn
stlearn.add.image(adata_subset2, '0205_1_A.tif', library_id = '0205_1_A', quality = 'hires')