# Tangram: cell type projection report

In [None]:
import os
print("Files in use:")
print(f"FILE1='{os.path.abspath(FILE1)}'")
print(f"FILE2='{os.path.abspath(FILE2)}'")

In [None]:
# import packages
import os
import warnings
import tangram as tg
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
import re

#### Plotting settings

In [None]:
# plot settings
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)
hist_dims = (10,3)

### Read data

In [None]:
adata_map = sc.read(FILE1)
adata_spatial = sc.read(FILE2)

---

## Training scores for mapping single cell to spatial data 

In [None]:
print("Number of genes used for training: {}".format(adata_map.uns['train_genes_df'].shape[0]))

#### Training history
Overall training scores should show converge before the end of training

In [None]:
loss = [ float(l) for l in adata_map.uns['training_history']['main_loss']]
plt.plot(range(0, len(loss)), loss, )
plt.xlabel("Epoch")
plt.ylabel("Score")

#### Summary plots for training scores and gene expression sparsity

To evaluate the mapping, training scores are shown per gene (only genes that were used for training). Sparsity of gene expression can impact training. To check whether sparsity of gene expression impacts training, training scores are plotted against sparsity of gene expression in single cells and spatial data.

In [None]:
tg.plot_training_scores(adata_map, alpha=.5)

#### 20 genes with best training performance

In [None]:
adata_map.uns['train_genes_df'].head(n=20)

#### 20 genes with lowest training performance

In [None]:
adata_map.uns['train_genes_df'].tail(n=20)

## Cell type mapping

#### Sum over cell type projections
Cell type projection scores are summed up over all cells/spots for each cell type. This sum provides an overview which cell type projections dominate the mapping

In [None]:
# compile sum of celltype statistics
pref = 'n_tangram_'
celltypes = adata_spatial.obsm['tangram_ct_pred'].columns
obs_entries = [pref + celltyp for celltyp in celltypes ]
df_cellscores = adata_spatial.obs[obs_entries]
df_cellscores.columns = [ re.sub(pref, '', head) for head in df_cellscores.columns if re.search(pref, head)]

df_sumcells = pd.DataFrame( 
    {
        'Celltype' : df_cellscores.sum(axis=0).index,
        'sum_cells' : df_cellscores.sum(axis=0)
    })
df_sumcells.sort_values('sum_cells', inplace=True)

df_sumcells.plot.barh(figsize=[5, df_sumcells.shape[0]*0.25], legend=False)

#### Entropy per cell/spot
The entropy over cell type projections per cell/spot provides an overview how clearly a cell/spot could be assigned to a single cell type (low entropy) or several different cell types (high entropy). 

In [None]:
if 'spatial' in adata_spatial.uns.keys():
    sc.pl.spatial(adata_spatial, color=['n_tangram_entropy'], alpha=1, color_map='OrRd')
else:
    max_width_height= 5
    sp_width = np.max(adata_spatial.obsm['X_spatial'][:,0]) - np.min(adata_spatial.obsm['X_spatial'][:,0])
    sp_height = np.max(adata_spatial.obsm['X_spatial'][:,1]) - np.min(adata_spatial.obsm['X_spatial'][:,1])
    ratio_hw = sp_height / sp_width
    if sp_width > sp_height:
        fig_height = int(ratio_hw * max_width_height)
        fig_width = max_width_height
    else:
        fig_width = int(max_width_height / ratio_hw)
        fig_height = max_width_height
    with rc_context({'figure.figsize': (fig_width, fig_height)}):
        sc.pl.embedding(adata_spatial, basis="X_spatial", color=['n_tangram_entropy'], alpha=1, color_map='OrRd', vmin=0, vmax=1)

#### Highest scoring cell type assignments per cell/spot

In [None]:
bestanno = adata_spatial.obs.keys()[[key.startswith('tangram_best_') for key in adata_spatial.obs.keys()]][0]

if 'spatial' in adata_spatial.uns.keys():
    sc.pl.spatial(adata_spatial, color=bestanno, alpha=1, palette='nipy_spectral')
else:
    max_width_height= 5
    sp_width = np.max(adata_spatial.obsm['X_spatial'][:,0]) - np.min(adata_spatial.obsm['X_spatial'][:,0])
    sp_height = np.max(adata_spatial.obsm['X_spatial'][:,1]) - np.min(adata_spatial.obsm['X_spatial'][:,1])
    ratio_hw = sp_height / sp_width
    if sp_width > sp_height:
        fig_height = int(ratio_hw * max_width_height)
        fig_width = max_width_height
    else:
        fig_width = int(max_width_height / ratio_hw)
        fig_height = max_width_height
    with rc_context({'figure.figsize': (fig_width, fig_height)}):
        sc.pl.embedding(adata_spatial, basis="X_spatial", color=[bestanno], palette='nipy_spectral')