# TCR Clonotypes

In [None]:
import sys
import numpy as np
sys.path.insert(0, "../..")
import scirpy as ir
import scanpy as sc
from glob import glob
import pandas as pd
import tarfile
import anndata
import warnings
from matplotlib import pyplot as plt
from numba import NumbaPerformanceWarning

# ignore numba performance warnings
warnings.filterwarnings("ignore", category=NumbaPerformanceWarning)

# suppress "storing XXX as categorical" warnings.
anndata.logging.anndata_logger.setLevel("ERROR")

In [None]:
tcr_and_rna_combined = sc.read('/scratch/batch_corrected_umap.h5ad')

In [None]:
# We only select the cells that have TCR data
tcr_and_rna_combined = tcr_and_rna_combined[tcr_and_rna_combined.obs['has_tcr']=='True']

In [None]:
ir.pp.tcr_neighbors(tcr_and_rna_combined, receptor_arms="all", dual_tcr="primary_only")
# running this would add 'clonotype' and 'clonotyp_size' to the dataframe, which is important for clonotype expansion pipeline
ir.tl.define_clonotypes(tcr_and_rna_combined)

In [None]:
ir.pp.tcr_neighbors(
    tcr_and_rna_combined,
    metric="alignment",
    sequence="aa",
    cutoff=5,
    receptor_arms="all",
    dual_tcr="all",
)

In [None]:
ir.tl.define_clonotype_clusters(tcr_and_rna_combined, partitions="connected", sequence="aa", metric="alignment")

#### Just based on the clonotype network plot, primary tumor has far more varied clonotypes

In [None]:
ir.tl.clonotype_network(tcr_and_rna_combined, min_size=4, sequence="aa", metric="alignment")

In [None]:
ir.pl.clonotype_network(tcr_and_rna_combined, color="group", size=60, panel_size=(12, 12))

#### It is not surprised to see that primary and metastatic tumor tissues provide common clonotypes, but it is very interesting that 295 and 314 actually have a lot in common in their clonotypes.

In [None]:
ir.pl.clonotype_network(tcr_and_rna_combined, color="sample", size=60, panel_size=(12, 12))

#### Examine information for clonotype cluster 4

# Clonotype Expansion

In [None]:
ir.tl.clonal_expansion(tcr_and_rna_combined,clip_at=5)

#### Most of the expanded clonotypes belong to the primary tumor group

In [None]:
sc.pl.umap(tcr_and_rna_combined, color=['clonal_expansion','clonotype_size','group'])

#### The largest clonotypes belong to cluster 18 and cluster 35 both are T/NKT. 

#### The ratio of clonally expanded T-cells is also higher in primary tumor group

In [None]:
ir.pl.clonal_expansion(tcr_and_rna_combined, groupby='group', clip_at=5)

#### Batch-wise, the ratio of clonally expanded T-cells are not drastically different which is a good signal.

In [None]:
ir.pl.clonal_expansion(tcr_and_rna_combined, groupby='bat', clip_at=5)

In [None]:
fig, ax = plt.subplots(figsize=(25, 12))
ir.pl.clonal_expansion(tcr_and_rna_combined, groupby='sample', clip_at=5,ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(25, 12))
ir.pl.clonal_expansion(tcr_and_rna_combined, groupby='leiden', clip_at=5,ax=ax)

#### Clonotype frequency of primary tumor completely takes over 

In [None]:
fig, ax = plt.subplots(figsize=(25, 12))
ax = ir.pl.group_abundance(tcr_and_rna_combined, groupby="clonotype", target_col="group", sort='count',max_cols=15,normalize='clonotype', fig_kws={"dpi": 100},ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(25, 12))
ir.pl.group_abundance(tcr_and_rna_combined, groupby="clonotype", target_col="sample",max_cols=15,ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(25, 12))
ir.pl.group_abundance(tcr_and_rna_combined, groupby='clonotype', normalize='clonotype',target_col='sample',max_cols=15,ax=ax)


#### We can extract clonotype information in more details

In [None]:
tcr_and_rna_combined.obs.loc[
    tcr_and_rna_combined.obs["ct_cluster_aa_alignment"] == "450",
    ["TRA_1_cdr3", "TRA_2_cdr3", "TRB_1_cdr3", "TRB_2_cdr3"],
]

# Major Clonotypes Shift (not complete yet) 

In [None]:
#primary = tcr_and_rna_combined[tcr_and_rna_combined.obs['group']=='primary']
#metastatic = tcr_and_rna_combined[tcr_and_rna_combined.obs['group']=='metastatic']
#ir.pp.tcr_neighbors(primary, receptor_arms="all", dual_tcr="primary_only")

# running this would add 'clonotype' and 'clonotyp_size' to the dataframe, which is important for clonotype expansion pipeline
#ir.tl.define_clonotypes(primary)

#ir.pp.tcr_neighbors(metastatic, receptor_arms="all", dual_tcr="primary_only")
# running this would add 'clonotype' and 'clonotyp_size' to the dataframe, which is important for clonotype expansion pipeline
#ir.tl.define_clonotypes(metastatic)

In [None]:
#ir.tl.clonal_expansion(primary,clip_at=5)
#ir.tl.clonal_expansion(metastatic,clip_at=5)

In [None]:
#fig, ax = plt.subplots(figsize=(25, 12))
#ir.pl.group_abundance(primary, groupby="clonotype", target_col="sample",max_cols=15,ax=ax)

# Repertoire Simiarity

In [None]:
df, dst, lk = ir.tl.repertoire_overlap(tcr_and_rna_combined, 'sample', inplace=False)

#### Primary and metastatic samples of the same patient are more closely related which is understandable. What's interesting in this plot is the high similarity between sample 295 and 314. This can also be seen from the clonotype network plot which shows that 295 and 314 do share a lot of common clonotypes.

In [None]:
ir.pl.repertoire_overlap(tcr_and_rna_combined, 'sample', heatmap_cats=['bat', 'group'])

In [None]:
import pickle 
object = Object() 
filehandler = open(filename, 'w') 
pickle.dump(object, filehandler)