# Init

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import scipy.stats as stats
%matplotlib inline
import os
from matplotlib import pyplot as plt
import matplotlib as mpl
import plotly
params = {
    'font.size': 12,
    'axes.titlesize': 12,
    'axes.labelsize': 12,
    'legend.fontsize': 12,
    'xtick.labelsize': 12,
    'ytick.labelsize': 12,
    'font.family': "Helvetica",
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
    'figure.dpi': 500
   }
mpl.rcParams.update(params)
SMALL_SIZE = 20
MEDIUM_SIZE = 22
BIGGER_SIZE = 24
plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
sns.set_style("ticks")
sns.set_palette('colorblind')

savefig_args = {"dpi": 500, "bbox_inches": "tight", "pad_inches": 0.05}
mpl.rc('savefig', dpi=500)
output_dir = "../../figures/tcr_clonality/"
if not os.path.exists(output_dir):
            os.makedirs(output_dir)
output_suffix = ""
output_formats = [".png", ".pdf"]

def save_figure(fig, name, output_dir=output_dir, output_suffix=output_suffix, output_formats=output_formats, savefig_args=savefig_args):
    for output_format in output_formats:
        fig.savefig(output_dir + "/" + name + output_suffix + output_format, **savefig_args)
    return None

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


def tissue_colors():
    
    tissue_color_dict = {'Bladder': '#e7969c',
             'Blood': '#d6616b',
             'Bone_Marrow': '#cedb9c',
             'Eye': '#c7ea46',#"#00ff7f",
             'Fat': '#e7cb94',
             'Heart': '#ff0800',
             'Kidney': '#7b4173',
             'Large_Intestine': '#31a354',
             'Liver': '#000080',
             'Lung': '#3182bd',
             'Lymph_Node': '#8c6d31',
             'Mammary':'#ce6dbd',
             'Muscle': '#e7ba52',
             'Pancreas': '#fd8d3c',
             'Prostate':'#637939',#'#a55194',#
             'Salivary_Gland':'#622a0f',
             'Skin': '#de9ed6',
             'Small_Intestine': '#6baed6',
             'Spleen': '#393b79',
             'Thymus': '#9c9ede',
             'Tongue':'#b5cf6b',
             'Trachea': '#969696',
             'Uterus':'#c64b8c',#'#ff0090',
             'Vasculature': '#843c39'}
    
    return tissue_color_dict

### BtReceptor Edited for T cell data 

from __future__ import division
import pandas as pd
import numpy as np
import Levenshtein
from scipy.spatial.distance import squareform
from scipy.sparse.csgraph import connected_components
from itertools import combinations


def donor_colors():
    donors = ['TSP1','TSP2','TSP3','TSP4','TSP5','TSP6','TSP7','TSP8','TSP9','TSP10','TSP11','TSP12','TSP13','TSP14','TSP15']
    
    import matplotlib.colors as pltcolors
    
    cmap = plt.cm.get_cmap("YlGnBu")
        
    donor_color_dict = {}
    j=1/len(donors)
    for d in donors:
        donor_color_dict[d] = pltcolors.to_hex(cmap(j))
        j+=1/len(donors)
        
    return donor_color_dict


def donor_colors():
    donor_color_dict = {'TSP6': '#034001', 'TSP7': '#8FBC8F', 'TSP4': '#c5ba30', 'TSP5': '#DC143C', 
                    'TSP10': '#FFD700', 'TSP8': '#8839ff', 'TSP3': '#c34a17', 'TSP11': '#00edff', 
                    'TSP12': '#f507a0', 'TSP9': '#9CADCA', 'TSP14': '#27BFD2', 'TSP15': '#bc7c00', 
                    'TSP1': '#4169E1', 'TSP2': '#636c8c', 'TSP13': '#ADD8E6'}
    return donor_color_dict

from numpy.random import seed

In [2]:
df = pd.read_table('../../data/merged_airr_scirpy.tsv')
df = df[df.receptor_type == 'TCR']

In [3]:
## Permutation Test on Clones of size larger than 3

In [4]:
# there must be a more laconic way to do this
selector = df.clone_id.value_counts() > 3
clone_ids = selector[selector == True].index
df = df[df.clone_id.isin(clone_ids)]

In [5]:
df.clone_id.nunique() - 22

42

In [6]:
# number of singleton tissue clones (summary stat)
df.groupby('clone_id').tissue.nunique().value_counts()
#df.groupby('clone_id').tissue.nunique().value_counts().iloc[0]

1    24
3    16
2    13
4     5
5     3
6     2
7     1
Name: tissue, dtype: int64

In [7]:
def permute_labels(_df, group, label):
    """ permutes the label within groups"""
    # create copy
    _df = df.copy(deep=True)
    # shuffle labels
    list_of_dfs = []
    for group, frame in _df.groupby(group):
        frame.loc[:,label] = np.random.permutation(frame.loc[:,label].values)
        list_of_dfs.append(frame)
        shuffled_df = pd.concat(list_of_dfs)    
        number_of_singletons = pd.DataFrame(shuffled_df.groupby(label).tissue.nunique().value_counts()).loc[1,:].values[0]
        
    return shuffled_df

In [8]:
permutations = 10000
summary_df_list = []
for i in range(permutations):
    permuted_df = permute_labels(df, "donor", "clone_id")
    permuted_df.loc[:,'permutation'] = i
    summary_df_list.append(permuted_df)

In [9]:
summary_df = pd.concat(summary_df_list)

In [10]:
summary_df.groupby(['permutation', 'clone_id']).tissue.nunique().mean()

3.0324984375

In [61]:
df.groupby('clone_id').tissue.nunique().mean()

2.389830508474576

In [None]:
pd.Series(summary_stat).quantile(0.9999)

In [None]:
ax = ir.pl.group_abundance(adata, groupby='receptor_subtype', target_col='tissue')

ax = ir.pl.group_abundance(adata, groupby='receptor_subtype', target_col='donor')


ax = ir.pl.group_abundance(adata, groupby='clone_id', target_col='cell_ontology_class', max_cols=10)

ax = ir.pl.vdj_usage(adata, full_combination=False, max_segments=None, max_ribbons=30)

ir.pl.vdj_usage(
    adata[adata.obs["clone_id"].isin(["546", "556", '626']), :],
    max_ribbons=None,
    max_segments=100,
)