# 3.0 CITE-seq ADT

In [1]:
from clustergrammer2 import net
df = {}

clustergrammer2 backend version 0.2.8


In [2]:
import numpy as np
import pandas as pd
import gene_exp_10x
from copy import deepcopy
import json_scripts
import clustergrammer_groupby as cby

In [3]:
cell_dict = json_scripts.load_to_dict('../data/CITE-seq_data/human_mouse_cell_dictionary.json')

### Load ADT Data
Load ADT data, arcsinh transform the ADT levels, then Z-score ADT levels across cells.

### Drop CCR5, CCR7, and CD10 (following Seurat tutorial)

In [4]:
df['adt-ini'] = pd.read_csv('../data/CITE-seq_data/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv', index_col=0)
df['adt'] = np.arcsinh(df['adt-ini']/5)

# drop markers with low "poor enrichments"
df['adt'] = df['adt'].drop(['CCR5', 'CCR7', 'CD10'])

# keep only human cells
df['adt'] = df['adt'][cell_dict['human-cells']]

net.load_df(df['adt'])
net.normalize(axis='row', norm_type='zscore')
df['adt-z'] = net.export_df()

df['adt'].shape

(10, 7339)

# Visualize ADT Levels (Z-scored)

In [5]:
cols = df['adt-z'].columns.tolist()
new_cols = [(x,) for x in cols]
df['adt-z-cat'] = deepcopy(df['adt-z'])
df['adt-z-cat'].columns = new_cols

In [6]:
net.load_df(df['adt-z-cat'])
net.cluster()
net.dendro_cats(axis='col', dendro_level=2)
df['adt-groups'] = net.export_df()
# net.cluster()
# net.widget()

In [7]:
cols = df['adt-groups'].columns.tolist()

In [8]:
group_info = [x[1] for x in cols]

In [9]:
ser_groups = pd.Series(group_info)


In [10]:
group_val_counts = ser_groups.value_counts()
print(group_val_counts.shape)
group_val_counts

(30,)


Group 2: cat-4     2529
Group 2: cat-19    1455
Group 2: cat-26     891
Group 2: cat-20     623
Group 2: cat-10     410
Group 2: cat-29     305
Group 2: cat-25     267
Group 2: cat-18     169
Group 2: cat-24     152
Group 2: cat-7      131
Group 2: cat-16     131
Group 2: cat-23      51
Group 2: cat-13      50
Group 2: cat-27      49
Group 2: cat-1       32
Group 2: cat-8       20
Group 2: cat-9       13
Group 2: cat-15      10
Group 2: cat-2        9
Group 2: cat-5        8
Group 2: cat-17       7
Group 2: cat-22       5
Group 2: cat-11       5
Group 2: cat-6        4
Group 2: cat-12       4
Group 2: cat-3        3
Group 2: cat-28       2
Group 2: cat-21       2
Group 2: cat-30       1
Group 2: cat-14       1
dtype: int64

In [11]:
keep_group_val_counts = group_val_counts[group_val_counts >= 15]
print(keep_group_val_counts.shape)
keep_group_val_counts

(16,)


Group 2: cat-4     2529
Group 2: cat-19    1455
Group 2: cat-26     891
Group 2: cat-20     623
Group 2: cat-10     410
Group 2: cat-29     305
Group 2: cat-25     267
Group 2: cat-18     169
Group 2: cat-24     152
Group 2: cat-7      131
Group 2: cat-16     131
Group 2: cat-23      51
Group 2: cat-13      50
Group 2: cat-27      49
Group 2: cat-1       32
Group 2: cat-8       20
dtype: int64

In [12]:
keep_groups = keep_group_val_counts.index.tolist()

In [13]:
cols = df['adt-groups'].columns.tolist()
keep_clustered = [x for x in cols if x[1] in keep_groups]
print(len(cols), len(keep_clustered))

7339 7265


In [14]:
df['adt-trim'] = df['adt-groups'][keep_clustered]

In [19]:
net.load_df(df['adt-trim'])
net.widget()

ExampleWidget(network='{"row_nodes": [{"name": "CD3", "ini": 10, "clust": 1, "rank": 4, "rankvar": 2, "group":…

In [20]:
# net.load_df(df['adt-trim'])
viz_json = net.export_net_json(net_type='viz')

'{"row_nodes": [{"name": "CD3", "ini": 10, "clust": 1, "rank": 4, "rankvar": 2, "group": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}, {"name": "CD4", "ini": 9, "clust": 2, "rank": 1, "rankvar": 6, "group": [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}, {"name": "CD8", "ini": 8, "clust": 7, "rank": 2, "rankvar": 1, "group": [10.0, 10.0, 8.0, 5.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0]}, {"name": "CD45RA", "ini": 7, "clust": 5, "rank": 0, "rankvar": 5, "group": [6.0, 6.0, 5.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0]}, {"name": "CD56", "ini": 6, "clust": 8, "rank": 3, "rankvar": 0, "group": [8.0, 8.0, 7.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0]}, {"name": "CD16", "ini": 5, "clust": 9, "rank": 9, "rankvar": 9, "group": [9.0, 9.0, 7.0, 4.0, 4.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0]}, {"name": "CD11c", "ini": 4, "clust": 3, "rank": 6, "rankvar": 3, "group": [4.0, 4.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0]}, {"name": "CD14", "ini": 3, "clust": 0, "rank": 8, "rankvar": 4, 

In [18]:
net.save_dict_to_json(inst_dict=viz_json,filename='../data/CITE-seq_data/adt-viz.json')

In [16]:
df['cat-sig'], keep_genes_dict, df_gene_pval, all_fold_info = cby.generate_signatures(df['adt-trim'], category_level='Group 2')

### Add cluster size to cluster names

In [17]:
# cols

In [18]:
cols = df['cat-sig'].columns.tolist()
keep_group_val_counts.loc[cols[0][1]]

new_cols = []
for inst_col in cols:
    inst_name = inst_col[0]
    inst_cat = inst_col[1]
    inst_count = keep_group_val_counts[inst_col[1]]
    new_col = (inst_name.replace('cat-', 'G') + '_' + str(inst_count), inst_cat)
    new_cols.append(new_col)
    
df['cat-sig-count'] = deepcopy(df['cat-sig'])
df['cat-sig-count'].columns = new_cols

In [19]:
keep_group_val_counts

Group 2: cat-4     2529
Group 2: cat-19    1455
Group 2: cat-26     891
Group 2: cat-20     623
Group 2: cat-10     410
Group 2: cat-29     305
Group 2: cat-25     267
Group 2: cat-18     169
Group 2: cat-24     152
Group 2: cat-7      131
Group 2: cat-16     131
Group 2: cat-23      51
Group 2: cat-13      50
Group 2: cat-27      49
Group 2: cat-1       32
Group 2: cat-8       20
dtype: int64

In [20]:
df['cat-sig-count'].shape

(10, 16)

In [21]:
cols = df['cat-sig-count']
ct_dict = {}
for inst_col in cols:
    ct_dict[inst_col[0]] = inst_col[0]

In [22]:
ct_dict['G29_305'] = 'B cell'
ct_dict['G27_49'] = 'pDC_1'
ct_dict['G26_891'] = 'NK cell'
ct_dict['G25_267'] = 'CD8 T cell'
ct_dict['G4_2529'] = 'CD4 T cell'
ct_dict['G1_32'] = 'pDC_2'
ct_dict['G13_50'] = 'CD34+ cell'
ct_dict['G20_623'] = 'CD14+ Mono cell_1'
ct_dict['G19_1455'] = 'CD14+ Mono cell_2'
ct_dict['G18_169'] = 'CD16+ Mono cell'

ct_dict['G16_131'] = 'Unknown_1'
ct_dict['G7_131'] = 'Unknown_2'
ct_dict['G10_410'] = 'Unknown_3'
ct_dict['G8_20'] = 'Unknown_4'
ct_dict['G24_152'] = 'Unknown_5'
ct_dict['G23_51'] = 'Unknown_6'

In [23]:
tct_dict = {}
for inst_group in ct_dict:
    tct_dict[inst_group.split('_')[0]] = ct_dict[inst_group]

In [24]:
new_cols = [(ct_dict[x[0]], x[1]) for x in cols]
df['cat-sig-labeled'] = deepcopy(df['cat-sig-count'])
df['cat-sig-labeled'].columns = new_cols

### Labeled Average Marker Data

In [25]:
net.load_df(df['cat-sig-labeled'])
net.widget()

ExampleWidget(network='{"row_nodes": [{"name": "CD11c", "ini": 10, "clust": 4, "rank": 4, "rankvar": 3, "group…

### Transfer labels to trimmed data

In [26]:
cols = df['adt-trim'].columns.tolist()
new_cols = [(x[0], tct_dict[x[1].split(': ')[1].replace('cat-','G')]) for x in cols]
df['adt-trim-cat'] = deepcopy(df['adt-trim'])
df['adt-trim-cat'].columns = new_cols

In [27]:
net.load_df(df['adt-trim-cat'])
net.widget()

ExampleWidget(network='{"row_nodes": [{"name": "CD3", "ini": 10, "clust": 1, "rank": 4, "rankvar": 2, "group":…

In [29]:
df['adt-trim-cat'].to_csv('../data/CITE-seq_data/adt_ashz_trim_cats.txt', sep='\t')