# Import setup

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import csv

import scanpy as sc
import scanpy.external as sce

from igraph import *
import ipywidgets
from MulticoreTSNE import MulticoreTSNE as TSNE #faster TSNE alternative
import anndata
from anndata import read_h5ad
import arrow
import bbknn
import leidenalg

sc.settings.verbosity = 3
sc.logging.print_versions()
np.random.seed(685)

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.set_figure_params(dpi=50)

# Color dictionary

In [None]:
def color_dict():
    tissues = ['Bladder','Brain_Myeloid','Brain_Non-Myeloid','Fat','Heart','Kidney','Large_Intestine','Limb_Muscle','Liver','Lung','Mammary_Gland','Marrow','Pancreas','Skin','Spleen','Thymus','Tongue','Trachea','Aorta','Diaphgram','BAT', 'GAT', 'MAT', 'SCAT']
    tissues.sort()
    
    import matplotlib.colors as pltcolors
    
    cmap = [plt.cm.get_cmap("tab20b")(0)] # Aorta
    for i in range(3,5): # BAT, Bladder
        cmap.append(plt.cm.get_cmap("tab20b")(i))
    for i in range(6,9): # Brain_Myeloid, Brain_Non_Myeloid, Diaphgram
        cmap.append(plt.cm.get_cmap("tab20b")(i))
    for i in range(17,18): # Fat
        cmap.append(plt.cm.get_cmap("tab20c")(i))
    for i in range(9,13): # GAT, Heart, Kidney, Large_Intestine
        cmap.append(plt.cm.get_cmap("tab20b")(i))
    for i in range(14,20): # Limb_Muscle, Liver, Lung, MAT, Mammary_Gland, Marrow
        cmap.append(plt.cm.get_cmap("tab20b")(i))
    for i in range(0,2): # Pancreas, SCAT
        cmap.append(plt.cm.get_cmap("tab20c")(i))
    for i in range(4,6): # Skin, Spleen
        cmap.append(plt.cm.get_cmap("tab20c")(i))
    for i in range(8,9): # Thymus
        cmap.append(plt.cm.get_cmap("tab20c")(i))
    for i in range(10,11): # Tongue
        cmap.append(plt.cm.get_cmap("tab20c")(i))
    for i in range(16,17): # Trachea 
        cmap.append(plt.cm.get_cmap("tab20c")(i))
    
    color_dict = {}
    j=0
    for t in tissues:
        color_dict[t] = pltcolors.to_hex(cmap[j])
        j+=1
    
    return color_dict

maca_color_dict = color_dict()



In [None]:
def plot_colortable(colors, title, sort_colors=True, emptycols=0):

    cell_width = 212
    cell_height = 22
    swatch_width = 48
    margin = 12
    topmargin = 40

    # Sort colors by hue, saturation, value and name.
    by_hsv = [(v, k) for k, v in colors.items()]
    
    if sort_colors is True:
        by_hsv = sorted(by_hsv)
    names = [name for hsv, name in by_hsv]

    n = len(names)
    ncols = 4 - emptycols
    nrows = n // ncols + int(n % ncols > 0)

    width = cell_width * 4 + 2 * margin
    height = cell_height * nrows + margin + topmargin
    dpi = 72

    fig, ax = plt.subplots(figsize=(width / dpi, height / dpi), dpi=dpi)
    fig.subplots_adjust(margin/width, margin/height,
                        (width-margin)/width, (height-topmargin)/height)
    ax.set_xlim(0, cell_width * 4)
    ax.set_ylim(cell_height * (nrows-0.5), -cell_height/2.)
    ax.yaxis.set_visible(False)
    ax.xaxis.set_visible(False)
    ax.set_axis_off()
    ax.set_title(title, fontsize=24, loc="left", pad=10)

    for i, name in enumerate(names):
        row = i % nrows
        col = i // nrows
        y = row * cell_height

        swatch_start_x = cell_width * col
        swatch_end_x = cell_width * col + swatch_width
        text_pos_x = cell_width * col + swatch_width + 7

        ax.text(text_pos_x, y, name, fontsize=14,
                horizontalalignment='left',
                verticalalignment='center')

        ax.hlines(y, swatch_start_x, swatch_end_x,
                  color=colors[name], linewidth=18)

    return fig

plot_colortable(maca_color_dict, "Tabula Muris Senis Color Dictionary", sort_colors=False, emptycols=1)

In [None]:
def age_color_dict():
    ages = ['1m','3m','18m','21m','24m','30m']
    
    import matplotlib.colors as pltcolors
    
    cmap = plt.cm.get_cmap("YlGnBu")
        
    age_color_dict = {}
    j=1/len(ages)
    for a in ages:
        age_color_dict[a] = pltcolors.to_hex(cmap(j))
        j+=1/len(ages)
    
    return age_color_dict

age_color_dict = age_color_dict()

plot_colortable(age_color_dict, "Tabula Muris Senis Age Color Dictionary", sort_colors=False, emptycols=1)
plt.savefig('./figures/maca_age_color_dict.pdf')

# Clonal analysis

In [None]:
from collections import Counter
import re
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import networkx as nx
from pyvis.network import Network
from Bio.Seq import translate

## Prepare the data

### new way

In [None]:
adata = read_h5ad('/data/maca/maca-data-release-v4/tabula-muris-senis-facs-official-annotations.h5ad')
adata

In [None]:
adata

In [None]:
metadata = adata.obs.copy()
metadata['cell_name_18m_24m'] = [i.split('.m')[0] for i in metadata.index.values]
metadata['cell_name_3m'] = metadata.index
metadata.head()

In [None]:
metadata.to_csv('./write/tracer_bracer/metadata.csv')

## Tracer

### add 3m

In [None]:
tracer3m = pd.read_csv('./write/tracer_bracer/cell_data_tracer_3m.csv')
tracer3m

In [None]:
tracer3m['cell_name_matching_metadata'] = tracer3m['cell_name'].str.replace('-','.')
tracer3m['cell_name_matching_metadata'] = tracer3m['cell_name_matching_metadata'] + '-1-1'
tracer3m

In [None]:
tracer3m[~tracer3m['cell_name_matching_metadata'].isin(metadata.index)]

In [None]:
tracer3m = tracer3m.set_index('cell_name_matching_metadata')
tracer3m.head()

In [None]:
tracer3m = tracer3m.join(metadata, how='inner')
tracer3m.head()

In [None]:
set(tracer3m['clonal_group'].astype(str))

### add 18m

In [None]:
tracer18m = pd.read_csv('./write/tracer_bracer/cell_data_tracer_18m.csv')
tracer18m

In [None]:
tracer18m[~tracer18m['cell_name'].isin(metadata['cell_name_18m_24m'])]

In [None]:
tracer18m = tracer18m.merge(metadata, left_on='cell_name', right_on='cell_name_18m_24m')
tracer18m['clonal_group'] = tracer18m['clonal_group']+100.0
tracer18m

In [None]:
tracer18m.head()

In [None]:
set(tracer18m['clonal_group'].astype(str))

### add 24m

In [None]:
tracer24m = pd.read_csv('./write/tracer_bracer/cell_data_tracer_24m.csv')
tracer24m

In [None]:
tracer24m[~tracer24m['cell_name'].isin(metadata['cell_name_18m_24m'])]

In [None]:
tracer24m = tracer24m.merge(metadata, left_on='cell_name', right_on='cell_name_18m_24m')
tracer24m['clonal_group'] = tracer24m['clonal_group']+300.0
tracer24m.head()

In [None]:
set(tracer24m['clonal_group'].astype(str))

### analysis with metadata

In [None]:
tracer3m.shape, tracer18m.shape, tracer24m.shape

In [None]:
tracer = pd.concat([tracer3m, tracer18m, tracer24m], ignore_index=True)
tracer

In [None]:
len(tracer),len(set(tracer['clonal_group'].astype(str)))

In [None]:
tracer['cell_ontology_class_reannotated'].value_counts()

In [None]:
tracer['age'].value_counts()

In [None]:
tracer.groupby(['age','clonal_group'])['cell_name'].agg({'len':len})

In [None]:
tracer_summary = tracer.groupby(['clonal_group','age','mouse.id'])['cell'].agg({'cell':len})
tracer_summary.head()

In [None]:
tracer_summary = tracer.groupby(['age','clonal_group'])['clonal_group'].agg({'count':len})
tracer_summary.head()

In [None]:
tracer_summary.index.get_level_values('clonal_group'), tracer_summary.index.get_level_values('age')

In [None]:
len(tracer_summary.index.get_level_values('age')),sum(tracer_summary.index.get_level_values('age')=='24m'),sum(tracer_summary.index.get_level_values('age')=='3m'),sum(tracer_summary.index.get_level_values('age')=='18m')


In [None]:
set(tracer_summary.index.get_level_values('age'))

In [None]:
tracer.columns

In [None]:
aproductive = list(set(tracer.A_productive[~tracer['A_productive'].isnull()]))
bproductive = list(set(tracer.B_productive[~tracer['B_productive'].isnull()]))
clonegroup = list(set(tracer.clonal_group[~tracer['clonal_group'].isnull()]))

clonegroup[0:10]

In [None]:
bproductive.sort()
len(aproductive),len(bproductive),len(tracer),len(clonegroup)

In [None]:
tracer.columns

In [None]:
tracer.groupby(['clonal_group','mouse.id'])['group_size'].agg({'len':len})

In [None]:
cg = tracer.loc[tracer['clonal_group']=='5.0_2']
cg

In [None]:
tracer = pd.concat([tracer3m, tracer18m, tracer24m], ignore_index=True)
tracer['validated_clone'] = 999
tracer['validated_clone_length'] = 1.0

clonegroup = list(set(tracer.clonal_group[~tracer['clonal_group'].isnull()]))
cauxi = 500

for c in clonegroup:
    cg = tracer.loc[tracer['clonal_group']==c]
    mice = set(cg['mouse.id'])
    if len(mice) == 1:
        tracer.loc[cg.index,'validated_clone'] = 1
        tracer.loc[cg.index,'validated_clone_length'] = len(cg)
    else:
        for m in list(set(cg['mouse.id'])):
            
            cgaux = cg[cg['mouse.id']==m]
            if len(cgaux) > 1:
                tracer.loc[cgaux.index,'validated_clone_length'] = len(cgaux)
                tracer.loc[cgaux.index,'validated_clone'] = 1
                tracer.loc[cgaux.index,'clonal_group'] = tracer.loc[cgaux.index,'clonal_group'] + cauxi
                cauxi = cauxi+100
                
tracer.head()

In [None]:
tracer.groupby(['validated_clone','clonal_group','mouse.id'])['age'].agg({'len':len})

In [None]:
tracer.groupby(['clonal_group','mouse.id'])['mouse.id'].agg({'len':len})

In [None]:
tracer.groupby(['clonal_group','mouse.id'])['mouse.id'].agg({'len':len})

In [None]:
tracer

In [None]:
df = pd.DataFrame(tracer.loc[tracer['validated_clone']==1].groupby(['clonal_group']).size())

In [None]:
set(tracer['validated_clone'])

In [None]:
tracer.loc[tracer['clonal_group']==8.0]

In [None]:
set(tracer.loc[tracer['clonal_group']==8.0]['B_productive'])

In [None]:
tracer_validated = tracer[tracer['validated_clone']==1]

In [None]:
display(tracer.groupby(['age','validated_clone'])['validated_clone'].agg({'len':len}))

display(tracer.groupby(['age'])['validated_clone'].agg({'len':len}))

In [None]:
metadata.groupby(['age'])['mouse.id'].agg({'len':len})

In [None]:
tracer_validated.groupby(['age','validated_clone'])['validated_clone'].sum()

In [None]:
len(tracer[tracer['age']=='3m']),len(tracer[tracer['age']=='18m']),len(tracer[tracer['age']=='24m'])



In [None]:
len(tracer_validated[tracer_validated['age']=='3m']),len(tracer_validated[tracer_validated['age']=='18m']),len(tracer_validated[tracer_validated['age']=='24m'])



In [None]:
tracer_validated.groupby(['tissue','age'])['validated_clone_length'].agg({'count':len})


In [None]:
set(tracer_validated['cell_ontology_class']), set(tracer_validated['cell_ontology_class_reannotated'])

In [None]:
df = tracer_validated.groupby(['age','clonal_group'])['validated_clone_length'].agg({'count':len})
df = df.reset_index()
df[df['count']>1].groupby(['age']).count()

In [None]:
tracer_validated.groupby(['age','clonal_group','tissue','mouse.id','cell_ontology_class_reannotated'])['validated_clone_length'].agg({'count':len})



### network visualization

In [None]:
G = Network(height=2000,width=1000,notebook=True)

edgelist = []
vertice = []
vidx = int(np.nanmax(tracer['clonal_group'].values.astype(float)))+1.
for i in tracer.index:
        
    edge = (str(tracer.loc[i, 'age']), tracer.loc[i, 'mouse.id'])
    if edge not in edgelist:
        vert1 = str(tracer.loc[i, 'age'])
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [tracer.loc[i, 'age']], color = ["#dd4b39"],
                       value = [10])
            vertice.append(str(vert1))
        vert2 = tracer.loc[i, 'mouse.id']
        if vert2 not in vertice:
            G.add_nodes([vert2], color = ["#00ff1e"],label = [tracer.loc[i, 'mouse.id']])
            vertice.append(vert2)
        G.add_edge(edge[0],to=edge[1])
        edgelist.append(edge)
        
    edge = (tracer.loc[i, 'mouse.id'], tracer.loc[i, 'clonal_group'])
    if edge not in edgelist:
        vert1 = tracer.loc[i, 'mouse.id']
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [tracer.loc[i, 'mouse.id']], color = ["#00ff1e"])
            vertice.append(str(vert1))
        vert2 = tracer.loc[i, 'clonal_group']
        if vert2 not in vertice:
            if math.isnan(float(vert2)):
                vert2 = vidx
                val = 1.0
                vidx = vidx+1
            else:
                val = tracer.loc[i, 'validated_clone_length']
            G.add_nodes([vert2], color = ["#FFD700"],label = [' '],
                       value = [val])
            vertice.append(vert2)
        G.add_edge(edge[0],to=vert2)
        edgelist.append(edge)
        
    edge = (tracer.loc[i, 'cell'], tracer.loc[i, 'clonal_group'])
    if edge not in edgelist:
        vert1 = tracer.loc[i, 'cell']
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [' '], color = ["#162347"])
            vertice.append(str(vert1))
        vert2 = tracer.loc[i, 'clonal_group']
        if vert2 not in vertice:
            if math.isnan(float(vert2)):
                vert2 = vidx
                val = 1.0
                vidx = vidx+1
            else:
                val = tracer.loc[i, 'validated_clone_length']
            G.add_nodes([vert2], color = ["#FFD700"],label = [tracer.loc[i, 'clonal_group']],
                       value = [val])
            vertice.append(vert2)
        G.add_edge(edge[0],to=vert2)
        edgelist.append(edge)


G.save_graph('tracer.html')



In [None]:
tracer_validated = tracer_validated.replace('Heart','Heart_and_Aorta')

In [None]:
from pyvis.network import Network
G = Network(height=2000,width=1000,notebook=True)

edgelist = []
vertice = []

tracer_validated['tissue'] = tracer_validated['tissue'].replace('Diaphragm','Diaphgram')
for i in tracer_validated.index:
        
    edge = (str(tracer_validated.loc[i, 'age']), tracer_validated.loc[i, 'mouse.id'])
    if edge not in edgelist:
        vert1 = str(tracer_validated.loc[i, 'age'])
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [tracer_validated.loc[i, 'age']], color = ['black'],#color = ["#dd4b39"],
                       value = [10])
            vertice.append(str(vert1))
        vert2 = tracer_validated.loc[i, 'mouse.id']
        if vert2 not in vertice:
            G.add_nodes([vert2], color = ["#555555"],label = [tracer_validated.loc[i, 'mouse.id']])
            vertice.append(vert2)
        G.add_edge(edge[0],to=edge[1])
        edgelist.append(edge)
        
    edge = (tracer_validated.loc[i, 'mouse.id'], tracer_validated.loc[i, 'clonal_group'])
    if edge not in edgelist:
        vert1 = tracer_validated.loc[i, 'mouse.id']
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [tracer_validated.loc[i, 'mouse.id']], color = ["#555555"])
            vertice.append(str(vert1))
        vert2 = tracer_validated.loc[i, 'clonal_group']
        if vert2 not in vertice:
            G.add_nodes([vert2], color = ["#737373"],label = [' '],
                       value = [tracer_validated.loc[i, 'validated_clone_length']])
            vertice.append(vert2)
        G.add_edge(edge[0],to=edge[1])
        edgelist.append(edge)
        
    edge = (tracer_validated.loc[i, 'cell_name'], tracer_validated.loc[i, 'clonal_group'])
    if edge not in edgelist:
        vert1 = tracer_validated.loc[i, 'cell_name']
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [' '], color = [maca_color_dict[tracer_validated.loc[i, 'tissue']]])
            vertice.append(str(vert1))
        vert2 = tracer_validated.loc[i, 'clonal_group']
        if vert2 not in vertice:
            G.add_nodes([vert2], color = ["#737373"],label = [tracer_validated.loc[i, 'clonal_group']],
                       value = [tracer_validated.loc[i, 'validated_clone_length']])
            vertice.append(vert2)
        G.add_edge(edge[0],to=edge[1])
        edgelist.append(edge)

G.show('tracer_validated_all_ages.html')



## Bracer

### Load the data

In [None]:
pwd

In [None]:
bracer_counts = pd.read_csv('./20190408_B_cells_combined_counts.tsv',sep='\t')
# bracer_counts

In [None]:
bracer_asssemblies = pd.read_csv('./20190408_B_cells_combined_assemblies.tsv', sep = '\t')
bracer_asssemblies.head(10)

In [None]:
bracer_asssemblies['SAMPLENAME'] = [sn.split('/')[0] for sn in bracer_asssemblies['SAMPLENAME']]
bracer_asssemblies

In [None]:
bracer_asssemblies.shape

In [None]:
bracer_asssemblies = pd.concat([bracer_asssemblies, bracer18m], ignore_index=True)

In [None]:
bracer_asssemblies.head()

### Merge metadata

In [None]:
immune_cells_metadata = adata.obs
immune_cells_metadata

In [None]:
immune_cells_metadata[immune_cells_metadata['age']=='3m']

In [None]:
immune_cells_metadata['cell'] = immune_cells_metadata.cell.apply(lambda x: x.replace(".", "-"))

In [None]:
immune_cells_metadata['cell'] = immune_cells_metadata['cell'].astype(str)
immune_cells_metadata['cellid'] = immune_cells_metadata['cellid'].astype(str)
immune_cells_metadata.loc[immune_cells_metadata['cell']=='nan','cell'] = immune_cells_metadata.loc[immune_cells_metadata['cell']=='nan','cellid']

In [None]:
immune_cells_metadata[immune_cells_metadata['age']=='24m']

In [None]:
immune_cells_metadata['cell'] = [c.split('.')[0] for c in immune_cells_metadata['cell']]
immune_cells_metadata['cell']

In [None]:
immune_cells_metadata['cell'] = ['_'.join(c.split('-mus')[:2]) for c in immune_cells_metadata['cell']]
immune_cells_metadata['cell']

In [None]:
immune_cells_metadata['cell'] = ['_'.join(c.split('_')[:3]) for c in immune_cells_metadata['cell']]
immune_cells_metadata['cell']

In [None]:
set(immune_cells_metadata['age'])

In [None]:
len(set(immune_cells_metadata['cell'].astype(str)))

In [None]:
immune_cells_metadata

In [None]:
immune_cells_metadata_18m = immune_cells_metadata[immune_cells_metadata['age'].isin(['18m','24m'])]
immune_cells_metadata.loc[immune_cells_metadata_18m.index,'cell'] = immune_cells_metadata.loc[immune_cells_metadata_18m.index,'cell_name_18m']
immune_cells_metadata

In [None]:
bracer_asssemblies[~bracer_asssemblies['SAMPLENAME'].isin(immune_cells_metadata['cell'])]

In [None]:
full_bcell_data = bracer_asssemblies.merge(immune_cells_metadata, left_on='SAMPLENAME',right_on='cell')
full_bcell_data.shape

In [None]:
full_bcell_data.head()

In [None]:
bracer_asssemblies.shape

In [None]:
full_bcell_data.columns

In [None]:
full_bcell_data.groupby(['age'])['cell'].agg({'len':len})

### 3m data

In [None]:
bracer3m = pd.read_csv('./write/tracer_bracer/combined_assemblies_bracer_3m.tsv', sep = '\t')
bracer3m

In [None]:
bracer3m['SAMPLENAME'] = bracer3m['SAMPLENAME'].str.replace('-','.')
bracer3m['SAMPLENAME'] = bracer3m['SAMPLENAME'] + '-1-1'
bracer3m

In [None]:
bracer3m[~bracer3m['SAMPLENAME'].isin(metadata.index)]

In [None]:
bracer3m = bracer3m.merge(metadata, left_on='SAMPLENAME', right_on='cell_name_3m')
bracer3m

### 18m data

In [None]:
bracer18m = pd.read_csv('./write/tracer_bracer/combined_assemblies_bracer_18m.tsv', sep = '\t')
bracer18m.head()

In [None]:
bracer18m[~bracer18m['SAMPLENAME'].isin(metadata['cell_name_18m_24m'])]

In [None]:
bracer18m = bracer18m.merge(metadata, left_on='SAMPLENAME', right_on='cell_name_18m_24m')
bracer18m.head()

### 24m data

In [None]:
bracer24m = pd.read_csv('./write/tracer_bracer/combined_assemblies_24_months_b_cells.tsv', sep = '\t')
bracer24m.head()

In [None]:
bracer24m[~bracer24m['SAMPLENAME'].isin(metadata['cell_name_18m_24m'])]

In [None]:
bracer24m = bracer24m.merge(metadata, left_on='SAMPLENAME', right_on='cell_name_18m_24m')
bracer24m.head()

### Start analysis

In [None]:
bracer3m.shape, bracer18m.shape, bracer24m.shape

In [None]:
full_bcell_data = pd.concat([bracer3m,bracer18m,bracer24m], ignore_index=True) #,bracer24m
display(full_bcell_data.shape)
full_bcell_data.head()

In [None]:
full_bcell_data.columns

In [None]:
full_bcell_data.groupby('C_CALL').V_IDENTITY.median()

In [None]:
full_bcell_data['heavy'] = full_bcell_data.SEQUENCE_ID.str.contains('heavy')

In [None]:
full_bcell_data.groupby('heavy').heavy.value_counts().head()


In [None]:
full_bcell_data[full_bcell_data.heavy].CDR3_IMGT.value_counts()

In [None]:
aux = full_bcell_data[full_bcell_data['Unnamed: 0']!=2]
aux

In [None]:
paired_cells = aux.groupby('SAMPLENAME').heavy.apply(lambda x: x.value_counts().tolist() == [1,1]).replace(False, np.nan).dropna().index



In [None]:
aux2 = aux[aux.SAMPLENAME.isin(paired_cells)].copy()

In [None]:
pv = aux2.pivot(index='SAMPLENAME', columns='heavy', values='CDR3_IMGT')
pv.groupby(True)[False].value_counts().sort_values(ascending=False)

In [None]:
full_bcell_data.C_CALL.value_counts()

In [None]:
full_bcell_data['CDR3_LEN'] = 0
for b in range(0,len(full_bcell_data)):
    if str(full_bcell_data.loc[b,'CDR3_IMGT']) == full_bcell_data.loc[b,'CDR3_IMGT']:
        full_bcell_data.loc[b,'CDR3_LEN'] = len(list(full_bcell_data.loc[b,'CDR3_IMGT']))
        

In [None]:
full_bcell_data["cell"] = full_bcell_data.SAMPLENAME.apply(lambda x: re.sub(r"_S[0-9]+(_L[0-9]{3})?", "", x))
full_bcell_data["cell"] = [full_bcell_data["cell"][c].split('/')[0] for c in full_bcell_data.index]
full_bcell_data.shape


In [None]:
full_bcell_data.head()

### Look at heavy chain stats

In [None]:
bracer_asssemblies_heavy_chain = full_bcell_data[full_bcell_data['Unnamed: 0']==0]
bracer_asssemblies_heavy_chain = bracer_asssemblies_heavy_chain[~bracer_asssemblies_heavy_chain['tissue'].isna()]
bracer_asssemblies_heavy_chain.head()

In [None]:
set(bracer_asssemblies_heavy_chain['tissue'])

In [None]:
bracer_asssemblies_heavy_chain = bracer_asssemblies_heavy_chain.reset_index(drop=True)
bracer_asssemblies_heavy_chain.head()

In [None]:
len(set(bracer_asssemblies_heavy_chain.cell))

In [None]:
bracer_asssemblies_heavy_chain.columns

In [None]:
bracer_asssemblies_heavy_chain['age'].value_counts()

In [None]:
bracer_summary_heavy_chain = bracer_asssemblies_heavy_chain.groupby(['mouse.id','V_CALL','J_CALL','CDR3_LEN'])['cell'].agg({'cell':set,'len':len})
bracer_summary_heavy_chain = bracer_summary_heavy_chain[bracer_summary_heavy_chain['len']>1]

bracer_summary_heavy_chain = bracer_summary_heavy_chain[bracer_summary_heavy_chain.index.get_level_values('CDR3_LEN')>0]
bracer_summary_heavy_chain


In [None]:
bsmelt = bracer_summary_heavy_chain.melt()
bsmeltcell = bsmelt[bsmelt['variable']=='cell']
simscore = []
for s in bsmeltcell['value']:
    b = 0
    X = bracer_asssemblies_heavy_chain[bracer_asssemblies_heavy_chain['cell'].isin(list(s))]['CDR3_IMGT']
    X = X[~X.isna()].values
    
    X2 = []
    [X2.append(x.replace('.','')) for x in X]
    
    Y = []
    [Y.append(translate(x)) for x in X2]

    for ix in range(0,len(Y)):
        for jx in range(ix+1,len(Y)):
            if ix!=jx:
                f = fuzz.ratio(Y[ix],Y[jx])
                if b==0:
                    b = f
                else:
                    b = np.mean([b,f])
    simscore.append(b)
    
bracer_summary_heavy_chain['similarity'] = np.round(simscore,2)
bracer_summary_heavy_chain = bracer_summary_heavy_chain[bracer_summary_heavy_chain['similarity']>90]
bracer_summary_heavy_chain['clonal_group'] = range(len(bracer_summary_heavy_chain))
bracer_summary_heavy_chain['clonal_group'] = 'C_'+bracer_summary_heavy_chain['clonal_group'].astype(str)
bracer_summary_heavy_chain['len'] = bracer_summary_heavy_chain['len'].astype(str)
bracer_summary_heavy_chain


In [None]:
bracer_summary_heavy_chain = bracer_summary_heavy_chain.reset_index(drop=True)
bracer_summary_heavy_chain


In [None]:
bracer_summary_heavy_chain.sort_values(['similarity','len'],ascending=False)

In [None]:
bracer_asssemblies_heavy_chain['clonal_group'] = np.nan
bracer_asssemblies_heavy_chain['clonal_group_len'] = 1
bracer_asssemblies_heavy_chain['clonal_group_similarity'] = np.nan

for c in bracer_summary_heavy_chain['cell']:
    cgroup = bracer_summary_heavy_chain[bracer_summary_heavy_chain['cell']==c]['clonal_group'].values[0]
    cgroups = bracer_summary_heavy_chain[bracer_summary_heavy_chain['cell']==c]['similarity'].values[0]
    cgroupl = bracer_summary_heavy_chain[bracer_summary_heavy_chain['cell']==c]['len'].values[0]
    for cc in list(c):
        idx = bracer_asssemblies_heavy_chain[bracer_asssemblies_heavy_chain['cell'] == cc].index
        bracer_asssemblies_heavy_chain.loc[idx,'clonal_group'] = cgroup
        bracer_asssemblies_heavy_chain.loc[idx,'clonal_group_len'] = cgroupl
        bracer_asssemblies_heavy_chain.loc[idx,'clonal_group_similarity'] = cgroups
        
        
        

In [None]:
bracer_asssemblies_heavy_chain.head()

In [None]:
bracer_asssemblies_heavy_chain[~bracer_asssemblies_heavy_chain['clonal_group'].isna()].groupby(['clonal_group','mouse.id','cell','age','tissue'])['cell'].agg({'cells':set,'len':len})



In [None]:
bracer_clonal_groups = bracer_asssemblies_heavy_chain[~bracer_asssemblies_heavy_chain['clonal_group'].isna()].groupby(['clonal_group'])['mouse.id'].agg({'animals':set,'len':len})
bracer_clonal_groups

In [None]:
bracer_clonal_groups['validated'] = 0
for c in bracer_clonal_groups.index:
    a = bracer_clonal_groups.loc[c,'animals']
    if len(a)==1 and bracer_clonal_groups.loc[c,'len']>1:
        bracer_clonal_groups.loc[c,'validated'] = 1
bracer_clonal_groups
        

In [None]:
bracer_asssemblies_heavy_chain.index

In [None]:
len(bracer_non_valid_clones)

In [None]:
bracer_asssemblies_heavy_chain[~bracer_asssemblies_heavy_chain['clonal_group'].isna()].groupby(['clonal_group','mouse.id','cell','age','tissue'])['cell'].agg({'cells':set,'len':len})


In [None]:
bracer_asssemblies_heavy_chain['validated'] = 0
for c in bracer_clonal_groups.index:
    idx = bracer_asssemblies_heavy_chain[bracer_asssemblies_heavy_chain['clonal_group'] == c].index
    bracer_asssemblies_heavy_chain.loc[idx,'validated'] = bracer_clonal_groups.loc[c,'validated']
bracer_asssemblies_heavy_chain.head()


In [None]:
for cgcg in list(set(bracer_non_valid_clones['clonal_group'])): 
    aux = bracer_non_valid_clones[bracer_non_valid_clones['clonal_group']==cgcg]
    baux = aux.groupby(['mouse.id'])['cell'].agg({'len':len,'set':set})
    baux = baux[baux['len']>1.]
    bracer_asssemblies_heavy_chain.loc[aux[aux['mouse.id'].isin(baux.index)].index,'validated'] = 1
    
    if len(set(list(baux.index)))>1:
        j = 1
        for m in list(set(list(baux.index))):
            bracer_asssemblies_heavy_chain.loc[aux[aux['mouse.id']==m].index,'clonal_group'] = bracer_asssemblies_heavy_chain.loc[aux[aux['mouse.id'].isin(baux.index)].index,'clonal_group']+'_'+str(j)
            j+=1



In [None]:
bracer_asssemblies_heavy_chain.loc[bracer_asssemblies_heavy_chain['validated'] == 0, 'clonal_group'] = np.nan

In [None]:
set(bracer_asssemblies_heavy_chain['clonal_group'])

In [None]:
bracer_asssemblies_heavy_chain[~bracer_asssemblies_heavy_chain['clonal_group'].isna()].groupby(['age','clonal_group'])['mouse.id'].agg({'cells':set,'len':len})


In [None]:
bracer_asssemblies_heavy_chain[~bracer_asssemblies_heavy_chain['clonal_group'].isna()].groupby(['clonal_group','mouse.id'])['cell'].agg({'cells':set,'len':len})



In [None]:
bracer_asssemblies_heavy_chain

In [None]:
bracer_asssemblies_heavy_chain.columns

In [None]:
bracer_asssemblies_heavy_chain['C_CALL']

In [None]:
bracer_asssemblies_heavy_chain['clonal_group'] = bracer_asssemblies_heavy_chain['clonal_group'].astype(str)
bracer_asssemblies_heavy_chain.groupby(['C_CALL','age','clonal_group'])['tissue'].agg({'tissue':set,'no_cells':len})#.to_csv('./figures/Bracer_output_all_cells_grouped_with_clones.csv')


In [None]:
set(bracer_asssemblies_heavy_chain['clonal_group'].astype(str))

### Network visualization

In [None]:
bracer_summary_heavy_chain

In [None]:
bracer_asssemblies_heavy_chain.head()

In [None]:
df = bracer_asssemblies_heavy_chain.groupby(['age','validated'])['cell'].agg({'len':len})
df = df.unstack('validated')
df.columns = df.columns.droplevel()
df = df.rename(columns={0:'singleton',1:'clone'})
df['total'] = df['clone'] + df['singleton']
df['proportion_clone'] = np.round(df['clone']/df['total']*100)
df['proportion_singleton2'] = np.round(df['singleton']/df['total']*100)
df

In [None]:
bracer_validated = bracer_asssemblies_heavy_chain[~bracer_asssemblies_heavy_chain['clonal_group'].isna()]
bracer_validated = bracer_validated[bracer_validated['clonal_group']!='nan']
bracer_validated.head(10)

In [None]:
set(bracer_validated['clonal_group'])

In [None]:
bracer_asssemblies_heavy_chain['age'].value_counts()

In [None]:
bracer_validated['age'].value_counts()

In [None]:
bracer_validated.groupby(['tissue','age','mouse.id'])['clonal_group'].agg({'len':len,'set':set})

In [None]:
bracer_validated.groupby(['age','mouse.id'])['tissue'].agg({'len':len,'set':set})

In [None]:
bracer_validated['tissue'] = bracer_validated['tissue'].replace('Diaphragm','Diaphgram')

G = Network(height=2000,width=1000,notebook=True)

edgelist = []
vertice = []
for i in bracer_validated.index:
        
    edge = (str(bracer_validated.loc[i, 'age']), bracer_validated.loc[i, 'mouse.id'])
    if edge not in edgelist:
        vert1 = bracer_validated.loc[i, 'age']
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [bracer_validated.loc[i, 'age']], color = ['black'],
                       value = [10])
            vertice.append(str(vert1))
        vert2 = bracer_validated.loc[i, 'mouse.id']
        if vert2 not in vertice:
            G.add_nodes([vert2], color = ["#555555"],label = [bracer_validated.loc[i, 'mouse.id']])
            vertice.append(vert2)
        G.add_edge(edge[0],to=edge[1])
        edgelist.append(edge)
        
    edge = (bracer_validated.loc[i, 'mouse.id'], bracer_validated.loc[i, 'clonal_group'])
    if edge not in edgelist:
        vert1 = bracer_validated.loc[i, 'mouse.id']
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [bracer_validated.loc[i, 'mouse.id']], color = ["#555555"])
            vertice.append(str(vert1))
        vert2 = bracer_validated.loc[i, 'clonal_group']
        if vert2 not in vertice:
            G.add_nodes([vert2], color = ["#737373"],label = [' '],
                       value = [bracer_validated.loc[i, 'clonal_group_len']])
            vertice.append(vert2)
        G.add_edge(edge[0],to=edge[1])
        edgelist.append(edge)
        

    edge = (bracer_validated.loc[i, 'cell'], bracer_validated.loc[i, 'clonal_group'])
    if edge not in edgelist:
        vert1 = bracer_validated.loc[i, 'cell']
        if vert1 not in vertice:
            G.add_nodes([vert1],label = [' '], color = [maca_color_dict[bracer_validated.loc[i, 'tissue']]])
            vertice.append(str(vert1))
        vert2 = bracer_validated.loc[i, 'clonal_group']
        if vert2 not in vertice:
            G.add_nodes([vert2], color = ["#737373"],label = [bracer_validated.loc[i, 'clonal_group']],
                       value = [bracer_validated.loc[i, 'len']])
            vertice.append(vert2)
        G.add_edge(edge[0],to=edge[1])
        edgelist.append(edge)


G.show('bracer_validated_animal_threshold_first_all_ages.html')

