In [61]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
from matplotlib import pyplot as plt


color_scheme = {'Mouse':'#FF9E01',
               'Kymouse':'#000000',
                'Human':'#5695C1'}
row_colors = {'Human':'#1D00FF','Kymouse':'#000000'}
data_dir = 'tables'

In [60]:
def figure2(matrix, transpose = False, row_colors = row_colors, figsize = (20,10), order = True, col_cluster = True, row_cluster = False):
    subject_ids = matrix.index
    genes = matrix.columns
    row_cols = pd.Series(subject_ids.map(lambda x:row_colors[x.split()[0]]))
    row_cols.index = subject_ids
    row_cols.name = 'Species'
    if order:
        gene_order = sorted(genes, key = lambda x:int(x[4]))
    else:
        gene_order = genes
    matrix = matrix.T if transpose else matrix
    if transpose:
        print(matrix)
        ax = sns.clustermap(matrix.loc[gene_order],figsize=figsize,cmap='RdBu_r',z_score=0,dendrogram_ratio=(.1,0.1),col_colors=row_cols,
                       edgecolor='white',lw=.8,vmin=-4,vmax=5,cbar_kws={'label':'Z-normalized frequency'},row_cluster=False)
        
        ax.ax_row_dendrogram.set_visible(False)
    else:
        ax = sns.clustermap(matrix[gene_order],figsize=figsize,cmap='RdBu_r',z_score=1,dendrogram_ratio=(.1,0.1), row_colors=row_cols,
                       edgecolor='white',lw=.8,vmin=-4,vmax=5,cbar_kws={'label':'Z-normalized frequency'},col_cluster=False)

        ax.ax_col_dendrogram.set_visible(False)        
        ax.cax.yaxis.set_label_position('left')
        ax.cax.yaxis.set_ticks_position('left')
    for tick in ax.ax_heatmap.get_xticklabels():
        tick.set_fontsize(18)
    for tick in ax.ax_heatmap.get_yticklabels():
        tick.set_fontsize(18)
    ax.cax.set_visible(False)
    ax.ax_heatmap.figure.axes[-3].tick_params(labelsize=18)  
    
def plot_confidence_interval(mean, ci_lower, ci_upper, x, ax, color='black', horizontal_line_width=0.25):
    left = x - horizontal_line_width / 2
    top = max([ci_lower,ci_upper])
    right = x + horizontal_line_width / 2
    bottom = min([ci_lower,ci_upper])
    ax.plot([x, x], [top, bottom], color=color)
    ax.plot([left, right], [top, top], color=color)
    ax.plot([left, right], [bottom, bottom], color=color)
    ax.plot(x, mean, 'o',markersize=4,markeredgecolor='black',markerfacecolor="white")

            
def figure3_estimationplot(factors):
    fig, ax = plt.subplots(1,1)
    xticks = ['n']
    mapping = {'NP1':'VD insertion','NP2':'DJ insertion','IGHD':'IGHD gene','IGHJ':'IGHJ gene','IGHV':'IGHV gene'}
    for x,(k,p) in enumerate(factors.sort_values('mean').iterrows()):
        plot_confidence_interval(p['mean'],p['lower'],p['upper'],x=x, ax=ax)
        xticks.append(k)
    ax.set_xticklabels(xticks)
    ax.set_ylabel('Difference in CDRH3 length\nbetween Kymouse and humans\n (nt)')
    ax.set_xlabel('Factor')
    ax.axhline(0,color='grey')
    

def figure4EF(diversities):
    fig, axs = plt.subplots(2,1,figsize=(5,10))
    sns.lineplot(data=diversities[diversities['key']=='clone_id'], ax=axs[1], x='subsample',y='shannon',hue='species',palette=[color_scheme['Human'],color_scheme['Kymouse'],color_scheme['Mouse']])
    axs[1].set_ylabel('Shannon diversity (clonotypes)')
    axs[1].set_xlabel('subsample proportion')
    sns.despine()
    sns.lineplot(data=diversities[diversities['key']=='cdr3_aa'], ax=axs[0], x='subsample',y='shannon',hue='species',palette=[color_scheme['Human'],color_scheme['Kymouse'],color_scheme['Mouse']])
    axs[0].set_ylabel('Shannon diversity (CDRH3 A.A.)')
    axs[0].set_xlabel('subsample proportion')
    
def figure4GH(sharing):
    fig, axs = plt.subplots(2,1,figsize=(5,10))
    sns.boxplot(data=sharing[sharing['key']=='cdr3_aa'].sort_values('order'), x='type', y='shared',palette=[color_scheme['Human'],'white',color_scheme['Kymouse'],'white','white',color_scheme['Mouse']],boxprops=dict(edgecolor='black',alpha=.4),ax=axs[0])
    axs[0].set_xlabel('Comparison')
    axs[0].set_ylabel('% CDRH3s shared')
    for tick in axs[0].get_xticklabels():
        tick.set_rotation(15)
    sns.despine()

    plt.setp(axs[0].artists, edgecolor = 'k')
    sns.boxplot(data=sharing[sharing['key']=='clone_id'], x='type',y='shared',palette=[color_scheme['Human'],'white',color_scheme['Kymouse']],boxprops=dict(alpha=.3),ax=axs[1])
    sns.swarmplot(data=sharing[sharing['key']=='clone_id'], x='type',y='shared',color='black',alpha=0.5,ax=axs[1])
    axs[1].set_xlabel('Comparison')
    axs[1].set_ylabel('% clonotypes shared')
    for tick in axs[1].get_xticklabels():
        tick.set_rotation(15)
    sns.despine()
    
def figure4CD(diversities):
    fig, axs = plt.subplots(2,1,figsize=(5,10))
    ax = axs[1]
    sns.lineplot(data = diversity[(diversity['cdr3_length']<=37)&(diversity['type']=='clonotype')], x= 'cdr3_length', y = 'diversity',hue='species',palette = [color_scheme['Human'],color_scheme['Kymouse'],color_scheme['Mouse']],ax=ax)
    ax.set_ylabel('Shannon diversity (clonotypes)')
    ax.set_xlabel('CDRH3 length (A.A.)')
    sns.despine()
    ax = axs[0]
    ax = sns.lineplot(data = diversity[(diversity['cdr3_length']<=37)&(diversity['type']=='cdrh3')], x= 'cdr3_length', y = 'diversity',hue='species',palette = [color_scheme['Human'],color_scheme['Kymouse'],color_scheme['Mouse']],ax=ax)
    ax.set_ylabel('Shannon diversity (CDRH3s)')
    ax.set_xlabel('CDRH3 length (A.A.)')
    sns.despine()
    
def figure4AB(ratios):
    fig, axs = plt.subplots(2,1,figsize=(2,10))
    sns.stripplot(data=table, ax=axs[1], x='species',y='ratio clonotypes:sequences',hue='species',palette=[color_scheme['Human'],color_scheme['Kymouse'],color_scheme['Mouse']])
    axs[1].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

    axs[1].set_ylabel('Ratio unique clonotypes:sequences',size=14)

    sns.despine()
    axs[1].set_xticks([])

    xlims = axs[1].get_xlim()

    for k,p in enumerate(['Human','Kymouse','Mouse']):
        y = table[table['species']==p]
        mean = y['ratio clonotypes:sequences'].mean()
        sem = y['ratio clonotypes:sequences'].sem()*2
        axs[1].axhspan(mean+sem, mean-sem,xlims[0],xlims[1],color=color_scheme[p.replace('Humanised mouse','Kymouse')],alpha=0.2)
        axs[1].axhline(y=mean,color=color_scheme[p.replace('Humanised mouse','Kymouse')],alpha=0.9)


    sns.stripplot(data=table, ax=axs[0], x='species',y='ratio H3:sequences',hue='species',palette=[color_scheme['Human'],color_scheme['Kymouse'],color_scheme['Mouse']])
    xlims = axs[0].get_xlim()
    for k,p in enumerate(['Human','Kymouse','Mouse']):
        y = table[table['species']==p]
        mean = y['ratio H3:sequences'].mean()
        sem = y['ratio H3:sequences'].sem()*2
        axs[0].axhspan(mean+sem, mean-sem,xlims[0],xlims[1],color=color_scheme[p.replace('Humanised mouse','Kymouse')],alpha=0.2)
        axs[0].axhline(y=mean,color=color_scheme[p.replace('Humanised mouse','Kymouse')],alpha=0.9)

    axs[0].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    axs[0].set_ylabel('Ratio unique CDRH3s:sequences',size=14)
    axs[0].set_xticks([])
    sns.despine()
