In [None]:
import json
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
def draw_heatmap(data, x, y, ax=None):
    if isinstance(x, str):
        x = list(x)
    if isinstance(y, str):
        y = list(y)
    ax = sns.heatmap(data, xticklabels=x, yticklabels=y, cbar=True, ax=ax, cmap="YlGnBu")
    return ax

In [None]:
celltype=['Immature B cell','Transitional B cell','Mature B cell','Plasmacytes PC', 'Memory IgD-','Memory IgD+']

## UMAP
https://github.com/lmcinnes/umap

## Germline

In [None]:
!pip install umap-learn

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import umap.umap_ as umap
import pandas as pd
import numpy as np

In [None]:
# https://matplotlib.org/stable/tutorials/colors/colormaps.html
sns.color_palette()

In [None]:
def drawUMAP(data, n_neighbors=15, min_dist=0.1, n_components=2, metric='euclidean', c=None, cmap=None,title='', savename=''):
    fit = umap.UMAP(
        n_neighbors=n_neighbors,
        min_dist=min_dist,
        n_components=n_components,
        metric=metric
    )
    u = fit.fit_transform(data)
    fig = plt.figure()
    if n_components == 1:
        ax = fig.add_subplot(111)
        ax = ax.scatter(u[:,0], range(len(u)), c=c, cmap=cmap, s=5)
    if n_components == 2:
        ax = fig.add_subplot(111)
        ax = ax.scatter(u[:,0], u[:,1], c=c, cmap=cmap, s=5)
    if n_components == 3:
        ax = fig.add_subplot(111, projection='3d')
        ax = ax.scatter(u[:,0], u[:,1], u[:,2], c=data, cmap=cmap, s=100)
    plt.title(title, fontsize=18)
    plt.colorbar(ax)
    if savename:
        plt.savefig(savename, dpi=200)

In [None]:
def calDist(refer, hypo, mode='distance'):
    import Levenshtein as L
    if mode == 'distance':
        dist_func = L.distance
    elif mode == 'hamming':
        dist_func = L.hamming
    else:
        raise NotImplementedError
    return dist_func(refer, hypo)

In [None]:
def drawListUMAP(data, n_neighbors=15, min_dist=0.1, n_components=2, metric='euclidean', c=[], clabel=[], title='', savename=''):
    fit = umap.UMAP(
        n_neighbors=n_neighbors,
        min_dist=min_dist,
        n_components=n_components,
        metric=metric
    )
    fig, ax = plt.subplots()
    for d,cc,l in zip(data, c, clabel):
        u = fit.fit_transform(d)
        print(cc,l)
        ax.scatter(u[:,0], u[:,1], c=[cc]*len(d), label=l, s=5)
    plt.title(title, fontsize=18)
    ax.legend()
    if savename:
        plt.savefig(savename, dpi=200)

# Cumulate Prob

In [None]:
import json
import numpy as np
import random
random.seed(233)

In [None]:
name='analysis/model.top1000.cdrmatch.json'
data = json.load(open(name))

In [None]:
cumdata = {k:np.cumsum(v) for k, v in oridata.items()}

In [None]:
def cumulatePlot(fname, suffix='_sars0513_2_1000.uniq.log', need_random=True):
    data = json.load(open(fname))
    print(data.keys())
    data = {k: v for k, v in data.items() if k.replace(suffix,'') in name_mapping}
    oridata = {name_mapping[k.replace(suffix,'')]:v for k, v in data.items()}
    cumdata = {k:np.cumsum(v) for k, v in oridata.items()}

    # figs, ax = plt.subplots()
    for i, name in enumerate(model_orders):
        if name != 'Random':
            v = cumdata[name]
            print(len(v))
            plt.plot(v, label=name, color=color[i])
    if need_random:
        # plt.plot(cumdata['Random'], label='Random', color="black")
        plt.plot([0,len(cumdata['Transformer'])], [0, cumdata['Transformer'][-1]], label='Expected', color="grey", linestyle='dashed')
    
    plt.legend(fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    print(fname.replace('.json', 'pdf'))
    plt.savefig(fname.replace('.json', '.pdf'), dpi=100)