# SAMap zebrafish Farrell 10 - 12 hpf vs amphioxus N5 preprocess
With leiden-based neighborhoods

Zebrafish proteins

In [None]:
from samap.mapping import SAMAP
from samap.analysis import (get_mapping_scores, GenePairFinder, sankey_plot, chord_plot, CellTypeTriangles, 
                            ParalogSubstitutions, FunctionalEnrichment,
                            convert_eggnog_to_homologs, GeneTriangles)
from samalg import SAM
import pandas as pd
import numpy
import seaborn as sns
import csv
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
import scanpy as sc
import anndata as ad
import warnings
warnings.filterwarnings(action='ignore')
import pickle

def save_object(obj, filename):
    with open(filename, 'wb') as outp:  # Overwrites any existing file.
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

## Concatenating h5ad files

In [None]:
zf_adata=ad.concat({'hpf10':ad.read_h5ad('../../input_data/zf_farrell10_ZFB.h5ad'),
                    'hpf11':ad.read_h5ad('../../input_data/zf_farrell11_ZF3S.h5ad'),
                    'hpf12':ad.read_h5ad('../../input_data/zf_farrell12_ZF6S.h5ad')},label='stage')
zf_adata.write('zf_10_12_hpf.h5ad')

## Read gene name mapping

Parse mapping of transcript ids vs gene names used in the anndata object

In [None]:
with open('../../../reference/zebra_ens_prot/zf_final_prot_gene_map.csv') as f:
    dr_idmap=[tuple(line) for line in csv.reader(f)][2:]

with open('../../../reference/amp_braflo100_cdna/amp_tr_gene_map.csv') as f:
    bf_idmap=[tuple(line) for line in csv.reader(f)][2:]

print(dr_idmap[:5])
print(bf_idmap[:5])

# Running SAMap

## Loading in raw data

In [None]:
path_dr = 'zf_10_12_hpf.h5ad'
path_amp = '../../input_data/Amp_h5ad/Amp_N5.h5ad'

SAMap expects the above to be in a dictionary keyed by the species IDs determined in the BLAST step:

In [None]:
filenames = {'dr':path_dr,'bf':path_amp}

Instantiate the SAMAP object as below. 

In [None]:
sm = SAMAP(
        filenames,
        f_maps = '../../../reference/maps_prot/maps_ens/',
        names = {'dr' : dr_idmap, 'bf' : bf_idmap},
        #keys = {'dr' : 'TimeID', 'bf' : 'stage'},
        #resolutions = {'dr' : 5, 'bf' : 3},
        save_processed=False #if False, do not save the processed results to `*_pr.h5ad`
    )

Show unintegrated umaps.

In [None]:
sm.sams['bf'].adata.obs['stage']='N5'
vis_keys_bf=['stage','celltype_coarse2']
vis_keys_dr=['stage','segment_celltype']
print('Amphioxus unintegrated')
sc.pl.embedding(sm.sams['bf'].adata,
                    basis='X_umap',
                    color=vis_keys_bf,
                    frameon=False)
print('Zebrafish unintegrated')
sc.pl.embedding(sm.sams['dr'].adata,
                    basis='X_umap',
                    color=vis_keys_dr,
                    frameon=False)

## Run SAMap

In [None]:
sm.run(ncpus=16)

Save samap object.

In [None]:
save_object(sm,'sam_processed.pkl')

## Visualizing the combined projection

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(8)
fig.tight_layout()
ax.set_title('SAMap UMAP amphioxus (red) vs Zebrafish (blue)')
sm.scatter(COLORS={'dr':'#10b9f1','bf':'#f14810'},axes=ax,ss={'bf':5,'dr':5},alpha=0.5)