# Goal for how to organize the signatures

Copy all ribosomal-removed signatures to these subfolders


Using `DNA`, `protein`, and `dayhoff` for shorthand now. In reality would use the full sketch id, e.g. `alphabet-DNA__ksize-21__scaled-10`

```
mouse2mouse/
    self2self-bootstrapped/
        0--train-mouse1-single-cells/
            iteration-00/
                DNA/
                protein/
                dayhoff/
            iteration-01/
                ...
        1--test-mouse1-single-cells/
            iteration-00/
                DNA/
                protein/
                dayhoff/
            iteration-01/
                ...                
        2--train-mouse1-merged-celltype-sigs/
            iteration-00/
                DNA/
                protein/
                dayhoff/
            iteration-01/
                ...
        3--train-mouse1-celltype-remove-common-kmers/
            iteration-00/
                DNA/
                protein/
                dayhoff/
            iteration-01/
                ...
        4--train-mouse1--celltype-sbt-dbs/
            iteration-00/
                DNA/
                protein/
                dayhoff/
            iteration-01/
                ...
        5--search-results/
            iteration-00/
                DNA/
                protein/
                dayhoff/
            iteration-00/
                ...
    mouse1-mouse2/
        0--train-mouse1-single-cells/
            DNA/
            protein/
            dayhoff/
        1--test-mouse2-single-cells/
            DNA/
            protein/
            dayhoff/
        2--train-mouse1-merged-celltype-sigs/
            DNA/
            protein/
            dayhoff/
        3--train-mouse1-celltype-remove-common-kmers/
            DNA/
            protein/
            dayhoff/
        4--train-mouse1-celltype-sbt-dbs/
            DNA/
            protein/
            dayhoff
        5--search-results/
            DNA/
            protein/
            dayhoff/
train-mouse/
    0--protein-coding-fastas/
        nucleotide/
        peptide/
    1--single-cell-sigs/
        DNA/
        protein/
        dayhoff/
    2--merged-celltype-sigs/
        DNA/
        protein/
        dayhoff/
    3--celltype-remove-common-kmers/
        DNA/
        protein/
        dayhoff/
    4--celltype-sbt-dbs/
        DNA/
        protein/
        dayhoff/
test-human/
    0--protein-coding-fastas/
        nucleotide/
        peptide/
    1--single-cell-sigs/
        DNA/
        protein/
        dayhoff/
    2--mouse-search-results/
        DNA/
        protein/
        dayhoffl
test-lemur/
    0--protein-coding-fastas/
        nucleotide/
        peptide/
    1--single-cell-sigs/
        protein/
        dayhoff/
    2--mouse-search-results/
        DNA/
        protein/
        dayhoff/
test-bat/
    0--protein-coding-fastas/
        nucleotide/
        peptide/
    1--single-cell-sigs/
        protein/
        dayhoff/
    2--mouse-search-results/
        DNA/
        protein/
        dayhoff/
```

# Imports

In [3]:
import glob
import os
import shutil

import pandas as pd
import scanpy as sc
import sig_utils
from joblib import Parallel, delayed
from tqdm import tqdm

import create_sourmash_command_utils as csc_utils

import sig_reorg

pd.options.display.max_rows = 500

%load_ext autoreload
%autoreload 2

## Def Describe

In [4]:
%%file pandas_utils.py

def describe(df, random=False):
    print(df.shape)
    print("--- First 5 entries ---")
    display(df.head())
    if random:
        print('--- Random subset ---')
        display(df.sample(5))

Overwriting pandas_utils.py


In [5]:
from pandas_utils import describe

## Read one2one h5ad

In [6]:
adata = sc.read(
#     "/home/phoenix/data_sm/code/immune-evolution/h5ads/human-lemur-mouse-bat__lung_only_copy.h5ad"
    "/home/olga/data_sm/immune-evolution/h5ads/human-lemur-mouse-bat/human-lemur-mouse-bat__lung_only.h5ad"
)
adata.obs = adata.obs.reset_index().set_index('cell_id')
print(adata)
adata.obs.head()

AnnData object with n_obs × n_vars = 126745 × 10560
    obs: 'index', 'age', 'cell_barcode', 'cell_ontology_class', 'cell_ontology_id', 'channel', 'free_annotation', 'individual', 'sample', 'sequencing_run', 'sex', 'species', 'species_batch', 'species_latin', 'tissue', 'narrow_group', 'broad_group', 'compartment_group', 'compartment_narrow', 'channel_cleaned', 'batch', 'n_genes', 'n_counts', 'species_batch_v2', 'compartment_broad', 'compartment_broad_narrow', 'compartment_species', 'compartment_narrow_species', 'common_individual_id'
    var: 'bat__gene_name', 'mouse_lemur__gene_name-bat', 'mouse__gene_name-bat', 'mouse_lemur__gene_name_x-hlm', 'mouse__gene_name_x-hlm', 'gene_ids-lemur-hlm', 'n_cells-mouse-hlm', 'mouse_lemur__gene_name_y-hlm', 'mouse__gene_name_y-hlm'


Unnamed: 0_level_0,index,age,cell_barcode,cell_ontology_class,cell_ontology_id,channel,free_annotation,individual,sample,sequencing_run,...,channel_cleaned,batch,n_genes,n_counts,species_batch_v2,compartment_broad,compartment_broad_narrow,compartment_species,compartment_narrow_species,common_individual_id
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P2_1__AAACCTGAGAAACCAT,P2_1_AAACCTGAGAAACCAT-human-hlm,46y,AAACCTGAGAAACCAT,capillary endothelial cell,,P2_1,Capillary Aerocyte,patient_2,distal 2,180607_A00111_0155_BHFCWYDMXX,...,P2_1,,,,hlm,endothelial: Capillary,endothelial: Capillary: Capillary Aerocyte,endothelial: human,endothelial: Capillary Aerocyte: human,human_patient_2
P2_1__AAATGCCAGATGAGAG,P2_1_AAATGCCAGATGAGAG-human-hlm,46y,AAATGCCAGATGAGAG,capillary endothelial cell,,P2_1,Capillary Aerocyte,patient_2,distal 2,180607_A00111_0155_BHFCWYDMXX,...,P2_1,,,,hlm,endothelial: Capillary,endothelial: Capillary: Capillary Aerocyte,endothelial: human,endothelial: Capillary Aerocyte: human,human_patient_2
P2_1__AACACGTTCGATCCCT,P2_1_AACACGTTCGATCCCT-human-hlm,46y,AACACGTTCGATCCCT,capillary endothelial cell,,P2_1,Capillary Aerocyte,patient_2,distal 2,180607_A00111_0155_BHFCWYDMXX,...,P2_1,,,,hlm,endothelial: Capillary,endothelial: Capillary: Capillary Aerocyte,endothelial: human,endothelial: Capillary Aerocyte: human,human_patient_2
P2_1__AACACGTTCGCACTCT,P2_1_AACACGTTCGCACTCT-human-hlm,46y,AACACGTTCGCACTCT,capillary endothelial cell,,P2_1,Capillary Aerocyte,patient_2,distal 2,180607_A00111_0155_BHFCWYDMXX,...,P2_1,,,,hlm,endothelial: Capillary,endothelial: Capillary: Capillary Aerocyte,endothelial: human,endothelial: Capillary Aerocyte: human,human_patient_2
P2_1__AACCATGCAGCTCGCA,P2_1_AACCATGCAGCTCGCA-human-hlm,46y,AACCATGCAGCTCGCA,capillary endothelial cell,,P2_1,Capillary Aerocyte,patient_2,distal 2,180607_A00111_0155_BHFCWYDMXX,...,P2_1,,,,hlm,endothelial: Capillary,endothelial: Capillary: Capillary Aerocyte,endothelial: human,endothelial: Capillary Aerocyte: human,human_patient_2


In [7]:
adata.obs.species.value_counts()

Human          57353
Mouse lemur    36934
Mouse          23802
Bat             8656
Name: species, dtype: int64

In [8]:
adata.obs.query('species == "Mouse"').groupby(["age", "channel"], observed=True).size()

age  channel           
18m  MACA_18m_F_LUNG_50     320
     MACA_18m_F_LUNG_51    1138
     MACA_18m_M_LUNG_52    1573
     MACA_18m_M_LUNG_53    1256
21m  MACA_21m_F_LUNG_54    1312
     MACA_21m_F_LUNG_55    1526
30m  10X_P1_6              7728
     10X_P1_14             1565
     10X_P2_5               920
     10X_P3_0               496
1m   10X_P5_13             1043
     10X_P5_14             1384
3m   10X_P7_8               348
     10X_P7_9               979
     10X_P8_12              578
     10X_P8_13             1636
dtype: int64

## Set shared celltypes

In [9]:
SHARED_CELLTYPES = ['Alveolar Epithelial Type 2',
 'B cell',
 'Capillary',
 'Dendritic',
 'Fibroblast',
 'Macrophage',
 'Monocyte',
 'Natural Killer T cell',
 'Smooth Muscle and Myofibroblast',
 'T cell']
len(SHARED_CELLTYPES)

10

In [10]:
adata_shared = adata[adata.obs.broad_group.isin(SHARED_CELLTYPES)]
adata_shared

  if not is_categorical(df_full[k]):


View of AnnData object with n_obs × n_vars = 89795 × 10560
    obs: 'index', 'age', 'cell_barcode', 'cell_ontology_class', 'cell_ontology_id', 'channel', 'free_annotation', 'individual', 'sample', 'sequencing_run', 'sex', 'species', 'species_batch', 'species_latin', 'tissue', 'narrow_group', 'broad_group', 'compartment_group', 'compartment_narrow', 'channel_cleaned', 'batch', 'n_genes', 'n_counts', 'species_batch_v2', 'compartment_broad', 'compartment_broad_narrow', 'compartment_species', 'compartment_narrow_species', 'common_individual_id'
    var: 'bat__gene_name', 'mouse_lemur__gene_name-bat', 'mouse__gene_name-bat', 'mouse_lemur__gene_name_x-hlm', 'mouse__gene_name_x-hlm', 'gene_ids-lemur-hlm', 'n_cells-mouse-hlm', 'mouse_lemur__gene_name_y-hlm', 'mouse__gene_name_y-hlm'

In [11]:
CELL_IDS_TO_USE = set(adata_shared.obs.index)
len(CELL_IDS_TO_USE)

89795

# Lemur

## Single cell fastas

In [14]:
! ls -lha $sig_outdir_base

total 11K
drwxr-xr-x  6 olga czb 4.0K Feb 28 19:43 .
drwxrwxr-x 81 olga czb 8.0K Feb 28 09:02 ..
drwxr-xr-x  4 olga czb 4.0K Feb 28 09:53 0--mouse2mouse
drwxr-xr-x  7 olga czb 4.0K Feb 28 16:57 1--train-mouse
drwxr-xr-x  7 olga czb 4.0K Feb 28 17:17 2--test-human
drwxr-xr-x  5 olga czb 4.0K Feb 28 19:43 3--test-bat


In [15]:
lemur_kmermaid_base = "/home/olga/data_sm/tabula-microcebus/analyses/kmermaid/tenx-tgz--alldata-v2-from-bams--remove-ribo"

In [27]:
ls -lha $lemur_kmermaid_base

total 29M
drwxr-xr-x 15 olga czb 4.0K Mar  1 15:38 [0m[01;34m.[0m/
drwxrwxrwx 26 olga czb 4.0K Oct 16 14:44 [34;42m..[0m/
drwxr-xr-x  5 olga czb 4.0K Oct  9 20:33 [01;34m10x-fastqs[0m/
drwxr-xr-x  4 olga czb 4.0K Oct  9 20:33 [01;34mfastp[0m/
drwxr-xr-x  2 olga czb 4.0K Oct 18 19:27 [01;34mpipeline_info[0m/
drwxr-xr-x  2 olga czb 4.0K Oct 12 11:03 [01;34mprotein_index[0m/
drwxr-xr-x  7 olga czb 4.0K Mar  1 15:38 [01;34msearch_lemur_in_mouse_no_ribosome_no_containment_no_dissociation__remove_common_hashes_from_mouse_sbt__refseq_removed_ribo[0m[K/
drwxr-xr-x  2 olga czb 8.0M Mar  5 17:57 [01;34msketches_nucleotide_handmade[0m/
drwxr-xr-x  3 olga czb 4.0K Mar  8 09:17 [01;34msketches_nucleotide_handmade_merged[0m/
drwxr-xr-x  3 olga czb 4.0K Mar  8 09:20 [01;34msketches_nucleotide_handmade_merged_remove_ribosomal_dissociation_allcells[0m[K/
drwxr-xr-x  2 olga czb 4.0M Feb 28 19:56 [01;34msketches_peptide_handmade[0m/
drwxr-xr-x  6 olga czb 4.0K Feb 28 20:00 [01;3

In [17]:

sig_outdir_base = "/home/olga/data_sm/immune-evolution/kmer-signatures"

test_dir = os.path.join(sig_outdir_base, "4--test-lemur")

test_0_fastas = os.path.join(test_dir, "0--protein-coding-fastas")
test_1_single_cells = os.path.join(test_dir, "1--single-cell-sigs")
fasta_input_dir = os.path.join(lemur_kmermaid_base, "translate")

results_dir = os.path.join(test_dir, "2--mouse-search-results")
dirs = (test_0_fastas, test_1_single_cells, results_dir)

for outdir in dirs:
    if not os.path.exists(outdir):
        os.makedirs(outdir)


## Copy fastas


## Copy fastas

In [141]:
sig_reorg.copy_fastas(
    lemur_test_0_fastas,
    lemur_fasta_input_dir,
    select_cell_ids=CELL_IDS_TO_USE,
    dryrun=False,
    cell_id_fun=clean_cell_id,
)

0it [00:00, ?it/s]

Copying peptides


19896it [06:10, 53.74it/s]
0it [00:00, ?it/s]

Copying nucleotides


19896it [07:37, 43.48it/s]


## Single cell signatures

In [28]:

peptide_sketch_dir = os.path.join(
    lemur_kmermaid_base,
    "sketches_peptide_handmade_merged_remove_ribosomal_dissociation_allcells",
)

nucleotide_sketch_dir = os.path.join(
    lemur_kmermaid_base,
    "sketches_nucleotide_handmade_merged_remove_ribosomal_dissociation_allcells",
)

sig_reorg.copy_nucleotide_peptide_sketches(
    peptide_sketch_dir=peptide_sketch_dir,
    nucleotide_sketch_dir=nucleotide_sketch_dir,
    pre_sketch_id_outdir=test_1_single_cells,
    select_cell_ids=CELL_IDS_TO_USE,
    dryrun=False
)

0it [00:00, ?it/s]

Copying alphabet-DNA__ksize-21__scaled-10


354it [00:09, 38.83it/s]
5695it [00:00, 94680.86it/s]
5695it [00:00, 98135.45it/s]

Copying alphabet-protein__ksize-30__scaled-10
Copying alphabet-dayhoff__ksize-51__scaled-10





# Create queries

In [45]:
human_test_dir

'/home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human'

In [59]:
moltypes = "DNA", "protein", "dayhoff"

alpha_ksizes = (("DNA", 21), ("protein", 30), ("dayhoff", 51))

# mouse celltype database directory
train_4_celltype_sbts = "/home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/4--celltype-sbt-dbts"
train_3_merged_celltype_remove_common = "/home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/3--merged-celltype-remove-common-kmers"
train_2_merged_celltype = "/home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/2--merged-celltype-sigs"

# Try both with removed common hashes and original
celltype_dirs = dict(
    remove_common=train_3_merged_celltype_remove_common,
    raw_merged=train_2_merged_celltype,
)

txts = []
for i, (name, celltype_dir) in enumerate(celltype_dirs.items()):
    this_celltype_dir_results = os.path.join(
        human_test_dir, f"3-{i}--mouse-search-results--{name}"
    )
    if not os.path.exists(this_celltype_dir_results):
        os.makedirs(this_celltype_dir_results)
    for alpha, ksize in alpha_ksizes:
        txt = csc_utils.make_sourmash_search_commands(
            this_celltype_dir_results,
            query_sigs_dir=test_1_single_cells,
            sbt_base_dir=celltype_dir,
            k_sizes=[ksize],
            scaled_sizes=[
                10,
            ],
            cell_ids=[],
            sbt_template_basename=None,
            query_sig_files=False,
            containment=True,
            moltype=alpha,
            threshold=1e-100,
            n_jobs=96,
            force=True,
            num_results=None,
            add_ksize_to_sig_dir=False,
            traverse_directory=True,
        )
        txts.append(txt)
txts_to_cat = " ".join(txts)
catted_txt = os.path.join(results_dir, "sourmash_search_commands_concatenated.txt")
! cat $txts_to_cat > $catted_txt

print(f"---\nparallel --progress --eta --jobs 96 < {catted_txt}")

parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/3-0--mouse-search-results--remove_common/sourmash_search_commands_DNA.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/3-0--mouse-search-results--remove_common/sourmash_search_commands_protein.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/3-0--mouse-search-results--remove_common/sourmash_search_commands_dayhoff.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/3-1--mouse-search-results--raw_merged/sourmash_search_commands_DNA.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/3-1--mouse-search-results--raw_merged/sourmash_search_commands_protein.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/

In [60]:
adata.obs.loc['P3_3__CGTTAGAGTCTCTCTG', 'broad_group']

'Alveolar Epithelial Type 2'

In [61]:
! head $catted_txt

sourmash search --quiet --dna --no-protein --no-dayhoff  --threshold 1e-100 -k 21 --traverse-directory --containment --output /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/3-0--mouse-search-results--remove_common/alphabet-DNA__ksize-21__scaled-10/P3_3__CGTTAGAGTCTCTCTG.csv /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/1--single-cell-sigs/alphabet-DNA__ksize-21__scaled-10/P3_3__CGTTAGAGTCTCTCTG.sig /home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/3--merged-celltype-remove-common-kmers
sourmash search --quiet --dna --no-protein --no-dayhoff  --threshold 1e-100 -k 21 --traverse-directory --containment --output /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/3-0--mouse-search-results--remove_common/alphabet-DNA__ksize-21__scaled-10/P3_6__GGAGCAATCAACACTG.csv /home/olga/data_sm/immune-evolution/kmer-signatures/2--test-human/1--single-cell-sigs/alphabet-DNA__ksize-21__scaled-10/P3_6__GGAGCAATCAACACTG.sig /home/olga/d

In [39]:
! shuf $catted_txt | head -n 3

sourmash search --quiet --dna --no-protein --no-dayhoff --num-results 3 --threshold 1e-10 -k 21 --containment --output /mnt/ibm_sm/olga/immune-evolution/kmer-signatures/2--test-human/2--mouse-search-results/alphabet-DNA__ksize-21__scaled-10/P3_6__TTCGGTCCAAACCTAC.csv /mnt/ibm_sm/olga/immune-evolution/kmer-signatures/2--test-human/1--single-cell-sigs/alphabet-DNA__ksize-21__scaled-10/P3_6__TTCGGTCCAAACCTAC.sig /home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/4--celltype-sbt-dbts/alphabet-DNA__ksize-21__scaled-10.sbt.zip
sourmash search --quiet --protein --no-dna --num-results 3 --threshold 1e-10 -k 30 --containment --output /mnt/ibm_sm/olga/immune-evolution/kmer-signatures/2--test-human/2--mouse-search-results/alphabet-protein__ksize-30__scaled-10/P3_7__GTAACTGGTTTAGCTG.csv /mnt/ibm_sm/olga/immune-evolution/kmer-signatures/2--test-human/1--single-cell-sigs/alphabet-protein__ksize-30__scaled-10/P3_7__GTAACTGGTTTAGCTG.sig /home/olga/data_sm/immune-evolution/kmer-signatur

## Search mouse celltypes with k-mers shared by 5, 10% of cells, but remove k-mers shared by 80% of celltypes

In [31]:
moltypes = "DNA", "protein", "dayhoff"

alpha_ksizes = (("DNA", 21), ("protein", 30), ("dayhoff", 51))

# mouse celltype database directory
train_3_sigs_min_10_percent = "/home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/2--merged-celltype-sigs--min-kmer-count--10-percent"
train_3_sigs_min_5_percent = "/home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/2--merged-celltype-sigs--min-kmer-count--5-percent"
train_3_sigs_min_5_percent_remove_common = "/home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/3--merged-celltype-remove-common-kmers--min-kmer-count--5-percent"
train_3_sigs_min_10_percent_remove_common = "/home/olga/data_sm/immune-evolution/kmer-signatures/1--train-mouse/3--merged-celltype-remove-common-kmers--min-kmer-count--10-percent"

# Try both with removed common hashes and original
celltype_dirs = {
        'min-5-percent-shared-kmers': train_3_sigs_min_5_percent,
    'min-10-percent-shared-kmers': train_3_sigs_min_10_percent,
    'min-5-percent-shared-kmers-remove-common-kmers': train_3_sigs_min_5_percent_remove_common,
    'min-10-percent-shared-kmers-remove-common-kmers': train_3_sigs_min_10_percent_remove_common,
}

txts = []

containments = True, False
for containment in containments:
    for i, (name, celltype_dir) in enumerate(celltype_dirs.items()):
        suffix = f"3-{i}--mouse-containment-search-results--{name}" if containment else f"3-{i}--mouse-search-results--{name}"
        this_celltype_dir_results = os.path.join(
            test_dir, suffix
        )
        if not os.path.exists(this_celltype_dir_results):
            os.makedirs(this_celltype_dir_results)
        for alpha, ksize in alpha_ksizes:
            txt = csc_utils.make_sourmash_search_commands(
                this_celltype_dir_results,
                query_sigs_dir=test_1_single_cells,
                sbt_base_dir=celltype_dir,
                k_sizes=[ksize],
                scaled_sizes=[
                    10,
                ],
                sbt_template_basename=None,
                query_sig_files=False,
                containment=containment,
                moltype=alpha,
                threshold=1e-100,
                n_jobs=96,
                force=False,
                num_results=None,
                add_ksize_to_sig_dir=False,
                traverse_directory=True,
            )
            txts.append(txt)
txts_to_cat = " ".join(txts)
catted_txt = os.path.join(results_dir, "sourmash_search_commands_concatenated.txt")
! cat $txts_to_cat > $catted_txt

print(f"---\nparallel --progress --eta --jobs 96 < {catted_txt}")
! wc -l $catted_txt

parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/4--test-lemur/3-0--mouse-containment-search-results--min-5-percent-shared-kmers/sourmash_search_commands_DNA.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/4--test-lemur/3-0--mouse-containment-search-results--min-5-percent-shared-kmers/sourmash_search_commands_protein.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/4--test-lemur/3-0--mouse-containment-search-results--min-5-percent-shared-kmers/sourmash_search_commands_dayhoff.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/4--test-lemur/3-1--mouse-containment-search-results--min-10-percent-shared-kmers/sourmash_search_commands_DNA.txt
parallel --progress --eta --jobs 96 < /home/olga/data_sm/immune-evolution/kmer-signatures/4--test-lemur/3-1--mouse-containment-search-results--min-10-percent-shared-kmers/sourmash

In [32]:
1+1

2