# Partition L3 Cell Types

For review post-doublet filtering, we'll partition cell types based on their CellTypist L3 label designations. Later, we'll perform clustering within each group for inspection.

In [12]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

from datetime import date
import hisepy
import os
import re
import scanpy as sc

In [2]:
out_dir = 'output/l3_types'
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

## Helper functions

In [3]:
def cache_uuid_path(uuid):
    cache_path = '/home/jupyter/cache/{u}'.format(u = uuid)
    if not os.path.isdir(cache_path):
        hise_res = hisepy.reader.cache_files([uuid])
    filename = os.listdir(cache_path)[0]
    cache_file = '{p}/{f}'.format(p = cache_path, f = filename)
    return cache_file

In [4]:
def read_adata_uuid(uuid):
    cache_file = cache_uuid_path(uuid)
    res = sc.read_h5ad(cache_file)
    return res

In [5]:
def rm_cache_uuid(uuid):
    cache_path = '/home/jupyter/cache/{u}'.format(u = uuid)
    rm_call = 'rm -r {d}'.format(d = cache_path)
    os.system(rm_call)

In [6]:
def format_cell_type(cell_type):
    cell_type = re.sub('\\+', 'pos', cell_type)
    cell_type = re.sub('-', 'neg', cell_type)
    cell_type = re.sub(' ', '_', cell_type)
    return cell_type

In [7]:
def element_id(n = 3):
    import periodictable
    from random import randrange
    rand_el = []
    for i in range(n):
        el = randrange(0,118)
        rand_el.append(periodictable.elements[el].name)
    rand_str = '-'.join(rand_el)
    return rand_str

## Identify files in HISE

In [8]:
h5ad_uuids = {
    'all_ASDC': '0df777eb-6c6a-4892-89e8-3ab6ebfade89',
    'all_CD16-monocyte': '39cb92e1-8053-4607-8fe8-a6dd8632b32a',
    'all_CD56bright-NK-cell': '211843f0-3770-475d-a31f-29051fed91ab',
    'all_CD56bright-NK-cell': 'd4960075-6eba-4d79-9157-5f8259bbeedf',
    'all_CD8aa': 'da603bcc-e5f3-4ede-889f-e055b60d054d',
    'all_cDC1': '8728233c-e99c-4c93-921e-18f56ba72b75',
    'all_cDC2': '74207276-23b2-4943-8c02-02ca4d12473b',
    'all_DN-T-cell': '7c528158-b0c3-473a-9256-54bdaf510d66',
    'all_Effector-B-cell': '6d849b19-68be-4243-807a-9ee59e6c962a',
    'all_Erythrocyte': '2adc98ac-79a8-4d9c-9faf-55e7d81d0adf',
    'all_gdT': 'ec9f2e39-a65c-4e1a-b7e8-80b818d06166',
    'all_ILC': '4c885520-849a-4ffb-9490-90c7cc3d25df',
    'all_Intermediate-monocyte': 'c7bb1b35-03b4-4acb-bb52-b75a0c5efcdf',
    'all_MAIT': '35912dbb-1911-45fb-8c20-5e3794b3961b',
    'all_Memory-B-cell': '38977bd9-bb58-491d-a293-c8a83585b21a',
    'all_Naive-B-cell': 'f1f95172-4746-4632-b4ce-52e36f1328ca',
    'all_Naive-CD8-T-cell': 'b9cbc1b4-6e6e-4bfe-a729-7cf5a240f764',
    'all_pDC': 'ae861863-fd93-42aa-91d0-7a66c3528831',
    'all_Plasma-cell': '982321d2-4b2a-4dc3-b2f7-3c22a8692039',
    'all_Platelet': '10bd3fda-ad31-42b3-b440-78ba92e148a9',
    'all_Progenitor-cell': '700298cc-b5a9-45c4-97f6-9c15d3a11b24',
    'all_Proliferating-NK-cell': 'cc3b4985-3b83-4645-a472-cbf823e46a46',
    'all_Proliferating-T-cell': 'd86d1487-e394-4605-bd4f-f0743cb670bf',
    'all_Transitional-B-cell': 'a512e0ff-1535-4dfc-b9b4-fd51b7938efd',
    'all_Treg': '1a59aa90-eb0e-4250-9ffd-df6b1f801cb0',
    'BR1_Female_Negative_CD14-monocyte': 'beea5765-b3c0-4db1-a1c2-ada2ef51ee26',
    'BR1_Female_Negative_CD56dim-NK-cell': '426366d0-fdf7-4ab2-8339-0baefe80d096',
    'BR1_Female_Negative_CD56dim-NK-cell': '9deacbc9-06f9-4fad-b32c-0f9946f5a14e',
    'BR1_Female_Negative_Memory-CD4-T-cell': 'a40e7454-4637-4ad7-b222-bd29716aa027',
    'BR1_Female_Negative_Memory-CD8-T-cell': 'de83b600-3cc6-40ba-acb6-613c12e178ac',
    'BR1_Female_Negative_Naive-CD4-T-cell': 'b55301f1-2289-45a6-b14d-b1ee31a7f11c',
    'BR1_Female_Positive_CD14-monocyte': '8496f50f-38f6-4f0d-a50c-0835268c42b5',
    'BR1_Female_Positive_CD56dim-NK-cell': '8dd11b33-9065-460f-bcea-88a3092bf662',
    'BR1_Female_Positive_CD56dim-NK-cell': 'c5a0702e-6659-4a24-8bda-d3c61cf4677a',
    'BR1_Female_Positive_Memory-CD4-T-cell': '8f7afec7-194b-4e7c-9a27-2fa4e47b6085',
    'BR1_Female_Positive_Memory-CD8-T-cell': 'aa8b8b0f-164f-4fba-8af9-8397d9e67cd7',
    'BR1_Female_Positive_Naive-CD4-T-cell': '5a50a26e-1a56-4239-ba8e-dc0b8f3ef91d',
    'BR1_Male_Negative_CD14-monocyte': '6dfc083e-0392-438a-aa79-4b6c79acd55b',
    'BR1_Male_Negative_CD56dim-NK-cell': '35d2e12d-c528-45c7-b2c6-5ab76231793d',
    'BR1_Male_Negative_CD56dim-NK-cell': '6edf4d9d-f29a-4c7c-bc86-7e87b53ca9f5',
    'BR1_Male_Negative_Memory-CD4-T-cell': 'cdf5d83f-603d-4089-a5b9-9f04783329d8',
    'BR1_Male_Negative_Memory-CD8-T-cell': '8cc6a9b1-0ec7-445e-8299-556a4f95cb66',
    'BR1_Male_Negative_Naive-CD4-T-cell': '9997103a-4f8a-4e00-abc1-61a9674cf01b',
    'BR1_Male_Positive_CD14-monocyte': '4f0d6c22-21d5-4410-b603-8b485487f42b',
    'BR1_Male_Positive_CD56dim-NK-cell': '3788e5c0-5fee-4f6d-b108-77d9da289a7f',
    'BR1_Male_Positive_CD56dim-NK-cell': 'f46f20b0-a47a-4c5a-a8c9-6e0e7a1a2dc8',
    'BR1_Male_Positive_Memory-CD4-T-cell': 'f104fa2c-9054-4403-afd4-e17048d93d75',
    'BR1_Male_Positive_Memory-CD8-T-cell': 'df7c9a0a-3b5e-48d9-91de-2bd424daa44a',
    'BR1_Male_Positive_Naive-CD4-T-cell': '2aaa8f67-c64b-450c-9037-7a7cbde2c3e0',
    'BR2_Female_Negative_CD14-monocyte': '2906d91a-c989-4b1d-b343-aea2e23de036',
    'BR2_Female_Negative_CD56dim-NK-cell': '3b5a6c81-3e10-483d-be15-476430be0e5b',
    'BR2_Female_Negative_CD56dim-NK-cell': '82b143e9-0dd6-4ad9-b59e-6feb135f5c0c',
    'BR2_Female_Negative_Memory-CD4-T-cell': '050e56f7-fe83-4196-8fd4-d55292ed5cfa',
    'BR2_Female_Negative_Memory-CD8-T-cell': 'b8550f9d-b4aa-4ba7-955b-cf556fabb21d',
    'BR2_Female_Negative_Naive-CD4-T-cell': '82a127b5-7025-41db-8d88-5347055a5268',
    'BR2_Female_Positive_CD14-monocyte': '42c2b3e1-f3ec-4a44-9137-3db3ead74454',
    'BR2_Female_Positive_CD56dim-NK-cell': 'c27aacc8-68aa-4df3-a5dc-6a82cd7b246e',
    'BR2_Female_Positive_CD56dim-NK-cell': 'eda7be7b-7ba6-4832-83b9-8b210319c078',
    'BR2_Female_Positive_Memory-CD4-T-cell': '760961f6-4707-48b1-a2f4-33efc816be28',
    'BR2_Female_Positive_Memory-CD8-T-cell': 'd76d8ee6-6b85-42e4-9974-36c6ef4b0538',
    'BR2_Female_Positive_Naive-CD4-T-cell': '56630d0b-cdd9-43b0-8da4-9f7227b35190',
    'BR2_Male_Negative_CD14-monocyte': '47fff457-438f-46ba-ab37-9876bbbb1f18',
    'BR2_Male_Negative_CD56dim-NK-cell': '6dcc8d60-7b43-40f1-90b3-2390d09e4bbc',
    'BR2_Male_Negative_CD56dim-NK-cell': '80027d7a-95cc-4f01-9abe-3fb40d6a34c2',
    'BR2_Male_Negative_Memory-CD4-T-cell': 'ab6f751f-76df-42eb-be88-0315cf2d7c10',
    'BR2_Male_Negative_Memory-CD8-T-cell': '10d6ff38-dabd-4439-90e4-28d2c932d81f',
    'BR2_Male_Negative_Naive-CD4-T-cell': '22d42b15-1ae2-4b3f-8b6d-39dce427f765',
    'BR2_Male_Positive_CD14-monocyte': '41094c89-3ab0-43b8-a683-9b3b5a5b3653',
    'BR2_Male_Positive_CD56dim-NK-cell': '1304b375-fdbc-42ec-9ad8-37a473a17e75',
    'BR2_Male_Positive_CD56dim-NK-cell': 'f84657c6-d7df-42f9-9a42-5d3aa2e5c4c2',
    'BR2_Male_Positive_Memory-CD4-T-cell': '35215ceb-9dcc-41bd-abaf-1973de95b3d2',
    'BR2_Male_Positive_Memory-CD8-T-cell': '37d23649-4670-4ac2-9dd0-e0de0fce573d',
    'BR2_Male_Positive_Naive-CD4-T-cell': '87c3b749-c177-4fa1-8747-c8faa4e4859e'
}

## Separate files

In [9]:
for group_name, uuid in h5ad_uuids.items():
    adata = read_adata_uuid(uuid)
    
    l3_types = adata.obs['AIFI_L3'].unique().tolist()
    
    for l3_type in l3_types:
        out_type = format_cell_type(l3_type)
        type_dir = 'output/l3_types/{ct}'.format(ct = out_type)
        if not os.path.isdir(type_dir):
            os.makedirs(type_dir)
        out_file = '{td}/diha_celltypist_L3_{g}_{ct}.h5ad'.format(td = type_dir, g = group_name, ct = out_type)
        
        type_adata = adata[adata.obs['AIFI_L3'] == l3_type]
        type_adata.write_h5ad(out_file)

    rm_cache_uuid(uuid)

downloading fileID: 0df777eb-6c6a-4892-89e8-3ab6ebfade89
Files have been successfully downloaded!
downloading fileID: 39cb92e1-8053-4607-8fe8-a6dd8632b32a
Files have been successfully downloaded!
downloading fileID: d4960075-6eba-4d79-9157-5f8259bbeedf
Files have been successfully downloaded!
downloading fileID: da603bcc-e5f3-4ede-889f-e055b60d054d
Files have been successfully downloaded!
downloading fileID: 8728233c-e99c-4c93-921e-18f56ba72b75
Files have been successfully downloaded!
downloading fileID: 74207276-23b2-4943-8c02-02ca4d12473b
Files have been successfully downloaded!
downloading fileID: 2adc98ac-79a8-4d9c-9faf-55e7d81d0adf
Files have been successfully downloaded!
downloading fileID: ec9f2e39-a65c-4e1a-b7e8-80b818d06166
Files have been successfully downloaded!
downloading fileID: 4c885520-849a-4ffb-9490-90c7cc3d25df
Files have been successfully downloaded!
downloading fileID: c7bb1b35-03b4-4acb-bb52-b75a0c5efcdf
Files have been successfully downloaded!
downloading fileID: 

## Merge files for the same type

In [13]:
type_dirs = os.listdir('output/l3_types')
type_h5ads = []
for type_dir in type_dirs:
    type_path = 'output/l3_types/{td}'.format(td = type_dir)
    type_files = os.listdir(type_path)

    adata_list = []
    for type_file in type_files:
        adata = sc.read_h5ad('{tp}/{tf}'.format(tp = type_path, tf = type_file))
        adata_list.append(adata)

    type_adata = sc.concat(adata_list)

    cell_type = type_adata.obs['AIFI_L3'][0]
    out_type = format_cell_type(cell_type)
    
    out_file = 'output/diha_celltypist_L3_{ct}.h5ad'.format(ct = out_type)
    type_adata.write_h5ad(out_file)
    type_h5ads.append(out_file)

## Upload assembled results to HISE

In [14]:
study_space_uuid = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a'
title = 'DIHA CellTypist L3 .h5ads {d}'.format(d = date.today())

In [15]:
search_id = element_id()
search_id

'gadolinium-mercury-cadmium'

In [16]:
in_files = []
for group_name, uuid in h5ad_uuids.items():
    in_files.append(uuid)
in_files

['0df777eb-6c6a-4892-89e8-3ab6ebfade89',
 '39cb92e1-8053-4607-8fe8-a6dd8632b32a',
 'd4960075-6eba-4d79-9157-5f8259bbeedf',
 'da603bcc-e5f3-4ede-889f-e055b60d054d',
 '8728233c-e99c-4c93-921e-18f56ba72b75',
 '74207276-23b2-4943-8c02-02ca4d12473b',
 '7c528158-b0c3-473a-9256-54bdaf510d66',
 '6d849b19-68be-4243-807a-9ee59e6c962a',
 '2adc98ac-79a8-4d9c-9faf-55e7d81d0adf',
 'ec9f2e39-a65c-4e1a-b7e8-80b818d06166',
 '4c885520-849a-4ffb-9490-90c7cc3d25df',
 'c7bb1b35-03b4-4acb-bb52-b75a0c5efcdf',
 '35912dbb-1911-45fb-8c20-5e3794b3961b',
 '38977bd9-bb58-491d-a293-c8a83585b21a',
 'f1f95172-4746-4632-b4ce-52e36f1328ca',
 'b9cbc1b4-6e6e-4bfe-a729-7cf5a240f764',
 'ae861863-fd93-42aa-91d0-7a66c3528831',
 '982321d2-4b2a-4dc3-b2f7-3c22a8692039',
 '10bd3fda-ad31-42b3-b440-78ba92e148a9',
 '700298cc-b5a9-45c4-97f6-9c15d3a11b24',
 'cc3b4985-3b83-4645-a472-cbf823e46a46',
 'd86d1487-e394-4605-bd4f-f0743cb670bf',
 'a512e0ff-1535-4dfc-b9b4-fd51b7938efd',
 '1a59aa90-eb0e-4250-9ffd-df6b1f801cb0',
 'beea5765-b3c0-

In [17]:
out_files = type_h5ads

In [18]:
out_files

['output/diha_celltypist_L3_CD27neg_effector_B_cell.h5ad',
 'output/diha_celltypist_L3_cDC1.h5ad',
 'output/diha_celltypist_L3_CM_CD4_T_cell.h5ad',
 'output/diha_celltypist_L3_GZMKpos_Vd2_gdT.h5ad',
 'output/diha_celltypist_L3_ISGpos_CD14_monocyte.h5ad',
 'output/diha_celltypist_L3_GZMBpos_Vd2_gdT.h5ad',
 'output/diha_celltypist_L3_CD27pos_effector_B_cell.h5ad',
 'output/diha_celltypist_L3_ISGpos_naive_B_cell.h5ad',
 'output/diha_celltypist_L3_ISGpos_MAIT.h5ad',
 'output/diha_celltypist_L3_Proliferating_T_cell.h5ad',
 'output/diha_celltypist_L3_BaEoMaP_cell.h5ad',
 'output/diha_celltypist_L3_Plasma_cell.h5ad',
 'output/diha_celltypist_L3_Early_memory_B_cell.h5ad',
 'output/diha_celltypist_L3_DN_T_cell.h5ad',
 'output/diha_celltypist_L3_ISGpos_CD56dim_NK_cell.h5ad',
 'output/diha_celltypist_L3_ISGpos_memory_CD4_T_cell.h5ad',
 'output/diha_celltypist_L3_KLRF1neg_GZMBpos_CD27neg_memory_CD4_T_cell.h5ad',
 'output/diha_celltypist_L3_GZMKpos_CD27pos_EM_CD8_T_cell.h5ad',
 'output/diha_celltyp

In [19]:
hisepy.upload.upload_files(
    files = out_files,
    study_space_id = study_space_uuid,
    title = title,
    input_file_ids = in_files,
    destination = search_id
)

output/diha_celltypist_L3_CD27neg_effector_B_cell.h5ad
output/diha_celltypist_L3_cDC1.h5ad
output/diha_celltypist_L3_CM_CD4_T_cell.h5ad
output/diha_celltypist_L3_GZMKpos_Vd2_gdT.h5ad
output/diha_celltypist_L3_ISGpos_CD14_monocyte.h5ad
output/diha_celltypist_L3_GZMBpos_Vd2_gdT.h5ad
output/diha_celltypist_L3_CD27pos_effector_B_cell.h5ad
output/diha_celltypist_L3_ISGpos_naive_B_cell.h5ad
output/diha_celltypist_L3_ISGpos_MAIT.h5ad
output/diha_celltypist_L3_Proliferating_T_cell.h5ad
output/diha_celltypist_L3_BaEoMaP_cell.h5ad
output/diha_celltypist_L3_Plasma_cell.h5ad
output/diha_celltypist_L3_Early_memory_B_cell.h5ad
output/diha_celltypist_L3_DN_T_cell.h5ad
output/diha_celltypist_L3_ISGpos_CD56dim_NK_cell.h5ad
output/diha_celltypist_L3_ISGpos_memory_CD4_T_cell.h5ad
output/diha_celltypist_L3_KLRF1neg_GZMBpos_CD27neg_memory_CD4_T_cell.h5ad
output/diha_celltypist_L3_GZMKpos_CD27pos_EM_CD8_T_cell.h5ad
output/diha_celltypist_L3_Core_naive_CD4_T_cell.h5ad
output/diha_celltypist_L3_Core_CD16_mono

(y/n) y


{'trace_id': 'd6444748-2b86-469b-9eac-81a0b661d9c7',
 'files': ['output/diha_celltypist_L3_CD27neg_effector_B_cell.h5ad',
  'output/diha_celltypist_L3_cDC1.h5ad',
  'output/diha_celltypist_L3_CM_CD4_T_cell.h5ad',
  'output/diha_celltypist_L3_GZMKpos_Vd2_gdT.h5ad',
  'output/diha_celltypist_L3_ISGpos_CD14_monocyte.h5ad',
  'output/diha_celltypist_L3_GZMBpos_Vd2_gdT.h5ad',
  'output/diha_celltypist_L3_CD27pos_effector_B_cell.h5ad',
  'output/diha_celltypist_L3_ISGpos_naive_B_cell.h5ad',
  'output/diha_celltypist_L3_ISGpos_MAIT.h5ad',
  'output/diha_celltypist_L3_Proliferating_T_cell.h5ad',
  'output/diha_celltypist_L3_BaEoMaP_cell.h5ad',
  'output/diha_celltypist_L3_Plasma_cell.h5ad',
  'output/diha_celltypist_L3_Early_memory_B_cell.h5ad',
  'output/diha_celltypist_L3_DN_T_cell.h5ad',
  'output/diha_celltypist_L3_ISGpos_CD56dim_NK_cell.h5ad',
  'output/diha_celltypist_L3_ISGpos_memory_CD4_T_cell.h5ad',
  'output/diha_celltypist_L3_KLRF1neg_GZMBpos_CD27neg_memory_CD4_T_cell.h5ad',
  'outp

In [20]:
import session_info
session_info.show()