# Split data by AIFI_L2 type

To help with pseudobulk and other downstream processes, we'll read in our complete, labeled .h5ad file and split it up per AIFI_L2 cell type.

## Load packages

In [1]:
from datetime import date
import hisepy
import os
import re
import scanpy as sc

In [2]:
if not os.path.exists('output'):
    os.mkdir('output')

In [3]:
out_files = []

## Helper functions

In [4]:
def read_adata_uuid(h5ad_uuid):
    h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)
    if not os.path.isdir(h5ad_path):
        hise_res = hisepy.cache_files([h5ad_uuid])
    h5ad_filename = os.listdir(h5ad_path)[0]
    h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)
    adata = sc.read_h5ad(h5ad_file)
    return adata

## Read dataset from HISE

In [5]:
h5ad_uuid = '6e8972a5-9463-4230-84b4-a20de055b9c3'

In [6]:
adata = read_adata_uuid(h5ad_uuid)

## Split and save L2 types

In [7]:
adata = adata.raw.to_adata()

In [8]:
l2_types = adata.obs['AIFI_L2'].unique()

In [9]:
l2_types

['MAIT', 'Naive CD4 T cell', 'CD14 monocyte', 'Memory CD4 T cell', 'Memory B cell', ..., 'Plasma cell', 'Erythrocyte', 'ILC', 'cDC1', 'ASDC']
Length: 29
Categories (29, object): ['ASDC', 'CD8aa', 'CD14 monocyte', 'CD16 monocyte', ..., 'cDC1', 'cDC2', 'gdT', 'pDC']

In [10]:
for l2_type in l2_types:
    out_type = re.sub(' ', '_', l2_type)
    out_file = 'output/ref_pbmc_AIFI_L2_{t}_{d}.h5ad'.format(t = out_type, d = date.today())
    type_adata = adata[adata.obs['AIFI_L2'] == l2_type]
    type_adata.write_h5ad(out_file)
    out_files.append(out_file)

## Upload Results to HISE

Finally, we'll use `hisepy.upload.upload_files()` to send a copy of our output to HISE to use for downstream analysis steps.

In [11]:
study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'
title = 'PBMC Reference Split Per L2 Type {d}'.format(d = date.today())

In [12]:
in_files = [h5ad_uuid]

In [13]:
in_files

['6e8972a5-9463-4230-84b4-a20de055b9c3']

In [14]:
out_files

['output/ref_pbmc_AIFI_L2_MAIT_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Naive_CD4_T_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_CD14_monocyte_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Memory_CD4_T_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Memory_B_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_pDC_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Proliferating_NK_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Naive_CD8_T_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_CD16_monocyte_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_cDC2_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Memory_CD8_T_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_CD56dim_NK_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Naive_B_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Effector_B_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Proliferating_T_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_Transitional_B_cell_2024-04-03.h5ad',
 'output/ref_pbmc_AIFI_L2_DN_T_cell_2024-04-03.h5ad',
 'output

In [15]:
hisepy.upload.upload_files(
    files = out_files,
    study_space_id = study_space_uuid,
    title = title,
    input_file_ids = in_files
)

output/ref_pbmc_AIFI_L2_MAIT_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Naive_CD4_T_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_CD14_monocyte_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Memory_CD4_T_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Memory_B_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_pDC_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Proliferating_NK_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Naive_CD8_T_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_CD16_monocyte_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_cDC2_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Memory_CD8_T_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_CD56dim_NK_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Naive_B_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Effector_B_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Proliferating_T_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Transitional_B_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_DN_T_cell_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_gdT_2024-04-03.h5ad
output/ref_pbmc_AIFI_L2_Intermed

(y/n) y


{'trace_id': 'e62e09e3-b89c-4bde-9559-73821d533d93',
 'files': ['output/ref_pbmc_AIFI_L2_MAIT_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Naive_CD4_T_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_CD14_monocyte_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Memory_CD4_T_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Memory_B_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_pDC_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Proliferating_NK_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Naive_CD8_T_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_CD16_monocyte_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_cDC2_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Memory_CD8_T_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_CD56dim_NK_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Naive_B_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Effector_B_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Proliferating_T_cell_2024-04-03.h5ad',
  'output/ref_pbmc_AIFI_L2_Transitional_B_cell_20

In [16]:
import session_info
session_info.show()