# Generate "clean" dataset with all normalized genes

For visualizations, it's helpful to have an .h5ad object with all genes normalized in the main `adata.X` object, rather than just highly variable genes.

We'll perform this for the main object as well as the subset objects generated for each of the major cell classes.

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

from datetime import date
import hisepy
import os
import scanpy as sc

In [2]:
def read_adata_uuid(h5ad_uuid):
    h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)
    if not os.path.isdir(h5ad_path):
        hise_res = hisepy.reader.cache_files([h5ad_uuid])
    h5ad_filename = os.listdir(h5ad_path)[0]
    h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)
    adata = sc.read_h5ad(h5ad_file)
    return adata

In [3]:
def normalize_raw_data(adata):
    adata = adata.raw.to_adata()
    adata.raw = adata
    
    sc.pp.normalize_total(adata, target_sum = 1e4)
    sc.pp.log1p(adata)

    return adata

In [4]:
out_dir = 'output'
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

In [5]:
out_files = []

## Read annotated datasets

In [6]:
h5ad_uuids = {
    'all':           '6e8972a5-9463-4230-84b4-a20de055b9c3',
    'b-cells':       '3ba425f9-b8e0-4a03-ae69-bac3d35b00b3',
    'myeloid-cells': 'a366815b-8092-4a66-9afa-c4fd3834edc4',
    'nk-cells':      '90ce9dff-28b0-4b12-abaa-deab19fb68c9',
    'other':         '8f55628c-cc28-4011-abb2-e3e13dad2b49',
    't-cells':       '546b8939-cb2a-4b28-bf99-898ee4c0217f'
}

In [7]:
for cell_class,uuid in h5ad_uuids.items():
    out_file = 'output/ref_clean_pbmc_{c}_labeled-all-genes_{d}.h5ad'.format(
        c = cell_class,
        d = date.today()
    )
    
    adata = read_adata_uuid(uuid)
    adata = normalize_raw_data(adata)

    adata.write_h5ad(out_file)
    out_files.append(out_file)

downloading fileID: 3ba425f9-b8e0-4a03-ae69-bac3d35b00b3
Files have been successfully downloaded!
downloading fileID: a366815b-8092-4a66-9afa-c4fd3834edc4
Files have been successfully downloaded!
downloading fileID: 90ce9dff-28b0-4b12-abaa-deab19fb68c9
Files have been successfully downloaded!
downloading fileID: 8f55628c-cc28-4011-abb2-e3e13dad2b49
Files have been successfully downloaded!
downloading fileID: 546b8939-cb2a-4b28-bf99-898ee4c0217f
Files have been successfully downloaded!


## Upload results to HISE

Finally, we'll use `hisepy.upload.upload_files()` to send a copy of our output to HISE to use for downstream analysis steps.

In [8]:
study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'
title = '10x 3-prime PBMC Clean Reference with all genes {d}'.format(d = date.today())

In [9]:
in_files = list(h5ad_uuids.values())

In [10]:
in_files

['6e8972a5-9463-4230-84b4-a20de055b9c3',
 '3ba425f9-b8e0-4a03-ae69-bac3d35b00b3',
 'a366815b-8092-4a66-9afa-c4fd3834edc4',
 '90ce9dff-28b0-4b12-abaa-deab19fb68c9',
 '8f55628c-cc28-4011-abb2-e3e13dad2b49',
 '546b8939-cb2a-4b28-bf99-898ee4c0217f']

In [11]:
out_files

['output/ref_clean_pbmc_all_labeled-all-genes_2024-03-11.h5ad',
 'output/ref_clean_pbmc_b-cells_labeled-all-genes_2024-03-11.h5ad',
 'output/ref_clean_pbmc_myeloid-cells_labeled-all-genes_2024-03-11.h5ad',
 'output/ref_clean_pbmc_nk-cells_labeled-all-genes_2024-03-11.h5ad',
 'output/ref_clean_pbmc_other_labeled-all-genes_2024-03-11.h5ad',
 'output/ref_clean_pbmc_t-cells_labeled-all-genes_2024-03-11.h5ad']

In [12]:
hisepy.upload.upload_files(
    files = out_files,
    study_space_id = study_space_uuid,
    title = title,
    input_file_ids = in_files
)

output/ref_clean_pbmc_all_labeled-all-genes_2024-03-11.h5ad
output/ref_clean_pbmc_b-cells_labeled-all-genes_2024-03-11.h5ad
output/ref_clean_pbmc_myeloid-cells_labeled-all-genes_2024-03-11.h5ad
output/ref_clean_pbmc_nk-cells_labeled-all-genes_2024-03-11.h5ad
output/ref_clean_pbmc_other_labeled-all-genes_2024-03-11.h5ad
output/ref_clean_pbmc_t-cells_labeled-all-genes_2024-03-11.h5ad
you are trying to upload file_ids... ['output/ref_clean_pbmc_all_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_b-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_myeloid-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_nk-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_other_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_t-cells_labeled-all-genes_2024-03-11.h5ad']. Do you truly want to proceed?


(y/n) y


{'trace_id': '8a5c7691-5c1f-4eef-abf1-a1ca4872e6c3',
 'files': ['output/ref_clean_pbmc_all_labeled-all-genes_2024-03-11.h5ad',
  'output/ref_clean_pbmc_b-cells_labeled-all-genes_2024-03-11.h5ad',
  'output/ref_clean_pbmc_myeloid-cells_labeled-all-genes_2024-03-11.h5ad',
  'output/ref_clean_pbmc_nk-cells_labeled-all-genes_2024-03-11.h5ad',
  'output/ref_clean_pbmc_other_labeled-all-genes_2024-03-11.h5ad',
  'output/ref_clean_pbmc_t-cells_labeled-all-genes_2024-03-11.h5ad']}

In [13]:
import session_info
session_info.show()