# Refine labels for Memory CD8 T cells

In this notebook, we load all Memory CD8 T cells across our subjects to refine final L3 labels. We'll combine, recluster, and assign final labels by taking the most frequent AIFI_L3 label in each cluster, and back-propogate those L3 labels to L2 and L1 based on our cell type hierarcy.

We'll also generate metadata, UMAP coordinates, and marker gene summaries for review of our final labels, then store all of the outputs in HISE for later use.

Because there are many Memory CD8 T cells, we'll divide them into smaller subsets based on cohort, sex, and visit grouping (defined below). We'll combine CMV+ and CMV- subjects so that we better capture cell types that are enriched in CMV+ subjects. We'll then review each subset and assemble all labeled data in later notebooks.

## Load packages

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

import concurrent.futures
from concurrent.futures import ThreadPoolExecutor
from datetime import date
import hisepy
import os
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import tarfile

In [2]:
out_dir = 'output'
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

In [3]:
out_dir = 'output/review'
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

In [4]:
class_name = 'memory_cd8_t_cell'

## Helper functions

### HISE data
These functions make it easy to utilize files from the HISE cache

In [5]:
def cache_uuid_path(uuid):
    cache_path = '/home/jupyter/cache/{u}'.format(u = uuid)
    if not os.path.isdir(cache_path):
        hise_res = hisepy.reader.cache_files([uuid])
    filename = os.listdir(cache_path)[0]
    cache_file = '{p}/{f}'.format(p = cache_path, f = filename)
    return cache_file

In [6]:
def read_csv_uuid(uuid):
    cache_file = cache_uuid_path(uuid)
    res = pd.read_csv(cache_file)
    return res

In [7]:
def read_adata_uuid(uuid):
    cache_file = cache_uuid_path(uuid)
    res = sc.read_h5ad(cache_file)
    return res

In [8]:
def read_obs_uuid(uuid):
    cache_file = cache_uuid_path(uuid)
    res = sc.read_h5ad(cache_file, backed = 'r')
    obs = res.obs.copy()
    return obs

### Label refinement

This function uses the most frequent label within each cluster to refine label assignments.

In [9]:
def single_value(series):
    res = []
    for value in series:
        if isinstance(value, list):
            res.append(value[0])
        else:
            res.append(value)
    return res

In [10]:
def assign_most_frequent(adata, clusters, labels, keep_original = False, original_prefix = 'predicted_'):
    obs = adata.obs
    
    most_frequent_labels = (
        adata.obs
        .groupby(clusters)[labels]
        .agg(pd.Series.mode)
        .to_frame()
        .reset_index()
    )
    most_frequent_labels[labels] = single_value(most_frequent_labels[labels])
    
    if keep_original:
        obs = obs.rename({labels: original_prefix + labels}, axis = 1)
    else:
        obs = obs.drop(labels, axis = 1)
    
    obs = obs.merge(most_frequent_labels, on = clusters, how = 'left')
    
    adata.obs = obs
    
    return adata

This function back-propagates hierarchical labeling from AIFI_L3 back to AIFI_L2 and AIFI_L1 to ensure our labels agree across levels of our cell type hierarchy.

In [11]:
def propagate_hierarchy(
    adata,
    hierarchy_df,
    from_level = 'AIFI_L3',
    to_levels = ['AIFI_L2', 'AIFI_L1'],
    keep_original = True,
    original_prefix = 'predicted_'
):
    obs = adata.obs
    
    for to_level in to_levels:
        prop_df = hierarchy_df[[from_level, to_level]]
        prop_df = prop_df.drop_duplicates()
        
        if keep_original:
            obs = obs.rename({to_level: original_prefix + to_level}, axis = 1)
        else:
            obs = obs.drop(to_level, axis = 1)

        obs[from_level] = obs[from_level].astype(str)
        obs = obs.merge(prop_df, on = from_level, how = 'left')
        obs[from_level] = obs[from_level].astype('category')
        obs[to_level] = obs[to_level].astype('category')
    
    adata.obs = obs

    return adata

### Review outputs

These functions are used to assemble marker gene expression tables for review

In [12]:
def marker_frac_df(adata, markers, clusters = 'louvain_2'):
    gene_cl_frac = sc.pl.dotplot(
        adata, 
        groupby = clusters,
        var_names = markers,
        return_fig = True
    ).dot_size_df
    return gene_cl_frac

def marker_mean_df(adata, markers, log = False, clusters = 'louvain_2'):
    gene_cl_mean = sc.pl.dotplot(
        adata, 
        groupby = clusters,
        var_names = markers,
        return_fig = True,
        log = log
    ).dot_color_df
    
    return gene_cl_mean

def tidy_marker_df(adata, markers, clusters = 'louvain_2'):
    gene_cl_frac = marker_frac_df(adata, markers, clusters)
    gene_cl_frac = gene_cl_frac.reset_index(drop = False)
    gene_cl_frac = pd.melt(gene_cl_frac, id_vars = clusters, var_name = 'gene', value_name = 'gene_frac')
    
    gene_cl_mean = marker_mean_df(adata, markers, log = False, clusters = clusters)
    gene_cl_mean = gene_cl_mean.reset_index(drop = False)
    gene_cl_mean = pd.melt(gene_cl_mean, id_vars = clusters, var_name = 'gene', value_name = 'gene_mean')

    marker_df = gene_cl_frac.merge(gene_cl_mean, on = [clusters, 'gene'], how = 'left')
    return marker_df

This function will select clusters based on gene detection above a specifiec fraction of cells (cutoff).

In [13]:
def select_clusters_above_gene_frac(adata, gene, cutoff, clusters = 'louvain_2'):
    gene_cl_frac = marker_frac_df(adata, gene, clusters)
    select_cl = gene_cl_frac.index[gene_cl_frac[gene] > cutoff].tolist()

    return select_cl

This function retrieves both observations and UMAP coordinates in a single table for review.

In [14]:
def obs_with_umap(adata):
    obs = adata.obs
    
    umap_mat = adata.obsm['X_umap']
    umap_df = pd.DataFrame(umap_mat, columns = ['umap_1', 'umap_2'])
    obs['umap_1'] = umap_df['umap_1']
    obs['umap_2'] = umap_df['umap_2']

    return obs

This function applies data analysis methods to our scRNA-seq data, including normalization, HVG selection, PCA, nearest neighbors, UMAP, and Leiden clustering.

In [15]:
def process_adata(adata, resolution = 2):
    
    # Keep a copy of the raw data
    adata = adata.raw.to_adata()
    adata.raw = adata

    print('Normalizing', end = "; ")
    # Normalize and log transform
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)

    print('Finding HVGs', end = "; ")
    # Restrict downstream steps to variable genes
    sc.pp.highly_variable_genes(adata)
    adata = adata[:, adata.var_names[adata.var['highly_variable']]].copy()

    print('Scaling', end = "; ")
    # Scale variable genes
    sc.pp.scale(adata)

    print('PCA', end = "; ")
    # Run PCA
    sc.tl.pca(adata, svd_solver = 'arpack')

    print('Harmony', end = "; ")
    # Integrate subjects
    sce.pp.harmony_integrate(
        adata, 
        'subject.subjectGuid',
        max_iter_harmony = 30,
        verbose = False
    )
    
    print('Neighbors', end = "; ")
    # Find nearest neighbors
    sc.pp.neighbors(
        adata, 
        n_neighbors = 20,
        n_pcs = 30,
        use_rep = 'X_pca_harmony'
    )

    print('Leiden', end = "; ")
    # Find clusters
    sc.tl.leiden(
        adata, 
        resolution = resolution, 
        key_added = 'leiden_{r}'.format(r = resolution),
        n_iterations = 5
    )

    print('UMAP', end = "; ")
    # Run UMAP
    sc.tl.umap(adata, min_dist = 0.05)
    
    print('Renormalizing')
    adata = adata.raw.to_adata()
    adata.raw = adata

    # Normalize and log transform
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)
    
    return adata

In [16]:
def isg_hi_refinement(adata, clusters = 'leiden_2', refine_res = 2):
    isg_hi_cl = select_clusters_above_gene_frac(
        adata, 'IFI44L', 0.3, clusters = clusters
    )
    print(isg_hi_cl)
    
    isg_adata = adata[adata.obs[clusters].isin(isg_hi_cl)]
    
    isg_adata = process_adata(isg_adata, resolution = refine_res)
    isg_adata = assign_most_frequent(
        isg_adata, 
        'leiden_{r}'.format(r = refine_res), 'AIFI_L3',
        keep_original = True,
        original_prefix = 'predicted_'
    )

    isg_df = isg_adata.obs[['barcodes', 'AIFI_L3', 'predicted_AIFI_L3']]
    return isg_df

## Cell Type Markers

These are the set of marker genes that we'll use to review our cell type labels.

In [17]:
l2_markers = [
    'CD3E', 'CD4', 'CD8A', 'TRAC', 'TRDC', 
    'LEF1', 'TCF7', 'CCR7', 'SELL', 'ITGB1', 
    'FAS', 'CD27', 'CD28', 'FOXP3', 'RTKN2', 
    'IKZF2', 'ZNF683', 'NKG7', 'KLRC2', 'KLRF1', 
    'KLRB1', 'KLRD1', 'GZMA', 'GZMK', 'GZMB', 
    'PRF1', 'GNLY', 'IFI44L', 'IRF7', 'MKI67', 
    'TRDV1', 'TRDV2', 'TRDV3', 'MME', 'SOX4'
]

l3_markers = [
    'CD27', 'CD28', 'KLRF1', 'KLRB1', 'KLRD1', 
    'KLRC1', 'KLRC2', 'GZMA', 'GZMB', 'GZMK', 
    'GZMH', 'PRF1', 'CCL5', 'GNLY', 'GATA3', 
    'TBX21', 'EOMES', 'LEF1', 'TCF7', 'IKZF2',
    'SOX4', 'IFI44L'
]

## Read cell type hierarchy from HISE

As part of label refinement, we'll back-propagate our cell type labels from refined AIFI_L3 labels to their parent cell classes at AIFI_L2 and AIFI_L1. To do this, we need the hierarchical relationships between these levels, which have been generated for our cell type reference dataset.

In [18]:
hierarchy_uuid = '1a44252c-8cab-4c8f-92c9-d8f3af633790'
hierarchy_df = read_csv_uuid(hierarchy_uuid)

## Read sample metadata from HISE

We previously assembled sample metadata and CMV status for each subject. We'll retrieve and combine these to utilize for selecting subsets of samples.

In [19]:
sample_meta_uuid = 'd82c5c42-ae5f-4e67-956e-cd3b7bf88105'
sample_meta = read_csv_uuid(sample_meta_uuid)

In [20]:
sample_meta.shape

(868, 32)

In [21]:
cmv_meta_uuid = '9469f67c-b09a-454d-9fb9-f50ff3494d69'
cmv_path = cache_uuid_path(cmv_meta_uuid)
cmv_meta = pd.read_csv(cmv_path, index_col = 0)
cmv_meta = cmv_meta.drop_duplicates()

In [22]:
cmv_meta.shape

(96, 4)

In [23]:
sample_meta = sample_meta.merge(cmv_meta, on = 'subject.subjectGuid', how = 'left')

In [24]:
sample_meta.shape

(868, 35)

## Assign sample groups

To subdivide the full set of cells, we'll use groups that include cohort, sex, CMV status, and a subset of visits. To group our visit data, we'll define 3 visit groups, and use those together with the other metadata to group samples.

In [25]:
visit_group_dict = {
    'Flu Year 1 Day 0': 'Year 1',
    'Flu Year 1 Day 7': 'Year 1',
    'Flu Year 1 Day 90': 'Year 1',
    'Flu Year 1 Stand-Alone': 'Year 1',
    'Flu Year 2 Day 0': 'Year 2',
    'Flu Year 2 Day 7': 'Year 2',
    'Flu Year 2 Day 90': 'Year 2',
    'Flu Year 2 Stand-Alone': 'Year 2',
    'Immune Variation Day 0': 'Immune Variation',
    'Immune Variation Day 7': 'Immune Variation',
    'Immune Variation Day 90': 'Immune Variation',
    'Flu Year 3 Stand-Alone': 'Immune Variation',
}

In [26]:
visit_groups = list(set(visit_group_dict.values()))

In [27]:
sample_meta['sample.visitGroup'] = [visit_group_dict[v] for v in sample_meta['sample.visitName']]

In [28]:
group_samples_by = ['cohort.cohortGuid', 'subject.biologicalSex']

In [29]:
grouped_meta = sample_meta.groupby(group_samples_by)

In [30]:
split_meta = {}
for group_tuple, meta in grouped_meta:
    split_name = '_'.join(group_tuple)
    split_meta[split_name] = meta

## Identify files in HISE

For this analysis, we'll read in these files from HISE storage from previous steps. We'll group these into "large" files, which are from the set of 5 very large cell type assignments, and "small" files, which are from other cell types.

In [31]:
large_uuids = {
    'BR1_Female': {
        'BR1_Female_Negative_Memory-CD8-T-cell': 'de83b600-3cc6-40ba-acb6-613c12e178ac',
        'BR1_Female_Positive_Memory-CD8-T-cell': 'aa8b8b0f-164f-4fba-8af9-8397d9e67cd7',
    },
    'BR1_Male': {
        'BR1_Male_Negative_Memory-CD8-T-cell': '8cc6a9b1-0ec7-445e-8299-556a4f95cb66',
        'BR1_Male_Positive_Memory-CD8-T-cell': 'df7c9a0a-3b5e-48d9-91de-2bd424daa44a',
    },
    'BR2_Female': {
        'BR2_Female_Negative_Memory-CD8-T-cell': 'b8550f9d-b4aa-4ba7-955b-cf556fabb21d',
        'BR2_Female_Positive_Memory-CD8-T-cell': 'd76d8ee6-6b85-42e4-9974-36c6ef4b0538',
    },
    'BR2_Male': {
        'BR2_Male_Negative_Memory-CD8-T-cell': '10d6ff38-dabd-4439-90e4-28d2c932d81f',
        'BR2_Male_Positive_Memory-CD8-T-cell': '37d23649-4670-4ac2-9dd0-e0de0fce573d',
    }
}

## Read and process data per group

Here, we'll step through each group based on cohort, sex, and CMV, assemble all related data across our selected files for this cell type, then split the results back out to multiple files based on the Visit Groups defined above.

This way, we can combine across L2 cell classes without generating enormous datasets (almost 6M cells for CD4 T cells alone, for example).

Once split up, we'll reprocess each subset of data to generate nice clusters and UMAP projections, then refine L3 cell type labels by taking the most frequent label within each cluster. After refining labels, we'll propagate labels back to L2 and L1 so they're consistent.

Finally, we'll output these refined results per group, as well as a .csv file with updated labels to enable review of our analysis.

In [32]:
for grouping, meta in split_meta.items():
    print(grouping)

    # Check if output files have been generated previously
    out_files = {}
    out_file = 'output/diha_{c}_{g}_AIFI_L3_review_{d}.h5ad'.format(
        c = class_name,
        g = grouping,
        d = date.today()
    )
    out_files[grouping] = [out_file]
    
    if os.path.isfile(out_file):
        print('{g} Previously processed. Skipping.'.format(g = grouping))
    else:
        
        # Read Large Files
        large_group_uuids = large_uuids[grouping]
        for group_name, uuid in large_group_uuids.items():
            group_adata = read_adata_uuid(uuid)
            print('{g}: {n} cells'.format(g = group_name, n = group_adata.shape[0]))
            if not "all_adata" in globals():
                all_adata = group_adata
            else:
                all_adata = sc.concat([all_adata, group_adata])
            del group_adata
        print('Total: {n} cells'.format(n = all_adata.shape[0]))
        
        out_file = out_files[grouping][0]
        
        all_adata = process_adata(all_adata)
        
        # Refine labels based on in-group clustering
        isg_hi_labels = isg_hi_refinement(all_adata, refine_res = 3)
        
        visit_group_adata = assign_most_frequent(
            all_adata, 
            'leiden_2', 'AIFI_L3', 
            keep_original = True, 
            original_prefix = 'predicted_'
        )

        # Correct ISG-high labels
        isg_idx = all_adata.obs['barcodes'].isin(isg_hi_labels['barcodes'])
        isg_adata = all_adata[isg_idx]
        all_adata = all_adata[[not x for x in isg_idx]]
        isg_obs = isg_adata.obs.drop(['AIFI_L3', 'predicted_AIFI_L3'], axis = 1)
        isg_obs = isg_obs.reset_index(drop = True)
        isg_obs = isg_obs.merge(isg_hi_labels, on = 'barcodes', how = 'left')
        isg_obs = isg_obs.set_index('barcodes', drop = False)
        isg_adata.obs = isg_obs
        
        all_adata = sc.concat([all_adata, isg_adata])
        
        all_adata = propagate_hierarchy(
            all_adata,
            hierarchy_df,
            from_level = 'AIFI_L3',
            to_levels = ['AIFI_L2', 'AIFI_L1'],
            keep_original = True,
            original_prefix = 'predicted_'
        )
        
        # Save results for this visit group
        all_adata.write_h5ad(out_file)

        # Save metadata and UMAP
        meta_csv = 'output/review/diha_{c}_{g}_AIFI_L3_review_meta_{d}.csv'.format(
            c = class_name,
            g = grouping,
            d = date.today()
        )
        group_obs = obs_with_umap(all_adata)
        group_obs.to_csv(meta_csv)
        out_files[grouping].append(meta_csv)

        # Save marker expression summaries
        out_l2_markers = 'output/review/diha_{c}_{g}_AIFI_L2_review_markers_{d}.csv'.format(
            c = class_name,
            g = grouping,
            d = date.today()
        )
        l2_marker_df = tidy_marker_df(
            all_adata,
            l2_markers,
            'AIFI_L2'
        )
        l2_marker_df.to_csv(out_l2_markers)
        out_files[grouping].append(out_l2_markers)

        out_l3_markers = 'output/review/diha_{c}_{g}_AIFI_L3_review_markers_{d}.csv'.format(
            c = class_name,
            g = grouping,
            d = date.today()
        )
        l3_marker_df = tidy_marker_df(
            all_adata,
            l3_markers,
            'AIFI_L3'
        )
        l3_marker_df.to_csv(out_l3_markers)
        out_files[grouping].append(out_l3_markers)

        del all_adata

BR1_Female
BR1_Female Previously processed. Skipping.
BR1_Male
BR1_Male_Negative_Memory-CD8-T-cell: 132362 cells
BR1_Male_Positive_Memory-CD8-T-cell: 189102 cells
Total: 321464 cells
Normalizing; Finding HVGs; Scaling; PCA; Harmony; 

2024-03-25 03:53:33,007 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...
2024-03-25 03:56:01,838 - harmonypy - INFO - sklearn.KMeans initialization complete.


Neighbors; Leiden; UMAP; Renormalizing
['14']
Finding HVGs; Scaling; PCA; 

2024-03-25 04:20:45,384 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...


Harmony; 

2024-03-25 04:20:49,361 - harmonypy - INFO - sklearn.KMeans initialization complete.


Neighbors; Leiden; UMAP; Renormalizing




BR2_Female
BR2_Female_Negative_Memory-CD8-T-cell: 78384 cells
BR2_Female_Positive_Memory-CD8-T-cell: 399129 cells
Total: 477513 cells
Normalizing; Finding HVGs; Scaling; PCA; Harmony; 

2024-03-25 04:26:23,243 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...
2024-03-25 04:30:11,931 - harmonypy - INFO - sklearn.KMeans initialization complete.


Neighbors; Leiden; UMAP; 

IOStream.flush timed out


Renormalizing
['11']
Finding HVGs; Scaling; PCA; 

2024-03-25 05:07:04,409 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...


Harmony; 

2024-03-25 05:07:09,099 - harmonypy - INFO - sklearn.KMeans initialization complete.


Neighbors; Leiden; UMAP; Renormalizing




BR2_Male
BR2_Male_Negative_Memory-CD8-T-cell: 173486 cells
BR2_Male_Positive_Memory-CD8-T-cell: 195401 cells
Total: 368887 cells
Normalizing; Finding HVGs; Scaling; PCA; Harmony; 

2024-03-25 05:12:46,093 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...
2024-03-25 05:15:44,880 - harmonypy - INFO - sklearn.KMeans initialization complete.


Neighbors; Leiden; UMAP; Renormalizing
['13']
Finding HVGs; Scaling; PCA; 

2024-03-25 05:42:30,742 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...


Harmony; 

2024-03-25 05:42:35,051 - harmonypy - INFO - sklearn.KMeans initialization complete.


Neighbors; Leiden; UMAP; Renormalizing




## Assemble output files for upload

### h5ad files

In [33]:
output_files = os.listdir('output')
h5ad_files = []
for output_file in output_files:
    if class_name in output_file:
        h5ad_files.append('{d}/{f}'.format(d = 'output', f = output_file))

### Review files

In [34]:
rev_files = os.listdir('output/review')
review_files = []
for rev_file in rev_files:
    if class_name in rev_file:
        review_files.append('{d}/{f}'.format(d = 'output/review', f = rev_file))

### Combine metadata files to assemble a full set

In [35]:
meta_files = []
for review_file in review_files:
    if 'meta' in review_file:
        meta_files.append(review_file)

meta_list = []
for meta_file in meta_files:
    meta_list.append(pd.read_csv(meta_file, index_col = 0))
all_meta = pd.concat(meta_list)

In [36]:
meta_csv = 'output/diha_{c}_AIFI_L3_refinement_meta_{d}.csv'.format(c = class_name, d = date.today())
all_meta.to_csv(meta_csv)
meta_parquet = 'output/diha_{c}_AIFI_L3_refinement_meta_{d}.parquet'.format(c = class_name, d = date.today())
all_meta.to_parquet(meta_parquet)

### Bundle review files into a .tar for later use

In [37]:
review_tar = 'output/diha_{c}_AIFI_L3_refinement_review_{d}.tar.gz'.format(c = class_name, d = date.today())
tar = tarfile.open(review_tar, 'w:gz')
for review_file in review_files:
    tar.add(review_file)
tar.close()

## Upload assembled results to HISE

In [38]:
study_space_uuid = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a'
title = 'DIHA Memory CD8 AIFI_L3 Refinement {d}'.format(d = date.today())

In [39]:
in_files = []
for group_name, file_dict in large_uuids.items():
    for type_group, uuid in file_dict.items():
        in_files.append(uuid)
in_files

['de83b600-3cc6-40ba-acb6-613c12e178ac',
 'aa8b8b0f-164f-4fba-8af9-8397d9e67cd7',
 '8cc6a9b1-0ec7-445e-8299-556a4f95cb66',
 'df7c9a0a-3b5e-48d9-91de-2bd424daa44a',
 'b8550f9d-b4aa-4ba7-955b-cf556fabb21d',
 'd76d8ee6-6b85-42e4-9974-36c6ef4b0538',
 '10d6ff38-dabd-4439-90e4-28d2c932d81f',
 '37d23649-4670-4ac2-9dd0-e0de0fce573d']

In [40]:
out_files = h5ad_files + [meta_csv, meta_parquet, review_tar]

In [41]:
out_files

['output/diha_memory_cd8_t_cell_BR2_Female_AIFI_L3_review_2024-03-25.h5ad',
 'output/diha_memory_cd8_t_cell_BR2_Male_AIFI_L3_review_2024-03-25.h5ad',
 'output/diha_memory_cd8_t_cell_BR1_Male_AIFI_L3_review_2024-03-25.h5ad',
 'output/diha_memory_cd8_t_cell_BR1_Female_AIFI_L3_review_2024-03-25.h5ad',
 'output/diha_memory_cd8_t_cell_AIFI_L3_refinement_meta_2024-03-25.csv',
 'output/diha_memory_cd8_t_cell_AIFI_L3_refinement_meta_2024-03-25.parquet',
 'output/diha_memory_cd8_t_cell_AIFI_L3_refinement_review_2024-03-25.tar.gz']

In [42]:
hisepy.upload.upload_files(
    files = out_files,
    study_space_id = study_space_uuid,
    title = title,
    input_file_ids = in_files
)

output/diha_memory_cd8_t_cell_BR2_Female_AIFI_L3_review_2024-03-25.h5ad
output/diha_memory_cd8_t_cell_BR2_Male_AIFI_L3_review_2024-03-25.h5ad
output/diha_memory_cd8_t_cell_BR1_Male_AIFI_L3_review_2024-03-25.h5ad
output/diha_memory_cd8_t_cell_BR1_Female_AIFI_L3_review_2024-03-25.h5ad
output/diha_memory_cd8_t_cell_AIFI_L3_refinement_meta_2024-03-25.csv
output/diha_memory_cd8_t_cell_AIFI_L3_refinement_meta_2024-03-25.parquet
output/diha_memory_cd8_t_cell_AIFI_L3_refinement_review_2024-03-25.tar.gz
you are trying to upload file_ids... ['output/diha_memory_cd8_t_cell_BR2_Female_AIFI_L3_review_2024-03-25.h5ad', 'output/diha_memory_cd8_t_cell_BR2_Male_AIFI_L3_review_2024-03-25.h5ad', 'output/diha_memory_cd8_t_cell_BR1_Male_AIFI_L3_review_2024-03-25.h5ad', 'output/diha_memory_cd8_t_cell_BR1_Female_AIFI_L3_review_2024-03-25.h5ad', 'output/diha_memory_cd8_t_cell_AIFI_L3_refinement_meta_2024-03-25.csv', 'output/diha_memory_cd8_t_cell_AIFI_L3_refinement_meta_2024-03-25.parquet', 'output/diha_memor

(y/n) y


{'trace_id': '087fa01e-2458-420f-a999-645b10a239bb',
 'files': ['output/diha_memory_cd8_t_cell_BR2_Female_AIFI_L3_review_2024-03-25.h5ad',
  'output/diha_memory_cd8_t_cell_BR2_Male_AIFI_L3_review_2024-03-25.h5ad',
  'output/diha_memory_cd8_t_cell_BR1_Male_AIFI_L3_review_2024-03-25.h5ad',
  'output/diha_memory_cd8_t_cell_BR1_Female_AIFI_L3_review_2024-03-25.h5ad',
  'output/diha_memory_cd8_t_cell_AIFI_L3_refinement_meta_2024-03-25.csv',
  'output/diha_memory_cd8_t_cell_AIFI_L3_refinement_meta_2024-03-25.parquet',
  'output/diha_memory_cd8_t_cell_AIFI_L3_refinement_review_2024-03-25.tar.gz']}

In [43]:
import session_info
session_info.show()