In [1]:
import pandas as pd 
import numpy as np 
import scanpy as sc
import matplotlib.pyplot as plt
import concurrent.futures
import pickle
import warnings
from datetime import date
import hisepy
import os
import shutil
from concurrent.futures import ThreadPoolExecutor, as_completed,ProcessPoolExecutor
from tqdm import tqdm
import anndata
import milopy
import milopy.core as milo
import gc
warnings.filterwarnings("ignore")
sc.settings.n_jobs = 60
print("Current working directory:", os.getcwd())

Current working directory: /home/jupyter/BRI_Figures/Figure2


# Read MetaData

In [2]:
meta_data=pd.read_csv("/home/jupyter/BRI_Figures/Dataset/scRNA_meta_data-2024-05-09.csv")

# Assemble Year 1 Day 0

In [3]:
meta_data_subset=meta_data[meta_data['sample.visitName'].isin(['Flu Year 1 Day 0'])]

In [4]:
%%time
file_names= ['/home/jupyter/BRI_Figures/Dataset/scRNA/BRI/h5ad/sample_h5ad/'+x+".h5ad" for x in meta_data_subset['pbmc_sample_id'].tolist()]
adata_list = []
with ThreadPoolExecutor(max_workers=10) as executor:
    future_to_file = {executor.submit(sc.read_h5ad, file_name): file_name for file_name in file_names}
    for future in tqdm(as_completed(future_to_file), total=len(file_names)):
        result = future.result()
        if result is not None:
            adata_list.append(result)

100% 92/92 [00:30<00:00,  3.00it/s]

CPU times: user 17.4 s, sys: 14.3 s, total: 31.7 s
Wall time: 30.7 s





In [5]:
adata = anndata.concat(adata_list)

In [6]:
%%time
adata.raw=adata
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

igl_genes = [gene for gene in adata.var_names if gene.startswith("IGL")]
igk_genes = [gene for gene in adata.var_names if gene.startswith("IGK")]
ighc_genes = [gene for gene in adata.var_names if gene.startswith("IGH")]
exl_genes = igl_genes + igk_genes + ighc_genes
mask = ~adata.var_names.isin(exl_genes)
adata = adata[:, mask]
sc.pp.highly_variable_genes(adata)
adata = adata[:, adata.var_names[adata.var['highly_variable']]]

sc.pp.scale(adata)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=50,use_rep='X_pca', n_pcs=20)
sc.tl.umap(adata,min_dist=0.45,random_state=0)

  0%|          | 0/200 [00:00<?, ?it/s]

	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs


IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out
IOStream.flush timed out


	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
CPU times: user 11h 50min 33s, sys: 35min 20s, total: 12h 25min 53s
Wall time: 1h 18min 15s


In [8]:
%%time
milo.make_nhoods(adata)
milo.count_nhoods(adata, sample_col="specimen.specimenGuid")

CPU times: user 1h 56min 15s, sys: 1h 20min 58s, total: 3h 17min 14s
Wall time: 51min


In [9]:
adata=adata.raw.to_adata()
adata.write_h5ad('Y1D0_BRI.h5ad')

# Assemble Year 1 Day 0 and Day 7

In [3]:
meta_data_subset=meta_data[meta_data['sample.visitName'].isin(['Flu Year 1 Day 0','Flu Year 1 Day 7'])]
meta_data_subset=meta_data_subset[meta_data_subset['Covid_exclusion']=='no']

In [4]:
%%time
file_names= ['/home/jupyter/BRI_Figures/Dataset/scRNA/BRI/h5ad/sample_h5ad/'+x+".h5ad" for x in meta_data_subset['pbmc_sample_id'].tolist()]
adata_list = []
with ThreadPoolExecutor(max_workers=10) as executor:
    future_to_file = {executor.submit(sc.read_h5ad, file_name): file_name for file_name in file_names}
    for future in tqdm(as_completed(future_to_file), total=len(file_names)):
        result = future.result()
        if result is not None:
            adata_list.append(result)

100% 184/184 [06:46<00:00,  2.21s/it]

CPU times: user 36.9 s, sys: 40.7 s, total: 1min 17s
Wall time: 6min 46s





In [5]:
adata = anndata.concat(adata_list)

In [6]:
del adata_list
gc.collect()

0

In [None]:
%%time
adata.raw=adata
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

igl_genes = [gene for gene in adata.var_names if gene.startswith("IGL")]
igk_genes = [gene for gene in adata.var_names if gene.startswith("IGK")]
ighc_genes = [gene for gene in adata.var_names if gene.startswith("IGH")]
exl_genes = igl_genes + igk_genes + ighc_genes
mask = ~adata.var_names.isin(exl_genes)
adata = adata[:, mask]
sc.pp.highly_variable_genes(adata)
adata = adata[:, adata.var_names[adata.var['highly_variable']]]

sc.pp.scale(adata)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=50,use_rep='X_pca', n_pcs=20)
sc.tl.umap(adata,min_dist=0.45,random_state=0)

In [None]:
%%time
milo.make_nhoods(adata)
milo.count_nhoods(adata, sample_col="specimen.specimenGuid")

In [None]:
adata

In [None]:
adata=adata.raw.to_adata()
adata.write_h5ad('Y1D0_Y1D7_BRI.h5ad')

# Assemble Null Vaccination- Immune-Variation

In [None]:
meta_data_subset=meta_data[meta_data['sample.visitName'].isin(['Immune Variation Day 0', 'Immune Variation Day 7'])]
meta_data_subset=meta_data_subset[meta_data_subset['Covid_exclusion']=='no']

In [None]:
%%time
file_names= ['/home/jupyter/BRI_Figures/Dataset/scRNA/BRI/h5ad/sample_h5ad/'+x+".h5ad" for x in meta_data_subset['pbmc_sample_id'].tolist()]
adata_list = []
with ThreadPoolExecutor(max_workers=10) as executor:
    future_to_file = {executor.submit(sc.read_h5ad, file_name): file_name for file_name in file_names}
    for future in tqdm(as_completed(future_to_file), total=len(file_names)):
        result = future.result()
        if result is not None:
            adata_list.append(result)


In [None]:
adata = anndata.concat(adata_list)

In [None]:
%%time
adata.raw=adata
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

igl_genes = [gene for gene in adata.var_names if gene.startswith("IGL")]
igk_genes = [gene for gene in adata.var_names if gene.startswith("IGK")]
ighc_genes = [gene for gene in adata.var_names if gene.startswith("IGH")]
exl_genes = igl_genes + igk_genes + ighc_genes
mask = ~adata.var_names.isin(exl_genes)
adata = adata[:, mask]
sc.pp.highly_variable_genes(adata)
adata = adata[:, adata.var_names[adata.var['highly_variable']]]

sc.pp.scale(adata)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=50,use_rep='X_pca', n_pcs=20)
sc.tl.umap(adata,min_dist=0.45,random_state=0)

In [None]:
%%time
milo.make_nhoods(adata)
milo.count_nhoods(adata, sample_col="specimen.specimenGuid")

In [None]:
adata=adata.raw.to_adata()
adata.write_h5ad('ImmVarD0_ImmVarD7_BRI.h5ad')

# Upload the file

In [2]:
input_uuid=pd.read_csv("/home/jupyter/BRI_Figures/Dataset/scRNA_BRI_h5ad_uuid.csv")['id'].tolist()

In [3]:
study_space_uuid = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a'
title =  'Cert-Pro_BRI_Figures_Files_{d}_from_{wd}/{notebook_name}"'.format(d = date.today(),
                                                                      wd=os.getcwd(),
                                                                      notebook_name=str(__session__).split('/')[-1])
title

'Cert-Pro_BRI_Figures_Files_2024-05-21_from_/home/jupyter/BRI_Figures/Figure2/01A_Assemble_Data_scRNA.ipynb"'

In [4]:
hisepy.upload.upload_files(
    study_space_id = study_space_uuid,
    title = title,
    input_file_ids = input_uuid,
    files = ['/home/jupyter/BRI_Figures/Figure2/Y1D0_BRI.h5ad','/home/jupyter/BRI_Figures/Figure2/Y1D0_Y1D7_BRI.h5ad'],
    destination="Cert-Pro_BRI_Figures_Figure2_h5ad_set_{d}".format(d = date.today())
)

Cannot determine the current notebook.
1) /home/jupyter/IHA-Figures/Figure6/0?_DEG_Visualization.ipynb
2) /home/jupyter/IHA-Figures/Figure5/0?_Composition.ipynb
3) /home/jupyter/IHA-Figures/Figure3/03B_Aging_CompositeScore_SF4.ipynb
Please select (1-3) 


 1


you are trying to upload file_ids... ['/home/jupyter/BRI_Figures/Figure2/Y1D0_BRI.h5ad', '/home/jupyter/BRI_Figures/Figure2/Y1D0_Y1D7_BRI.h5ad']. Do you truly want to proceed?


(y/n) Y


{'trace_id': '8e842ef9-394f-4f98-8ad4-8ba1fa7e91fa',
 'files': ['/home/jupyter/BRI_Figures/Figure2/Y1D0_BRI.h5ad',
  '/home/jupyter/BRI_Figures/Figure2/Y1D0_Y1D7_BRI.h5ad']}

In [5]:
print(str(__session__).split('/')[-1])


01A_Assemble_Data_scRNA.ipynb
