## Loading libraries

In [6]:
import anndata as ad
import scvi
import numpy as np
import pandas as pd
from scipy import sparse
import scanpy as sc
import os

<h2>Loading data</h2>

<p style="color:orange; font-size:20px">Reading all the samples in the respective sample path.</p>

In [79]:
# Dir to the samples
sample_path = 'Samples'
# Folders between the sample name and the files (if doesn't exist, put '')
middle_path = '/filtered_feature_bc_matrix'

sample_names = []
for foldername in os.listdir(sample_path):
    if os.path.isdir(os.path.join(sample_path, foldername)):
        sample_names.append(foldername)
print(sample_names)
# For each sample, read the directory to a list of samples.
        
sample_list = []

for name in sample_names:
    # Getting anndata (transposed to obs X vars)
    path = f'samples/{name}{middle_path}/matrix.mtx.gz'
    sample = sc.read(path, cache=True).T
    
    # Getting obs
    path = f'samples/{name}{middle_path}/barcodes.tsv.gz'
    obs = pd.read_csv(path, sep='\t', header=None, index_col=0)
    obs.index.name = 'barcode'
    sample.obs = obs
    
    # Adding metadata
    sample.obs['Patient'] = name
    sample.obs['Condition'] = name[-1].upper()
    
    # Getting vars
    path = f"samples/{name}{middle_path}/features.tsv.gz"
    var = pd.read_table(path, sep='\t', header=None, index_col=1)
    var.index.name = 'genes'
    sample.var = var
    sample.var_names_make_unique(join="-")
    
    
    sample_list.append(sample)

[AnnData object with n_obs × n_vars = 12183 × 33538
     obs: 0, 'Patient', 'Condition',
 AnnData object with n_obs × n_vars = 14770 × 33538
     obs: 0, 'Patient', 'Condition',
 AnnData object with n_obs × n_vars = 1547 × 33538
     obs: 0, 'Patient', 'Condition',
 AnnData object with n_obs × n_vars = 1557 × 33538
     obs: 0, 'Patient', 'Condition']

In [81]:
adata = ad.concat(sample_list)
del sample_list
adata.obs_names_make_unique(join="-")
adata.obs['Author'] = 'Peng_2019'

  utils.warn_names_duplicates("obs")


AnnData object with n_obs × n_vars = 30057 × 33538
    obs: 0, 'Patient', 'Condition'

In [None]:
adata.write_h5ad("adata_v1.0.h5ad")