In [None]:
import sys

import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import matplotlib as mpl
import scipy

import scvi
import hashlib

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text
import seaborn as sns

In [None]:
pwd

In [None]:
# Code provided by Vitalii

def read_10x_output(smp_list, metadata=None, type = 'raw'):
    import os
    
    #Writing output from separate samples, processed using CellRanger, into a dictionary of Scanpy objects:
    ad = {}

    #Generate AnnData for each sample
    for sample_name in smp_list:
        path = sample_name
        for i in os.listdir(path):
            if type in i and 'h5' in i:
                file = i
        ad[sample_name] = sc.read_10x_h5(sample_name +'/'+file)
        ad[sample_name].var.rename(columns = {'gene_ids':'ENSEMBL'}, inplace = True)
        ad[sample_name].var['SYMBOL'] = ad[sample_name].var.index
        ad[sample_name].var.index = ad[sample_name].var['ENSEMBL']
        ad[sample_name].var.drop(columns=['ENSEMBL'], inplace=True)
        #ad[sample_name].var_names_make_unique() 
        
        
        sc.pp.calculate_qc_metrics(ad[sample_name], inplace=True)
        #ad[sample_name] = ad[sample_name][ad[sample_name].obs['total_counts'] > umi_filter, :]
        ad[sample_name].var['mt'] = [gene.startswith('mt-') 
                                     for gene in ad[sample_name].var['SYMBOL']]
        ad[sample_name].obs['mt_frac'] = (ad[sample_name][:, 
               ad[sample_name].var['mt'].tolist()].X.sum(1).A.squeeze() 
                                          / ad[sample_name].obs['total_counts'])
        
        ad[sample_name].obs['sample_id'] = sample_name
        ad[sample_name].obs['barcode'] = ad[sample_name].obs_names
        #ad[sample_name].obs_names = ad[sample_name].obs['barcode']+"_"+ad[sample_name].obs['sample_id']

    #Merge AnnData objects from all the samples together    
    from scipy.sparse import vstack
    stack = vstack([ad[x].X for x in smp_list]) # stack data
    adata = sc.AnnData(stack, var = ad[smp_list[0]].var)
    adata.obs = pd.concat([ad[x].obs for x in smp_list], axis = 0)

    if metadata is not None:
        #Add cleaned metadata to the Anndata.obs table
        # obs_merged = pd.merge(left = adata.obs, right = metadata, 
        #                      how = "left", left_on="sample", right_on="sample")
        # obs_merged.index = obs_merged['sample']+"_"+obs_merged['barcode']
        # print(obs_merged.index.equals(adata.obs.index))
        # adata.obs = obs_merged
        adata.obs[metadata.columns] = metadata.reindex(index=adata.obs['sample_id']).values
    for c in adata.obs.columns:
        adata.obs[c] = adata.obs[c].astype('str')
    adata.obs =  adata.obs.copy()

    return adata, ad

In [3]:
cd /lustre/scratch126/cellgen/team205/jl29/samsidfetal/samsidfetal

/lustre/scratch126/cellgen/team205/jl29/samsidfetal/samsidfetal


In [4]:
sample_list= pd.read_csv("/home/jovyan/mount/gdrive/Spine/metadata_spine_DRteam.csv")

In [5]:
sample_list=list(sample_list['irods_path'])

In [6]:
sample_list

['cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841322_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841323_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621989_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621992_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621990_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621991_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032965_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032967_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032966_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032968_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032964_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032969_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841316_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841317_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841318_GRCh38-2

In [7]:
sample_list = [x for x in sample_list if str(x) != 'nan']


In [8]:
sample_list

['cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841322_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841323_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621989_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621992_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621990_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621991_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032965_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032967_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032966_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032968_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032964_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032969_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841316_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841317_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841318_GRCh38-2

In [9]:
import os

arr = os.listdir()

In [10]:
arr

['cellranger600_count_41456_SB_200532_10863996_GRCh38-2020-A',
 'cellranger600_count_39102_SB_200532_10297929_GRCh38-2020-A',
 'cellranger600_count_42152_SB_200532_11254467_GRCh38-2020-A',
 'cellranger600_count_45703_SL_CRTexp13070411_GRCh38-2020-A',
 'cellranger600_count_46766_SL_CRTexp13442811_GRCh38-2020-A',
 'cellranger600_count_44847_SB_200532_12829014_GRCh38-2020-A',
 'cellranger600_count_42152_SB_200532_11254458_GRCh38-2020-A',
 'cellranger211_count_35528_WSSS_THYst9384956_GRCh38-1_2_0',
 'cellranger600_count_41025_SB_200532_10621989_GRCh38-2020-A',
 'cellranger600_count_46766_SL_CRTexp13442805_GRCh38-2020-A',
 'Velocyto',
 'WSSS_THYst9699526',
 'spaceranger110_count_34881_WS_F_ADRsp9141991_GRCh38-2020-A',
 'WSSS_THYst9383361',
 'cellranger600_count_39102_SB_200532_10297927_GRCh38-2020-A',
 'pns_cellxgene.h5ad',
 'cellranger600_count_44999_SL_CRT12867208_GRCh38-2020-A',
 'cartilage_raw_for_ISS.h5ad',
 'cellranger600_count_42152_SB_200532_11254464_GRCh38-2020-A',
 'cellranger211_

In [11]:
filter_data = [x for x in arr if
              any(y in x for y in sample_list)]

In [12]:
sample_list

['cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841322_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841323_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621989_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621992_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621990_GRCh38-2020-A',
 'cellranger600_count_41025_SB_200532_10621991_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032965_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032967_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032966_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032968_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032964_GRCh38-2020-A',
 'cellranger600_count_37295_SB_200532_10032969_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841316_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841317_GRCh38-2020-A',
 'cellranger600_count_40813_SB_200532_10841318_GRCh38-2

In [None]:
len(filter_data)

In [14]:
adata, ad_list = read_10x_output(
    smp_list=filter_data,
    
    type='filtered'
)

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


In [15]:
pwd

'/lustre/scratch126/cellgen/team205/jl29/samsidfetal/samsidfetal'

In [16]:
adata

AnnData object with n_obs × n_vars = 559455 × 36601
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'mt_frac', 'sample_id', 'barcode'
    var: 'feature_types', 'genome', 'SYMBOL', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'mt'

In [17]:
adata.var

Unnamed: 0_level_0,feature_types,genome,SYMBOL,n_cells_by_counts,mean_counts,log1p_mean_counts,pct_dropout_by_counts,total_counts,log1p_total_counts,mt
ENSEMBL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ENSG00000243485,Gene Expression,GRCh38,MIR1302-2HG,0,0.000000,0.000000,100.000000,0.0,0.000000,False
ENSG00000237613,Gene Expression,GRCh38,FAM138A,0,0.000000,0.000000,100.000000,0.0,0.000000,False
ENSG00000186092,Gene Expression,GRCh38,OR4F5,0,0.000000,0.000000,100.000000,0.0,0.000000,False
ENSG00000238009,Gene Expression,GRCh38,AL627309.1,10,0.005900,0.005882,99.410029,10.0,2.397895,False
ENSG00000239945,Gene Expression,GRCh38,AL627309.3,0,0.000000,0.000000,100.000000,0.0,0.000000,False
...,...,...,...,...,...,...,...,...,...,...
ENSG00000277836,Gene Expression,GRCh38,AC141272.1,0,0.000000,0.000000,100.000000,0.0,0.000000,False
ENSG00000278633,Gene Expression,GRCh38,AC023491.2,0,0.000000,0.000000,100.000000,0.0,0.000000,False
ENSG00000276017,Gene Expression,GRCh38,AC007325.1,0,0.000000,0.000000,100.000000,0.0,0.000000,False
ENSG00000278817,Gene Expression,GRCh38,AC007325.4,108,0.077286,0.074445,93.628319,131.0,4.882802,False


# Metadata

In [18]:
metadata= pd.read_csv("/home/jovyan/mount/gdrive/Spine/metadata_spine_DRteam.csv")

In [19]:
metadata["run_id"]= metadata["irods_path"]

In [20]:
metadata

Unnamed: 0,PCW,region,subregion,dissociation,batch,irods_path,run_id
0,7.0,lumbar,cervical,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841321_G...,cellranger600_count_40813_SB_200532_10841321_G...
1,7.0,thoracic,thoracic,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841322_G...,cellranger600_count_40813_SB_200532_10841322_G...
2,7.0,cervical,lumbar,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841323_G...,cellranger600_count_40813_SB_200532_10841323_G...
3,5.0,cervical,cervical,no_trypsin,HDBR15868,cellranger600_count_41025_SB_200532_10621989_G...,cellranger600_count_41025_SB_200532_10621989_G...
4,5.0,cervical,cervical,trypsin,HDBR15868,cellranger600_count_41025_SB_200532_10621992_G...,cellranger600_count_41025_SB_200532_10621992_G...
...,...,...,...,...,...,...,...
63,,,,,,,
64,,,,,,,
65,,,,,,,
66,,,,,,,


In [21]:
metadata=metadata.dropna(how='all')

In [22]:
metadata

Unnamed: 0,PCW,region,subregion,dissociation,batch,irods_path,run_id
0,7.0,lumbar,cervical,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841321_G...,cellranger600_count_40813_SB_200532_10841321_G...
1,7.0,thoracic,thoracic,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841322_G...,cellranger600_count_40813_SB_200532_10841322_G...
2,7.0,cervical,lumbar,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841323_G...,cellranger600_count_40813_SB_200532_10841323_G...
3,5.0,cervical,cervical,no_trypsin,HDBR15868,cellranger600_count_41025_SB_200532_10621989_G...,cellranger600_count_41025_SB_200532_10621989_G...
4,5.0,cervical,cervical,trypsin,HDBR15868,cellranger600_count_41025_SB_200532_10621992_G...,cellranger600_count_41025_SB_200532_10621992_G...
...,...,...,...,...,...,...,...
57,17.0,cervical,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018320_G...,cellranger600_count_41865_SB_200532_11018320_G...
58,17.0,thoracic,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018321_G...,cellranger600_count_41865_SB_200532_11018321_G...
59,17.0,thoracic,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018322_G...,cellranger600_count_41865_SB_200532_11018322_G...
60,17.0,lumbar,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018323_G...,cellranger600_count_41865_SB_200532_11018323_G...


In [23]:
adata.obs.dtypes

n_genes_by_counts              object
log1p_n_genes_by_counts        object
total_counts                   object
log1p_total_counts             object
pct_counts_in_top_50_genes     object
pct_counts_in_top_100_genes    object
pct_counts_in_top_200_genes    object
pct_counts_in_top_500_genes    object
mt_frac                        object
sample_id                      object
barcode                        object
dtype: object

In [24]:
#Add cleaned metadata to the Anndata.obs table
        # obs_merged = pd.merge(left = adata.obs, right = metadata, 
        #                      how = "left", left_on="sample", right_on="sample")
        # obs_merged.index = obs_merged['sample']+"_"+obs_merged['barcode']
        # print(obs_merged.index.equals(adata.obs.index))
        # adata.obs = obs_merged

In [25]:
metadata["run_id"]= metadata["run_id"].astype(str)
adata.obs["sample_id"]=adata.obs["sample_id"].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata["run_id"]= metadata["run_id"].astype(str)


In [26]:
metadata

Unnamed: 0,PCW,region,subregion,dissociation,batch,irods_path,run_id
0,7.0,lumbar,cervical,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841321_G...,cellranger600_count_40813_SB_200532_10841321_G...
1,7.0,thoracic,thoracic,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841322_G...,cellranger600_count_40813_SB_200532_10841322_G...
2,7.0,cervical,lumbar,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841323_G...,cellranger600_count_40813_SB_200532_10841323_G...
3,5.0,cervical,cervical,no_trypsin,HDBR15868,cellranger600_count_41025_SB_200532_10621989_G...,cellranger600_count_41025_SB_200532_10621989_G...
4,5.0,cervical,cervical,trypsin,HDBR15868,cellranger600_count_41025_SB_200532_10621992_G...,cellranger600_count_41025_SB_200532_10621992_G...
...,...,...,...,...,...,...,...
57,17.0,cervical,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018320_G...,cellranger600_count_41865_SB_200532_11018320_G...
58,17.0,thoracic,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018321_G...,cellranger600_count_41865_SB_200532_11018321_G...
59,17.0,thoracic,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018322_G...,cellranger600_count_41865_SB_200532_11018322_G...
60,17.0,lumbar,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018323_G...,cellranger600_count_41865_SB_200532_11018323_G...


In [27]:
metadata.dtypes

PCW             float64
region           object
subregion        object
dissociation     object
batch            object
irods_path       object
run_id           object
dtype: object

In [29]:
#metadata.reset_index(drop=True)
obs_merged = pd.merge(left = metadata, right = adata.obs, 
                             how = "left", left_on="run_id", right_on="sample_id")
# obs_merged.index = obs_merged['sample_id']+"_"+obs_merged['barcode']
# print(obs_merged.index.equals(adata.obs.index))
# adata.obs = obs_merged

In [34]:
obs_merged

Unnamed: 0,PCW,region,subregion,dissociation,batch,irods_path,run_id,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,mt_frac,sample_id,barcode
cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGAAGCTGC-1,7.0,lumbar,cervical,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841321_G...,cellranger600_count_40813_SB_200532_10841321_G...,466,6.1463292576688975,900.0,6.8035054,49.55555555555556,59.333333333333336,70.44444444444444,100.0,0.0,cellranger600_count_40813_SB_200532_10841321_G...,AAACCCAAGAAGCTGC-1
cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGAGATGCC-1,7.0,lumbar,cervical,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841321_G...,cellranger600_count_40813_SB_200532_10841321_G...,1006,6.914730892718563,2392.0,7.780303,46.32107023411371,55.39297658862876,64.79933110367892,78.84615384615384,0.0,cellranger600_count_40813_SB_200532_10841321_G...,AAACCCAAGAGATGCC-1
cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGAGTTCGG-1,7.0,lumbar,cervical,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841321_G...,cellranger600_count_40813_SB_200532_10841321_G...,1862,7.529943370601589,5188.0,8.5542965,31.939090208172704,46.70393215111797,57.84502698535081,72.10871241326137,0.0,cellranger600_count_40813_SB_200532_10841321_G...,AAACCCAAGAGTTCGG-1
cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGATTCGCT-1,7.0,lumbar,cervical,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841321_G...,cellranger600_count_40813_SB_200532_10841321_G...,1166,7.062191632286556,2368.0,7.770223,41.7652027027027,48.64864864864865,57.179054054054056,71.875,0.0,cellranger600_count_40813_SB_200532_10841321_G...,AAACCCAAGATTCGCT-1
cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGCGTCTGC-1,7.0,lumbar,cervical,no_trypsin,HDBR15918,cellranger600_count_40813_SB_200532_10841321_G...,cellranger600_count_40813_SB_200532_10841321_G...,617,6.42648845745769,1024.0,6.932448,39.2578125,49.0234375,59.27734375,88.57421875,0.0,cellranger600_count_40813_SB_200532_10841321_G...,AAACCCAAGCGTCTGC-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
cellranger600_count_41865_SB_200532_11018324_GRCh38-2020-A_TTTGTTGTCGCCTATC-1,17.0,sacral,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018324_G...,cellranger600_count_41865_SB_200532_11018324_G...,1216,7.104144092987527,2048.0,7.6251073,22.314453125,32.568359375,44.775390625,65.0390625,0.0,cellranger600_count_41865_SB_200532_11018324_G...,TTTGTTGTCGCCTATC-1
cellranger600_count_41865_SB_200532_11018324_GRCh38-2020-A_TTTGTTGTCGGAATGG-1,17.0,sacral,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018324_G...,cellranger600_count_41865_SB_200532_11018324_G...,701,6.553933404025811,1384.0,7.2334557,36.921965317919074,50.9393063583815,63.800578034682076,85.47687861271676,0.0,cellranger600_count_41865_SB_200532_11018324_G...,TTTGTTGTCGGAATGG-1
cellranger600_count_41865_SB_200532_11018324_GRCh38-2020-A_TTTGTTGTCGGCCAAC-1,17.0,sacral,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018324_G...,cellranger600_count_41865_SB_200532_11018324_G...,2226,7.708410667257367,5599.0,8.630522,28.36220753706019,40.918021075191994,52.42007501339525,66.17253080907305,0.0,cellranger600_count_41865_SB_200532_11018324_G...,TTTGTTGTCGGCCAAC-1
cellranger600_count_41865_SB_200532_11018324_GRCh38-2020-A_TTTGTTGTCTCATGGA-1,17.0,sacral,,trypsin,HDBR15948,cellranger600_count_41865_SB_200532_11018324_G...,cellranger600_count_41865_SB_200532_11018324_G...,1442,7.274479558773871,3001.0,8.007034,26.957680773075644,38.75374875041653,51.51616127957348,68.61046317894035,0.0,cellranger600_count_41865_SB_200532_11018324_G...,TTTGTTGTCTCATGGA-1


In [35]:
adata.obs.index

Index(['cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGAAGCTGC-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGAGATGCC-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGAGTTCGG-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGATTCGCT-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGCGTCTGC-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGGAGAGTA-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGGCAGGTT-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGGCCTGAA-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCAAGTAATCCC-1',
       'cellranger600_count_40813_SB_200532_10841321_GRCh38-2020-A_AAACCCACACAATTCG-1',
       ...
       'cellranger600_count_41865_SB_200532_11018324_GRCh38-2020-A_TTTGTTGGTGCAACGA-1',
       'cellranger600

In [32]:
obs_merged.index = obs_merged['sample_id']+"_"+obs_merged['barcode']
print(obs_merged.index.equals(adata.obs.index))
adata.obs = obs_merged

False


In [None]:
adata.write_h5ad("/lustre/scratch126/cellgen/team298/sm54/Data_Integration/Spine/data/raw_counts_no_ppc_5_17_pcw.h5ad")

In [1]:
adata

NameError: name 'adata' is not defined

In [None]:
sc.read_cellranger-arc201_count_49263c3c9ece1e019faa5bbb4533af77
