# Single Cell Core Input Files Generation

Last updated October 27, 2021

By Qian Xiao, Michelle Curtis

In [1]:
#Import modules
import pandas as pd
import numpy as np

# References

In [2]:
ref_path = '/Users/curtism/Documents/2021/singlecellcore/qian_scripts' ###Change it to your own local path, where you want the references to be stored

#Read the references
ADT_HTO_index_ref= pd.read_csv('{}/ADT_HTO_primer_reference.csv'.format(ref_path))
HTO_antibody_ref= pd.read_csv('{}/hashtag_antibodies_reference.csv'.format(ref_path))
CMO_ref= pd.read_csv('{}/cell_multiplexing_reference.csv'.format(ref_path))

In [3]:
flow_cell_id = "HM2H5DSX2"### can be found in the raw bcl folder, using the <grep -i *.cfg 'Flowcell'> command
raw_folder_name = '211008_10X_KW9393_bcl'### change to the folder name containing raw bcl files

In [4]:
work_order = 'KW9393'### the current work order
local_path = '/Users/curtism/Documents/2021/singlecellcore/qian_scripts/input/{}'.format(work_order)### The path on your local PC, where the input files will be saved 

In [5]:
pipe_new = 'cellranger-6.1.1'   ###Cellranger version we use
#pipe_old1 = 'cellranger-6.0.1' 
#pipe_old3 = 'cellranger-5.0.1'
#pipe_old2 = 'cellranger-4.0.0'
#pipe_old = 'cellranger-3.1.0'
cellranger_version = pipe_new

In [6]:
#Reset all indices to empty vectors
gex_libs_index = []
adt_libs_index = []
hto_libs_index = []
vdj_libs_index = []
cmo_libs_index = []
visium_libs_index = []
nuc_libs_index  = []
gex_only_lib_index = []

Example csv input for `df_run_info`. May need to adjust the 0s and 1s and modify primer information. There should be no blanks for species or user.

<img src="example_csv.png" alt="title"> 


In [7]:
#Read the spreadsheet with run info
df_run_info = pd.read_csv('{local_path}/{work_order}.csv'.format(local_path = local_path,
                                                                 work_order = work_order) )

In [144]:
df_run_info.head()

Unnamed: 0,Library ID,Library Type,Sample ID,primer_set,Species,i7 Well,HTO,ADT,CMO,TCR/BCR,Category,Number,User,Nuclei,Sequencing�Configuration,FASTQ request,Mapping request
0,BRI-1391,GEX,1,TT,human,D1,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0
1,BRI-1392,GEX,2,TT,human,D2,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0
2,BRI-1393,GEX,3,TT,human,D3,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0
3,BRI-1394,GEX,4,TT,human,D4,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0
4,BRI-1391-M,GEX,1-Epmotion,TT,human,A12,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0


In [9]:
#Actually only need the columns Library ID, Library Type, primer_set, Species, 
#i7 Well, HTO, ADT, CMO, TCR/BCR, Category, Number, User, slide, area to run the script

#If specific Lane and Sequencing Config are provided, can edit the input files manually
#Just too lazy to incorporate it into the function :/
df_run_info.head()

Unnamed: 0,Library ID,Library Type,Sample ID,primer_set,Species,i7 Well,HTO,ADT,CMO,TCR/BCR,Category,Number,User,Nuclei,Sequencing�Configuration,FASTQ request,Mapping request
0,BRI-1391,GEX,1,TT,human,D1,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0
1,BRI-1392,GEX,2,TT,human,D2,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0
2,BRI-1393,GEX,3,TT,human,D3,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0
3,BRI-1394,GEX,4,TT,human,D4,0,0,0,0,,,hfaust@bwh.harvard.edu,N,28+10+10+90,Y,GRCh38-3.0.0
4,BRI-1391-M,GEX,1-Epmotion,TT,human,A12,0,0,0,0,,,,N,28+10+10+90,Y,GRCh38-3.0.0


# Sample sheet generation

This part is used to generate the sample sheet for running `cellranger mkfastq` command

In [8]:
lib_ids = df_run_info['Library ID'].to_list() #e.g. BRI-100
lib_type = df_run_info['Library Type'].to_list() #GEX, HTO, ADT, ADT/HTO, HTO/ADT, TCR, BCR, TCR/BCR, BCR/TCR, CMO, Visium, NUC
lib_index = df_run_info['i7 Well'].to_list() #e.g. A5
lib_set = df_run_info['primer_set'].to_list() # G(for SI-GA), N(for SI-NA), TT(for SI-TT), TN, NT, NN
lib_species = df_run_info.Species 
lib_version = (df_run_info["Mapping request"].str.split("-",n=1,expand=True))[1].fillna("2020-A")

In [10]:
lib_ids[:4]

['BRI-1391', 'BRI-1392', 'BRI-1393', 'BRI-1394']

In [11]:
#Generate rows for the final dataframe
rows = []
for i in range(len(lib_ids)):
    print(i)
    #print(i)
    if (lib_set[i] == 'G') or (lib_set[i] == 'N') :
        rows.append(['*', 
                     lib_ids[i], 
                     'SI-{lib_set}A-{lib_index}'.format(
                         lib_set = lib_set[i],
                         lib_index = lib_index[i]
                     ) ])
        
    elif (lib_set[i] == 'TT') or (lib_set[i] == 'TN') or (lib_set[i] == 'NN') or (lib_set[i] == 'NT'):
        rows.append(['*', 
                     lib_ids[i], 
                     'SI-{lib_set}-{lib_index}'.format(
                         lib_set = lib_set[i],
                         lib_index = lib_index[i]
                     ) ])
    else:
        #print(ADT_HTO_index_ref.index[ADT_HTO_index_ref.primer_name == lib_index[i]].values)
        primer = ADT_HTO_index_ref.sequence[ int(ADT_HTO_index_ref.index[ADT_HTO_index_ref.primer_name == lib_index[i]].values) ] 
        rows.append(['*', 
                     lib_ids[i], 
                     primer ])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


In [14]:
print(rows[0])

['*', 'BRI-1391', 'SI-TT-D1']


In [15]:
rows

[['*', 'BRI-1391', 'SI-TT-D1'],
 ['*', 'BRI-1392', 'SI-TT-D2'],
 ['*', 'BRI-1393', 'SI-TT-D3'],
 ['*', 'BRI-1394', 'SI-TT-D4'],
 ['*', 'BRI-1391-M', 'SI-TT-A12'],
 ['*', 'BRI-1392-M', 'SI-TT-B12'],
 ['*', 'BRI-1393-M', 'SI-TT-C12'],
 ['*', 'BRI-1394-M', 'SI-TT-D12'],
 ['*', 'BRI-1395', 'SI-TT-D5'],
 ['*', 'BRI-1396', 'SI-TT-D6'],
 ['*', 'BRI-1397', 'SI-TT-D7'],
 ['*', 'BRI-1398', 'SI-TT-D8'],
 ['*', 'BRI-1399', 'SI-TT-E1'],
 ['*', 'BRI-1401', 'SI-TT-E2'],
 ['*', 'BRI-1403', 'SI-TT-E3'],
 ['*', 'BRI-1405', 'SI-TT-E4'],
 ['*', 'BRI-1407', 'SI-TT-E5'],
 ['*', 'BRI-1409', 'SI-TT-E6'],
 ['*', 'BRI-1400', 'TGTCAGTC'],
 ['*', 'BRI-1402', 'TTTCCGCT'],
 ['*', 'BRI-1404', 'AATGAGCG'],
 ['*', 'BRI-1406', 'GGAATCTC'],
 ['*', 'BRI-1408', 'TTCTGAAT'],
 ['*', 'BRI-1410', 'ACGAATTC'],
 ['*', 'BRI-1411', 'SI-TT-E7'],
 ['*', 'BRI-1412', 'SI-TT-E8'],
 ['*', 'BRI-1413', 'SI-NT-B8'],
 ['*', 'BRI-1414', 'SI-TT-C10'],
 ['*', 'BRI-1415', 'SI-TN-A6'],
 ['*', 'BRI-1416', 'SI-TT-C9']]

In [12]:
df_sample_sheet = pd.DataFrame(rows, columns=["Lane", "Sample", "Index"])

In [14]:
df_sample_sheet.head()

Unnamed: 0,Lane,Sample,Index
0,*,BRI-1391,SI-TT-D1
1,*,BRI-1392,SI-TT-D2
2,*,BRI-1393,SI-TT-D3
3,*,BRI-1394,SI-TT-D4
4,*,BRI-1391-M,SI-TT-A12


In [19]:
'Users/curtism/Documents/2021/singlecellcore/input/{}/sample_sheet.csv'.format(work_order)

'Users/curtism/Documents/2021/singlecellcore/input/KW9393/sample_sheet.csv'

In [20]:
#Save the sample sheet to .csv file under the path you specified earlier
df_sample_sheet.to_csv('/Users/curtism/Documents/2021/singlecellcore/qian_scripts/input/{}/sample_sheet.csv'.format(work_order), index = False)

Check sample sheet and break up into multiple sheets, one for each sequencing configuration. 

# Feature and library files

This part is for generating the *library.csv* and *features.csv* files for the HTO/ADT libs when running `cellranger count`.

For ADT features, will use a separate script to clean the panel and generate *features.csv* file.

Any hashtag/cite-seq antibody sequences that are not in the current reference are usually on Biolengend's official website. In case they are not, email Zhu and she will contact Biolegend team.



### ADT

#### ADT Library 

In [36]:
#Index of the ADT libs
adt_libs_index = df_run_info.index[(df_run_info['Library Type'] == 'ADT') | (df_run_info['Library Type'] == 'ADT/HTO' ) | (df_run_info['Library Type'] == 'HTO/ADT' )].values.tolist()

#Index of all the GEX libs
gex_libs_index = df_run_info.index[df_run_info['Library Type'] == 'GEX'].values.tolist()

#Index of the GEX libs with ADT libs ONLY
adt_gex_libs_index = df_run_info.index[(df_run_info['Library Type'] == 'GEX') & df_run_info['ADT'] == 1].values.tolist()

In [14]:
gex_libs_index

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 24, 25, 27]

In [15]:
adt_libs_index

[26, 28]

In [56]:
adt_gex_libs_index

[25, 27]

In [87]:
for i in adt_libs_index:
    print(i)

26
28


In [89]:
#Write the library file for ADT under the path you specified 
for i in adt_libs_index:
    open('{local_path}/library_{ADT_sample}_ADT.csv'.format(local_path = local_path, ADT_sample = lib_ids[i]), 'w').write('fastqs,sample,library_type\n')

    open('{local_path}/library_{ADT_sample}_ADT.csv'.format(local_path = local_path, ADT_sample = lib_ids[i]), 'a+').write(
        "/data/srlab/bwh10x/{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{lib_id}/,".format(raw_folder = raw_folder_name, 
                                                                                      flow_cell = flow_cell_id,
                                                                                      lib_id = lib_ids[ adt_gex_libs_index[adt_libs_index.index(i)] ]) + 
        
        lib_ids[ adt_gex_libs_index[adt_libs_index.index(i)] ]+ #May edit this later
        
        ","+
        
        "Gene Expression\n")

    open('{local_path}/library_{ADT_sample}_ADT.csv'.format(local_path = local_path, ADT_sample = lib_ids[i]), 'a+').write(
        "/data/srlab/bwh10x/{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{lib_id}/,".format(raw_folder = raw_folder_name,
                                                                                              flow_cell = flow_cell_id,
                                                                                              lib_id = lib_ids[i]) +  
        lib_ids[i]+
        
        ","+
        
        "Antibody Capture")

### HTO

In [37]:
#Same principle as above
hto_libs_index = df_run_info.index[(df_run_info['Library Type'] == 'HTO')|(df_run_info['Library Type'] == 'ADT/HTO')|(df_run_info['Library Type'] == 'HTO/ADT')].values.tolist()
gex_libs_index = df_run_info.index[df_run_info['Library Type'] == 'GEX'].values.tolist()
hto_gex_libs_index = df_run_info.index[(df_run_info['Library Type'] == 'GEX') & df_run_info['HTO'] == 1].values.tolist()

In [52]:
hto_libs_index

[18, 19, 20, 21, 22, 23, 26, 28]

In [53]:
gex_libs_index

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 24, 25, 27]

In [54]:
hto_gex_libs_index

[12, 13, 14, 15, 16, 17, 25, 27]

#### HTO Library

In [94]:
#When ADT present for the same GEX
for i in hto_libs_index:
    open('{local_path}/library_{HTO_sample}_hashing.csv'.format(local_path = local_path, HTO_sample = lib_ids[i]), 'w').write('fastqs,sample,library_type\n')

    open('{local_path}/library_{HTO_sample}_hashing.csv'.format(local_path = local_path, HTO_sample = lib_ids[i]), 'a+').write(
        "/data/srlab/bwh10x/{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{lib_id}/,".format(raw_folder = raw_folder_name, 
                                                                                      flow_cell = flow_cell_id,
                                                                                      lib_id = lib_ids[ hto_gex_libs_index[hto_libs_index.index(i)] ]) + 
        
        lib_ids[ hto_gex_libs_index[hto_libs_index.index(i)] ]+ #May edit this later
        
        ","+
        
        "Gene Expression\n")

    open('{local_path}/library_{HTO_sample}_hashing.csv'.format(local_path = local_path, HTO_sample = lib_ids[i]), 'a+').write(
        "/data/srlab/bwh10x/{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{lib_id}/,".format(raw_folder = raw_folder_name,
                                                                                              flow_cell = flow_cell_id,
                                                                                              lib_id = lib_ids[i]) +  
        lib_ids[i]+
        
        ","+
        
        "Antibody Capture")

#### HTO Feature

In [25]:
#This is what HTO antibody reference looks like, you may add additional hashtag antibodies to it if new ones are used
HTO_antibody_ref.head()

Unnamed: 0,Number,Category,Sequence,Species
0,Hashtag 1,C,GTCAACTCTTTAGCG,human
1,Hashtag 2,C,TGATGGCCTATTGGG,human
2,Hashtag 3,C,TTCCGCCTCTCTTTG,human
3,Hashtag 4,C,AGTAAGTTCAGCGTA,human
4,Hashtag 5,C,AAGTATCGTTTCGCA,human


In [26]:
hto_libs_index

[18, 19, 20, 21, 22, 23, 26, 28]

In [116]:
#Generate and save the HTO feature files
#Note that in your run info spreadsheet, the hashtag numbers should be a list of numbers separated by comma without space in between

rows_features = []
for i in hto_libs_index:
    #print(i)
    species = df_run_info.Species[i]
    hash_number = df_run_info.Number[i].split(',')
    
    
    open('{local_path}/features_{HTO_sample}.csv'.format(local_path = local_path, 
                                                         HTO_sample = lib_ids[i]), 'w').write('id,name,read,pattern,sequence,feature_type\n')
    
    
    for hashtag in hash_number:
        hashtag_name = 'Hashtag'+hashtag
        
        hashtag = int(hashtag)
        
        hash_sequence_index = HTO_antibody_ref.index[(HTO_antibody_ref.Species == df_run_info.Species[i]) & 
                               (HTO_antibody_ref.Category == df_run_info.Category[i]) &
                               (HTO_antibody_ref.Number == "Hashtag {}".format(hashtag))].values
        
        hash_sequence = HTO_antibody_ref.Sequence[hash_sequence_index[0]]#So that the index is an integer but
        #Not a list
        
        hash_pattern = ''
        if df_run_info.Category[i] == 'A':
            hash_pattern = hash_pattern.join('5P(BC)')
        else:
            hash_pattern = hash_pattern.join('5PNNNNNNNNNN(BC)')
        
    
        open('{local_path}/features_{HTO_sample}.csv'.format(local_path = local_path, 
                                                             HTO_sample = lib_ids[i]), 'a+').write(
        
            hashtag_name+','+
            
            hashtag_name+','+
            
            'R2,'+
            hash_pattern+ ','+
            
            hash_sequence+','+
            
            
            'Antibody Capture\n')
            
    

## CellPlex

This part is for generating CMO lib input. Probably won't need this in a while.

In [38]:
cmo_libs_index = df_run_info.index[(df_run_info['Library Type'] == 'CMO')].values.tolist()
gex_libs_index = df_run_info.index[df_run_info['Library Type'] == 'GEX'].values.tolist()
cmo_gex_libs_index = df_run_info.index[(df_run_info['Library Type'] == 'GEX') & df_run_info['CMO'] == 1].values.tolist()

In [84]:
CMO_ref

Unnamed: 0,Number,Sequence
0,CMO301,ATGAGGAATTCCTGC
1,CMO302,CATGCCAATAGAGCG
2,CMO303,CCGTCGTCCAAGCAT
3,CMO304,AACGTTAATCACTCA
4,CMO305,CGCGATATGGTCGGA
5,CMO306,AAGATGAGGTCTGTG
6,CMO307,AAGCTCGTTGGAAGA
7,CMO308,CGGATTCCACATCAT
8,CMO309,GTTGATCTATAACAG
9,CMO310,GCAGGAGGTATCAAT


### CMO config csv

[gene-expression]
reference,/path/to/transcriptome

[libraries]
fastq_id,fastqs,feature_types
gex1,/path/to/fastqs,Gene Expression
mux1,/path/to/fastqs,Multiplexing Capture

[samples]
sample_id,cmo_ids
sample1,CMO301|CMO302
sample2,CMO303|CMO304

In [85]:
rows_features = []

for i in cmo_libs_index:
    
    ###
    species = df_run_info.Species[i]
    
    genome = ''
    species_ver = ''
    
    if species == 'human':
        species_ver = species_ver.join('GRCh38')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(species_ver))
        
    elif species == 'mouse':
        species_ver = species_ver.join('mm10')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(species_ver))
        
    ###
    hash_number = df_run_info.Number[i].split(',')
    hashtags =[]
    for hashtag in hash_number:
        
        hashtag = int(hashtag)
        
        hash_sequence_index = hashtag-1
        
        hash_sequence = CMO_ref.Sequence[hash_sequence_index]
        hashtag_name = CMO_ref.Number[hash_sequence_index]
        
        hashtags.append(hashtag_name)
    
    
    
    open('{local_path}/CMO_{CMO_sample}.csv'.format(local_path = local_path, 
                                                         CMO_sample = lib_ids[i]), 'w').write('[gene-expression]\n')
    
    open('{local_path}/CMO_{CMO_sample}.csv'.format(local_path = local_path, 
                                                             CMO_sample = lib_ids[i]), 'a+').write(
        'reference,' + 
        genome+ '\n' +
        
        '\n'+
        
        ######
        '[libraries]\n'+
        
        'fastq_id,fastqs,feature_types\n'+
        
        ##
        
        lib_ids[ cmo_gex_libs_index[cmo_libs_index.index(i)] ]+ #May edit this later
        
        ","+
        
        "/data/srlab/bwh10x/{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{lib_id}/,".format(raw_folder = raw_folder_name, 
                                                                                      flow_cell = flow_cell_id,
                                                                                      lib_id = lib_ids[ cmo_gex_libs_index[cmo_libs_index.index(i)] ]) + 
        
        "Gene Expression\n"+

    
        ##
        lib_ids[i]+
        
        ","+
        
        "/data/srlab/bwh10x/{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{lib_id}/,".format(raw_folder = raw_folder_name,
                                                                                              flow_cell = flow_cell_id,
                                                                                              lib_id = lib_ids[i]) +  
        
        "Multiplexing Capture\n" +
        
        ######
        
        '\n'+
        
        '[samples]\n'+
        
        'sample_id,cmo_ids\n'+
        
        lib_ids[i]+ ','+
        
        '|'.join(hashtags))
        

### CMO: Parameters for the -multi command 

In [104]:
rows_multi = []
for i in cmo_libs_index:
    
    species = df_run_info.Species[i]
    
    species_ver = ''
    
    if species == 'human':
        species_ver = species_ver.join('GRCh38')
        
    elif species == 'mouse':
        species_ver = species_ver.join('mm10')
        
    path_to_config = '/data/srlab/bwh10x/{raw_folder_name}/CMO_{CMO_sample}.csv'.format(raw_folder_name = raw_folder_name,
                                                                                        CMO_sample = lib_ids[i])
    
    rows_multi.append([raw_folder_name,
                       cellranger_version,
                       species_ver,
                       '{CMO_sample}_CMO'.format(CMO_sample = lib_ids[ cmo_gex_libs_index[cmo_libs_index.index(i)]]), 
                       path_to_config])


In [103]:
df_multi = pd.DataFrame(rows_multi)
df_multi

Unnamed: 0,0,1,2,3,4
0,210504_10X_KW8909_bcl,cellranger-6.0.1,GRCh38,BRI-1194_CMO.csv,/data/srlab/bwh10x/210504_10X_KW8909_bcl/CMO_B...
1,210504_10X_KW8909_bcl,cellranger-6.0.1,GRCh38,BRI-1197_CMO.csv,/data/srlab/bwh10x/210504_10X_KW8909_bcl/CMO_B...


In [173]:
df_multi.to_csv('/Users/qxiao/Desktop/single_cell_core/{work_order}/lsf_params_multi_{work_order}'.format(
    work_order = work_order), 
                        sep='\t', 
                        index = False,
                        header = False)

NameError: name 'df_multi' is not defined

# Count and Count feature files

This part is for generating parameter files that will be used when running `cellranger count`.

## Count(GEX only)

In [11]:
gex_only_lib_index = df_run_info[(df_run_info['Library Type'] == 'GEX') 
                                 & (df_run_info.HTO + df_run_info.ADT +df_run_info.CMO + df_run_info['TCR/BCR'] == 0)].index.values.tolist()

In [14]:
gex_only_lib_index

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 24]

In [29]:
#Note that in your run info spreadsheet, the species should be all in lower cases

rows_count = []
for i in gex_only_lib_index:
    genome = ''
    gex_species = ''
    version = lib_version[i]
    if lib_species[i] == 'human':
        gex_species = gex_species.join('GRCh38')
        
        prepend='' # gex or cellranger reference
        if version=='2020-A':
            prepend = prepend.join('gex')
        elif version=='3.0.0':
            prepend = prepend.join('cellranger')
            
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-{}-{}-{}'.format(prepend,gex_species,version))
        
    elif lib_species[i] == 'mouse':
        gex_species = gex_species.join('mm10')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(gex_species))
    
    elif lib_species[i] == 'zebrafish':
        gex_species = gex_species.join('GRCz10')
        genome = genome.join('/data/srlab/external-data/10xgenomics/Danio.rerio_genome')
        
    rows_count.append([raw_folder_name, 
                               lib_ids[i], 
                               genome,
                               cellranger_version,
                               gex_species])


In [30]:
df_count = pd.DataFrame(rows_count)
df_count

Unnamed: 0,0,1,2,3,4
0,211008_10X_KW9393_bcl,BRI-1391,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
1,211008_10X_KW9393_bcl,BRI-1392,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
2,211008_10X_KW9393_bcl,BRI-1393,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
3,211008_10X_KW9393_bcl,BRI-1394,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
4,211008_10X_KW9393_bcl,BRI-1391-M,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
5,211008_10X_KW9393_bcl,BRI-1392-M,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
6,211008_10X_KW9393_bcl,BRI-1393-M,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
7,211008_10X_KW9393_bcl,BRI-1394-M,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
8,211008_10X_KW9393_bcl,BRI-1395,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38
9,211008_10X_KW9393_bcl,BRI-1396,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38


In [193]:
df_count.to_csv('/Users/curtism/Documents/2021/singlecellcore/qian_scripts/input/{work_order}/lsf_params_count_{work_order}'.format(
    work_order = work_order), 
                        sep='\t', 
                        index = False,
                        header = False)

## Nucseq

In [179]:
nuc_libs_index = df_run_info[(df_run_info['Library Type'] == 'NUC')].index.values.tolist()

In [180]:
nuc_libs_index

[]

In [48]:
rows_count = []
for i in nuc_libs_index:
    genome = ''
    gex_species = ''
    if lib_species[i] == 'human':
        gex_species = gex_species.join('GRCh38')
        genome = genome.join('/data/srlab/external-data/10xgenomics/GRCh38-1.2.0_premrna')
        
    rows_count.append([raw_folder_name, 
                               lib_ids[i], 
                               genome,
                               cellranger_version,
                               gex_species])


In [51]:
df_count = pd.DataFrame(rows_count)

In [50]:
df_count.to_csv('/Users/qxiao/Desktop/single_cell_core/{work_order}/lsf_params_count_nucseq_{work_order}'.format(
    work_order = work_order), 
                        sep='\t', 
                        index = False,
                        header = False)

## Count feature

This part is for generating count feature parameters when running `cellranger count` for ADT/HTO libs

In [114]:
#human_ref_path = '/data/srlab/external-data/10xgenomics/refdata-gex-GRCh38-2020-A'
#mouse_ref_path = '/data/srlab/external-data/10xgenomics/refdata-gex-mm10-2020-A'
#lib_species = df_run_info.Species

In [40]:
#pipe_new = 'cellranger-6.0.1'
#pipe_old2 = 'cellranger-4.0.0'
#pipe_old = 'cellranger-3.1.0'
#lib_species = df_run_info.Species

### ADT

In [38]:
adt_libs_index

[26, 28]

In [46]:
rows_count_feature = []
for i in adt_libs_index:
    adt_version = lib_version[i]
    adt_species = ''
    prepend= 'gex'
    if lib_species[i] == 'human':
        adt_species = adt_species.join('GRCh38')
        if lib_version[i]=='3.0.0':
            prepend = 'cellranger'
    elif lib_species[i] == 'mouse':
        adt_species = adt_species.join('mm10')
        
        
    rows_count_feature.append([raw_folder_name, 
                               lib_ids[ adt_gex_libs_index[adt_libs_index.index(i)] ], 
                               '/data/srlab/external-data/10xgenomics/refdata-{}-{}-{}'.format(prepend,adt_species,adt_version),
                               'features.csv',
                               'library_{ADT_sample}_ADT.csv'.format(ADT_sample = lib_ids[i]),
                               cellranger_version,
                               adt_species])

In [47]:
df_count_feature = pd.DataFrame(rows_count_feature)

In [48]:
df_count_feature.head()

Unnamed: 0,0,1,2,3,4,5,6
0,211008_10X_KW9393_bcl,BRI-1412,/data/srlab/external-data/10xgenomics/refdata-...,features.csv,library_BRI-1413_ADT.csv,cellranger-6.1.1,GRCh38
1,211008_10X_KW9393_bcl,BRI-1414,/data/srlab/external-data/10xgenomics/refdata-...,features.csv,library_BRI-1415_ADT.csv,cellranger-6.1.1,GRCh38


In [198]:
df_count_feature.to_csv('/Users/curtism/Documents/2021/singlecellcore/qian_scripts/input/{work_order}/lsf_params_count_feature_{work_order}_ADT'.format(
    work_order = work_order), 
                        sep='\t', 
                        index = False,
                        header = False)


### HTO

In [49]:
rows_count_feature = []
for i in hto_libs_index:
    genome = ''
    hto_species = ''
    hto_version = lib_version[i]
    if lib_species[i] == 'human':
        hto_species = hto_species.join('GRCh38')
        
        prepend='' # gex or cellranger reference
        if version=='2020-A':
            prepend = prepend.join('gex')
        elif version=='3.0.0':
            prepend = prepend.join('cellranger')

        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-{}-{}-{}'.format(prepend,hto_species,hto_version))
        
    elif lib_species[i] == 'mouse':
        hto_species = hto_species.join('mm10')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(hto_species))
    
    elif lib_species[i] == 'zebrafish':
        hto_species = hto_species.join('GRCz10')
        genome = genome.join('/data/srlab/external-data/10xgenomics/Danio.rerio_genome')
        
    rows_count_feature.append([raw_folder_name, 
                               lib_ids[ hto_gex_libs_index[hto_libs_index.index(i)] ]+'_hashing', 
                               genome,
                               'features_{HTO_sample}.csv'.format(HTO_sample = lib_ids[i]),
                               'library_{HTO_sample}_hashing.csv'.format(HTO_sample = lib_ids[i]),
                               cellranger_version,
                               hto_species])

In [50]:
df_count_feature = pd.DataFrame(rows_count_feature)
df_count_feature.head()

Unnamed: 0,0,1,2,3,4,5,6
0,211008_10X_KW9393_bcl,BRI-1399_hashing,/data/srlab/external-data/10xgenomics/refdata-...,features_BRI-1400.csv,library_BRI-1400_hashing.csv,cellranger-6.1.1,GRCh38
1,211008_10X_KW9393_bcl,BRI-1401_hashing,/data/srlab/external-data/10xgenomics/refdata-...,features_BRI-1402.csv,library_BRI-1402_hashing.csv,cellranger-6.1.1,GRCh38
2,211008_10X_KW9393_bcl,BRI-1403_hashing,/data/srlab/external-data/10xgenomics/refdata-...,features_BRI-1404.csv,library_BRI-1404_hashing.csv,cellranger-6.1.1,GRCh38
3,211008_10X_KW9393_bcl,BRI-1405_hashing,/data/srlab/external-data/10xgenomics/refdata-...,features_BRI-1406.csv,library_BRI-1406_hashing.csv,cellranger-6.1.1,GRCh38
4,211008_10X_KW9393_bcl,BRI-1407_hashing,/data/srlab/external-data/10xgenomics/refdata-...,features_BRI-1408.csv,library_BRI-1408_hashing.csv,cellranger-6.1.1,GRCh38


In [201]:
df_count_feature.to_csv('/Users/curtism/Documents/2021/singlecellcore/qian_scripts/input/{work_order}/lsf_params_count_feature_{work_order}_HTO'.format(
    work_order = work_order), 
                        sep='\t', 
                        index = False,
                        header = False)

## TCR/BCR

In [38]:
vdj_libs_index = df_run_info.index[(df_run_info['Library Type'] == 'TCR')| (df_run_info['Library Type'] == 'BCR')| (df_run_info['Library Type'] == 'TCR/BCR')| (df_run_info['Library Type'] == 'BCR/TCR') ].values.tolist()



In [18]:
vdj_libs_index

[29]

In [204]:
rows_vdj = []
for i in vdj_libs_index:
    
    genome = ''
    vdj_species = ''
    vdj_species2 = ''
    
    if lib_species[i] == 'human':
        vdj_species = vdj_species.join('GRCh38')
        vdj_species2 = vdj_species2.join('GRCh38')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-cellranger-vdj-{}-alts-ensembl-5.0.0'.format(vdj_species))
        
    elif lib_species[i] == 'mouse':
        vdj_species = vdj_species.join('GRCm38')
        vdj_species2 = vdj_species2.join('mm10')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-cellranger-vdj-{}-alts-ensembl-5.0.0'.format(vdj_species))
 
    rows_vdj.append([raw_folder_name, 
                     lib_ids[i], 
                     genome,
                     cellranger_version,
                     vdj_species2])

In [205]:
df_vdj = pd.DataFrame(rows_vdj)
df_vdj.head()

Unnamed: 0,0,1,2,3,4
0,211008_10X_KW9393_bcl,BRI-1416,/data/srlab/external-data/10xgenomics/refdata-...,cellranger-6.1.1,GRCh38


In [206]:
df_vdj.to_csv('/Users/curtism/Documents/2021/singlecellcore/qian_scripts/input/{work_order}/lsf_params_vdj_{work_order}'.format(
    work_order = work_order), 
                        sep='\t', 
                        index = False,
                        header = False)


## Visium

In [76]:
visium_libs_index = df_run_info.index[df_run_info['Library Type'] == 'Visium'].values.tolist()

In [77]:
visium_libs_index

[31, 32, 33, 34]

In [78]:
lib_species = df_run_info.Species.tolist()
lib_slide = df_run_info.slide.tolist()
lib_area = df_run_info.area.tolist()

space_ranger = 'spaceranger-1.2.1'
space_pipe_ver = space_ranger

In [132]:
lib_species_full = []
for i in range(len(lib_species)):
    if lib_species[i] ==  'human':
        lib_species_full.append('GRCh38')
        
    elif lib_species[i] ==  'mouse':
        lib_species_full.append('mm10')
        
    elif lib_species[i] ==  'zebrafish':
        lib_species_full.append('GRCz10')
        
        

Without alignment:

In [133]:

rows_visium = []
for i in visium_libs_index:
    
    genome = ''
    visium_species = ''
    
    if lib_species[i] == 'human':
        visium_species = visium_species.join('GRCh38')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(visium_species))
        
    elif lib_species[i] == 'mouse':
        visium_species = visium_species.join('mm10')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(visium_species))
        
        
    rows_visium.append([raw_folder_name, 
                     lib_ids[i], 
                     '/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(lib_species_full[i]),
                     space_pipe_ver,
                     visium_species,
                     '/data/srlab/bwh10x/{raw_folder}/images/{area}.tif'.format(
                          raw_folder = raw_folder_name,
                          area = lib_area[i]),
                      lib_slide[i],
                      lib_area[i]])

With aligntment:

In [80]:
rows_visium = []
for i in visium_libs_index:
    
    genome = ''
    visium_species = ''
    
    if lib_species[i] == 'human':
        visium_species = visium_species.join('GRCh38')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(visium_species))
        
    elif lib_species[i] == 'mouse':
        visium_species = visium_species.join('mm10')
        genome = genome.join('/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(visium_species))
        
        
    rows_visium.append([raw_folder_name, 
                     lib_ids[i], 
                     '/data/srlab/external-data/10xgenomics/refdata-gex-{}-2020-A'.format(visium_species),
                     space_pipe_ver,
                     visium_species,
                     '/data/srlab/bwh10x/{raw_folder}/images/{area}.tif'.format(
                          raw_folder = raw_folder_name,
                          area = lib_area[i]),
                      lib_slide[i],
                      lib_area[i],
                      '/data/srlab/bwh10x/{raw_folder}/alignment/{slide}-{area}.json'.format(
                          raw_folder = raw_folder_name,
                          slide = lib_slide[i],
                          area = lib_area[i]) ])

In [81]:
df_visium = pd.DataFrame(rows_visium)
df_visium.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,210624_A00873_0376_BHC3KTDSX2_KW9079,BRI-1267,/data/srlab/external-data/10xgenomics/refdata-...,spaceranger-1.2.1,GRCh38,/data/srlab/bwh10x/210624_A00873_0376_BHC3KTDS...,V10M02-035,A1,/data/srlab/bwh10x/210624_A00873_0376_BHC3KTDS...
1,210624_A00873_0376_BHC3KTDSX2_KW9079,BRI-1268,/data/srlab/external-data/10xgenomics/refdata-...,spaceranger-1.2.1,GRCh38,/data/srlab/bwh10x/210624_A00873_0376_BHC3KTDS...,V10M02-035,B1,/data/srlab/bwh10x/210624_A00873_0376_BHC3KTDS...
2,210624_A00873_0376_BHC3KTDSX2_KW9079,BRI-1269,/data/srlab/external-data/10xgenomics/refdata-...,spaceranger-1.2.1,GRCh38,/data/srlab/bwh10x/210624_A00873_0376_BHC3KTDS...,V10M02-035,C1,/data/srlab/bwh10x/210624_A00873_0376_BHC3KTDS...
3,210624_A00873_0376_BHC3KTDSX2_KW9079,BRI-1270,/data/srlab/external-data/10xgenomics/refdata-...,spaceranger-1.2.1,GRCh38,/data/srlab/bwh10x/210624_A00873_0376_BHC3KTDS...,V10M02-035,D1,/data/srlab/bwh10x/210624_A00873_0376_BHC3KTDS...


In [82]:
df_visium.to_csv('/Users/qxiao/Desktop/single_cell_core/{work_order}/lsf_params_space_{work_order}'.format(
    work_order = work_order), 
                        sep='\t', 
                        index = False,
                        header = False)


# Transfer to Broad

In [41]:
users = df_run_info.User[df_run_info.User.notna()].tolist()

In [42]:
set(users)

{'callan@rics.bwh.harvard.edu',
 'dfdwyer@bwh.harvard.edu',
 'hfaust@bwh.harvard.edu',
 'tsasaki2@bwh.harvard.edu',
 'veronica_rendo@dfci.harvard.edu',
 'ymebratu@bwh.harvard.edu'}

In [71]:
# Get index for all shared ADT/HTO libraries
adt_hto_libs_index=list(set(adt_libs_index).intersection(hto_libs_index))
adt_hto_gex_libs_index = df_run_info.index[(df_run_info['Library Type'] == 'GEX') & (df_run_info['HTO'] == 1) & 
                                           (df_run_info['ADT'] == 1)].values.tolist()

adt_only_libs_index=list(set(adt_libs_index).difference(hto_libs_index))

hto_only_libs_index=list(set(hto_libs_index).difference(adt_libs_index))

In [142]:
#HTO and ADT both present for some GEX
#Some GEX have HTO/ADT combined

for u in set(users):
    #user_short = u.split(' ')[0]#Only taking the first element of the user name, the name elements should be separated by space
    user_short = u.split('@')[0]#Only taking the first element of the user name, the name elements should be separated by @
    
    open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'w')

    for g in gex_only_lib_index:
        if df_run_info.User[g] == u:
            species = ''
            if lib_species[g] == 'human':
                species = species.join('GRCh38')
            elif lib_species[g] == 'mouse':
                species = species.join('mm10')
            elif lib_species[g] == 'zebrafish':
                species = species.join('GRCz10')
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/{cellranger_version}/{species}/{sample_name}/outs/\n'.format(raw_folder = raw_folder_name,
                                                                               cellranger_version = cellranger_version,
                                                                               species = species,
                                                                               sample_name = lib_ids[g])
             )  
            
    for a in adt_only_libs_index:
        if df_run_info.User[a] == u:
            species = ''
            if lib_species[a] == 'human':
                species = species.join('GRCh38')
            elif lib_species[a] == 'mouse':
                species = species.join('mm10')
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/{cellranger_version}/{species}/{sample_name}_ADT/outs/\n'.format(raw_folder = raw_folder_name,
                                                                               cellranger_version = cellranger_version,
                                                                               species = species,
                                                                               sample_name = lib_ids[ adt_gex_libs_index[adt_libs_index.index(a)] ])
             )
             
    for h in hto_only_libs_index:
         if df_run_info.User[h] == u:
            species = ''
            if lib_species[h] == 'human':
                species = species.join('GRCh38')
            elif lib_species[h] == 'mouse':
                species = species.join('mm10')
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/{cellranger_version}/{species}/{sample_name}_hashing/outs/\n'.format(raw_folder = raw_folder_name,
                                                                               cellranger_version = cellranger_version,
                                                                               species = species,
                                                                               sample_name = lib_ids[ hto_gex_libs_index[hto_libs_index.index(h)] ])
             )  

    for ah in adt_hto_libs_index:
         if df_run_info.User[ah] == u:
            species = ''
            if lib_species[ah] == 'human':
                species = species.join('GRCh38')
            elif lib_species[ah] == 'mouse':
                species = species.join('mm10')
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/{cellranger_version}/{species}/{sample_name}/outs/\n'.format(raw_folder = raw_folder_name,
                                                                               cellranger_version = cellranger_version,
                                                                               species = species, 
                                                                               sample_name = lib_ids[adt_hto_gex_libs_index[adt_hto_libs_index.index(ah)] ])
             )  

    for v in vdj_libs_index:
        if df_run_info.User[v] == u:
            species = ''
            if lib_species[v] == 'human':
                species = species.join('GRCh38')
            elif lib_species[v] == 'mouse':
                species = species.join('mm10')
            elif lib_species[v] == 'zebrafish':
                species = species.join('GRCz10')
                
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/{cellranger_version}/{species}/{sample_name}/outs/\n'.format(raw_folder = raw_folder_name,
                                                                               cellranger_version = cellranger_version,
                                                                               species = species,
                                                                               sample_name = lib_ids[v])
             )  
            
#     for vi in visium_libs_index:
#         if df_run_info.User[vi] == u:
#             species = ''
#             if lib_species[vi] == 'human':
#                 species = species.join('GRCh38')
#             elif lib_species[vi] == 'mouse':
#                 species = species.join('mm10')
#             elif lib_species[vi] == 'zebrafish':
#                 species = species.join('GRCz10')
                
#             open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
#                                                                   work_order = work_order,
#                                                                   user = user_short), 'a+').write(
             
#              '{raw_folder}/{cellranger_version}/{species}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
#                                                                                cellranger_version = cellranger_version,
#                                                                                species = species,
#                                                                                sample_name = lib_ids[vi])
#              )
            
#     for n in nuc_libs_index:
#         if df_run_info.User[n] == u:
#             species = ''
#             if lib_species[n] == 'human':
#                 species = species.join('GRCh38')
#             elif lib_species[n] == 'mouse':
#                 species = species.join('mm10')
#             elif lib_species[n] == 'zebrafish':
#                 species = species.join('GRCz10')
                
#             open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
#                                                                   work_order = work_order,
#                                                                   user = user_short), 'a+').write(
             
#              '{raw_folder}/{cellranger_version}/{species}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
#                                                                                cellranger_version = cellranger_version,
#                                                                                species = species,
#                                                                                sample_name = lib_ids[n])
#              )
            
    
#     for c in cmo_libs_index:
#         if df_run_info.User[c] == u:
#             species = ''
#             if lib_species[c] == 'human':
#                 species = species.join('GRCh38')
#             elif lib_species[c] == 'mouse':
#                 species = species.join('mm10')
#             elif lib_species[c] == 'zebrafish':
#                 species = species.join('GRCz10')
                
#             open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
#                                                                   work_order = work_order,
#                                                                   user = user_short), 'a+').write(
             
#              '{raw_folder}/{cellranger_version}/{species}/{sample_name}_CMO/\n'.format(raw_folder = raw_folder_name,
#                                                                                cellranger_version = cellranger_version,
#                                                                                species = species,
#                                                                                sample_name = lib_ids[cmo_gex_libs_index[cmo_libs_index.index(c)]])
#              )

             
 
    
    

Additionally, send FASTQs for those users who request it.

In [148]:
fastq_users=df_run_info.loc[df_run_info["FASTQ request"]=="Y"].User.tolist()

set(fastq_users)

{'dfdwyer@bwh.harvard.edu',
 'hfaust@bwh.harvard.edu',
 'tsasaki2@bwh.harvard.edu'}

In [143]:
#HTO and ADT both present for some GEX
#Some GEX have HTO/ADT combined

for u in set(fastq_users):
    #user_short = u.split(' ')[0]#Only taking the first element of the user name, the name elements should be separated by space
    user_short = u.split('@')[0]#Only taking the first element of the user name, the name elements should be separated by @
    
    for g in gex_only_lib_index:
        if df_run_info.User[g] == u:
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
                                                                               flow_cell = flow_cell_id,
                                                                               sample_name = lib_ids[g])
             )  
            
    for a in adt_only_libs_index:
        if df_run_info.User[a] == u:
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
                                                                               flow_cell = flow_cell_id,
                                                                               sample_name = lib_ids[ adt_gex_libs_index[adt_libs_index.index(a)] ])
             )
             
    for h in hto_only_libs_index:
         if df_run_info.User[h] == u:
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
                                                                               flow_cell = flow_cell_id,
                                                                               sample_name = lib_ids[ hto_gex_libs_index[hto_libs_index.index(h)] ])
             )  

    for ah in adt_hto_libs_index:
         if df_run_info.User[ah] == u:
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
                                                                               flow_cell = flow_cell_id,
                                                                               sample_name = lib_ids[adt_hto_gex_libs_index[adt_hto_libs_index.index(ah)] ])
             )  

    for v in vdj_libs_index:
        if df_run_info.User[v] == u:
                
            open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
                                                                  work_order = work_order,
                                                                  user = user_short), 'a+').write(
             
             '{raw_folder}/FASTQS/outs/fastq_path/{flow_cell}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
                                                                               flow_cell = flow_cell_id,
                                                                               sample_name = lib_ids[v])
             )  
            
#     for vi in visium_libs_index:
#         if df_run_info.User[vi] == u:
#             species = ''
#             if lib_species[vi] == 'human':
#                 species = species.join('GRCh38')
#             elif lib_species[vi] == 'mouse':
#                 species = species.join('mm10')
#             elif lib_species[vi] == 'zebrafish':
#                 species = species.join('GRCz10')
                
#             open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
#                                                                   work_order = work_order,
#                                                                   user = user_short), 'a+').write(
             
#              '{raw_folder}/{cellranger_version}/{species}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
#                                                                                cellranger_version = cellranger_version,
#                                                                                species = species,
#                                                                                sample_name = lib_ids[vi])
#              )
            
#     for n in nuc_libs_index:
#         if df_run_info.User[n] == u:
#             species = ''
#             if lib_species[n] == 'human':
#                 species = species.join('GRCh38')
#             elif lib_species[n] == 'mouse':
#                 species = species.join('mm10')
#             elif lib_species[n] == 'zebrafish':
#                 species = species.join('GRCz10')
                
#             open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
#                                                                   work_order = work_order,
#                                                                   user = user_short), 'a+').write(
             
#              '{raw_folder}/{cellranger_version}/{species}/{sample_name}/\n'.format(raw_folder = raw_folder_name,
#                                                                                cellranger_version = cellranger_version,
#                                                                                species = species,
#                                                                                sample_name = lib_ids[n])
#              )
            
    
#     for c in cmo_libs_index:
#         if df_run_info.User[c] == u:
#             species = ''
#             if lib_species[c] == 'human':
#                 species = species.join('GRCh38')
#             elif lib_species[c] == 'mouse':
#                 species = species.join('mm10')
#             elif lib_species[c] == 'zebrafish':
#                 species = species.join('GRCz10')
                
#             open('{local_path}/send_files_{work_order}_{user}.txt'.format(local_path = local_path, 
#                                                                   work_order = work_order,
#                                                                   user = user_short), 'a+').write(
             
#              '{raw_folder}/{cellranger_version}/{species}/{sample_name}_CMO/\n'.format(raw_folder = raw_folder_name,
#                                                                                cellranger_version = cellranger_version,
#                                                                                species = species,
#                                                                                sample_name = lib_ids[cmo_gex_libs_index[cmo_libs_index.index(c)]])
#              )

             
 
    
    

# Check files are correct
There might be scenarios that the script failed to include, so always double check the input files are correct before running it.