In [9]:
import matplotlib.pyplot as plt
import pandas as p
import seaborn as sns
import numpy as np
%matplotlib inline
from scipy.stats import linregress
from scipy.spatial import distance
from matplotlib import colors
import scipy
import os
import copy
from matplotlib.backends.backend_pdf import PdfPages
# from sklearn.decomposition import PCA
from ast import literal_eval
# sns.set()
# sns.set_context('talk')
# sns.set(font="Avenir")
sns.set_color_codes()
sns.set_style('white')
sns.set_style('ticks')
# sns.set_style({'font.family': 'Avenir'})

In [10]:
def flatten(t):
    return [item for sublist in t for item in sublist]

In [11]:
f_list = ['F' + str(f) for f in range(201,213)] # inline forward primers
r_list = ['R' + str(r) for r in range(301,309)] # inline reverse primers 
n_list = ['N' + str(n) for n in [716,718,719,720,721,722,723,724,726,727,728,729]] # nextera n primers
s_list = ['S' + str(s) for s in [513,515,516,517,518,520,521,522]] # nextera s primers

prefixes = ['070418_diagonal','080618_Nextseq','082818_Hiseq']

write_loc = 'SequenceToCount/index_hopping/'

In [7]:
for prefix in prefixes:
    ### CREATE SBATCH FILE!!!
            
    sbatch_name = f'iHop_{prefix}_demultiplex.sbatch'
    
    with open(f'{write_loc}{sbatch_name}','w') as sbatch_f:
                
        sbatch_f.write(f"""#!/usr/bin/env bash
#SBATCH -J {sbatch_name}
#SBATCH -p hns,dpetrov,normal,owners
#SBATCH -n 16
#SBATCH -N 1
#SBATCH -t 2-00:00
#SBATCH --array=1-96
#SBATCH --mem-per-cpu=2G
#SBATCH --requeue
#SBATCH -o SlurmFiles/slurm-%A_%a_%x.out
#SBATCH --mail-user=grantkinsler@gmail.com
#SBATCH --mail-type=END

sample_sheets=$(sed -n "$SLURM_ARRAY_TASK_ID"p iHop_{prefix}_demultiplex.inp)

module load python/3.6.1
module load biology
module load ncbi-blast+/2.7.1
python3 /home/groups/dpetrov/SOFTWARE/BarcodeCounter2-master_07272021update/barcodeCounter.py -fastqDir ../AllRawData/ -outputDir ../BarcodeCounts_index_hopping/{prefix}/ -templateSeq ../Template_doubleBC_Read1First.txt -sample $sample_sheets -multiBCFasta ../Primers_noP.fasta -pairedEnd -useUMI -numThreads 16 -demultiplexOnly -useBowtie2 -bowtie2Path /home/groups/dpetrov/SOFTWARE/bowtie2-2.2.6_new/ -readLength 108
""")
        
    sbatch_name = f'iHop_{prefix}_map_barcodes.sbatch' 
        
    with open(f'{write_loc}{sbatch_name}','w') as sbatch_f:
                
        sbatch_f.write(f"""#!/usr/bin/env bash
#SBATCH -J {sbatch_name}
#SBATCH -p hns,dpetrov,normal,owners
#SBATCH -n 16
#SBATCH -N 1
#SBATCH -t 2-00:00
#SBATCH --array=1-1
#SBATCH --mem-per-cpu=2G
#SBATCH --requeue
#SBATCH -o SlurmFiles/slurm-%A_%a_%x.out
#SBATCH --mail-user=grantkinsler@gmail.com
#SBATCH --mail-type=END

module load python/3.6.1
module load biology
module load ncbi-blast+/2.7.1
python3 /home/groups/dpetrov/SOFTWARE/BarcodeCounter2-master_07272021update/barcodeCounter.py -fastqDir ../AllRawData/ -outputDir ../BarcodeCounts_index_hopping/{prefix}/ -templateSeq ../Template_doubleBC_Read1First.txt -sample iHop_{prefix}_All_Samples.txt -multiBCFasta ../Primers_noP.fasta -pairedEnd -useUMI -numThreads 16 -skipSplitFastq -barcodeList ../500pool_noconstant_bothBCs_withSpikeIns.fasta -useBowtie2 -bowtie2Path /home/groups/dpetrov/SOFTWARE/bowtie2-2.2.6_new/ -readLength 108 -remapBarcodes
""")
        
    with open(f'{write_loc}iHop_{prefix}_demultiplex.inp','w') as inp_f:
    
    
        for n in n_list:
            for s in s_list:
                            
                sample_list_name =  f'iHop_{prefix}_{n}{s}_Samples.txt'
                
                inp_f.write(f'{sample_list_name}\n')

    
    for n in n_list:
        for s in s_list:
            
            fastq_name = f'{prefix}_{n}{s}'
            
            sample_list_name =  f'iHop_{prefix}_{n}{s}_Samples.txt'
       
            
            with open(f'{write_loc}{sample_list_name}','w') as sample_f:        

                for f in f_list:
                    for r in r_list:
                        
                        sample_name = f'{n}_{s}_{f}_{r}'
                                    
                        sample_f.write(f'{sample_name}\t{fastq_name}\t{f}\t{r}\n')
    
#     
    all_sample_list_name =  f'iHop_{prefix}_All_Samples.txt'
    
    with open(f'{write_loc}{all_sample_list_name}','w') as all_sample_f:   
        for n in n_list:
            for s in s_list:

                fastq_name = f'{prefix}_{n}{s}'    

                for f in f_list:
                    for r in r_list:

                        sample_name = f'{n}_{s}_{f}_{r}'

                        all_sample_f.write(f'{sample_name}\t{fastq_name}\t{f}\t{r}\n')


In [12]:
n_list = ['N' + str(n) for n in [716,718,719,720,721,722,723,724,726,727,728,729]] # nextera n primers
s_list = ['S' + str(s) for s in [513,515,516,517,518,520,521,522]] # nextera s primers

In [13]:
samples = p.read_csv('SequenceToCount/All_Samples.txt',sep='\t',header=None)
samples.columns = ['sample','lane','f_primer','r_primer']

In [14]:
samples['sample'].values

array(['3-0', '3-1-1', '3-1-2', '3-1-3', '3-2-1', '3-2-2', '3-2-3',
       '3-3-1', '3-3-2', '3-3-3', '3-4-1', '3-4-2', '3-4-3', '6-0',
       '6-1-1', '6-1-2', '6-1-3', '6-2-1', '6-2-2', '6-2-3', '6-3-1',
       '6-3-2', '6-3-3', '6-4-1', '6-4-2', '6-4-3', '13-0', '13-0_pool',
       '13-1-1', '13-1-2', '13-1-3', '13-2-1', '13-2-2', '13-2-3',
       '13-3-1', '13-3-2', '13-3-3', '13-4-1', '13-4-2', '13-4-3',
       '18-1-1', '18-1-2', '18-1-3', '18-2-1', '18-2-2', '18-2-3',
       '18-3-1', '18-3-2', '18-3-3', '18-4-1', '18-4-2', '18-4-3',
       '20-1-1', '20-1-2', '20-1-3', '20-2-1', '20-2-2', '20-2-3',
       '20-3-1', '20-3-2', '20-3-3', '20-4-1', '20-4-2', '20-4-3',
       '21-0-1', '21-0-2', '21-0-4', '21-1-1', '21-1-2', '21-1-3',
       '21-2-1', '21-2-2', '21-2-3', '21-3-1', '21-3-2', '21-3-3',
       '21-4-1', '21-4-2', '21-4-3', '21+wt', '23-0', '23-1-1', '23-1-2',
       '23-1-3', '23-2-1', '23-2-2', '23-2-3', '23-3-1', '23-3-2',
       '23-3-3', '23-4-1', '23-4-2', '23-4-3

In [15]:
samples

Unnamed: 0,sample,lane,f_primer,r_primer
0,3-0,Experiment1and3_Unindexed,F204,R301
1,3-1-1,Experiment1and3_Unindexed,F203,R302
2,3-1-2,Experiment1and3_Unindexed,F204,R302
3,3-1-3,Experiment1and3_Unindexed,F202,R305
4,3-2-1,Experiment1and3_Unindexed,F206,R301
5,3-2-2,Experiment1and3_Unindexed,F204,R305
6,3-2-3,Experiment1and3_Unindexed,F205,R305
7,3-3-1,Experiment1and3_Unindexed,F206,R305
8,3-3-2,Experiment1and3_Unindexed,F207,R305
9,3-3-3,Experiment1and3_Unindexed,F201,R306


In [16]:
groups = {}

# First assign each entry to a group
for g,lane in enumerate(np.unique(samples['lane'].values)):
    groups[g] = samples[samples['lane']==lane]['sample'].values



In [17]:
groups

{0: array(['B0-DE1-PCRa', 'B0-DE2-PCRa', 'B2-DE1-PCRa', 'B4-DE1-PCRa',
        'B4-DE2-PCRa', 'Y2-DE1-PCRa', 'Y3-DE1-PCRa', 'B3-DE1-PCRa'],
       dtype=object),
 1: array(['B0-DE1-PCRa', 'B0-DE2-PCRa', 'B0-DE2-PCRb', 'B0-DE2-PCRc',
        'B0-DE3-PCRa', 'B0-DE4-PCRa', 'B1-DE1-PCRa', 'B2-DE1-PCRa',
        'B2-DE1-PCRb', 'B2-DE2-PCRa', 'B2-DE3-PCRa', 'B2-DE4-PCRa',
        'B3-DE1-PCRb', 'B4-DE1-PCRa', 'B4-DE2-PCRa', 'B4-DE2-PCRb',
        'B4-DE3-PCRa', 'B4-DE4-PCRa', 'C0-DE1-PCRa', 'C0-DE1-PCRb',
        'C0-DE2-PCRa', 'C1-DE1-PCRb', 'C2-DE1-PCRa', 'C2-DE2-PCRa',
        'C3-DE1-PCRa', 'C4-DE1-PCRa', 'C4-DE2-PCRa', 'Tneg-DE1-PCRa',
        'Tneg-DE2-PCRa', 'Y0-DE1-PCRa', 'Y0-DE2-PCRa', 'Y0-DE2-PCRb',
        'Y0-DE3-PCRa', 'Y0-DE4-PCRa', 'Y1-DE1-PCRb', 'Y2-DE1-PCRa',
        'Y2-DE1-PCRb', 'Y2-DE2-PCRa', 'Y2-DE3-PCRa', 'Y2-DE4-PCRa',
        'Y3-DE1-PCRa', 'Y4-DE1-PCRa', 'Y4-DE1-PCRb', 'Y4-DE2-PCRa',
        'Y4-DE3-PCRa', 'Y4-DE4-PCRa', 'Z0-DE1-PCRa', 'Z0-DE2-PCRa',
        'Z1-DE1

In [18]:
groups = {}

# First assign each entry to a group
for g,lane in enumerate(np.unique(samples['lane'].values)):
    groups[g] = samples[samples['lane']==lane]['sample'].values

# merge groups when samples are the same
groups_merged = {}
group_counter = 0
for g1,group1 in groups.items():
    
    if g1 not in groups_merged:
        groups_merged[g1] = group_counter
        group_counter +=1 
    
    for g2,group2 in groups.items():
        if g1 < g2: # for every pair of groups
            intersection = set(group1).intersection(set(group2))
            if len(intersection) > 0:
                
                if g1 in groups_merged.keys():
                    groups_merged[g2] = groups_merged[g1]
                else:
                    
                    groups_merged[g1] = group_counter
                    groups_merged[g2] = group_counter
                    group_counter += 1

                    ## invert the merged groups                
group_merger = {}
for k, v in groups_merged.items():
    group_merger.setdefault(v, set()).add(k)

actual_groups = {}
for g,group_names in group_merger.items():
    actual_groups[g] = np.unique(flatten([groups[name] for name in group_names]))
    

In [19]:
actual_groups

{0: array(['B0-DE1-PCRa', 'B0-DE2-PCRa', 'B0-DE2-PCRb', 'B0-DE2-PCRc',
        'B0-DE3-PCRa', 'B0-DE4-PCRa', 'B1-DE1-PCRa', 'B2-DE1-PCRa',
        'B2-DE1-PCRb', 'B2-DE2-PCRa', 'B2-DE3-PCRa', 'B2-DE4-PCRa',
        'B3-DE1-PCRa', 'B3-DE1-PCRb', 'B4-DE1-PCRa', 'B4-DE2-PCRa',
        'B4-DE2-PCRb', 'B4-DE3-PCRa', 'B4-DE4-PCRa', 'C0-DE1-PCRa',
        'C0-DE1-PCRb', 'C0-DE2-PCRa', 'C1-DE1-PCRa', 'C1-DE1-PCRb',
        'C2-DE1-PCRa', 'C2-DE2-PCRa', 'C3-DE1-PCRa', 'C4-DE1-PCRa',
        'C4-DE2-PCRa', 'Tneg-DE1-PCRa', 'Tneg-DE2-PCRa', 'Y0-DE1-PCRa',
        'Y0-DE2-PCRa', 'Y0-DE2-PCRb', 'Y0-DE3-PCRa', 'Y0-DE4-PCRa',
        'Y1-DE1-PCRa', 'Y1-DE1-PCRb', 'Y2-DE1-PCRa', 'Y2-DE1-PCRb',
        'Y2-DE2-PCRa', 'Y2-DE3-PCRa', 'Y2-DE4-PCRa', 'Y3-DE1-PCRa',
        'Y4-DE1-PCRa', 'Y4-DE1-PCRb', 'Y4-DE2-PCRa', 'Y4-DE3-PCRa',
        'Y4-DE4-PCRa', 'Z0-DE1-PCRa', 'Z0-DE2-PCRa', 'Z1-DE1-PCRa',
        'Z2-DE1-PCRa', 'Z2-DE1-PCRb', 'Z2-DE2-PCRa', 'Z3-DE1-PCRa',
        'Z4-DE1-PCRa', 'Z4-DE2-PCRa'], dt

In [20]:
write_loc = 'SequenceToCount/technical_replicates/'


with open(f'{write_loc}techRep_demultiplex.inp','w') as inp_f:
    for g,group in actual_groups.items():
        sample_list_name =  f'Group_{g+1:03}_Samples.txt'

        inp_f.write(f'{sample_list_name}\n')
    


    ### CREATE SBATCH FILE!!!
            
sbatch_name = f'techRep_demultiplex.sbatch'

with open(f'{write_loc}{sbatch_name}','w') as sbatch_f:

    sbatch_f.write(f"""#!/usr/bin/env bash
#SBATCH -J techRep_demultiplex
#SBATCH -p hns,dpetrov,normal,owners
#SBATCH -n 16
#SBATCH -N 1
#SBATCH -t 2-00:00
#SBATCH --array=1-{len(actual_groups.keys())}
#SBATCH --mem-per-cpu=2G
#SBATCH --requeue
#SBATCH -o SlurmFiles/slurm-%A_%a_%x.out
#SBATCH --mail-user=grantkinsler@gmail.com
#SBATCH --mail-type=END

sample_sheets=$(sed -n "$SLURM_ARRAY_TASK_ID"p techRep_demultiplex.inp)

module load python/3.6.1
module load biology
module load ncbi-blast+/2.7.1
python3 /home/groups/dpetrov/SOFTWARE/BarcodeCounter2-master_07272021update/barcodeCounter.py -fastqDir ../AllRawData/ -outputDir ../BarcodeCounts_technical_replicates/ -templateSeq ../Template_doubleBC_Read1First.txt -sample $sample_sheets -multiBCFasta ../Primers_noP.fasta -pairedEnd -useUMI -numThreads 32 -demultiplexOnly -useBowtie2 -bowtie2Path /home/groups/dpetrov/SOFTWARE/bowtie2-2.2.6_new/ -readLength 108
""")

    
for g,group in actual_groups.items():
    
    with open(f'{write_loc}Group_{g+1:03}_Samples.txt','w') as sample_f:
        this_group = samples[samples['sample'].isin(group)]
        
        for item in this_group[this_group.columns].values:
            sample_f.write(f'{item[0]}\t{item[1]}\t{item[2]}\t{item[3]}\n')
            
with open(f'{write_loc}AllSamples.txt','w') as sample_f:
    for g,group in actual_groups.items():
    
        this_group = samples[samples['sample'].isin(group)]
        
        for item in this_group[this_group.columns].values:
            sample_f.write(f'{item[0]}\t{item[1]}\t{item[2]}\t{item[3]}\n')

            

            
### Mapping Barcodes (in parallel), with UMIs
sbatch_name = f'techRep_map_barcodes.sbatch'

with open(f'{write_loc}{sbatch_name}','w') as sbatch_f:

    sbatch_f.write(f"""#!/usr/bin/env bash
#SBATCH -J techRep_map
#SBATCH -p hns,dpetrov,normal,owners
#SBATCH -n 16
#SBATCH -N 1
#SBATCH -t 2-00:00
#SBATCH --array=1-{len(actual_groups.keys())}
#SBATCH --mem-per-cpu=2G
#SBATCH --requeue
#SBATCH -o SlurmFiles/slurm-%A_%a_%x.out
#SBATCH --mail-user=grantkinsler@gmail.com
#SBATCH --mail-type=END

sample_sheets=$(sed -n "$SLURM_ARRAY_TASK_ID"p techRep_demultiplex.inp)

module load python/3.6.1
module load biology
module load ncbi-blast+/2.7.1
python3 /home/groups/dpetrov/SOFTWARE/BarcodeCounter2-master_07272021update/barcodeCounter.py -fastqDir ../AllRawData/ -outputDir ../BarcodeCounts_technical_replicates/ -templateSeq ../Template_doubleBC_Read1First.txt -sample $sample_sheets -multiBCFasta ../Primers_noP.fasta -pairedEnd -useUMI -numThreads 16 -skipSplitFastq -barcodeList ../500pool_noconstant_bothBCs_withSpikeIns.fasta -useBowtie2 -bowtie2Path /home/groups/dpetrov/SOFTWARE/bowtie2-2.2.6_new/ -readLength 108 -remapBarcodes
""")
    
    

    
### Final count (no re-mapping), with UMIs
sbatch_name = f'techRep_count_barcodes.sbatch'

with open(f'{write_loc}{sbatch_name}','w') as sbatch_f:

    sbatch_f.write(f"""#!/usr/bin/env bash
#SBATCH -J techRep_count
#SBATCH -p hns,dpetrov,normal,owners
#SBATCH -n 16
#SBATCH -N 1
#SBATCH -t 2-00:00
#SBATCH --array=1-1
#SBATCH --mem-per-cpu=2G
#SBATCH --requeue
#SBATCH -o SlurmFiles/slurm-%A_%a_%x.out
#SBATCH --mail-user=grantkinsler@gmail.com
#SBATCH --mail-type=END


module load python/3.6.1
module load biology
module load ncbi-blast+/2.7.1
python3 /home/groups/dpetrov/SOFTWARE/BarcodeCounter2-master_07272021update/barcodeCounter.py -fastqDir ../AllRawData/ -outputDir ../BarcodeCounts_technical_replicates/ -templateSeq ../Template_doubleBC_Read1First.txt -sample AllSamples.txt -multiBCFasta ../Primers_noP.fasta -pairedEnd -useUMI -numThreads 16 -skipSplitFastq -barcodeList ../500pool_noconstant_bothBCs_withSpikeIns.fasta -useBowtie2 -bowtie2Path /home/groups/dpetrov/SOFTWARE/bowtie2-2.2.6_new/ -readLength 108
""")
       

       
        

In [21]:
#### WITHOUT UMIs included!!!!

write_loc = 'SequenceToCount/umi_analysis/'

for g,group in actual_groups.items():
    
    with open(f'{write_loc}Group_{g+1:03}_Samples.txt','w') as sample_f:
        this_group = samples[samples['sample'].isin(group)]
        
        for item in this_group[this_group.columns].values:
            sample_f.write(f'{item[0]}\t{item[1]}\t{item[2]}\t{item[3]}\n')
            
with open(f'{write_loc}AllSamples.txt','w') as sample_f:
    for g,group in actual_groups.items():
    
        this_group = samples[samples['sample'].isin(group)]
        
        for item in this_group[this_group.columns].values:
            sample_f.write(f'{item[0]}\t{item[1]}\t{item[2]}\t{item[3]}\n')



### Mapping Barcodes (in parallel), WITHOUT UMIs!!!
sbatch_name = f'umi_analysis_map_barcodes.sbatch'

with open(f'{write_loc}{sbatch_name}','w') as sbatch_f:

    sbatch_f.write(f"""#!/usr/bin/env bash
#SBATCH -J umi_map
#SBATCH -p hns,dpetrov,normal,owners
#SBATCH -n 16
#SBATCH -N 1
#SBATCH -t 2-00:00
#SBATCH --array=1-{len(actual_groups.keys())}
#SBATCH --mem-per-cpu=2G
#SBATCH --requeue
#SBATCH -o SlurmFiles/slurm-%A_%a_%x.out
#SBATCH --mail-user=grantkinsler@gmail.com
#SBATCH --mail-type=END

sample_sheets=$(sed -n "$SLURM_ARRAY_TASK_ID"p techRep_demultiplex.inp)

module load python/3.6.1
module load biology
module load ncbi-blast+/2.7.1
python3 /home/groups/dpetrov/SOFTWARE/BarcodeCounter2-master_07272021update/barcodeCounter.py -fastqDir ../AllRawData/ -outputDir ../BarcodeCounts_umi_analysis/ -templateSeq ../Template_doubleBC_Read1First.txt -sample $sample_sheets -multiBCFasta ../Primers_noP.fasta -pairedEnd -numThreads 16 -skipSplitFastq -barcodeList ../500pool_noconstant_bothBCs_withSpikeIns.fasta -useBowtie2 -bowtie2Path /home/groups/dpetrov/SOFTWARE/bowtie2-2.2.6_new/ -readLength 108 -remapBarcodes
""")

### Final count (no re-mapping), WITHOUTTT UMIs

sbatch_name = f'umi_analysis_count_barcodes.sbatch'

with open(f'{write_loc}{sbatch_name}','w') as sbatch_f:

    sbatch_f.write(f"""#!/usr/bin/env bash
#SBATCH -J umi_count
#SBATCH -p hns,dpetrov,normal,owners
#SBATCH -n 16
#SBATCH -N 1
#SBATCH -t 2-00:00
#SBATCH --array=1-1
#SBATCH --mem-per-cpu=2G
#SBATCH --requeue
#SBATCH -o SlurmFiles/slurm-%A_%a_%x.out
#SBATCH --mail-user=grantkinsler@gmail.com
#SBATCH --mail-type=END


module load python/3.6.1
module load biology
module load ncbi-blast+/2.7.1
python3 /home/groups/dpetrov/SOFTWARE/BarcodeCounter2-master_07272021update/barcodeCounter.py -fastqDir ../AllRawData/ -outputDir ../BarcodeCounts_umi_analysis/ -templateSeq ../Template_doubleBC_Read1First.txt -sample AllSamples.txt -multiBCFasta ../Primers_noP.fasta -pairedEnd -numThreads 16 -skipSplitFastq -barcodeList ../500pool_noconstant_bothBCs_withSpikeIns.fasta -useBowtie2 -bowtie2Path /home/groups/dpetrov/SOFTWARE/bowtie2-2.2.6_new/ -readLength 108
""")

In [52]:
for g,group in actual_groups.items():
    
#     with open(f'Group_{g+1:03}_Samples.txt'):
    this_group = samples[samples['sample'].isin(group)]
        
    for item in this_group[this_group.columns].values:
        
        print(f'{item[0]}\t{item[1]}\t{item[2]}\t{item[3]}\n')

B0-DE1-PCRa	040418_Unindexed	F202	R302

B0-DE1-PCRa	051118_Unindexed	F202	R302

B0-DE2-PCRa	040418_Unindexed	F203	R303

B0-DE2-PCRa	051118_Unindexed	F203	R303

B0-DE2-PCRb	051118_Unindexed	F208	R302

B0-DE2-PCRc	051118_Unindexed	F209	R302

B0-DE3-PCRa	051118_Unindexed	F203	R302

B0-DE4-PCRa	051118_Unindexed	F212	R302

B1-DE1-PCRa	051118_Unindexed	F202	R308

B2-DE1-PCRa	040418_Unindexed	F204	R304

B2-DE1-PCRa	051118_Unindexed	F204	R304

B2-DE1-PCRb	051118_Unindexed	F208	R304

B2-DE2-PCRa	051118_Unindexed	F202	R304

B2-DE3-PCRa	051118_Unindexed	F205	R304

B2-DE4-PCRa	051118_Unindexed	F206	R304

B3-DE1-PCRb	051118_Unindexed	F209	R305

B4-DE1-PCRa	040418_Unindexed	F206	R306

B4-DE1-PCRa	051118_Unindexed	F206	R306

B4-DE2-PCRa	040418_Unindexed	F207	R307

B4-DE2-PCRa	051118_Unindexed	F207	R307

B4-DE2-PCRb	051118_Unindexed	F208	R306

B4-DE3-PCRa	051118_Unindexed	F203	R306

B4-DE4-PCRa	051118_Unindexed	F204	R306

C0-DE1-PCRa	051118_Unindexed	F205	R302

C0-DE1-PCRb	051118_Unindexed	F209	R303




GG2-DE1-PCRa	082818_Hiseq_N728S518	F211	R305

P3-DE1-PCRa	080618_Nextseq_N728S520	F211	R306

P3-DE1-PCRa	082818_Hiseq_N728S520	F211	R306

I4-DE1-PCRa	080618_Nextseq_N728S521	F211	R307

I4-DE1-PCRa	082818_Hiseq_N728S521	F211	R307

DD0-DE1-PCRa	080618_Nextseq_N729S513	F212	R301

DD0-DE1-PCRa	082818_Hiseq_N729S513	F212	R301

I1-DE1-PCRa	080618_Nextseq_N729S515	F212	R302

I1-DE1-PCRa	082818_Hiseq_N729S515	F212	R302

K0-DE1-PCRb	080618_Nextseq_N729S516	F212	R303

K0-DE1-PCRb	082818_Hiseq_N729S516	F212	R303

H2-DE1-PCRa	080618_Nextseq_N729S517	F212	R304

H2-DE1-PCRa	082818_Hiseq_N729S517	F212	R304

A3-DE1-PCRa	080618_Nextseq_N729S518	F212	R305

A3-DE1-PCRa	082818_Hiseq_N729S518	F212	R305

AA3-DE1-PCRa	080618_Nextseq_N729S520	F212	R306

AA3-DE1-PCRa	082818_Hiseq_N729S520	F212	R306

J4-DE1-PCRa	080618_Nextseq_N729S521	F212	R307

J4-DE1-PCRa	082818_Hiseq_N729S521	F212	R307

EE0-DE1-PCRa	090118_Nextseq_N716S517	F201	R304

EE0-DE1-PCRa	092518_lane1_Nextseq_N716S517	F201	R304

EE0-DE1-PCRa	092518


1.8%-R1-T3	090519_HiSeqX_N720S518	F204	R305

2.5%-R1-T3	090519_HiSeqX_N720S520	F204	R306

Ben0.4-R1-T3	090519_HiSeqX_N720S521	F204	R307

Ben2-R1-T3	090519_HiSeqX_N720S522	F204	R308

1.4%-R1-T4	090519_HiSeqX_N721S513	F205	R301

1.5%-R1-T4	090519_HiSeqX_N721S515	F205	R302

1.6%-R1-T4	090519_HiSeqX_N721S516	F205	R303

1.7%-R1-T4	090519_HiSeqX_N721S517	F205	R304

1.8%-R1-T4	090519_HiSeqX_N721S518	F205	R305

2.5%-R1-T4	090519_HiSeqX_N721S520	F205	R306

Ben0.4-R1-T4	090519_HiSeqX_N721S521	F205	R307

Ben2-R1-T4	090519_HiSeqX_N721S522	F205	R308

1.4%-R2-T1	090519_HiSeqX_N722S513	F206	R301

1.5%-R2-T1	090519_HiSeqX_N722S515	F206	R302

1.6%-R2-T1	090519_HiSeqX_N722S516	F206	R303

1.7%-R2-T1	090519_HiSeqX_N722S517	F206	R304

1.8%-R2-T1	090519_HiSeqX_N722S518	F206	R305

2.5%-R2-T1	090519_HiSeqX_N722S520	F206	R306

Ben0.4-R2-T1	090519_HiSeqX_N722S521	F206	R307

Ben2-R2-T1	090519_HiSeqX_N722S522	F206	R308

1.4%-R2-T2	090519_HiSeqX_N723S513	F207	R301

1.5%-R2-T2	090519_HiSeqX_N723S515	F207	R302

1.6

In [49]:
this_group[this_group.columns].values

array([['B0-DE1-PCRa', '040418_Unindexed', 'F202', 'R302'],
       ['B0-DE1-PCRa', '051118_Unindexed', 'F202', 'R302'],
       ['B0-DE2-PCRa', '040418_Unindexed', 'F203', 'R303'],
       ['B0-DE2-PCRa', '051118_Unindexed', 'F203', 'R303'],
       ['B0-DE2-PCRb', '051118_Unindexed', 'F208', 'R302'],
       ['B0-DE2-PCRc', '051118_Unindexed', 'F209', 'R302'],
       ['B0-DE3-PCRa', '051118_Unindexed', 'F203', 'R302'],
       ['B0-DE4-PCRa', '051118_Unindexed', 'F212', 'R302'],
       ['B1-DE1-PCRa', '051118_Unindexed', 'F202', 'R308'],
       ['B2-DE1-PCRa', '040418_Unindexed', 'F204', 'R304'],
       ['B2-DE1-PCRa', '051118_Unindexed', 'F204', 'R304'],
       ['B2-DE1-PCRb', '051118_Unindexed', 'F208', 'R304'],
       ['B2-DE2-PCRa', '051118_Unindexed', 'F202', 'R304'],
       ['B2-DE3-PCRa', '051118_Unindexed', 'F205', 'R304'],
       ['B2-DE4-PCRa', '051118_Unindexed', 'F206', 'R304'],
       ['B3-DE1-PCRb', '051118_Unindexed', 'F209', 'R305'],
       ['B4-DE1-PCRa', '040418_Unindexed