#### This notebook tests the pipeline for calling differing Base Calls (Single Nucleotide Variant) between two samples from a pair of samples (*Longitudinal* or *Replicate* pair). This notebook also submits the jobs to run this variant extraction pipeline for each sample pair in our study.

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

############################################################################################################################################################################################################################

### Prepare the script

############################################################################################################################################################################################################################

In [1]:
%matplotlib inline

import vcf
import os
import pandas as pd
import numpy as np
from itertools import compress
import ast
import itertools
import time
import sys
import pickle
import shutil

import Bio
from Bio.Alphabet import IUPAC
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import pairwise2
from Bio import SeqIO
from Bio.Graphics import GenomeDiagram
from Bio.SeqUtils import GC
from Bio.Align.Applications import MuscleCommandline
from StringIO import StringIO
from Bio import AlignIO
from Bio.Align import AlignInfo
from Bio.Seq import MutableSeq

### *Functions* to extract Base Calls (that have different alterante allele frequencies) from a pair of VCF files

In [2]:
def Get_Base_Calls_from_VCF(parent_output_dir, tag):

    '''
    This function takes as input, the isolate tag, then extracts SINGLE base calls from
    the VCF file generated by Pilon. The function returns a DataFrame for a single isolate
    that contains [1] PASS calls that support the reference allele, [2] PASS calls that support
    an alternate allele and [3] AMB calls that support both the reference and alternate alleles.
    Note: Many base calls won't have an Alternate Allele.
    '''

    #directory where VCF file for sequenced isolated is stored & filename
    VCF_file = parent_output_dir + tag + '/pilon/' + tag + '.vcf'

    vcf_reader = vcf.Reader(open(VCF_file , 'r'))

    #position-base dictionaries
    base_order = {0:'A' , 1:'C' , 2:'G', 3:'T'} #order of output in Pilon
    base_order_r = {'A':0 , 'C':1 , 'G':2 , 'T':3}

    #create dictionaries to store information for each call
    ref_bases = {}
    qry_bases = {}
    ref_positions = {}
    quality_scores = {}
    SNP_type = {}
    PASS_filter = {}
    INFO_for_call = {}
    alt_allele_freq = {}
    valid_depth = {}

    #store the DEPTH of all base & variant calls (depth not stored for major structural variants)
    coverage_for_all_base_and_small_indel_calls = []

    #indexer for dataframe containing SNPs
    index = 0

    #iterate through each record from VCF file
    for record in vcf_reader:

        ######## store the DEPTH of all base & variant calls (depth not stored for major structural variants) ########
        try:
            coverage_for_all_base_and_small_indel_calls.append( record.INFO['DP'] )

        except KeyError: #major structural variant with no depth value
            pass 

        ######## Classify SNP-type according to Pilon Flags ########

        ### Filtering Criteria
        #---> Check that Filter is either PASS ([]) or AMBIGUOUS (['Amb']) , no 'DEL' or 'LOWCOV' flags from Pilon

        ### Call supporting REFERENCE
        #if there's no alternate allele and Filter is PASS; PASS call supporting Ref allele
        if (len(record.ALT) == 1) and (record.ALT == [None]) and (record.FILTER == []):

            alt_allele = 'Z' #no alternate allele supported by reads (might change through filtering process further down if small fraction of reads support an alternate)
            ref_allele = record.REF #allele on H37Rv
            SNP_type_for_Call = 'Ref_PASS'

        ### Call supporting ALTERNATE 
        #if there's 1 alternate allele, and Filter is PASS; PASS call supporting Alt allele
        elif (len(record.ALT) == 1) and (record.ALT != [None]) and (record.FILTER == []):

            alt_allele = str( record.ALT[0] ) #alternate allele supported by reads
            ref_allele = record.REF #allele on H37Rv
            SNP_type_for_Call = 'Alt_PASS'

        ### Call ambiguous between REFERNENCE and ALTERNATE
        #if there's 1 alternate allele, and Filter is AMB; AMB call supporting supporting neither Alt nor Ref
        elif (len(record.ALT) == 1) and (record.ALT != [None]) and (record.FILTER == ['Amb']):

            alt_allele = str( record.ALT[0] ) #alternate allele supported by reads
            ref_allele = record.REF #allele on H37Rv
            SNP_type_for_Call = 'Amb'

        ### Bad Quality Call, don't store information for this Base Call, move onto next
        else:

            continue

        ### Filtering Criteria (for Reference Confirmation or SNP)
        #---> Make sure that the length of all any alleles is 1 (No Variant or 1 Base) ; if variant, then variant is a SNP and not an INDEL
        if (len(ref_allele) == 1) and (len(alt_allele) == 1):

            ######## Retrieve Relevant information for filtering quality of Base Call ########

            # Mean Base Quality @ locus
            BQ = record.INFO['BQ']

            # Mean Mapping Quality @ locus
            MQ = record.INFO['MQ']

            # Number of Reads w/ Deletion 
            DC = record.INFO['DC']

            # Number of Reads w/ Insertion
            IC = record.INFO['IC']  
            
            # Depth of Valid Reads in Pileup
            VD = record.INFO['DP']

            ### Filtering Criteria
            #---> Mean Base Quality > 20
            #---> Mean Mapping Quality > 30
            #---> No Reads Supporting Insertions
            #---> No Reads Supporting Deletions
            #---> Number of High Quality Reads >= 25
            if (BQ > 20) and (MQ > 30) and (DC == 0) and (IC == 0) and (VD >= 25):

                # Quality-Percentage - Percentage of As, Cs, Gs, Ts weighted by Q & MQ at locus
                QP = record.INFO['QP']

                ### Filtering Criteria
                #---> Make sure that Reads support at most 2 alleles (either confirm the Reference or provide evidence for an alternate allele)
                #-----> If there are 2 alleles (make sure 1 of them is the Reference Allele)

                if (sum(np.array(QP) == 0) == 3): #1 Allele supported by Reads (either Reference or an Alternate), store information for this Base Call (3 bases have NO reads supporting)

                    #create a Boolean list for the Bases that have at least 1% of support from reads
                    supported_bases_boolean = list( np.array( QP ) > 0 )

                    #positions of Bases [A,C,G,T] with at least 1% of support from reads
                    supported_bases_i = [i for i,x in enumerate(supported_bases_boolean) if x == True] #returns list of length 
                    supported_base = base_order[supported_bases_i[0]]

                    #if the Reference Allele is supported
                    if supported_base == ref_allele:
                        alt_allele_frequency = 0.0 #no alternate allele

                    #if an Alternate Allele is supported:
                    if supported_base != ref_allele:
                        alt_allele_frequency = 1.0 #alternate allele

                elif (sum(np.array(QP) == 0) == 2): #2 Alleles supported, make sure 1 is the Reference & confirm the Alternate (2 bases have NO reads supporting)

                    #position of the Reference Allele in [A,C,G,T]
                    ref_allele_base_i = base_order_r[ref_allele] 

                    #create a Boolean list for the Bases that have at least 1% of support from reads
                    supported_bases_boolean = list( np.array( QP ) > 0 )

                    #positions of Bases [A,C,G,T] with at least 1% of support from reads
                    supported_bases_i = [i for i,x in enumerate(supported_bases_boolean) if x == True] #returns list of length 2

                    #check that Reference Allele is supported by at least 1% of the reads
                    if ref_allele_base_i in supported_bases_i:

                        #find the Alternate Allele that was supported by at least 1% of the reads
                        for supported_base_i in supported_bases_i:

                            if supported_base_i != ref_allele_base_i: #then must be alternate allele

                                alt_allele = base_order[supported_base_i] #Alternate Allele
                                alt_allele_frequency = float(QP[supported_base_i]) / 100.0 #weighted percentage of reads supporting Alternate Allele
                                break
                    
                    #Reference Allele is NOT supported by at least 1% of the reads (2 alleles supported but neither is the reference), don't store information for this Base Call
                    else:
                        continue

                #more than 2 alleles supported by reads, don't store information for this Base Call
                else: 
                    continue 

                #After extensive filtering and categorization, store all of the pertinent information about the Base Call
                ref_bases[index] = ref_allele
                qry_bases[index] = alt_allele
                ref_positions[index] = record.POS
                PASS_filter[index] = record.FILTER
                quality_scores[index] = record.QUAL
                SNP_type[index] = SNP_type_for_Call
                INFO_for_call[index] = record.INFO
                alt_allele_freq[index] = alt_allele_frequency
                valid_depth[index] = record.INFO['DP']

                index += 1

    #convert dictionaries to series
    ref_bases = pd.Series(ref_bases)
    qry_bases = pd.Series(qry_bases)
    ref_positions = pd.Series(ref_positions)
    quality_scores = pd.Series(quality_scores)
    SNP_type = pd.Series(SNP_type)
    PASS_filter = pd.Series(PASS_filter)
    INFO_for_call = pd.Series(INFO_for_call)
    alt_allele_freq = pd.Series(alt_allele_freq)
    valid_depth = pd.Series(valid_depth)
    tag_series = pd.Series([tag]*len(ref_bases.index) , index = ref_bases.index) #psuedo-series to store the tag id of the isolate

    #create DataFrame to hold all base calls for a given sample
    Variant_Call_DF = pd.DataFrame()
    Variant_Call_DF['ref_base'] = ref_bases
    Variant_Call_DF['alt_base'] = qry_bases
    Variant_Call_DF['ref_position'] = ref_positions
    Variant_Call_DF['quality'] = quality_scores
    Variant_Call_DF['SNP_type'] = SNP_type
    Variant_Call_DF['PASS_filter'] = PASS_filter
    Variant_Call_DF['INFO'] = INFO_for_call
    Variant_Call_DF['alt_AF'] = alt_allele_freq
    Variant_Call_DF['depth'] = valid_depth
    Variant_Call_DF['tag'] = tag_series

    ### Filtering Criteria (high coverage)
    #---> Filter out Base Calls with total # of reads aligning above the 99th percentile of covered positions in sample
    ninety_ninth_percentile_coverage = np.percentile( np.array( coverage_for_all_base_and_small_indel_calls ) , 99 )
    Variant_Call_DF = Variant_Call_DF[Variant_Call_DF.depth < ninety_ninth_percentile_coverage] #keep base calls that dont have unusually high coverage

    #reset index after dropping base calls with unusually high coverage
    Variant_Call_DF.reset_index(inplace = True , drop = True)

    #save REFERENCE POSITIONS only for filtered (good) single base call variants as a pickle in same directory as VCF file
    Variant_Call_DF.loc[: , 'ref_position'].to_pickle(parent_output_dir + tag + '/' + tag + '_ref_positions_for_filtered_base_calls.pkl')

    ##SIDE NOTE: use "pd.read_pickle(parent_output_dir + tag + '/' + tag + '_filtered_base_calls.pkl')" to unpickle series object

    return Variant_Call_DF #DataFrame for base calls for a single isolate

In [3]:
def Get_different_Base_Calls_between_isolates(parent_output_dir, tag_list):
    
    '''
    This function uses the previously defined function to filter the VCF files corresponding to multiple
    isolates from the same isolate pair. The input is a list of tags (fastq ids) for isolates obtained for a given isolate pair
    Once the VCF files have been seperately filtered for good calls (PASS supporting Reference Allele or PASS supporting 
    Alternate Allele or ALT supporting two Alleles; and no unusually high coverage) this function then compares the 
    single base calls between the multiple isolates. The output is a DataFrame that contains GOOD base calls
    for both isolates but for which the Alternate Allee Frequencies DIFFERED.
    '''

    #store VCF output for both isolates, key: tag , value: DF for variant output
    Variant_Call_DFs = {}

    #iterate through both isolates that came from subject
    for tag in tag_list:

        #call function to get variants for isolate and store in dict
        Variant_Call_DFs[tag] = Get_Base_Calls_from_VCF(parent_output_dir, tag)

    #concatenate DataFrames for each isolate into 1 DataFrame
    Variant_Call_Master_DF = pd.concat(Variant_Call_DFs.values())

    #reset index of master dataframe
    Variant_Call_Master_DF.reset_index(inplace = True, drop = True)

    #sort values in order of reference position
    Variant_Call_Master_DF.sort_values(by = 'ref_position' , ascending = True , inplace = True)

    ############################################################################################

    #create a list to store all of the indices that should be dropped
    base_calls_to_drop = [] #indices in dataframe to drop later

    #### FILTER methodology ####
    ## if a call for a reference position is PASS-REFERENCE with alt AF of 0.0 for both isolates, then drop base call (for each isolate) (all reads from both sequenced isolates support the same allele)
    PASS_REF_filter = [(Ref_PASS and zero_alt_AF) for Ref_PASS , zero_alt_AF in  zip( list( Variant_Call_Master_DF.SNP_type == 'Ref_PASS' ) , list( Variant_Call_Master_DF.alt_AF == 0.0 ) ) ]

    ## if a call for a reference position is PASS-ALTERNATE with alt AF of 1.0 for both isolates, then drop base call (for each isolate) (all reads from both sequenced isolates support the same allele)
    PASS_ALT_filter = [(Alt_PASS and one_alt_AF) for Alt_PASS , one_alt_AF in  zip( list( Variant_Call_Master_DF.SNP_type == 'Alt_PASS' ) , list( Variant_Call_Master_DF.alt_AF == 1.0 ) ) ]

    #subset to base calls that support criteria for each pair matching criteria
    PASS_REF_base_calls = Variant_Call_Master_DF[PASS_REF_filter]
    PASS_ALT_base_calls = Variant_Call_Master_DF[PASS_ALT_filter]

    #find matching base calls that support Reference allele entirely
    matching_PASS_REF_base_calls_filter = PASS_REF_base_calls.duplicated(subset = ['ref_base' , 'alt_base' , 'ref_position' , 'SNP_type' , 'alt_AF'] , keep = False)
    matching_base_call_indices_to_drop = list( PASS_REF_base_calls[matching_PASS_REF_base_calls_filter].index )
    base_calls_to_drop = base_calls_to_drop + matching_base_call_indices_to_drop

    #find matching base calls that support Alternate allele entirely
    matching_PASS_ALT_base_calls_filter = PASS_ALT_base_calls.duplicated(subset = ['ref_base' , 'alt_base' , 'ref_position' , 'SNP_type' , 'alt_AF'] , keep = False)
    matching_base_call_indices_to_drop = list( PASS_ALT_base_calls[matching_PASS_ALT_base_calls_filter].index )
    base_calls_to_drop = base_calls_to_drop + matching_base_call_indices_to_drop

    #drop base calls from Master variant DF that match between isolates AND support 1 allele completely
    Variant_Call_Master_DF.drop(labels = base_calls_to_drop , axis = 0 , inplace = True)

    ############################################################################################

    #### FILTER methodology ####
    ## from the remaining base calls, find base calls for which we have a reference position for BOTH isolates (calls in both samples were good quality)
    ## that is, we will have a PASS call or AMB call (w/o low coverage or deletion flags) for a reference position for BOTH isolates
    Variant_Call_Master_DF = Variant_Call_Master_DF[Variant_Call_Master_DF.duplicated(subset = 'ref_position' , keep = False)]

    #reset index of filtered SNP-variant Master DataFrame
    Variant_Call_Master_DF.reset_index(inplace = True, drop = True)
    
    ############################################################################################
    
    #### FILTER methodology ####
    ## filter out paired Base Calls that have a difference in Alternate Allele Frequency of less than 5%
    
    alt_AF_isolate_A = Variant_Call_Master_DF.loc[range(0 , np.shape(Variant_Call_Master_DF)[0] , 2) , 'alt_AF']
    alt_AF_isolate_B = Variant_Call_Master_DF.loc[range(1 , np.shape(Variant_Call_Master_DF)[0] , 2) , 'alt_AF']

    alt_AF_diff_btwn_paired_isolates = abs(alt_AF_isolate_A.values - alt_AF_isolate_B.values)

    isolate_A_Base_Call_indices_small_change_alt_AF = list(alt_AF_isolate_A[alt_AF_diff_btwn_paired_isolates < 0.05].index)
    isolate_B_Base_Call_indices_small_change_alt_AF = list(alt_AF_isolate_B[alt_AF_diff_btwn_paired_isolates < 0.05].index)

    Variant_Call_Indices_SMALL_Alt_AF_Diff = isolate_A_Base_Call_indices_small_change_alt_AF + isolate_B_Base_Call_indices_small_change_alt_AF

    #drop paired Base Calls w/ corresponding change in Alterante Allele Frequency < 5%
    Variant_Call_Master_DF.drop(Variant_Call_Indices_SMALL_Alt_AF_Diff , axis = 0 , inplace = True)
    
    #reset index of final filtered SNP-variant Master DataFrame
    Variant_Call_Master_DF.reset_index(inplace = True, drop = True)
    
    return Variant_Call_Master_DF #DataFrame for good base calls that differed between isolates from same subject or replicate pair

### Call Functions to call SNPs between a (*longitudinal* or *replicate*) pair of isolates

#### *For Testing* (longitudinal pairs)

In [5]:
#INPUT: each sub-directory has the pilon-outputed VCF file
parent_output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/JankyPipe/output/'

#INPUT: Sample annotation file path for longitudinal or replicate samples (already filtered with Kraken and F2)
sample_annotation_file_path = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/CSV_files/sample_annotation_files/Longitudinal_fastq_path_names_and_JankyPipe_tags_filtered_with_Kraken_and_F2.csv'

#INPUT: either subject ID (longitudinal) or isolate pair ID (replicate)
isolate_pair_ID = '1960'

#INPUT: parent directory where all SNPs
SNP_call_parent_output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/pickled_files/variant_calling/longitudinal_SNPs/all_SNPs_between_longitudinal_pairs/'

#### *For Testing* (replicate pairs)

In [4]:
#INPUT: each sub-directory has the pilon-outputed VCF file
parent_output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/JankyPipe/output_REPLICATES/'

#INPUT: Sample annotation file path for longitudinal or replicate samples (already filtered with Kraken and F2)
sample_annotation_file_path = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/CSV_files/sample_annotation_files/Replicate_fastq_path_names_and_JankyPipe_tags_filtered_with_Kraken_and_F2.csv'

#INPUT: either subject ID (longitudinal) or isolate pair ID (replicate)
isolate_pair_ID = 'replicate_pair_67'

#INPUT: parent directory where all SNPs
SNP_call_parent_output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/pickled_files/variant_calling/replicate_SNPs/all_SNPs_between_replicate_pairs/'

#### *For Script*

In [None]:
#INPUT: each sub-directory has the pilon-outputed VCF file
parent_output_dir = sys.argv[1]

#INPUT: Sample annotation file path for longitudinal or replicate samples (already filtered with Kraken and F2)
sample_annotation_file_path = sys.argv[2]

#INPUT: either subject ID (longitudinal) or isolate pair ID (replicate)
isolate_pair_ID = sys.argv[3]

#INPUT: parent directory where all SNPs 
SNP_call_parent_output_dir = sys.argv[4] 

In [5]:
#Sample Annotation & Tag Information for Isolate Pair
##############################################################################################################################

#import annotation file for all isolates and corresponding tags
sample_annotation = pd.read_csv(sample_annotation_file_path , sep = ',').set_index('patient_id')

#subset to both of the isolates taken from the isolate pair
sequenced_isolates_annot_for_sample_pair = sample_annotation.loc[isolate_pair_ID , :] #annotation (only) for both isolates from the sample pair

#get list of tags for both longitudinal isolates from isolate pair
tags = []

#iterate across both isolates
for isolate_i in [0 , 1]:
    
    #subset to 1 of the isolates taken from the isolate pair
    sequenced_isolate_annot = sequenced_isolates_annot_for_sample_pair.iloc[isolate_i , :] #annotation for 1 of the isolates from isolate pair
    
    #get relevant information for specific isolate
    population = sequenced_isolate_annot.population
    isolate_pair_ID = sequenced_isolate_annot.name
    tag = sequenced_isolate_annot.tag
    
    #append tag to tags list to get Variants From and compare other other isolates in tag list
    tags.append(tag)
    
#make necessary directory for isolate pair (to store variation between longitudinal isolates)
SNP_call_output_dir = SNP_call_parent_output_dir + population + '_' + isolate_pair_ID
    
if os.path.exists(SNP_call_output_dir):
    shutil.rmtree(SNP_call_output_dir)
    os.makedirs(SNP_call_output_dir)
elif not os.path.exists(SNP_call_output_dir):
    os.makedirs(SNP_call_output_dir)
    
#BASE CALLS
##############################################################################################################################
    
#get 'good' calls that differ between isolates for the same isolate pair (input is the tags corresponding to the isolates from isolate pair)
Base_Call_Master_DF = Get_different_Base_Calls_between_isolates(parent_output_dir, tags)

#append psuedo-columns to note population & isolate pair ID
population_series = pd.Series( [population] * np.shape(Base_Call_Master_DF)[0] , index = Base_Call_Master_DF.index )
isolate_pair_ID_series = pd.Series( [isolate_pair_ID] * np.shape(Base_Call_Master_DF)[0] , index = Base_Call_Master_DF.index )

Base_Call_Master_DF['population'] = population_series
Base_Call_Master_DF['patient_id'] = isolate_pair_ID_series

##pickle CSV file containing base calls that differ between paired isolates
Base_Call_Master_DF.to_pickle(SNP_call_output_dir + '/' + 'base_calls_different_between_isolates.pkl') 

############################################################################################################################################################################################################################

### Submit Jobs - Longitudinal Isolates

############################################################################################################################################################################################################################

In [29]:
import os
import pandas as pd
from slurmpy import Slurm
import numpy as np

#### Import annotation file for all sample pairs

In [30]:
#annotation file for all isolates and corresponding tags
sample_annotation = pd.read_csv('/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/CSV_files/sample_annotation_files/Longitudinal_fastq_path_names_and_JankyPipe_tags_filtered_with_Kraken_and_F2.csv' , sep = ',').set_index('patient_id')

#### Find which subjects have 2 valid VCF files for both longitudinal isolates

In [31]:
successful_JankyPipe_run = []

for isolate_i in range(0 , np.shape(sample_annotation)[0]):

    #get the tag ID for the fastq files (same as ID for fastq files)
    tag = sample_annotation.tag[isolate_i]

    #where pilon VCF and lineage information will be stored [LAB FOLDER]
    output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/JankyPipe/output/' + tag
    
    #check to see 'Lineage Call' folder exists in the output directory (last thing that is run in JankyPipe)
    if os.path.exists(output_dir + '/fast-lineage-caller/'):
        
        successful_JankyPipe_run.append('yes')
        
    else:
        
        successful_JankyPipe_run.append('no')
        
sample_annotation['successful_JankyPipe_run'] = successful_JankyPipe_run

In [32]:
sample_annotation[sample_annotation.successful_JankyPipe_run == 'no']

Unnamed: 0_level_0,fastq_files,population,run_ID,sample_ID,sample_order,tag,isolate_type,successful_JankyPipe_run
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


In [33]:
#subjects for which we do not have 2 good VCF files for
bad_isolate_pair_IDs = set( list( sample_annotation[sample_annotation.successful_JankyPipe_run == 'no'].index ) ) 

In [34]:
print list(bad_isolate_pair_IDs)

[]


#### Get list of subjects (longitudinal isolate pairs) to get SNP variants from

In [35]:
isolate_pair_ID_list = list( set( list( sample_annotation.index ) ) - bad_isolate_pair_IDs )

In [36]:
print isolate_pair_ID_list

['P000183', '1960', '2718', '2491', '2968', '2020E', '5017Y', '4146V', '24', 'KPS_9', '2321', '27', '3157Q', 'P000089', '23', 'P233', 'KPS_1', 'KPS_2', 'KPS_3', 'KPS_4', 'P12', 'KPS_7', 'I0000024-5', '1678', 'KPS_39', 'KPS_34', 'KPS_35', 'KPS_36', 'KPS_37', 'KPS_30', 'KPS_31', '1672', '1673', '1700', 'P000027', 'KPS_58', 'KPS_59', 'KPS_56', 'KPS_57', 'KPS_52', 'KPS_53', 'KPS_50', '1620', '4137X', 'P000155', 'P000156', '2556', '4154E', '2319', 'P000094', 'P000095', '1827', '2492', 'P000226', 'P03', 'P02', 'KPS_29', 'KPS_28', 'P07', 'P06', 'P04', 'KPS_23', 'KPS_22', '3129J', 'P08', 'KPS_27', 'KPS_26', '3061R', '3644X', 'P043', 'KPS_49', 'KPS_48', 'I0003165-3', 'KPS_46', 'KPS_41', 'I0000180-5', '3149E', '3047X', '4085U', '3117W', '3673', 'KPS_66', '3100U', 'P052', '1719', 'P01', '2307', 'P10', 'P11', '3', '2043R', '7', 'P000026', '3018C', 'P000227', '1657', 'P000225', 'KPS_62', 'KPS_79', '4084F', 'KPS_21', '4161B', 'KPS_70', 'KPS_71', 'KPS_72', 'P000259', 'KPS_74', '1948', 'KPS_77', 'P062

In [37]:
len(isolate_pair_ID_list)

244

#### Submit a job for each subject to get the SNPs

In [38]:
parent_output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/JankyPipe/output/'
sample_annotation_file_path = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/CSV_files/sample_annotation_files/Longitudinal_fastq_path_names_and_JankyPipe_tags_filtered_with_Kraken_and_F2.csv'
SNP_call_parent_output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/pickled_files/variant_calling/longitudinal_SNPs/all_SNPs_between_longitudinal_pairs/'

for isolate_pair_ID in isolate_pair_ID_list:
    
    longitudinal_SNP_variants_job = 'python /n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/python_scripts/BaseCall_variant_between_isolate_pair_collection.py ' + parent_output_dir + ' ' + sample_annotation_file_path + ' ' + isolate_pair_ID + ' ' + SNP_call_parent_output_dir

    #directory where you want output + error files
    os.chdir('/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/SLURM_logs/BaseCalls_between_isolate_pairs_SLURM_logs/')

    job_name = isolate_pair_ID

    s = Slurm(job_name , {'partition':'short' , 'n':'1' , 't':'0-03:00:00' , 'mem-per-cpu':'24G' , 'mail-type':'FAIL' , 'mail-user':'roger_vargas@g.harvard.edu'})

    #submits the job
    job_id = s.run(longitudinal_SNP_variants_job)

    print job_name  + ' : ' +  str(job_id)

submitted: Submitted batch job 2663382


P000183 : 2663382


submitted: Submitted batch job 2663383


1960 : 2663383


submitted: Submitted batch job 2663384


2718 : 2663384


submitted: Submitted batch job 2663385


2491 : 2663385


submitted: Submitted batch job 2663386


2968 : 2663386


submitted: Submitted batch job 2663387


2020E : 2663387


submitted: Submitted batch job 2663388


5017Y : 2663388


submitted: Submitted batch job 2663389


4146V : 2663389


submitted: Submitted batch job 2663390


24 : 2663390


submitted: Submitted batch job 2663391


KPS_9 : 2663391


submitted: Submitted batch job 2663392


2321 : 2663392


submitted: Submitted batch job 2663393


27 : 2663393


submitted: Submitted batch job 2663394


3157Q : 2663394


submitted: Submitted batch job 2663395


P000089 : 2663395


submitted: Submitted batch job 2663396


23 : 2663396


submitted: Submitted batch job 2663397


P233 : 2663397


submitted: Submitted batch job 2663398


KPS_1 : 2663398


submitted: Submitted batch job 2663399


KPS_2 : 2663399


submitted: Submitted batch job 2663400


KPS_3 : 2663400


submitted: Submitted batch job 2663401


KPS_4 : 2663401


submitted: Submitted batch job 2663402


P12 : 2663402


submitted: Submitted batch job 2663403


KPS_7 : 2663403


submitted: Submitted batch job 2663404


I0000024-5 : 2663404


submitted: Submitted batch job 2663405


1678 : 2663405


submitted: Submitted batch job 2663406


KPS_39 : 2663406


submitted: Submitted batch job 2663407


KPS_34 : 2663407


submitted: Submitted batch job 2663408


KPS_35 : 2663408


submitted: Submitted batch job 2663409


KPS_36 : 2663409


submitted: Submitted batch job 2663410


KPS_37 : 2663410


submitted: Submitted batch job 2663411


KPS_30 : 2663411


submitted: Submitted batch job 2663412


KPS_31 : 2663412


submitted: Submitted batch job 2663413


1672 : 2663413


submitted: Submitted batch job 2663414


1673 : 2663414


submitted: Submitted batch job 2663415


1700 : 2663415


submitted: Submitted batch job 2663416


P000027 : 2663416


submitted: Submitted batch job 2663418


KPS_58 : 2663418


submitted: Submitted batch job 2663419


KPS_59 : 2663419


submitted: Submitted batch job 2663420


KPS_56 : 2663420


submitted: Submitted batch job 2663421


KPS_57 : 2663421


submitted: Submitted batch job 2663422


KPS_52 : 2663422


submitted: Submitted batch job 2663423


KPS_53 : 2663423


submitted: Submitted batch job 2663424


KPS_50 : 2663424


submitted: Submitted batch job 2663425


1620 : 2663425


submitted: Submitted batch job 2663426


4137X : 2663426


submitted: Submitted batch job 2663427


P000155 : 2663427


submitted: Submitted batch job 2663428


P000156 : 2663428


submitted: Submitted batch job 2663429


2556 : 2663429


submitted: Submitted batch job 2663430


4154E : 2663430


submitted: Submitted batch job 2663431


2319 : 2663431


submitted: Submitted batch job 2663432


P000094 : 2663432


submitted: Submitted batch job 2663433


P000095 : 2663433


submitted: Submitted batch job 2663434


1827 : 2663434


submitted: Submitted batch job 2663435


2492 : 2663435


submitted: Submitted batch job 2663436


P000226 : 2663436


submitted: Submitted batch job 2663437


P03 : 2663437


submitted: Submitted batch job 2663438


P02 : 2663438


submitted: Submitted batch job 2663439


KPS_29 : 2663439


submitted: Submitted batch job 2663440


KPS_28 : 2663440


submitted: Submitted batch job 2663441


P07 : 2663441


submitted: Submitted batch job 2663442


P06 : 2663442


submitted: Submitted batch job 2663443


P04 : 2663443


submitted: Submitted batch job 2663444


KPS_23 : 2663444


submitted: Submitted batch job 2663445


KPS_22 : 2663445


submitted: Submitted batch job 2663446


3129J : 2663446


submitted: Submitted batch job 2663447


P08 : 2663447


submitted: Submitted batch job 2663448


KPS_27 : 2663448


submitted: Submitted batch job 2663449


KPS_26 : 2663449


submitted: Submitted batch job 2663450


3061R : 2663450


submitted: Submitted batch job 2663451


3644X : 2663451


submitted: Submitted batch job 2663452


P043 : 2663452


submitted: Submitted batch job 2663465


KPS_49 : 2663465


submitted: Submitted batch job 2663466


KPS_48 : 2663466


submitted: Submitted batch job 2663469


I0003165-3 : 2663469


submitted: Submitted batch job 2663470


KPS_46 : 2663470


submitted: Submitted batch job 2663471


KPS_41 : 2663471


submitted: Submitted batch job 2663472


I0000180-5 : 2663472


submitted: Submitted batch job 2663473


3149E : 2663473


submitted: Submitted batch job 2663474


3047X : 2663474


submitted: Submitted batch job 2663475


4085U : 2663475


submitted: Submitted batch job 2663476


3117W : 2663476


submitted: Submitted batch job 2663477


3673 : 2663477


submitted: Submitted batch job 2663478


KPS_66 : 2663478


submitted: Submitted batch job 2663481


3100U : 2663481


submitted: Submitted batch job 2663482


P052 : 2663482


submitted: Submitted batch job 2663483


1719 : 2663483


submitted: Submitted batch job 2663484


P01 : 2663484


submitted: Submitted batch job 2663485


2307 : 2663485


submitted: Submitted batch job 2663486


P10 : 2663486


submitted: Submitted batch job 2663487


P11 : 2663487


submitted: Submitted batch job 2663488


3 : 2663488


submitted: Submitted batch job 2663489


2043R : 2663489


submitted: Submitted batch job 2663490


7 : 2663490


submitted: Submitted batch job 2663491


P000026 : 2663491


submitted: Submitted batch job 2663492


3018C : 2663492


submitted: Submitted batch job 2663493


P000227 : 2663493


submitted: Submitted batch job 2663494


1657 : 2663494


submitted: Submitted batch job 2663495


P000225 : 2663495


submitted: Submitted batch job 2663496


KPS_62 : 2663496


submitted: Submitted batch job 2663497


KPS_79 : 2663497


submitted: Submitted batch job 2663498


4084F : 2663498


submitted: Submitted batch job 2663499


KPS_21 : 2663499


submitted: Submitted batch job 2663500


4161B : 2663500


submitted: Submitted batch job 2663501


KPS_70 : 2663501


submitted: Submitted batch job 2663502


KPS_71 : 2663502


submitted: Submitted batch job 2663503


KPS_72 : 2663503


submitted: Submitted batch job 2663504


P000259 : 2663504


submitted: Submitted batch job 2663505


KPS_74 : 2663505


submitted: Submitted batch job 2663506


1948 : 2663506


submitted: Submitted batch job 2663507


KPS_77 : 2663507


submitted: Submitted batch job 2663508


P062 : 2663508


submitted: Submitted batch job 2663509


20 : 2663509


submitted: Submitted batch job 2663510


2523 : 2663510


submitted: Submitted batch job 2663511


P09 : 2663511


submitted: Submitted batch job 2663512


I0003758-5 : 2663512


submitted: Submitted batch job 2663513


I0002615-8 : 2663513


submitted: Submitted batch job 2663514


4078X : 2663514


submitted: Submitted batch job 2663515


1664 : 2663515


submitted: Submitted batch job 2663516


2050N : 2663516


submitted: Submitted batch job 2663517


2028K : 2663517


submitted: Submitted batch job 2663518


32 : 2663518


submitted: Submitted batch job 2663519


30 : 2663519


submitted: Submitted batch job 2663520


1846 : 2663520


submitted: Submitted batch job 2663521


34 : 2663521


submitted: Submitted batch job 2663522


3096 : 2663522


submitted: Submitted batch job 2663523


2027W : 2663523


submitted: Submitted batch job 2663524


P000035 : 2663524


submitted: Submitted batch job 2663525


4063W : 2663525


submitted: Submitted batch job 2663527


P000030 : 2663527


submitted: Submitted batch job 2663528


KPS_13 : 2663528


submitted: Submitted batch job 2663529


1941 : 2663529


submitted: Submitted batch job 2663530


I0000068-2 : 2663530


submitted: Submitted batch job 2663531


P377 : 2663531


submitted: Submitted batch job 2663532


P372 : 2663532


submitted: Submitted batch job 2663533


KPS_5 : 2663533


submitted: Submitted batch job 2663534


I0004240-3 : 2663534


submitted: Submitted batch job 2663535


P000320 : 2663535


submitted: Submitted batch job 2663536


3142P : 2663536


submitted: Submitted batch job 2663537


P355 : 2663537


submitted: Submitted batch job 2663538


1682 : 2663538


submitted: Submitted batch job 2663539


P179 : 2663539


submitted: Submitted batch job 2663540


3451 : 2663540


submitted: Submitted batch job 2663541


4241M : 2663541


submitted: Submitted batch job 2663542


I0004746-9 : 2663542


submitted: Submitted batch job 2663543


I0003398-0 : 2663543


submitted: Submitted batch job 2663544


I0002457-5 : 2663544


submitted: Submitted batch job 2663545


P000048 : 2663545


submitted: Submitted batch job 2663546


2688 : 2663546


submitted: Submitted batch job 2663547


6 : 2663547


submitted: Submitted batch job 2663548


KPS_38 : 2663548


submitted: Submitted batch job 2663549


P000128 : 2663549


submitted: Submitted batch job 2663562


2041Q : 2663562


submitted: Submitted batch job 2663563


4023C : 2663563


submitted: Submitted batch job 2663564


4068M : 2663564


submitted: Submitted batch job 2663566


B : 2663566


submitted: Submitted batch job 2663567


P367 : 2663567


submitted: Submitted batch job 2663568


3147D : 2663568


submitted: Submitted batch job 2663569


2741 : 2663569


submitted: Submitted batch job 2663570


2776 : 2663570


submitted: Submitted batch job 2663571


8 : 2663571


submitted: Submitted batch job 2663572


I0005235-2 : 2663572


submitted: Submitted batch job 2663573


3725 : 2663573


submitted: Submitted batch job 2663575


I0004236-1 : 2663575


submitted: Submitted batch job 2663576


KPS_24 : 2663576


submitted: Submitted batch job 2663577


I0002960-8 : 2663577


submitted: Submitted batch job 2663578


P108 : 2663578


submitted: Submitted batch job 2663579


11 : 2663579


submitted: Submitted batch job 2663580


10 : 2663580


submitted: Submitted batch job 2663581


13 : 2663581


submitted: Submitted batch job 2663583


12 : 2663583


submitted: Submitted batch job 2663584


15 : 2663584


submitted: Submitted batch job 2663585


14 : 2663585


submitted: Submitted batch job 2663586


16 : 2663586


submitted: Submitted batch job 2663587


I0004172-8 : 2663587


submitted: Submitted batch job 2663588


P160 : 2663588


submitted: Submitted batch job 2663589


I0005229-5 : 2663589


submitted: Submitted batch job 2663590


P164 : 2663590


submitted: Submitted batch job 2663591


KPS_67 : 2663591


submitted: Submitted batch job 2663592


P000059 : 2663592


submitted: Submitted batch job 2663593


2026 : 2663593


submitted: Submitted batch job 2663594


KPS_64 : 2663594


submitted: Submitted batch job 2663595


I0005973-8 : 2663595


submitted: Submitted batch job 2663596


I0001112-7 : 2663596


submitted: Submitted batch job 2663597


KPS_60 : 2663597


submitted: Submitted batch job 2663598


I0004013-4 : 2663598


submitted: Submitted batch job 2663599


2511 : 2663599


submitted: Submitted batch job 2663600


P000056 : 2663600


submitted: Submitted batch job 2663601


KPS_68 : 2663601


submitted: Submitted batch job 2663602


2731 : 2663602


submitted: Submitted batch job 2663603


2436 : 2663603


submitted: Submitted batch job 2663604


3103J : 2663604


submitted: Submitted batch job 2663605


P318 : 2663605


submitted: Submitted batch job 2663606


4024R : 2663606


submitted: Submitted batch job 2663607


4144U : 2663607


submitted: Submitted batch job 2663608


KPS_81 : 2663608


submitted: Submitted batch job 2663609


KPS_80 : 2663609


submitted: Submitted batch job 2663610


P316 : 2663610


submitted: Submitted batch job 2663611


2049 : 2663611


submitted: Submitted batch job 2663612


I0004304-7 : 2663612


submitted: Submitted batch job 2663613


P000189 : 2663613


submitted: Submitted batch job 2663614


I0000221-7 : 2663614


submitted: Submitted batch job 2663615


KPS_75 : 2663615


submitted: Submitted batch job 2663616


KPS_82 : 2663616


submitted: Submitted batch job 2663617


3058W : 2663617


submitted: Submitted batch job 2663618


P288 : 2663618


submitted: Submitted batch job 2663619


3430 : 2663619


submitted: Submitted batch job 2663620


KPS_16 : 2663620


submitted: Submitted batch job 2663621


P251 : 2663621


submitted: Submitted batch job 2663622


KPS_84 : 2663622


submitted: Submitted batch job 2663623


P000267 : 2663623


submitted: Submitted batch job 2663624


2918 : 2663624


submitted: Submitted batch job 2663625


KPS_10 : 2663625


submitted: Submitted batch job 2663626


KPS_11 : 2663626


submitted: Submitted batch job 2663627


P000263 : 2663627


submitted: Submitted batch job 2663628


KPS_17 : 2663628


submitted: Submitted batch job 2663629


KPS_14 : 2663629


submitted: Submitted batch job 2663630


KPS_15 : 2663630


submitted: Submitted batch job 2663631


I0004003-5 : 2663631


submitted: Submitted batch job 2663632


KPS_18 : 2663632


submitted: Submitted batch job 2663633


2047T : 2663633


submitted: Submitted batch job 2663634


28 : 2663634


submitted: Submitted batch job 2663635


D : 2663635


submitted: Submitted batch job 2663636


3143B : 2663636


submitted: Submitted batch job 2663637


1687 : 2663637


submitted: Submitted batch job 2663639


3439 : 2663639


submitted: Submitted batch job 2663640


I0001031-9 : 2663640


submitted: Submitted batch job 2663641


I0001560-7 : 2663641


submitted: Submitted batch job 2663642


I0003710-6 : 2663642


submitted: Submitted batch job 2663643


P305 : 2663643


submitted: Submitted batch job 2663644


4 : 2663644


submitted: Submitted batch job 2663645


26 : 2663645


submitted: Submitted batch job 2663647


P000071 : 2663647


submitted: Submitted batch job 2663654


P000076 : 2663654


submitted: Submitted batch job 2663656


KPS_73 : 2663656


submitted: Submitted batch job 2663658


P000075 : 2663658


submitted: Submitted batch job 2663659


P221 : 2663659


submitted: Submitted batch job 2663660


I0001162-2 : 2663660


submitted: Submitted batch job 2663661


1972 : 2663661


submitted: Submitted batch job 2663662


P227 : 2663662


submitted: Submitted batch job 2663663


P000276 : 2663663


submitted: Submitted batch job 2663665


1 : 2663665


submitted: Submitted batch job 2663666


P000271 : 2663666


submitted: Submitted batch job 2663668


P000272 : 2663668


submitted: Submitted batch job 2663669


P000273 : 2663669
P000176 : 2663670


submitted: Submitted batch job 2663670


############################################################################################################################################################################################################################

### Submit Jobs - Replicate Isolates

############################################################################################################################################################################################################################

In [39]:
import os
import pandas as pd
from slurmpy import Slurm
import numpy as np

#### Import annotation file for all sample pairs

In [40]:
#annotation file for all isolates and corresponding tags
sample_annotation = pd.read_csv('/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/CSV_files/sample_annotation_files/Replicate_fastq_path_names_and_JankyPipe_tags_filtered_with_Kraken_and_F2.csv' , sep = ',').set_index('patient_id')

#### Find which replicate pairs have 2 valid VCF files for both replicate isolates

In [41]:
successful_JankyPipe_run = []

for isolate_i in range(0 , np.shape(sample_annotation)[0]):

    #get the tag ID for the fastq files (same as ID for fastq files)
    tag = sample_annotation.tag[isolate_i]

    #where pilon VCF and lineage information will be stored [LAB FOLDER]
    output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/JankyPipe/output_REPLICATES/' + tag
    
    #check to see 'Lineage Call' folder exists in the output directory (last thing that is run in JankyPipe)
    if os.path.exists(output_dir + '/fast-lineage-caller/'):
        
        successful_JankyPipe_run.append('yes')
        
    else:
        
        successful_JankyPipe_run.append('no')
        
sample_annotation['successful_JankyPipe_run'] = successful_JankyPipe_run

In [42]:
sample_annotation[sample_annotation.successful_JankyPipe_run == 'no']

Unnamed: 0_level_0,fastq_files,population,run_ID,sample_ID,sample_order,tag,isolate_type,successful_JankyPipe_run
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


In [43]:
#subjects for which we do not have 2 good VCF files for
bad_isolate_pair_IDs = set( list( sample_annotation[sample_annotation.successful_JankyPipe_run == 'no'].index ) ) 

In [44]:
print list(bad_isolate_pair_IDs)

[]


#### Get list of subjects (longitudinal isolate pairs) to get SNP variants from

In [45]:
isolate_pair_ID_list = list( set( list( sample_annotation.index ) ) - bad_isolate_pair_IDs )

In [46]:
print isolate_pair_ID_list

['replicate_pair_19', 'replicate_pair_18', 'replicate_pair_27', 'replicate_pair_26', 'replicate_pair_11', 'replicate_pair_36', 'replicate_pair_13', 'replicate_pair_34', 'replicate_pair_15', 'replicate_pair_32', 'replicate_pair_57', 'replicate_pair_16', 'P12_1', 'replicate_pair_54', 'P12_3', 'P12_2', 'replicate_pair_51', 'replicate_pair_50', 'replicate_pair_73', 'replicate_pair_72', 'replicate_pair_71', 'replicate_pair_70', 'replicate_pair_59', 'replicate_pair_58', 'replicate_pair_74', 'replicate_pair_9', 'replicate_pair_8', 'replicate_pair_1', 'replicate_pair_0', 'replicate_pair_3', 'replicate_pair_2', 'replicate_pair_5', 'replicate_pair_4', 'replicate_pair_7', 'replicate_pair_6', 'I0002918-6', 'replicate_pair_37', 'replicate_pair_42', 'replicate_pair_48', 'replicate_pair_10', 'replicate_pair_45', 'replicate_pair_12', 'replicate_pair_30', 'replicate_pair_33', 'replicate_pair_14', 'replicate_pair_46', 'replicate_pair_47', 'replicate_pair_44', 'replicate_pair_23', 'replicate_pair_24', 'r

In [47]:
len(isolate_pair_ID_list)

70

#### Submit a job for each subject to get the SNPs

In [48]:
parent_output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/JankyPipe/output_REPLICATES/'
sample_annotation_file_path = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/CSV_files/sample_annotation_files/Replicate_fastq_path_names_and_JankyPipe_tags_filtered_with_Kraken_and_F2.csv'
SNP_call_parent_output_dir = '/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/pickled_files/variant_calling/replicate_SNPs/all_SNPs_between_replicate_pairs/'

for isolate_pair_ID in isolate_pair_ID_list:
    
    replicate_SNP_variants_job = 'python /n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/python_scripts/BaseCall_variant_between_isolate_pair_collection.py ' + parent_output_dir + ' ' + sample_annotation_file_path + ' ' + isolate_pair_ID + ' ' + SNP_call_parent_output_dir

    #directory where you want output + error files
    os.chdir('/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/SLURM_logs/BaseCalls_between_isolate_pairs_SLURM_logs/')

    job_name = isolate_pair_ID

    s = Slurm(job_name , {'partition':'short' , 'n':'1' , 't':'0-03:00:00' , 'mem-per-cpu':'24G' , 'mail-type':'FAIL' , 'mail-user':'roger_vargas@g.harvard.edu'})

    #submits the job
    job_id = s.run(replicate_SNP_variants_job)

    print job_name  + ' : ' +  str(job_id)

submitted: Submitted batch job 2663672


replicate_pair_19 : 2663672


submitted: Submitted batch job 2663673


replicate_pair_18 : 2663673


submitted: Submitted batch job 2663674


replicate_pair_27 : 2663674


submitted: Submitted batch job 2663675


replicate_pair_26 : 2663675


submitted: Submitted batch job 2663676


replicate_pair_11 : 2663676


submitted: Submitted batch job 2663677


replicate_pair_36 : 2663677


submitted: Submitted batch job 2663678


replicate_pair_13 : 2663678


submitted: Submitted batch job 2663679


replicate_pair_34 : 2663679


submitted: Submitted batch job 2663680


replicate_pair_15 : 2663680


submitted: Submitted batch job 2663681


replicate_pair_32 : 2663681


submitted: Submitted batch job 2663682


replicate_pair_57 : 2663682


submitted: Submitted batch job 2663683


replicate_pair_16 : 2663683


submitted: Submitted batch job 2663684


P12_1 : 2663684


submitted: Submitted batch job 2663685


replicate_pair_54 : 2663685


submitted: Submitted batch job 2663686


P12_3 : 2663686


submitted: Submitted batch job 2663687


P12_2 : 2663687


submitted: Submitted batch job 2663688


replicate_pair_51 : 2663688


submitted: Submitted batch job 2663689


replicate_pair_50 : 2663689


submitted: Submitted batch job 2663690


replicate_pair_73 : 2663690


submitted: Submitted batch job 2663691


replicate_pair_72 : 2663691


submitted: Submitted batch job 2663692


replicate_pair_71 : 2663692


submitted: Submitted batch job 2663693


replicate_pair_70 : 2663693


submitted: Submitted batch job 2663694


replicate_pair_59 : 2663694


submitted: Submitted batch job 2663695


replicate_pair_58 : 2663695


submitted: Submitted batch job 2663696


replicate_pair_74 : 2663696


submitted: Submitted batch job 2663697


replicate_pair_9 : 2663697


submitted: Submitted batch job 2663698


replicate_pair_8 : 2663698


submitted: Submitted batch job 2663699


replicate_pair_1 : 2663699


submitted: Submitted batch job 2663700


replicate_pair_0 : 2663700


submitted: Submitted batch job 2663701


replicate_pair_3 : 2663701


submitted: Submitted batch job 2663702


replicate_pair_2 : 2663702


submitted: Submitted batch job 2663703


replicate_pair_5 : 2663703


submitted: Submitted batch job 2663704


replicate_pair_4 : 2663704


submitted: Submitted batch job 2663705


replicate_pair_7 : 2663705


submitted: Submitted batch job 2663706


replicate_pair_6 : 2663706


submitted: Submitted batch job 2663707


I0002918-6 : 2663707


submitted: Submitted batch job 2663708


replicate_pair_37 : 2663708


submitted: Submitted batch job 2663709


replicate_pair_42 : 2663709


submitted: Submitted batch job 2663710


replicate_pair_48 : 2663710


submitted: Submitted batch job 2663711


replicate_pair_10 : 2663711


submitted: Submitted batch job 2663712


replicate_pair_45 : 2663712


submitted: Submitted batch job 2663714


replicate_pair_12 : 2663714


submitted: Submitted batch job 2663715


replicate_pair_30 : 2663715


submitted: Submitted batch job 2663716


replicate_pair_33 : 2663716


submitted: Submitted batch job 2663717


replicate_pair_14 : 2663717


submitted: Submitted batch job 2663718


replicate_pair_46 : 2663718


submitted: Submitted batch job 2663719


replicate_pair_47 : 2663719


submitted: Submitted batch job 2663720


replicate_pair_44 : 2663720


submitted: Submitted batch job 2663721


replicate_pair_23 : 2663721


submitted: Submitted batch job 2663722


replicate_pair_24 : 2663722


submitted: Submitted batch job 2663723


replicate_pair_69 : 2663723


submitted: Submitted batch job 2663724


replicate_pair_40 : 2663724


submitted: Submitted batch job 2663725


replicate_pair_41 : 2663725


submitted: Submitted batch job 2663726


replicate_pair_28 : 2663726


submitted: Submitted batch job 2663727


replicate_pair_29 : 2663727


submitted: Submitted batch job 2663728


replicate_pair_66 : 2663728


submitted: Submitted batch job 2663748


replicate_pair_67 : 2663748


submitted: Submitted batch job 2663750


replicate_pair_60 : 2663750


submitted: Submitted batch job 2663751


replicate_pair_61 : 2663751


submitted: Submitted batch job 2663752


replicate_pair_62 : 2663752


submitted: Submitted batch job 2663753


replicate_pair_49 : 2663753


submitted: Submitted batch job 2663754


I0003922-7 : 2663754


submitted: Submitted batch job 2663755


replicate_pair_21 : 2663755


submitted: Submitted batch job 2663756


replicate_pair_22 : 2663756


submitted: Submitted batch job 2663757


replicate_pair_68 : 2663757


submitted: Submitted batch job 2663759


replicate_pair_56 : 2663759


submitted: Submitted batch job 2663760


I0004220-5 : 2663760


submitted: Submitted batch job 2663761


I0003710-6 : 2663761


submitted: Submitted batch job 2663762


replicate_pair_43 : 2663762
replicate_pair_25 : 2663763


submitted: Submitted batch job 2663763
