In [1]:
import tskit
import pandas
import numpy

from tqdm import tqdm
pandas.options.display.max_columns=999
import tsconvert
import functools
from functools import reduce
from scipy.stats import chi2_contingency

import warnings
warnings.filterwarnings('ignore')

Let's make an tree annotated by lineage (using shapes at the end of branches) and whether or not the sample contains an S450L mutation in rpoB (by dots around the circle)

First, let's read in the `GENOMES` table and extract the form of `uid` that Kerri used in her phylogenetic tree (this was how the vcf files were named but there were two problems so when I made the tables I fixed the problems, but because she used this original identifier we have to rebuilt it from the VCF filename)

In [3]:
GENOMES = pandas.read_pickle('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/tb_rnap_compensation/tables/GENOMES.pkl.gz')
GENOMES.reset_index(inplace=True)

def create_original_uid(row):

    filename = row.FTP_FILENAME_VCF

    if '.regeno' in filename:
        if filename[:5] != 'comas':
            return('site'+filename.split('/site')[1].split('.regeno')[0])
        else:
            return(filename.split('comas_regeno/')[1].split('.regeno')[0])
    else:
        return None

GENOMES['ORIGINAL_UID'] = GENOMES.apply(create_original_uid, axis=1)
GENOMES.set_index('UNIQUEID', inplace=True)
GENOMES


Unnamed: 0_level_0,SITEID,SUBJID,LABID,ISOLATENO,SEQREPS,BELONGS_GPI,PER_SAMPLE_VCF_PRESENT,REGENOTYPED_VCF_PRESENT,CLOCKWORK_VERSION,TBI_INDEX,KMER_COUNTS,SNP_DISTANCE_TO_H37rV,SPECIES,LINEAGE_NAME,SUBLINEAGE_NAME,LINEAGE_PERCENTAGE,N_NULL,N_SNP,N_INDEL,N_FILTER_FAIL,N_REF,N_HET,CATALOGUE_NAME,CATALOGUE_VERSION,TB_TYPE_1,WGS_PREDICTION_STRING,IMAGE_MD5SUM,FTP_PATH,FTP_FILENAME_VCF,TREE_PATH,TREE_FILENAME_VCF,FASTQ_MD5SUMS,SEQTREAT_SAMPLE,MYKROBE_LINEAGE_NAME_1,MYKROBE_LINEAGE_NAME_2,ENA,ORIGINAL_UID
UNIQUEID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
site.02.subj.0958.lab.22A197.iso.1,02,0958,22A197,1,197,True,True,True,0.8.3,True,False,1154.0,M. tuberculosis,Lineage 2,,71.283784,4934,0,1154,104,0,14329,CRyPTIC,v1.31,MDR,RRURRRSSSSSSSSS,{'02-0958-22A197-1-14': 'a587bac9ad2a0ebd36274...,/well/bag/jeffk/release_staging/,00/01/41/00/14100/site.02.iso.1.subject.0958.l...,dat/CRyPTIC2/V2/02/0958/22A197/1/regenotyped/,site.02.subj.0958.lab.22A197.iso.1.v0.8.3.rege...,,False,Lineage 2,lineage2.2.1,ERS5301054,site.02.iso.1.subject.0958.lab_id.22A197.seq_r...
site.02.subj.0823.lab.2013241494.iso.1,02,0823,2013241494,1,241494,True,True,True,0.8.3,True,False,388.0,M. tuberculosis,Lineage 4,,95.705521,2250,0,388,43,0,9442,CRyPTIC,v1.31,UNK,UUSUSSSSSSSSSSS,{'02-0823-2013241494-1-14': '698507bed7ff19268...,/well/bag/jeffk/release_staging/,00/01/41/43/14143/site.02.iso.1.subject.0823.l...,dat/CRyPTIC2/V2/02/0823/2013241494/1/regenotyped/,site.02.subj.0823.lab.2013241494.iso.1.v0.8.3....,,False,Lineage 2,lineage2.2.3,ERS5301097,site.02.iso.1.subject.0823.lab_id.2013241494.s...
site.02.subj.0359.lab.222018-14.iso.1,02,0359,222018-14,1,14222018,True,True,True,0.8.3,True,False,1147.0,M. tuberculosis,Lineage 2,,95.608108,3578,0,1147,118,0,13038,CRyPTIC,v1.31,UNK,SUSSSSSSUSSSSSS,{'02-0359-222018-14-1-14': '39c28529c7564ce379...,/well/bag/jeffk/release_staging/,00/01/08/73/10873/site.02.iso.1.subject.0359.l...,dat/CRyPTIC2/V2/02/0359/222018-14/1/regenotyped/,site.02.subj.0359.lab.222018-14.iso.1.v0.8.3.r...,,False,Lineage 2,lineage2.2.3,ERS5298526,site.02.iso.1.subject.0359.lab_id.222018-14.se...
site.02.subj.0224.lab.2013221088.iso.1,02,0224,2013221088,1,13221088_2013221088,True,True,True,0.8.3,True,False,1193.0,M. tuberculosis,Lineage 2,,96.621622,3529,0,1193,104,0,13201,CRyPTIC,v1.31,SUS,SSSSSSSSSSSSSSS,,/well/bag/jeffk/release_staging/,00/01/13/37/11337/site.02.iso.1.subject.0224.l...,dat/CRyPTIC2/V2/02/0224/2013221088/1/regenotyped/,site.02.subj.0224.lab.2013221088.iso.1.v0.8.3....,,False,Lineage 2,lineage2.2.5,ERS5300527,site.02.iso.1.subject.0224.lab_id.2013221088.s...
site.02.subj.0918.lab.22A153.iso.1,02,0918,22A153,1,153,True,True,True,0.8.3,True,False,1163.0,M. tuberculosis,Lineage 2,,96.283784,4736,0,1163,121,0,13970,CRyPTIC,v1.31,MDR,RRRRSSUSUSSSSSS,{'02-0918-22A153-1-14': 'b48e27b23f8377c209854...,/well/bag/jeffk/release_staging/,00/01/40/62/14062/site.02.iso.1.subject.0918.l...,dat/CRyPTIC2/V2/02/0918/22A153/1/regenotyped/,site.02.subj.0918.lab.22A153.iso.1.v0.8.3.rege...,,False,Lineage 2,lineage2.2.3,ERS5301016,site.02.iso.1.subject.0918.lab_id.22A153.seq_r...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
site.10.subj.YA00023171.lab.YA00023171.iso.1,10,YA00023171,YA00023171,1,1,True,True,True,0.8.3,True,False,693.0,M. tuberculosis,Lineage 4,LAM,100.000000,5911,0,693,55,0,11874,CRyPTIC,v1.31,SUS,SSSSSSUSSSSSSSS,{'10-YA00023171-YA00023171-1-7': 'da79aa0564b0...,/well/bag/jeffk/release_staging/,00/00/31/24/3124/site.10.iso.1.subject.YA00023...,dat/CRyPTIC2/V2/10/YA00023171/YA00023171/1/reg...,site.10.subj.YA00023171.lab.YA00023171.iso.1.v...,,False,Lineage 4,lineage4.3.2.1,ERS5298216,site.10.iso.1.subject.YA00023171.lab_id.YA0002...
site.10.subj.SADH00634109_S16.lab.DH00634109_S16.iso.1,10,SADH00634109_S16,DH00634109_S16,1,1,False,True,False,0.8.3,True,True,,,,,,317,0,700,63,0,32,CRyPTIC,v1.31,SUS,SSSSSSSSSSSSSSS,,/well/bag/jeffk/release_staging/,00/00/81/50/8150/site.10.iso.1.subject.SADH006...,dat/CRyPTIC2/V2/10/SADH00634109_S16/DH00634109...,site.10.subj.SADH00634109_S16.lab.DH00634109_S...,,False,Lineage 4,lineage4.3.2,ERS2400531,
site.10.subj.H37RV_SA2437.lab.H37RV_SA2437.iso.1,10,H37RV_SA2437,H37RV_SA2437,1,1,False,True,False,0.8.3,True,True,,,,,,44,0,34,16,0,1,CRyPTIC,v1.31,SUS,SSSSSSSSSSSSSSS,,/well/bag/jeffk/release_staging/,00/01/45/29/14529/site.10.iso.1.subject.H37Rv_...,dat/CRyPTIC2/V2/10/H37RV_SA2437/H37RV_SA2437/1...,site.10.subj.H37RV_SA2437.lab.H37RV_SA2437.iso...,,False,Lineage 4,lineage4.10,NO_ENA,
site.10.subj.YA00044774.lab.YA00044774.iso.1,10,YA00044774,YA00044774,1,1,True,True,True,0.8.3,True,False,668.0,M. tuberculosis,Lineage 4,LAM,100.000000,13671,0,668,60,0,35563,CRyPTIC,v1.31,SUS,SSSSSSUSSSSSSSS,{'10-YA00044774-YA00044774-1-14': 'd3917b6c0c2...,/well/bag/jeffk/release_staging/,00/01/42/55/14255/site.10.iso.1.subject.YA0004...,dat/CRyPTIC2/V2/10/YA00044774/YA00044774/1/reg...,site.10.subj.YA00044774.lab.YA00044774.iso.1.v...,,False,Lineage 4,lineage4.3.2.1,NO_ENA,site.10.iso.1.subject.YA00044774.lab_id.YA0004...


In [None]:
GENOMES.

Save UNIQUEIDs with inconclusive sequencing result in REMOVE list

In [4]:
EFFECTS = pandas.read_pickle('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/tb_rnap_compensation/tables/EFFECTS.pkl.gz')
EFFECTS.reset_index(inplace = True)
REMOVE = EFFECTS[EFFECTS.MUTATION.str[-1].isin(['O','X'])].UNIQUEID
REMOVE

8                     site.02.subj.0823.lab.2013241494.iso.1
9                     site.02.subj.0823.lab.2013241494.iso.1
24                        site.02.subj.0085.lab.22A036.iso.1
25                        site.02.subj.0085.lab.22A036.iso.1
26                    site.02.subj.0830.lab.2013221279.iso.1
                                 ...                        
1074535    site.10.subj.SATRL0073861_S19.lab.TRL0073861_S...
1074564         site.10.subj.YA00038960.lab.YA00038960.iso.1
1074565         site.10.subj.YA00038960.lab.YA00038960.iso.1
1074598         site.10.subj.YA00036589.lab.YA00036589.iso.1
1074599         site.10.subj.YA00036589.lab.YA00036589.iso.1
Name: UNIQUEID, Length: 158356, dtype: object

Make dataframe of all UNIQUEIDs that are classified as resistant

Include other resistances ()

In [5]:
EFFECTS = pandas.read_pickle('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/tb_rnap_compensation/tables/EFFECTS.pkl.gz')
EFFECTS.reset_index(inplace = True)
EFFECTS

Unnamed: 0,UNIQUEID,DRUG,GENE,MUTATION,CATALOGUE_NAME,CATALOGUE_VERSION,CATALOGUE_GRAMMAR,SITEID,PREDICTION,DEFAULT_CATALOGUE
0,site.02.subj.0958.lab.22A197.iso.1,RFB,rpoB,P45S,CRyPTIC,v1.31,GARC1,02,U,True
1,site.02.subj.0958.lab.22A197.iso.1,RIF,rpoB,P45S,CRyPTIC,v1.31,GARC1,02,U,True
2,site.02.subj.0958.lab.22A197.iso.1,RFB,rpoB,S450L,CRyPTIC,v1.31,GARC1,02,U,True
3,site.02.subj.0958.lab.22A197.iso.1,RIF,rpoB,S450L,CRyPTIC,v1.31,GARC1,02,R,True
4,site.02.subj.0958.lab.22A197.iso.1,RFB,rpoB,A1075A,CRyPTIC,v1.31,GARC1,02,S,True
...,...,...,...,...,...,...,...,...,...,...
1074679,site.10.subj.YA00044774.lab.YA00044774.iso.1,RFB,rpoC,A542A,CRyPTIC,v1.31,GARC1,10,S,True
1074680,site.10.subj.YA00166043.lab.YA00166043.iso.1,RIF,rpoB,D435V,CRyPTIC,v1.31,GARC1,10,R,True
1074681,site.10.subj.YA00166043.lab.YA00166043.iso.1,RFB,rpoB,D435V,CRyPTIC,v1.31,GARC1,10,U,True
1074682,site.10.subj.YA00166043.lab.YA00166043.iso.1,RIF,rpoB,A1075A,CRyPTIC,v1.31,GARC1,10,S,True


In [23]:
EFFECTS = pandas.read_pickle('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/tb_rnap_compensation/tables/EFFECTS.pkl.gz')
EFFECTS.reset_index(inplace = True)
EFFECTS['GENE_MUTATION'] = EFFECTS['GENE'] + '_' + EFFECTS['MUTATION']
EFFECTS = EFFECTS[(EFFECTS.DRUG=='RIF') & (EFFECTS.PREDICTION=='R') 
                  & (~EFFECTS.MUTATION.str[-1].isin(['O','X']))]

res_gene_mutations = EFFECTS.GENE_MUTATION
query = res_gene_mutations.unique()

MUTATIONS = pandas.read_pickle('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/tb_rnap_compensation/tables/MUTATIONS.pkl.gz')
MUTATIONS.reset_index(inplace = True)
MUTATIONS = MUTATIONS.astype({'GENE':'str'})
MUTATIONS['GENE_MUTATION'] = MUTATIONS['GENE'] + '_' + MUTATIONS['MUTATION']

resistant = MUTATIONS[MUTATIONS.GENE_MUTATION.isin(res_gene_mutations.unique())]
resistant.set_index('UNIQUEID', inplace=True)
resistant['IS_RES'] = True
resistant

Unnamed: 0_level_0,GENE,MUTATION,POSITION,AMINO_ACID_NUMBER,GENOME_INDEX,NUCLEOTIDE_NUMBER,REF,ALT,IS_SNP,IS_INDEL,IN_CDS,IN_PROMOTER,IS_SYNONYMOUS,IS_NONSYNONYMOUS,IS_HET,IS_NULL,IS_FILTER_PASS,ELEMENT_TYPE,MUTATION_TYPE,INDEL_LENGTH,INDEL_1,INDEL_2,SITEID,NUMBER_NUCLEOTIDE_CHANGES,GENE_MUTATION,IS_RES
UNIQUEID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
site.02.subj.0958.lab.22A197.iso.1,rpoB,S450L,450.0,450.0,,,tcg,ttg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoB_S450L,True
site.02.subj.0918.lab.22A153.iso.1,rpoB,D435G,435.0,435.0,,,gac,ggc,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoB_D435G,True
site.02.subj.1033.lab.2013185075.iso.1,rpoB,S450L,450.0,450.0,,,tcg,ttg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoB_S450L,True
site.02.subj.0739.lab.2013221518.iso.1,rpoB,S450L,450.0,450.0,,,tcg,ttg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoB_S450L,True
site.02.subj.0104.lab.22A057.iso.1,rpoB,S450L,450.0,450.0,,,tcg,ttg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoB_S450L,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
site.10.subj.SATRL0115669_S18.lab.TRL0115669_S18.iso.1,rpoB,S450L,450.0,450.0,,,tcg,ttg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,10,1,rpoB_S450L,True
site.10.subj.XD01227882.lab.XD01227882.iso.1,rpoB,S450L,450.0,450.0,,,tcg,ttg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,10,1,rpoB_S450L,True
site.10.subj.WG00269790.lab.WG00269790.iso.1,rpoB,S450L,450.0,450.0,,,tcg,ttg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,10,1,rpoB_S450L,True
site.10.subj.YA00008913.lab.YA00008913.iso.1,rpoB,S450L,450.0,450.0,,,tcg,ttg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,10,1,rpoB_S450L,True


Make dataframe of all UNIQUEIDs that contain CMs

In [8]:
hits_final = pandas.read_csv('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/fisher_all.csv')
hits_final = hits_final[hits_final.both != 0]
hits_final = hits_final.iloc[:-1,:]

hits_final['log10'] = -numpy.log10(hits_final.p_value.astype('float'))

#create list of all hits above the 98% quantile of p-values
hits_final = hits_final[(hits_final.log10 > numpy.quantile(hits_final.log10, 0.98))]
print('There are', len(hits_final.other_mutation.unique()), 'hits above the indicated p-value quantile')

#remove all hits that are synonymous mutations or non-homoplastic
hits_final = hits_final[~(hits_final.other_mutation.apply(lambda x: x[5]==x[-1]))]
no_homoplasy = pandas.read_csv('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/all_hits_concat.csv')
no_homoplasy = no_homoplasy[no_homoplasy.homoplasy == False].CM
hits_final['homoplasy'] = (~hits_final.other_mutation.isin(no_homoplasy))
hits_final = hits_final[(hits_final.homoplasy == True)]

hits_final = hits_final.sort_values('both', ascending = False)
hits_final = hits_final.drop(['p_value', 'None', 'other', 'resistant', 'n_resistant', 'n_other', 'log10', 'homoplasy'], axis = 1)
hits_final.reset_index(drop = True, inplace = True)
hits_final

There are 96 hits above the indicated p-value quantile


Unnamed: 0,resistant_mutation,other_mutation,both
0,rpoB_S450L,rpoC_V483G,1206
1,rpoB_S450L,rpoC_I491V,665
2,rpoB_S450L,rpoC_V483A,586
3,rpoB_S450L,rpoC_I491T,457
4,rpoB_S450L,rpoC_P1040R,396
5,rpoB_S450L,rpoC_F452S,345
6,rpoB_S450L,rpoB_L731P,226
7,rpoB_S450L,rpoC_N698S,205
8,rpoB_S450L,rpoC_D485Y,194
9,rpoB_S450L,rpoC_V517L,184


In [9]:
compensatory = MUTATIONS[MUTATIONS.GENE_MUTATION.isin(hits_final.other_mutation.unique())]
compensatory.set_index('UNIQUEID', inplace=True)
compensatory['IS_CM'] = True
compensatory

Unnamed: 0_level_0,GENE,MUTATION,POSITION,AMINO_ACID_NUMBER,GENOME_INDEX,NUCLEOTIDE_NUMBER,REF,ALT,IS_SNP,IS_INDEL,IN_CDS,IN_PROMOTER,IS_SYNONYMOUS,IS_NONSYNONYMOUS,IS_HET,IS_NULL,IS_FILTER_PASS,ELEMENT_TYPE,MUTATION_TYPE,INDEL_LENGTH,INDEL_1,INDEL_2,SITEID,NUMBER_NUCLEOTIDE_CHANGES,GENE_MUTATION,IS_CM
UNIQUEID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
site.02.subj.0958.lab.22A197.iso.1,rpoB,P45S,45.0,45.0,,,ccg,tcg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoB_P45S,True
site.02.subj.0918.lab.22A153.iso.1,rpoB,I491L,491.0,491.0,,,atc,ctc,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoB_I491L,True
site.02.subj.0739.lab.2013221518.iso.1,rpoC,V483A,483.0,483.0,,,gtg,gcg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoC_V483A,True
site.02.subj.0885.lab.22A119.iso.1,rpoC,V483A,483.0,483.0,,,gtg,gcg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoC_V483A,True
site.02.subj.0411.lab.235087-14.iso.1,rpoA,T187A,187.0,187.0,,,acc,gcc,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,02,1,rpoA_T187A,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
site.10.subj.SATRL0116843_S10.lab.TRL0116843_S10.iso.1,rpoC,V483G,483.0,483.0,,,gtg,ggg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,10,1,rpoC_V483G,True
site.10.subj.SATRL0116880_S12.lab.TRL0116880_S12.iso.1,rpoC,N698S,698.0,698.0,,,aac,agc,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,10,1,rpoC_N698S,True
site.10.subj.SATRL0116187_S19.lab.TRL0116187_S19.iso.1,rpoC,V483G,483.0,483.0,,,gtg,ggg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,10,1,rpoC_V483G,True
site.10.subj.SATRL0115669_S18.lab.TRL0115669_S18.iso.1,rpoC,L527V,527.0,527.0,,,ttg,gtg,True,False,True,False,False,True,False,False,True,GENE,AAM,,,,10,1,rpoC_L527V,True


Prevalence of CMs in resistant samples in lineage 2 (vs other lineages)

In [10]:
GENOMES.TB_TYPE_1.value_counts()

TB_TYPE_1
SUS    47914
MDR    16885
UNK     8900
XDR     2122
RIF     2039
Name: count, dtype: int64

In [11]:
res_CM = GENOMES[((GENOMES.TB_TYPE_1 == 'MDR') | (GENOMES.TB_TYPE_1 == 'XDR')) & (GENOMES.LINEAGE_NAME != 'Lineage 2') & (GENOMES.index.isin(compensatory.index))]
res = GENOMES[((GENOMES.TB_TYPE_1 == 'MDR') | (GENOMES.TB_TYPE_1 == 'XDR')) & (GENOMES.LINEAGE_NAME != 'Lineage 2')] #(GENOMES.TB_TYPE_1 == 'RIF') | 
print(len(res_CM), len(res), len(res_CM)/len(res))

5873 15756 0.37274689007362277


Calculate statistic for CM prevalence in resistant samples in Lineage 2 vs other Lineages:

In [12]:
data_res = [[2407, 3423], [11217, 17623]]
data_res = [[2059, 3423], [5980, 17623]]
data_MDR = [[2047, 3251], [5873, 15756]]

stat, p, dof, expected = chi2_contingency(data_MDR)
  
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

p value is 1.2772599244343872e-60
Dependent (reject H0)


Now let's read in the mutational data for samples with specific mutations

In [13]:
MUTATIONS = pandas.read_pickle('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/tb_rnap_compensation/tables/MUTATIONS.pkl.gz')
MUTATIONS.reset_index(inplace=True)

rpob_s450l = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='S450L')]
rpob_s450l.set_index('UNIQUEID', inplace=True)
rpob_s450l['HAS_S450L'] = True
rpob_s450l

rpoc_e1092d = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='E1092D')] #No
rpoc_e1092d.set_index('UNIQUEID', inplace=True)
rpoc_e1092d['HAS_E1092D'] = True
rpoc_e1092d

rpoc_v483g = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='V483G')] #Yes
rpoc_v483g.set_index('UNIQUEID', inplace=True)
rpoc_v483g['HAS_V483G'] = True
rpoc_v483g

rpoc_i491v = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='I491V')] #Yes
rpoc_i491v.set_index('UNIQUEID', inplace=True)
rpoc_i491v['HAS_I491V'] = True
rpoc_i491v

rpoc_v483a = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='V483A')] #Yes
rpoc_v483a.set_index('UNIQUEID', inplace=True)
rpoc_v483a['HAS_V483A'] = True
rpoc_v483a

rpoc_i491t = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='I491T')] #Yes (but almost exclusively lineage 2)
rpoc_i491t.set_index('UNIQUEID', inplace=True)
rpoc_i491t['HAS_I491T'] = True
rpoc_i491t

rpoc_p1040r = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='P1040R')] #Yes
rpoc_p1040r.set_index('UNIQUEID', inplace=True)
rpoc_p1040r['HAS_P1040R'] = True
rpoc_p1040r

rpoc_g332s = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='G332S')] #Yes
rpoc_g332s.set_index('UNIQUEID', inplace=True)
rpoc_g332s['HAS_G332S'] = True
rpoc_g332s

rpoc_g433s = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='G433S')] #Yes
rpoc_g433s.set_index('UNIQUEID', inplace=True)
rpoc_g433s['HAS_G433S'] = True
rpoc_g433s


#rpoC_N698S
rpoc_n698s = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='N698S')] #Yes
rpoc_n698s.set_index('UNIQUEID', inplace=True)
rpoc_n698s['HAS_N698S'] = True
rpoc_n698s

#rpoC_F452S
rpoc_f452s = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='F452S')] #Yes
rpoc_f452s.set_index('UNIQUEID', inplace=True)
rpoc_f452s['HAS_F452S'] = True
rpoc_f452s

#rpoB_E761D
rpob_e761d = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='E761D')] #Yes
rpob_e761d.set_index('UNIQUEID', inplace=True)
rpob_e761d['HAS_E761D'] = True
rpob_e761d

#rpoB_L731P
rpob_l731p = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='L731P')] #Yes
rpob_l731p.set_index('UNIQUEID', inplace=True)
rpob_l731p['HAS_L731P'] = True
rpob_l731p

#rpoC_D485Y
rpoc_d485y = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='D485Y')] #Yes
rpoc_d485y.set_index('UNIQUEID', inplace=True)
rpoc_d485y['HAS_D485Y'] = True
rpoc_d485y

#rpoC_V517L
rpoc_v517l = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='V517L')] #Yes
rpoc_v517l.set_index('UNIQUEID', inplace=True)
rpoc_v517l['HAS_V517L'] = True
rpoc_v517l

#rpoC_V1252L
rpoc_v1252l = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='V1252L')] #Yes
rpoc_v1252l.set_index('UNIQUEID', inplace=True)
rpoc_v1252l['HAS_V1252L'] = True
rpoc_v1252l

#rpoA_T187A
rpoa_t187a = MUTATIONS[(MUTATIONS.GENE=='rpoA') & (MUTATIONS.MUTATION=='T187A')] #Yes
rpoa_t187a.set_index('UNIQUEID', inplace=True)
rpoa_t187a['HAS_T187A'] = True
rpoa_t187a


#rpoC_D485N
rpoc_d485n = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='D485N')] #Yes
rpoc_d485n.set_index('UNIQUEID', inplace=True)
rpoc_d485n['HAS_D485N'] = True
rpoc_d485n

#rpoC_L516P
rpoc_l516p = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='L516P')] #Yes
rpoc_l516p.set_index('UNIQUEID', inplace=True)
rpoc_l516p['HAS_L516P'] = True
rpoc_l516p

#rpoB_R827C
rpob_r827c = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='R827C')] #Yes
rpob_r827c.set_index('UNIQUEID', inplace=True)
rpob_r827c['HAS_R827C'] = True
rpob_r827c

#rpoC_P1040S
rpoc_p1040s = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='P1040S')] #Yes
rpoc_p1040s.set_index('UNIQUEID', inplace=True)
rpoc_p1040s['HAS_P1040S'] = True
rpoc_p1040s

#rpoC_G332R
rpoc_g332R = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='G332R')] #Yes
rpoc_g332R.set_index('UNIQUEID', inplace=True)
rpoc_g332R['HAS_G332R'] = True
rpoc_g332R

#rpoC_L527V
rpoc_l527v = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='L527V')] #Yes
rpoc_l527v.set_index('UNIQUEID', inplace=True)
rpoc_l527v['HAS_L527V'] = True
rpoc_l527v

#rpoC_P1040A
rpoc_p1040a = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='P1040A')] #Yes
rpoc_p1040a.set_index('UNIQUEID', inplace=True)
rpoc_p1040a['HAS_P1040A'] = True
rpoc_p1040a

#rpoB_I1106T
rpob_i1106t = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='I1106T')] #No
rpob_i1106t.set_index('UNIQUEID', inplace=True)
rpob_i1106t['HAS_I1106T'] = True
rpob_i1106t


#rpoC_K445R
rpoc_k445r = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='K445R')] #Yes
rpoc_k445r.set_index('UNIQUEID', inplace=True)
rpoc_k445r['HAS_K445R'] = True
rpoc_k445r

#rpoC_F452L
rpoc_f452l = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='F452L')] #Yes
rpoc_f452l.set_index('UNIQUEID', inplace=True)
rpoc_f452l['HAS_F452L'] = True
rpoc_f452l

#rpoC_L547V
rpoc_l547v = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='L547V')] #No
rpoc_l547v.set_index('UNIQUEID', inplace=True)
rpoc_l547v['HAS_L547V'] = True
rpoc_l547v

#rpoB_I480V
rpob_i480v = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='I480V')] #Yes
rpob_i480v.set_index('UNIQUEID', inplace=True)
rpob_i480v['HAS_I480V'] = True
rpob_i480v

#rpoB_K891E
rpob_k891e = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='K891E')] #Yes
rpob_k891e.set_index('UNIQUEID', inplace=True)
rpob_k891e['HAS_K891E'] = True
rpob_k891e

#rpoC_N416S
rpoc_n416s = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='N416S')] #Yes
rpoc_n416s.set_index('UNIQUEID', inplace=True)
rpoc_n416s['HAS_N416S'] = True
rpoc_n416s

#rpoC_E1033A
rpoc_e1033a = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='E1033A')] #Yes
rpoc_e1033a.set_index('UNIQUEID', inplace=True)
rpoc_e1033a['HAS_E1033A'] = True
rpoc_e1033a

#rpoC_S561P
rpoc_s561p = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='S561P')] #Yes
rpoc_s561p.set_index('UNIQUEID', inplace=True)
rpoc_s561p['HAS_S561P'] = True
rpoc_s561p


#rpoB_V168A
rpob_v168a = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='V168A')] #Yes
rpob_v168a.set_index('UNIQUEID', inplace=True)
rpob_v168a['HAS_V168A'] = True
rpob_v168a

#rpoB_H1028R
rpob_h1028r = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='H1028R')] #No
rpob_h1028r.set_index('UNIQUEID', inplace=True)
rpob_h1028r['HAS_H1028R'] = True
rpob_h1028r

#sigA_D146E
siga_d146e = MUTATIONS[(MUTATIONS.GENE=='sigA') & (MUTATIONS.MUTATION=='D146E')] #Yes
siga_d146e.set_index('UNIQUEID', inplace=True)
siga_d146e['HAS_D146E'] = True
siga_d146e

#rpoA_P25R
rpoa_p25r = MUTATIONS[(MUTATIONS.GENE=='rpoA') & (MUTATIONS.MUTATION=='P25R')] #Not enough data (1)
rpoa_p25r.set_index('UNIQUEID', inplace=True)
rpoa_p25r['HAS_P25R'] = True
rpoa_p25r

#rpoB_I491L
rpob_i491l = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='I491L')] #Yes
rpob_i491l.set_index('UNIQUEID', inplace=True)
rpob_i491l['HAS_I491L'] = True
rpob_i491l

#rpoC_G388A
rpoc_g388a = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='G388A')] #Yes
rpoc_g388a.set_index('UNIQUEID', inplace=True)
rpoc_g388a['HAS_G388A'] = True
rpoc_g388a

#sigA_A223T
siga_a223t = MUTATIONS[(MUTATIONS.GENE=='sigA') & (MUTATIONS.MUTATION=='A223T')] #No
siga_a223t.set_index('UNIQUEID', inplace=True)
siga_a223t['HAS_A223T'] = True
siga_a223t

#rpoB_E207K
rpob_e207k = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='E207K')] #No
rpob_e207k.set_index('UNIQUEID', inplace=True)
rpob_e207k['HAS_E207K'] = True
rpob_e207k


#rpoB_R167C
rpob_r167c = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='R167C')] #Yes
rpob_r167c.set_index('UNIQUEID', inplace=True)
rpob_r167c['HAS_R167C'] = True
rpob_r167c

#rpoC_G571R
rpoc_g571r = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='G571R')] #Yes
rpoc_g571r.set_index('UNIQUEID', inplace=True)
rpoc_g571r['HAS_G571R'] = True
rpoc_g571r

#rpoC_L405M
rpoc_l405m = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='L405M')] #No data (0)
rpoc_l405m.set_index('UNIQUEID', inplace=True)
rpoc_l405m['HAS_L405M'] = True
rpoc_l405m

#rpoB_L464M
rpob_l464m = MUTATIONS[(MUTATIONS.GENE=='rpoB') & (MUTATIONS.MUTATION=='L464M')] #No data (0)
rpob_l464m.set_index('UNIQUEID', inplace=True)
rpob_l464m['HAS_L464M'] = True
rpob_l464m

#rpoZ_T107I
rpoz_t107i = MUTATIONS[(MUTATIONS.GENE=='rpoZ') & (MUTATIONS.MUTATION=='T107I')] #No data (0)
rpoz_t107i.set_index('UNIQUEID', inplace=True)
rpoz_t107i['HAS_T107I'] = True
rpoz_t107i

#rpoC_T853A
rpoc_t853a = MUTATIONS[(MUTATIONS.GENE=='rpoC') & (MUTATIONS.MUTATION=='T853A')] #Yes
rpoc_t853a.set_index('UNIQUEID', inplace=True)
rpoc_t853a['HAS_T853A'] = True
rpoc_t853a

#make process faster (list of mutations that are missing, loop through them and make dataframe)
mutation_list = ['rpoC_G594E','rpoB_c-61t','rpoB_V695L','rpoC_W484G','rpoB_A692T','rpoA_V183G','rpoC_P481T','rpoB_Q409R','rpoC_V1039A','rpoC_A521D','rpoB_Q975H','rpoB_P45S','rpoC_N826T','rpoA_-40_indel','rpoC_L507V','rpoC_V431M','rpoB_V496A','rpoB_I488V','rpoC_V1252M','rpoB_A286V','rpoC_K1152Q','rpoC_T812I','rpoC_G519D','rpoC_L449V','rpoB_R827L','rpoC_F452C','rpoB_A405P','rpoA_A180V','rpoC_N698K','rpoA_D190G','rpoC_H525Q','rpoC_P434R','rpoA_G31S','rpoB_P45L','sigA_247_indel','rpoA_E184D','rpoB_F503S','rpoB_T400A','rpoC_S428A','rpoB_I491V','rpoB_S874Y','rpoB_R552L','rpoA_G31A','rpoC_D747A','rpoC_P1040L','rpoC_E1033K','rpoC_R741S','rpoC_E1137G','rpoC_T1230I','rpoC_W484S','rpoB_H835R','rpoC_L449R','rpoC_N698H','rpoB_R827H','rpoA_R182L','rpoC_F831L','rpoZ_P85S','rpoC_G332C','rpoB_V496M','rpoC_H525N','rpoB_V534M','rpoC_Q523E','rpoC_E518D','rpoB_R552H','rpoC_P1040Q','rpoB_E550G','rpoB_T399I','rpoC_Q1125H','rpoB_F971L','rpoC_R770H','rpoC_P434Q','rpoC_P434T','rpoA_V183A','rpoC_V1039G','rpoC_K1152N','rpoC_P1040T','rpoC_A734V','rpoC_G433A','rpoC_K715T','rpoC_E750G','rpoB_H723D','rpoC_I885V','rpoC_E750D','rpoC_Q1110H','rpoB_L42V','rpoC_G433C','rpoC_E1113G','rpoB_I588V','rpoC_E1140D','rpoC_E757G','rpoB_V970A','rpoC_Q479R','rpoB_V534A','rpoB_Y564H','rpoB_L554P','rpoB_R552C','rpoC_P434A','rpoB_D574E','rpoA_T181A','rpoC_A492V','rpoB_I873F','rpoC_G519R','rpoA_T187P','rpoC_E488Q','rpoB_T400S','rpoC_D747G','rpoC_A1213E','rpoB_a-83g','rpoC_V1124G','rpoC_N416T','rpoB_P45T','rpoC_W484L','rpoC_V1147A','rpoC_D735E','rpoC_H748P','rpoB_N487S','rpoB_I491M','rpoB_L862R']

In [15]:
#make process faster (list of mutations that are missing, loop through them and make dataframe)
#mutation_list = ['rpoC_G594E','rpoB_c-61t','rpoB_V695L','rpoC_W484G','rpoB_A692T','rpoA_V183G','rpoC_P481T','rpoB_Q409R','rpoC_V1039A','rpoC_A521D','rpoB_Q975H','rpoB_P45S','rpoC_N826T','rpoA_-40_indel','rpoC_L507V','rpoC_V431M','rpoB_V496A','rpoB_I488V','rpoC_V1252M','rpoB_A286V','rpoC_K1152Q','rpoC_T812I','rpoC_G519D','rpoC_L449V','rpoB_R827L','rpoC_F452C','rpoB_A405P','rpoA_A180V','rpoC_N698K','rpoA_D190G','rpoC_H525Q','rpoC_P434R','rpoA_G31S','rpoB_P45L','sigA_247_indel','rpoA_E184D','rpoB_F503S','rpoB_T400A','rpoC_S428A','rpoB_I491V','rpoB_S874Y','rpoB_R552L','rpoA_G31A','rpoC_D747A','rpoC_P1040L','rpoC_E1033K','rpoC_R741S','rpoC_E1137G','rpoC_T1230I','rpoC_W484S','rpoB_H835R','rpoC_L449R','rpoC_N698H','rpoB_R827H','rpoA_R182L','rpoC_F831L','rpoZ_P85S','rpoC_G332C','rpoB_V496M','rpoC_H525N','rpoB_V534M','rpoC_Q523E','rpoC_E518D','rpoB_R552H','rpoC_P1040Q','rpoB_E550G','rpoB_T399I','rpoC_Q1125H','rpoB_F971L','rpoC_R770H','rpoC_P434Q','rpoC_P434T','rpoA_V183A','rpoC_V1039G','rpoC_K1152N','rpoC_P1040T','rpoC_A734V','rpoC_G433A','rpoC_K715T','rpoC_E750G','rpoB_H723D','rpoC_I885V','rpoC_E750D','rpoC_Q1110H','rpoB_L42V','rpoC_G433C','rpoC_E1113G','rpoB_I588V','rpoC_E1140D','rpoC_E757G','rpoB_V970A','rpoC_Q479R','rpoB_V534A','rpoB_Y564H','rpoB_L554P','rpoB_R552C','rpoC_P434A','rpoB_D574E','rpoA_T181A','rpoC_A492V','rpoB_I873F','rpoC_G519R','rpoA_T187P','rpoC_E488Q','rpoB_T400S','rpoC_D747G','rpoC_A1213E','rpoB_a-83g','rpoC_V1124G','rpoC_N416T','rpoB_P45T','rpoC_W484L','rpoC_V1147A','rpoC_D735E','rpoC_H748P','rpoB_N487S','rpoB_I491M','rpoB_L862R']
#mutation_list = ['rpoB_A1075A','rpoB_A451A','rpoZ_S22S','rpoC_S548S','rpoB_G453G','rpoB_S431S','rpoC_C62C','rpoB_Q432Q','rpoB_G442G','rpoC_D404D','rpoB_L443L','rpoC_A542A','rpoB_S428S','rpoB_G426G','rpoB_P416P','sigA_V375V']
mutation_list = ['sigA_L386M','sigA_G380A','rpoC_I128V','sigA_I382V','rpoB_R791T','rpoB_E460D','sigA_E385Q']
mutation_list

data_frames = [GENOMES[['ORIGINAL_UID', 'LINEAGE_NAME']]]

d = {}
for i,name in enumerate(mutation_list):
    d["{0}".format(name)] = MUTATIONS[(MUTATIONS.GENE==name[0:4]) & (MUTATIONS.MUTATION==name[5:])]
    d["{0}".format(name)].set_index('UNIQUEID', inplace=True)
    d["{0}".format(name)]['HAS_MUTATION'] = True
    
d.keys()

dict_keys(['sigA_L386M', 'sigA_G380A', 'rpoC_I128V', 'sigA_I382V', 'rpoB_R791T', 'rpoB_E460D', 'sigA_E385Q'])

In [16]:
LABELS = GENOMES[['ORIGINAL_UID', 'LINEAGE_NAME']].join(rpob_s450l[['HAS_S450L']], how='left')
LABELS.fillna(value={'HAS_S450L': False}, inplace=True)

for i,name in enumerate(d.keys()):
    LABELS = LABELS.join(d[name]['HAS_MUTATION'], how='left')
    LABELS.rename(columns=({ 'HAS_MUTATION': name}), inplace=True)
    LABELS.fillna(value={name: False}, inplace=True)

LABELS.reset_index(inplace=True)
LABELS.set_index('ORIGINAL_UID', inplace=True)
LABELS

Unnamed: 0_level_0,UNIQUEID,LINEAGE_NAME,HAS_S450L,sigA_L386M,sigA_G380A,rpoC_I128V,sigA_I382V,rpoB_R791T,rpoB_E460D,sigA_E385Q
ORIGINAL_UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
site.02.iso.1.subject.0958.lab_id.22A197.seq_reps.197,site.02.subj.0958.lab.22A197.iso.1,Lineage 2,True,False,False,False,False,False,False,False
site.02.iso.1.subject.0823.lab_id.2013241494.seq_reps.241494,site.02.subj.0823.lab.2013241494.iso.1,Lineage 4,False,False,False,False,False,False,False,False
site.02.iso.1.subject.0359.lab_id.222018-14.seq_reps.14222018,site.02.subj.0359.lab.222018-14.iso.1,Lineage 2,False,False,False,False,False,False,False,False
site.02.iso.1.subject.0224.lab_id.2013221088.seq_reps.13221088_2013221088,site.02.subj.0224.lab.2013221088.iso.1,Lineage 2,False,False,False,False,False,False,False,False
site.02.iso.1.subject.0918.lab_id.22A153.seq_reps.153,site.02.subj.0918.lab.22A153.iso.1,Lineage 2,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...
site.10.iso.1.subject.YA00023171.lab_id.YA00023171.seq_reps.1,site.10.subj.YA00023171.lab.YA00023171.iso.1,Lineage 4,False,False,False,False,False,False,False,False
,site.10.subj.SADH00634109_S16.lab.DH00634109_S...,,False,False,False,False,False,False,False,False
,site.10.subj.H37RV_SA2437.lab.H37RV_SA2437.iso.1,,False,False,False,False,False,False,False,False
site.10.iso.1.subject.YA00044774.lab_id.YA00044774.seq_reps.1,site.10.subj.YA00044774.lab.YA00044774.iso.1,Lineage 4,False,False,False,False,False,False,False,False


Join the boolean dataframes with the LABEL dataframe

In [17]:
LABELS = GENOMES[['ORIGINAL_UID', 'LINEAGE_NAME']].join(rpob_s450l[['HAS_S450L']], how='left')
LABELS.fillna(value={'HAS_S450L': False}, inplace=True)
LABELS.reset_index(inplace=True)
LABELS.set_index('ORIGINAL_UID', inplace=True)
LABELS.index.value_counts()

ORIGINAL_UID
site.02.iso.1.subject.0958.lab_id.22A197.seq_reps.197              1
site.13.iso.1.subject.180200018.lab_id.180200018.seq_reps.1        1
site.13.iso.1.subject.140300022.lab_id.140300022.seq_reps.1        1
site.13.iso.1.subject.080200106.lab_id.080200106.seq_reps.1        1
site.13.iso.1.subject.180200089.lab_id.180200089.seq_reps.1        1
                                                                  ..
site.11.iso.1.subject.XTB_17-589.lab_id.XTB_17-589.seq_reps.1      1
site.11.iso.1.subject.XTB_18-022.lab_id.XTB_18-022.seq_reps.1_2    1
site.11.iso.1.subject.XTB_17-188.lab_id.XTB_17-188.seq_reps.1      1
site.11.iso.1.subject.XTB_18-108.lab_id.XTB_18-108.seq_reps.1      1
site.10.iso.1.subject.YA00166043.lab_id.YA00166043.seq_reps.1      1
Name: count, Length: 15228, dtype: int64

Tried more efficient way of joining multiple dataframes (still has same issue of duplicated UNIQUEIDs when joining resistant and compensatory)

In [13]:
data_frames = [GENOMES[['ORIGINAL_UID', 'LINEAGE_NAME']], resistant[['IS_RES']], compensatory[['IS_CM']],
               rpoc_e1092d[['HAS_E1092D']], rpoc_v483g[['HAS_V483G']], rpob_s450l[['HAS_S450L']], 
               rpoc_i491v[['HAS_I491V']], rpoc_v483a[['HAS_V483A']], rpoc_i491t[['HAS_I491T']], 
               rpoc_p1040r[['HAS_P1040R']],rpoc_g332s[['HAS_G332S']], rpoc_g433s[['HAS_G433S']], 
               rpoc_n698s[['HAS_N698S']], rpoc_f452s[['HAS_F452S']], rpob_e761d[['HAS_E761D']], 
               rpob_l731p[['HAS_L731P']], rpoc_d485y[['HAS_D485Y']], rpoc_v517l[['HAS_V517L']], 
               rpoc_v1252l[['HAS_V1252L']], rpoa_t187a[['HAS_T187A']], rpoc_d485n[['HAS_D485N']], 
               rpoc_l516p[['HAS_L516P']], rpob_r827c[['HAS_R827C']], rpoc_p1040s[['HAS_P1040S']], 
               rpoc_g332R[['HAS_G332R']], rpoc_l527v[['HAS_L527V']], rpoc_p1040a[['HAS_P1040A']], 
               rpob_i1106t[['HAS_I1106T']], rpoc_k445r[['HAS_K445R']], rpoc_f452l[['HAS_F452L']], 
               rpoc_l547v[['HAS_L547V']], rpob_i480v[['HAS_I480V']], rpob_k891e[['HAS_K891E']], 
               rpoc_n416s[['HAS_N416S']], rpoc_e1033a[['HAS_E1033A']], rpoc_s561p[['HAS_S561P']], 
               rpob_v168a[['HAS_V168A']], rpob_h1028r[['HAS_H1028R']], siga_d146e[['HAS_D146E']], 
               rpoa_p25r[['HAS_P25R']], rpob_i491l[['HAS_I491L']], rpoc_g388a[['HAS_G388A']], 
               siga_a223t[['HAS_A223T']], rpob_e207k[['HAS_E207K']], rpob_r167c[['HAS_R167C']], 
               rpoc_g571r[['HAS_G571R']], rpoc_l405m[['HAS_L405M']], rpob_l464m[['HAS_L464M']], 
               rpoz_t107i[['HAS_T107I']], rpoc_t853a[['HAS_T853A']]]

LABELS = reduce(lambda  left,right: pandas.merge(left,right, on=['UNIQUEID'],
                                            how='outer'), data_frames)
LABELS.replace(numpy.nan, False, inplace = True)
LABELS.reset_index(inplace=True)
LABELS.set_index('ORIGINAL_UID', inplace=True)
LABELS.index.value_counts()

ORIGINAL_UID
False                                                                    63079
site.03.iso.1.subject.JPN-R2012-0044.lab_id.JPN-R2012-0044.seq_reps.1        3
site.02.iso.1.subject.0183.lab_id.2013222280.seq_reps.2013222280             3
site.02.iso.1.subject.0070.lab_id.22A021.seq_reps.21                         3
site.02.iso.1.subject.0133.lab_id.22A088.seq_reps.88                         3
                                                                         ...  
site.11.iso.1.subject.XTB-18-189.lab_id.XTB-18-189.seq_reps.1                1
site.11.iso.1.subject.XTB_18-028.lab_id.XTB_18-028.seq_reps.1_2              1
site.11.iso.1.subject.XTB-18-160.lab_id.XTB-18-160.seq_reps.1                1
site.11.iso.1.subject.XTB_17-505.lab_id.XTB_17-505.seq_reps.1_2              1
site.10.iso.1.subject.YA00166043.lab_id.YA00166043.seq_reps.1                1
Name: count, Length: 15229, dtype: int64

In [18]:
LABELS

Unnamed: 0_level_0,UNIQUEID,LINEAGE_NAME,HAS_S450L
ORIGINAL_UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
site.02.iso.1.subject.0958.lab_id.22A197.seq_reps.197,site.02.subj.0958.lab.22A197.iso.1,Lineage 2,True
site.02.iso.1.subject.0823.lab_id.2013241494.seq_reps.241494,site.02.subj.0823.lab.2013241494.iso.1,Lineage 4,False
site.02.iso.1.subject.0359.lab_id.222018-14.seq_reps.14222018,site.02.subj.0359.lab.222018-14.iso.1,Lineage 2,False
site.02.iso.1.subject.0224.lab_id.2013221088.seq_reps.13221088_2013221088,site.02.subj.0224.lab.2013221088.iso.1,Lineage 2,False
site.02.iso.1.subject.0918.lab_id.22A153.seq_reps.153,site.02.subj.0918.lab.22A153.iso.1,Lineage 2,False
...,...,...,...
site.10.iso.1.subject.YA00023171.lab_id.YA00023171.seq_reps.1,site.10.subj.YA00023171.lab.YA00023171.iso.1,Lineage 4,False
,site.10.subj.SADH00634109_S16.lab.DH00634109_S...,,False
,site.10.subj.H37RV_SA2437.lab.H37RV_SA2437.iso.1,,False
site.10.iso.1.subject.YA00044774.lab_id.YA00044774.seq_reps.1,site.10.subj.YA00044774.lab.YA00044774.iso.1,Lineage 4,False


Read in the Newick file downloaded from ITOL. To get this I uploaded the Newick file in the `Brankin_Malone` repo and then exported it again as a Newick. Don't know why but trying to use the original Newick leads to errors in `tsconvert` lower down

In [19]:
with open('cryptic_tree.itol.newick', 'r') as f:
    cryptic_newick = f.read()

print(cryptic_newick[:1000])

(site.04.iso.1.subject.03818.lab_id.830476.seq_reps.1:6.90800,site.04.iso.1.subject.00310.lab_id.701951.seq_reps.1:3.09200,(site.04.iso.1.subject.01627.lab_id.27972.seq_reps.1:0.46454,(site.04.iso.1.subject.01042.lab_id.717652.seq_reps.1:8.18534,(((site.04.iso.1.subject.02112.lab_id.803805.seq_reps.1:9.91015,site.04.iso.1.subject.04137.lab_id.832790.seq_reps.1:8.08985):0.17857,((((site.04.iso.1.subject.01432.lab_id.724551.seq_reps.1:1.73206,site.04.iso.1.subject.02235.lab_id.805177.seq_reps.1:4.26794):0.87800,(site.04.iso.1.subject.00300.lab_id.702487.seq_reps.1:0.00000,site.04.iso.1.subject.03648.lab_id.JJH9682.seq_reps.1:2.00000):3.62200):8.94817,(site.04.iso.1.subject.00564.lab_id.709046.seq_reps.1:7.96229,(site.04.iso.1.subject.04356.lab_id.903933.seq_reps.1:5.42753,(site.04.iso.1.subject.01175.lab_id.721997.seq_reps.1:1.00000,site.04.iso.1.subject.01288.lab_id.801278.seq_reps.1:0.00000):3.07247):2.53771):1.48933):0.11168,(((site.04.iso.1.subject.02152.lab_id.803055.seq_reps.1:0.89

Use `tsconvert` to convert to a succint tree sequence

In [20]:
ts = tsconvert.from_newick(cryptic_newick, min_edge_length=0.001)
print('done')

done


Get a copy of the underlying tables that describe the tree so we can relate the `node id` to the `UNIQUEID`

In [21]:
new_tables = ts.dump_tables()  # make a copy of the tree sequence tables, for parsing
new_tables.nodes[:3]

id,flags,population,individual,time,metadata
0,0,-1,-1,2060.10904,{}
1,1,-1,-1,2053.20104,{'name': 'site.04.iso.1.subject.03818...
2,1,-1,-1,2057.01704,{'name': 'site.04.iso.1.subject.00310...


Attempt to prune the tree by removing lineages that are not 1-4, and removing outliers/ inconclusive sequences

In [31]:
vis = 'prune'

prune_header="""PRUNE

DATA
"""

prune_annotations = ''

# LABELS = GENOMES[['ORIGINAL_UID', 'LINEAGE_NAME']]

for i in tqdm(range(ts.num_nodes)):  

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']

        lineage = LABELS.loc[uid]['LINEAGE_NAME']
        
        if i == 15072 or i == 15074 or i == 15077 or i == 15076:
            print('outliers:', row.metadata)
            
            #uncomment to exclude outliers:
            #continue
            
        if LABELS.loc[uid].UNIQUEID in REMOVE.to_list():
            print('inconclusive sequencing:', str(i), row.metadata)
            
            #uncomment to exclude inconclusive sequences:
            continue
        
        if lineage!='' and lineage[0]=='L':
            
            if lineage == 'Lineage 1' or lineage == 'Lineage 2' or lineage == 'Lineage 3' or lineage == 'Lineage 4':
                line = 'n' + str(i) + '\n'

                prune_annotations += line 

            else:
                print('other lineage:', i)
                
                continue
                
        else:
            print('no lineage:', i)
            lineage = None

prune_annotations = prune_header + prune_annotations

  0%|                                       | 51/30454 [00:00<01:59, 253.68it/s]

inconclusive sequencing: 1 {'name': 'site.04.iso.1.subject.03818.lab_id.830476.seq_reps.1'}


  0%|▏                                     | 150/30454 [00:00<01:38, 309.00it/s]

inconclusive sequencing: 106 {'name': 'site.04.iso.1.subject.03595.lab_id.819090.seq_reps.1'}
inconclusive sequencing: 139 {'name': 'site.04.iso.1.subject.04511.lab_id.905563.seq_reps.1_2'}
inconclusive sequencing: 164 {'name': 'site.04.iso.1.subject.04554.lab_id.903942.seq_reps.1'}


  1%|▎                                     | 252/30454 [00:00<01:33, 322.95it/s]

inconclusive sequencing: 196 {'name': 'site.04.iso.1.subject.04556.lab_id.901728.seq_reps.1'}


  1%|▍                                     | 357/30454 [00:01<01:30, 332.45it/s]

inconclusive sequencing: 312 {'name': 'site.04.iso.1.subject.00549.lab_id.712263.seq_reps.1'}
inconclusive sequencing: 314 {'name': 'site.04.iso.1.subject.00429.lab_id.712261.seq_reps.1'}
inconclusive sequencing: 337 {'name': 'site.04.iso.1.subject.03218.lab_id.825638.seq_reps.1'}
inconclusive sequencing: 360 {'name': 'site.04.iso.1.subject.00898.lab_id.716085.seq_reps.1'}


  2%|▌                                     | 495/30454 [00:01<01:34, 315.37it/s]

inconclusive sequencing: 430 {'name': 'site.04.iso.1.subject.01401.lab_id.726777.seq_reps.1'}
inconclusive sequencing: 453 {'name': 'site.04.iso.1.subject.04004.lab_id.829393.seq_reps.1'}
inconclusive sequencing: 469 {'name': 'site.04.iso.1.subject.03598.lab_id.827147.seq_reps.1'}
inconclusive sequencing: 506 {'name': 'site.04.iso.1.subject.04448.lab_id.903574.seq_reps.1'}


  2%|▋                                     | 599/30454 [00:01<01:31, 327.46it/s]

inconclusive sequencing: 545 {'name': 'site.04.iso.1.subject.05330.lab_id.910071.seq_reps.1'}
inconclusive sequencing: 550 {'name': 'site.04.iso.1.subject.01554.lab_id.728320.seq_reps.1'}
inconclusive sequencing: 554 {'name': 'site.04.iso.1.subject.03767.lab_id.831236.seq_reps.1'}
inconclusive sequencing: 569 {'name': 'site.04.iso.1.subject.04189.lab_id.829826.seq_reps.1'}
inconclusive sequencing: 572 {'name': 'site.04.iso.1.subject.01563.lab_id.726771.seq_reps.1'}
inconclusive sequencing: 574 {'name': 'site.04.iso.1.subject.03317.lab_id.826189.seq_reps.1'}
inconclusive sequencing: 592 {'name': 'site.04.iso.1.subject.03654.lab_id.JJH9867.seq_reps.1'}
inconclusive sequencing: 604 {'name': 'site.04.iso.1.subject.01999.lab_id.802031.seq_reps.1'}
inconclusive sequencing: 605 {'name': 'site.04.iso.1.subject.03568.lab_id.829571.seq_reps.1'}
inconclusive sequencing: 619 {'name': 'site.04.iso.1.subject.02854.lab_id.816419.seq_reps.1'}
inconclusive sequencing: 622 {'name': 'site.04.iso.1.subjec

  2%|▊                                     | 673/30454 [00:02<01:31, 324.58it/s]

 631 {'name': 'site.04.iso.1.subject.02311.lab_id.803284.seq_reps.1'}
inconclusive sequencing: 635 {'name': 'site.04.iso.1.subject.01957.lab_id.801380.seq_reps.1'}
inconclusive sequencing: 656 {'name': 'site.04.iso.1.subject.00475.lab_id.711002.seq_reps.1'}


  2%|▉                                     | 739/30454 [00:02<01:32, 322.96it/s]

inconclusive sequencing: 705 {'name': 'site.04.iso.1.subject.05350.lab_id.913369.seq_reps.1'}
inconclusive sequencing: 708 {'name': 'site.04.iso.1.subject.03267.lab_id.824801.seq_reps.1'}
inconclusive sequencing: 718 {'name': 'site.04.iso.1.subject.03922.lab_id.914205.seq_reps.1'}
inconclusive sequencing: 721 {'name': 'site.04.iso.1.subject.05398.lab_id.912132.seq_reps.1'}
inconclusive sequencing: 722 {'name': 'site.04.iso.1.subject.04111.lab_id.901398.seq_reps.1'}
inconclusive sequencing: 738 {'name': 'site.04.iso.1.subject.03495.lab_id.827494.seq_reps.1'}
inconclusive sequencing: 746 {'name': 'site.04.iso.1.subject.02094.lab_id.803708.seq_reps.1'}
inconclusive sequencing: 750 {'name': 'site.04.iso.1.subject.02964.lab_id.827959.seq_reps.1'}


  3%|█                                     | 841/30454 [00:02<01:35, 308.90it/s]

inconclusive sequencing: 811 {'name': 'site.04.iso.1.subject.04610.lab_id.905979.seq_reps.1'}
inconclusive sequencing: 824 {'name': 'site.04.iso.1.subject.00194.lab_id.633913.seq_reps.1'}
inconclusive sequencing: 828 {'name': 'site.04.iso.1.subject.01628.lab_id.725044.seq_reps.1'}
inconclusive sequencing: 862 {'name': 'site.04.iso.1.subject.03910.lab_id.831968.seq_reps.1'}


  3%|█▏                                    | 934/30454 [00:02<01:41, 289.58it/s]

inconclusive sequencing: 884 {'name': 'site.04.iso.1.subject.00700.lab_id.706706.seq_reps.1'}
inconclusive sequencing: 897 {'name': 'site.04.iso.1.subject.03494.lab_id.828960.seq_reps.1'}
inconclusive sequencing: 904 {'name': 'site.04.iso.1.subject.04399.lab_id.902995.seq_reps.1'}
inconclusive sequencing: 916 {'name': 'site.04.iso.1.subject.03499.lab_id.827478.seq_reps.1'}
inconclusive sequencing: 919 {'name': 'site.04.iso.1.subject.04758.lab_id.905292.seq_reps.1'}


  3%|█▏                                   | 1022/30454 [00:03<01:45, 278.76it/s]

inconclusive sequencing: 981 {'name': 'site.04.iso.1.subject.04391.lab_id.904171.seq_reps.1'}


  4%|█▎                                   | 1087/30454 [00:03<01:42, 286.93it/s]

inconclusive sequencing: 1048 {'name': 'site.04.iso.1.subject.04636.lab_id.905869.seq_reps.1'}
inconclusive sequencing: 1080 {'name': 'site.04.iso.1.subject.01388.lab_id.726392.seq_reps.1'}
inconclusive sequencing: 1084 {'name': 'site.04.iso.1.subject.00699.lab_id.714916.seq_reps.1'}


  4%|█▍                                   | 1149/30454 [00:03<01:47, 273.38it/s]

inconclusive sequencing: 1119 {'name': 'site.04.iso.1.subject.03691.lab_id.JJH10303.seq_reps.1'}
inconclusive sequencing: 1158 {'name': 'site.04.iso.1.subject.04059.lab_id.831960.seq_reps.1'}


  4%|█▌                                   | 1236/30454 [00:04<01:49, 266.68it/s]

inconclusive sequencing: 1197 {'name': 'site.04.iso.1.subject.04386.lab_id.903936.seq_reps.1'}
inconclusive sequencing: 1244 {'name': 'site.04.iso.1.subject.03706.lab_id.JJH9874.seq_reps.1'}
inconclusive sequencing: 1248 {'name': 'site.04.iso.1.subject.03822.lab_id.829849.seq_reps.1'}


  4%|█▌                                   | 1295/30454 [00:04<01:49, 266.73it/s]

inconclusive sequencing: 1258 {'name': 'site.04.iso.1.subject.04490.lab_id.904547.seq_reps.1'}
inconclusive sequencing: 1288 {'name': 'site.04.iso.1.subject.04508.lab_id.904449.seq_reps.1'}
inconclusive sequencing: 1290 {'name': 'site.04.iso.1.subject.03278.lab_id.821146.seq_reps.1'}
inconclusive sequencing: 1298 {'name': 'site.04.iso.1.subject.04388.lab_id.903579.seq_reps.1'}
inconclusive sequencing: 1299 {'name': 'site.04.iso.1.subject.04558.lab_id.904897.seq_reps.1_2'}


  5%|█▋                                   | 1391/30454 [00:04<01:37, 296.73it/s]

inconclusive sequencing: 1334 {'name': 'site.04.iso.1.subject.03617.lab_id.830304.seq_reps.1'}
inconclusive sequencing: 1352 {'name': 'site.04.iso.1.subject.05006.lab_id.908561.seq_reps.1'}
inconclusive sequencing: 1360 {'name': 'site.04.iso.1.subject.02239.lab_id.806552.seq_reps.1'}
inconclusive sequencing: 1362 {'name': 'site.04.iso.1.subject.00083.lab_id.632666.seq_reps.1'}
inconclusive sequencing: 1364 {'name': 'site.04.iso.1.subject.00499.lab_id.705592.seq_reps.1'}
inconclusive sequencing: 1370 {'name': 'site.04.iso.1.subject.01229.lab_id.728523.seq_reps.1'}
inconclusive sequencing: 1372 {'name': 'site.04.iso.1.subject.05638.lab_id.919105.seq_reps.1'}
inconclusive sequencing: 1376 {'name': 'site.04.iso.1.subject.00218.lab_id.629294.seq_reps.1'}
inconclusive sequencing: 1378 {'name': 'site.04.iso.1.subject.01516.lab_id.728189.seq_reps.1'}
inconclusive sequencing: 1380 {'name': 'site.04.iso.1.subject.03448.lab_id.827388.seq_reps.1'}
inconclusive sequencing: 1381 {'name': 'site.04.is

  5%|█▊                                   | 1488/30454 [00:04<01:33, 311.34it/s]

 1416 {'name': 'site.10.iso.1.subject.YA00026182.lab_id.YA00026182.seq_reps.1'}
inconclusive sequencing: 1418 {'name': 'site.10.iso.1.subject.YA00058291.lab_id.YA00058291.seq_reps.1'}
inconclusive sequencing: 1440 {'name': 'site.10.iso.1.subject.MG03902304.lab_id.MG03902304.seq_reps.1'}


  5%|█▉                                   | 1559/30454 [00:05<01:28, 327.51it/s]

inconclusive sequencing: 1493 {'name': 'site.17.iso.1.subject.K0045.lab_id.123-20-0045-1000.seq_reps.1'}


  5%|██                                   | 1663/30454 [00:05<01:28, 325.96it/s]

inconclusive sequencing: 1606 {'name': 'site.08.iso.1.subject.02TB1566.lab_id.20646.seq_reps.1'}
inconclusive sequencing: 1612 {'name': 'site.08.iso.1.subject.05TB21119.lab_id.26636.seq_reps.1'}
inconclusive sequencing: 1614 {'name': 'site.08.iso.1.subject.02TB1566.lab_id.20950.seq_reps.1'}
inconclusive sequencing: 1616 {'name': 'site.08.iso.1.subject.05TB32024.lab_id.1892.seq_reps.1'}
inconclusive sequencing: 1628 {'name': 'site.08.iso.1.subject.02TB2346.lab_id.26277.seq_reps.1'}
inconclusive sequencing: 1634 {'name': 'site.08.iso.1.subject.DTU-109.lab_id.2414.seq_reps.1'}
inconclusive sequencing: 1654 {'name': 'site.08.iso.1.subject.28TB-D06-009.lab_id.2849.seq_reps.1'}
inconclusive sequencing: 1681 {'name': 'site.08.iso.1.subject.24TB-068.lab_id.2205.seq_reps.1'}


  6%|██▏                                  | 1762/30454 [00:05<01:36, 296.79it/s]

inconclusive sequencing: 1718 {'name': 'site.02.iso.1.subject.0058.lab_id.22A008.seq_reps.8'}
inconclusive sequencing: 1746 {'name': 'site.04.iso.1.subject.00622.lab_id.706166.seq_reps.1'}
inconclusive sequencing: 1752 {'name': 'site.08.iso.1.subject.24TB-057.lab_id.2170.seq_reps.1'}
inconclusive sequencing: 1762 {'name': 'site.02.iso.1.subject.0951.lab_id.22A186.seq_reps.186'}


  6%|██▏                                  | 1850/30454 [00:06<01:48, 263.80it/s]

inconclusive sequencing: 1823 {'name': 'site.04.iso.1.subject.04628.lab_id.905741.seq_reps.1'}
inconclusive sequencing: 1865 {'name': 'site.04.iso.1.subject.04792.lab_id.905999.seq_reps.1'}


  6%|██▎                                  | 1940/30454 [00:06<01:40, 283.50it/s]

inconclusive sequencing: 1901 {'name': 'site.13.iso.1.subject.150300002.lab_id.150300002.seq_reps.1'}
inconclusive sequencing: 1930 {'name': 'site.02.iso.1.subject.0395.lab_id.235084-15.seq_reps.15235084'}
inconclusive sequencing: 1938 {'name': 'site.02.iso.1.subject.0485.lab_id.235059-14.seq_reps.14235059'}


  7%|██▍                                  | 2003/30454 [00:06<01:39, 286.94it/s]

inconclusive sequencing: 1973 {'name': 'site.08.iso.1.subject.DTU-102.lab_id.2228.seq_reps.1'}
inconclusive sequencing: 1985 {'name': 'site.08.iso.1.subject.02TB1435.lab_id.20230.seq_reps.1'}
inconclusive sequencing: 2023 {'name': 'site.04.iso.1.subject.03483.lab_id.828400.seq_reps.1'}
inconclusive sequencing: 2028 {'name': 'site.04.iso.1.subject.04055.lab_id.900808.seq_reps.1_2'}


  7%|██▌                                  | 2115/30454 [00:07<01:52, 252.80it/s]

inconclusive sequencing: 2072 {'name': 'site.10.iso.1.subject.XC00297731.lab_id.XC00297731.seq_reps.1'}
inconclusive sequencing: 2082 {'name': 'site.10.iso.1.subject.TD02566481.lab_id.TD02566481.seq_reps.1'}
inconclusive sequencing: 2110 {'name': 'site.10.iso.1.subject.LA00815854.lab_id.LA00815854.seq_reps.1'}


  7%|██▋                                  | 2224/30454 [00:07<01:55, 245.33it/s]

inconclusive sequencing: 2184 {'name': 'site.13.iso.1.subject.130200098.lab_id.130200098.seq_reps.1'}


  8%|██▉                                  | 2394/30454 [00:08<01:38, 284.15it/s]

inconclusive sequencing: 2348 {'name': 'site.08.iso.1.subject.24TB00-031.lab_id.2290.seq_reps.1'}
inconclusive sequencing: 2349 {'name': 'site.08.iso.1.subject.24TB00-031.lab_id.2291.seq_reps.1'}
inconclusive sequencing: 2352 {'name': 'site.02.iso.1.subject.0634.lab_id.232085-15.seq_reps.15232085'}
inconclusive sequencing: 2380 {'name': 'site.02.iso.1.subject.0247.lab_id.2013241149.seq_reps.241149'}
inconclusive sequencing: 2383 {'name': 'site.02.iso.1.subject.0786.lab_id.2013241530.seq_reps.13241530'}


  8%|███                                  | 2479/30454 [00:08<01:46, 262.21it/s]

inconclusive sequencing: 2442 {'name': 'site.10.iso.1.subject.YA00071169.lab_id.YA00071169.seq_reps.1'}
inconclusive sequencing: 2491 {'name': 'site.10.iso.1.subject.YA00127813.lab_id.YA00127813.seq_reps.1'}


  8%|███                                  | 2559/30454 [00:08<01:52, 248.65it/s]

inconclusive sequencing: 2518 {'name': 'site.04.iso.1.subject.01781.lab_id.733189.seq_reps.1'}
inconclusive sequencing: 2522 {'name': 'site.04.iso.1.subject.05586.lab_id.919100.seq_reps.1'}
inconclusive sequencing: 2543 {'name': 'site.04.iso.1.subject.04188.lab_id.900889.seq_reps.2'}
inconclusive sequencing: 2553 {'name': 'site.04.iso.1.subject.04786.lab_id.908366.seq_reps.1'}
inconclusive sequencing: 2558 {'name': 'site.04.iso.1.subject.05590.lab_id.919708.seq_reps.1'}


  9%|███▎                                 | 2760/30454 [00:09<01:20, 342.85it/s]

inconclusive sequencing: 2680 {'name': 'site.04.iso.1.subject.01833.lab_id.733790.seq_reps.1'}
inconclusive sequencing: 2682 {'name': 'site.10.iso.1.subject.YA00056649.lab_id.YA00056649.seq_reps.1'}
inconclusive sequencing: 2684 {'name': 'site.04.iso.1.subject.03145.lab_id.824025.seq_reps.1'}
inconclusive sequencing: 2688 {'name': 'site.04.iso.1.subject.03457.lab_id.825198.seq_reps.1'}
inconclusive sequencing: 2690 {'name': 'site.04.iso.1.subject.01008.lab_id.717670.seq_reps.1'}
inconclusive sequencing: 2692 {'name': 'site.04.iso.1.subject.00749.lab_id.707627.seq_reps.1'}
inconclusive sequencing: 2694 {'name': 'site.04.iso.1.subject.03078.lab_id.822554.seq_reps.1'}
inconclusive sequencing: 2696 {'name': 'site.04.iso.1.subject.00720.lab_id.707220.seq_reps.1'}
inconclusive sequencing: 2697 {'name': 'site.06.iso.1.subject.D130116.lab_id.06MIL1353.seq_reps.1'}
inconclusive sequencing: 2703 {'name': 'site.05.iso.1.subject.LR-2178.lab_id.FN-00683-16.seq_reps.1'}
inconclusive sequencing: 2706

  9%|███▍                                 | 2832/30454 [00:09<01:21, 339.60it/s]

inconclusive sequencing: 2770 {'name': 'site.11.iso.1.subject.MDR085.lab_id.SWE-68.seq_reps.1_2_3_4'}
inconclusive sequencing: 2800 {'name': 'site.10.iso.1.subject.YA00072101.lab_id.YA00072101.seq_reps.1'}
inconclusive sequencing: 2848 {'name': 'site.08.iso.1.subject.02TB0912.lab_id.18954.seq_reps.1'}


 10%|███▌                                 | 2935/30454 [00:10<01:27, 315.18it/s]

inconclusive sequencing: 2874 {'name': 'site.08.iso.1.subject.28TB-D19-008.lab_id.2993.seq_reps.1'}
inconclusive sequencing: 2896 {'name': 'site.17.iso.1.subject.K0056.lab_id.123-20-0056-1010.seq_reps.1'}
inconclusive sequencing: 2936 {'name': 'site.08.iso.1.subject.24TB00-005.lab_id.2246.seq_reps.1'}
inconclusive sequencing: 2938 {'name': 'site.08.iso.1.subject.02TB0119.lab_id.17106.seq_reps.1'}
inconclusive sequencing: 2940 {'name': 'site.08.iso.1.subject.05TB32041.lab_id.1974.seq_reps.1'}


 10%|███▋                                 | 3042/30454 [00:10<01:23, 328.41it/s]

inconclusive sequencing: 2977 {'name': 'site.08.iso.1.subject.02TB1827.lab_id.21466.seq_reps.1'}
inconclusive sequencing: 2980 {'name': 'site.08.iso.1.subject.02TB0058.lab_id.17681.seq_reps.1'}


 10%|███▊                                 | 3111/30454 [00:10<01:22, 332.24it/s]

inconclusive sequencing: 3068 {'name': 'site.02.iso.1.subject.0570.lab_id.242345-14.seq_reps.14242345'}
inconclusive sequencing: 3096 {'name': 'site.03.iso.1.subject.T7615.lab_id.IML00128.seq_reps.1'}
inconclusive sequencing: 3110 {'name': 'site.10.iso.1.subject.BG00791366.lab_id.BG00791366.seq_reps.1'}
inconclusive sequencing: 3112 {'name': 'site.10.iso.1.subject.YA00013543.lab_id.YA00013543.seq_reps.1'}


 11%|███▉                                 | 3220/30454 [00:10<01:21, 333.10it/s]

inconclusive sequencing: 3166 {'name': 'site.08.iso.1.subject.24TB-082.lab_id.2281.seq_reps.1'}
inconclusive sequencing: 3190 {'name': 'site.04.iso.1.subject.04199.lab_id.901558.seq_reps.1'}
inconclusive sequencing: 3223 {'name': 'site.13.iso.1.subject.170300209.lab_id.170300209.seq_reps.1'}
inconclusive sequencing: 3232 {'name': 'site.04.iso.1.subject.02691.lab_id.817428.seq_reps.1'}


 11%|███▉                                 | 3290/30454 [00:11<01:20, 338.29it/s]

inconclusive sequencing: 3253 {'name': 'site.04.iso.1.subject.03315.lab_id.827426.seq_reps.1'}
inconclusive sequencing: 3260 {'name': 'site.04.iso.1.subject.04768.lab_id.907072.seq_reps.1'}


 11%|████                                 | 3395/30454 [00:11<01:21, 331.20it/s]

inconclusive sequencing: 3336 {'name': 'site.08.iso.1.subject.24TB-064.lab_id.2201.seq_reps.1'}


 11%|████▏                                | 3469/30454 [00:11<01:17, 346.23it/s]

inconclusive sequencing: 3427 {'name': 'site.08.iso.1.subject.02TB1922.lab_id.21394.seq_reps.1'}
inconclusive sequencing: 3430 {'name': 'site.08.iso.1.subject.02TB0851.lab_id.18810.seq_reps.1'}
inconclusive sequencing: 3446 {'name': 'site.08.iso.1.subject.02TB2023.lab_id.21665.seq_reps.1'}
inconclusive sequencing: 3460 {'name': 'site.08.iso.1.subject.05TB43016.lab_id.1882.seq_reps.1'}
inconclusive sequencing: 3478 {'name': 'site.04.iso.1.subject.03358.lab_id.825654.seq_reps.1'}


 12%|████▍                                | 3653/30454 [00:12<01:17, 345.85it/s]

inconclusive sequencing: 3588 {'name': 'site.05.iso.1.subject.PTAN-0044.lab_id.TAN-138.seq_reps.1'}
inconclusive sequencing: 3590 {'name': 'site.05.iso.1.subject.LR-3086.lab_id.CR-01358-16.seq_reps.1'}
inconclusive sequencing: 3592 {'name': 'site.05.iso.1.subject.LR-2217.lab_id.FN-00228-17.seq_reps.1'}
inconclusive sequencing: 3594 {'name': 'site.05.iso.1.subject.LR-3093.lab_id.CR-01555-16.seq_reps.1'}


 12%|████▌                                | 3757/30454 [00:12<01:21, 328.23it/s]

inconclusive sequencing: 3689 {'name': 'site.02.iso.1.subject.0961.lab_id.22A201.seq_reps.201'}
inconclusive sequencing: 3738 {'name': 'site.08.iso.1.subject.02TB1380.lab_id.21047.seq_reps.1'}


 13%|████▋                                | 3831/30454 [00:12<01:18, 337.79it/s]

inconclusive sequencing: 3766 {'name': 'site.05.iso.1.subject.LR-3094.lab_id.CR-01588-16.seq_reps.1'}
inconclusive sequencing: 3810 {'name': 'site.05.iso.1.subject.PMOP-0634.lab_id.MOP-199.seq_reps.1'}
inconclusive sequencing: 3824 {'name': 'site.05.iso.1.subject.LR-2051.lab_id.FN-00659-15.seq_reps.1'}
inconclusive sequencing: 3825 {'name': 'site.05.iso.1.subject.LR-2158.lab_id.FN-00245-16.seq_reps.1'}
inconclusive sequencing: 3826 {'name': 'site.05.iso.1.subject.PSLM-0798.lab_id.SLM-057.seq_reps.1'}


 13%|████▊                                | 3938/30454 [00:13<01:19, 334.09it/s]

inconclusive sequencing: 3894 {'name': 'site.02.iso.1.subject.0108.lab_id.22A062.seq_reps.62'}


 13%|████▉                                | 4086/30454 [00:13<01:13, 357.83it/s]

inconclusive sequencing: 4028 {'name': 'site.05.iso.1.subject.LR-2122.lab_id.FN-01741-15.seq_reps.1'}
inconclusive sequencing: 4042 {'name': 'site.05.iso.1.subject.PMOP-0535.lab_id.MOP-058.seq_reps.1'}
inconclusive sequencing: 4046 {'name': 'site.05.iso.1.subject.CA-0915.lab_id.CO-15367-18.seq_reps.1'}
inconclusive sequencing: 4048 {'name': 'site.05.iso.1.subject.LS-1095.lab_id.LS-11002-18.seq_reps.1'}
inconclusive sequencing: 4050 {'name': 'site.05.iso.1.subject.CA-1230.lab_id.CO-01326-19.seq_reps.1'}
inconclusive sequencing: 4052 {'name': 'site.10.iso.1.subject.YA00053864.lab_id.YA00053864.seq_reps.1'}
inconclusive sequencing: 4054 {'name': 'site.05.iso.1.subject.LS-1046.lab_id.LS-10565-18.seq_reps.1'}
inconclusive sequencing: 4056 {'name': 'site.03.iso.1.subject.T8704.lab_id.IML00166.seq_reps.1'}
inconclusive sequencing: 4060 {'name': 'site.05.iso.1.subject.CA-0082.lab_id.CO-07029-18.seq_reps.1'}
inconclusive sequencing: 4062 {'name': 'site.05.iso.1.subject.LS-1226.lab_id.MA-01567-1

 14%|█████                                | 4194/30454 [00:13<01:18, 332.50it/s]

inconclusive sequencing: 4154 {'name': 'site.02.iso.1.subject.0081.lab_id.22A032.seq_reps.32'}
inconclusive sequencing: 4202 {'name': 'site.02.iso.1.subject.1106.lab_id.2014185016.seq_reps.2014185016'}
inconclusive sequencing: 4218 {'name': 'site.08.iso.1.subject.02TB0720.lab_id.18744.seq_reps.1'}


 14%|█████▎                               | 4337/30454 [00:14<01:16, 340.79it/s]

inconclusive sequencing: 4300 {'name': 'site.08.iso.1.subject.02TB1468.lab_id.20931.seq_reps.1'}
inconclusive sequencing: 4302 {'name': 'site.08.iso.1.subject.02TB1468.lab_id.20005.seq_reps.1'}
inconclusive sequencing: 4303 {'name': 'site.08.iso.1.subject.02TB1468.lab_id.20257.seq_reps.1'}
inconclusive sequencing: 4318 {'name': 'site.05.iso.1.subject.CA-1235.lab_id.CO-01071-19.seq_reps.1'}
inconclusive sequencing: 4322 {'name': 'site.05.iso.1.subject.PTAN-0446.lab_id.TAN-240.seq_reps.1'}
inconclusive sequencing: 4340 {'name': 'site.05.iso.1.subject.LR-2032.lab_id.FN-00407-15.seq_reps.1'}
inconclusive sequencing: 4342 {'name': 'site.05.iso.1.subject.LR-2047.lab_id.FN-00615-15.seq_reps.1'}
inconclusive sequencing: 4343 {'name': 'site.05.iso.1.subject.LR-2050.lab_id.FN-00640-15.seq_reps.1'}


 15%|█████▌                               | 4591/30454 [00:14<01:15, 343.82it/s]

inconclusive sequencing: 4533 {'name': 'site.05.iso.1.subject.CA-1566.lab_id.CO-01462-19.seq_reps.1'}
inconclusive sequencing: 4582 {'name': 'site.05.iso.1.subject.PMFR-0737.lab_id.MFR-230.seq_reps.1'}


 15%|█████▋                               | 4662/30454 [00:15<01:15, 339.37it/s]

inconclusive sequencing: 4614 {'name': 'site.05.iso.1.subject.CA-1568.lab_id.CO-02226-19.seq_reps.1'}
inconclusive sequencing: 4630 {'name': 'site.05.iso.1.subject.CA-1507.lab_id.CO-00609-19.seq_reps.1'}


 16%|█████▊                               | 4805/30454 [00:15<01:14, 345.26it/s]

inconclusive sequencing: 4760 {'name': 'site.10.iso.1.subject.XD00893425.lab_id.XD00893425.seq_reps.1'}
inconclusive sequencing: 4768 {'name': 'site.20.iso.1.subject.SA00022500.lab_id.YA00134632.seq_reps.1'}
inconclusive sequencing: 4780 {'name': 'site.02.iso.1.subject.0969.lab_id.22A209.seq_reps.209'}
inconclusive sequencing: 4820 {'name': 'site.08.iso.1.subject.02TB1654.lab_id.20946.seq_reps.1'}
inconclusive sequencing: 4836 {'name': 'site.08.iso.1.subject.02TB1875.lab_id.21400.seq_reps.1'}


 16%|█████▉                               | 4875/30454 [00:15<01:14, 342.19it/s]

inconclusive sequencing: 4842 {'name': 'site.08.iso.1.subject.02TB1654.lab_id.21125.seq_reps.1'}
inconclusive sequencing: 4844 {'name': 'site.08.iso.1.subject.02TB1654.lab_id.20531.seq_reps.1'}


 16%|██████                               | 5010/30454 [00:16<01:25, 298.79it/s]

inconclusive sequencing: 4952 {'name': 'site.04.iso.1.subject.00843.lab_id.714909.seq_reps.1'}
inconclusive sequencing: 4956 {'name': 'site.04.iso.1.subject.05045.lab_id.909570.seq_reps.1'}
inconclusive sequencing: 4958 {'name': 'site.04.iso.1.subject.01173.lab_id.720348.seq_reps.1'}
inconclusive sequencing: 4962 {'name': 'site.04.iso.1.subject.04812.lab_id.907295.seq_reps.1'}
inconclusive sequencing: 4964 {'name': 'site.04.iso.1.subject.03635.lab_id.831197.seq_reps.1'}
inconclusive sequencing: 4999 {'name': 'site.08.iso.1.subject.02TB1854.lab_id.21854.seq_reps.1'}
inconclusive sequencing: 5022 {'name': 'site.04.iso.1.subject.03464.lab_id.825169.seq_reps.1'}


 17%|██████▏                              | 5077/30454 [00:16<01:21, 311.63it/s]

inconclusive sequencing: 5042 {'name': 'site.04.iso.1.subject.04629.lab_id.902939.seq_reps.1'}
inconclusive sequencing: 5058 {'name': 'site.04.iso.1.subject.00695.lab_id.714108.seq_reps.1'}
inconclusive sequencing: 5060 {'name': 'site.04.iso.1.subject.03670.lab_id.JJH9840.seq_reps.1'}
inconclusive sequencing: 5080 {'name': 'site.04.iso.1.subject.04126.lab_id.832376.seq_reps.1'}
inconclusive sequencing: 5084 {'name': 'site.04.iso.1.subject.03586.lab_id.818896.seq_reps.1'}
inconclusive sequencing: 5087 {'name': 'site.04.iso.1.subject.05487.lab_id.912582.seq_reps.1'}
inconclusive sequencing: 5094 {'name': 'site.04.iso.1.subject.04276.lab_id.903362.seq_reps.1'}
inconclusive sequencing: 5096 {'name': 'site.04.iso.1.subject.03232.lab_id.826153.seq_reps.1'}
inconclusive sequencing: 5104 {'name': 'site.04.iso.1.subject.03247.lab_id.823479.seq_reps.1'}


 17%|██████▎                              | 5187/30454 [00:16<01:16, 329.31it/s]

inconclusive sequencing: 5121 {'name': 'site.04.iso.1.subject.05400.lab_id.912774.seq_reps.1'}
inconclusive sequencing: 5135 {'name': 'site.04.iso.1.subject.05353.lab_id.913368.seq_reps.1'}
inconclusive sequencing: 5160 {'name': 'site.04.iso.1.subject.01224.lab_id.721665.seq_reps.1'}
inconclusive sequencing: 5186 {'name': 'site.04.iso.1.subject.03765.lab_id.911878.seq_reps.1'}
inconclusive sequencing: 5190 {'name': 'site.03.iso.1.subject.DR-257.lab_id.IML-01175.seq_reps.1'}


 17%|██████▍                              | 5262/30454 [00:16<01:13, 342.67it/s]

inconclusive sequencing: 5196 {'name': 'site.04.iso.1.subject.03169.lab_id.823019.seq_reps.1'}
inconclusive sequencing: 5200 {'name': 'site.04.iso.1.subject.03585.lab_id.830403.seq_reps.1'}
inconclusive sequencing: 5210 {'name': 'site.04.iso.1.subject.02985.lab_id.829373.seq_reps.1'}
inconclusive sequencing: 5212 {'name': 'site.04.iso.1.subject.04699.lab_id.905157.seq_reps.1'}
inconclusive sequencing: 5226 {'name': 'site.04.iso.1.subject.01813.lab_id.20218.seq_reps.1'}
inconclusive sequencing: 5239 {'name': 'site.04.iso.1.subject.04986.lab_id.906055.seq_reps.1'}
inconclusive sequencing: 5250 {'name': 'site.04.iso.1.subject.04038.lab_id.829619.seq_reps.1'}


 18%|██████▌                              | 5400/30454 [00:17<01:14, 335.60it/s]

inconclusive sequencing: 5335 {'name': 'site.04.iso.1.subject.03998.lab_id.832973.seq_reps.1'}
inconclusive sequencing: 5341 {'name': 'site.04.iso.1.subject.03344.lab_id.826332.seq_reps.1'}
inconclusive sequencing: 5361 {'name': 'site.04.iso.1.subject.03697.lab_id.912284.seq_reps.1'}
inconclusive sequencing: 5371 {'name': 'site.04.iso.1.subject.05473.lab_id.913264.seq_reps.1'}
inconclusive sequencing: 5377 {'name': 'site.04.iso.1.subject.03283.lab_id.825531.seq_reps.1'}
inconclusive sequencing: 5381 {'name': 'site.04.iso.1.subject.03286.lab_id.830190.seq_reps.1'}
inconclusive sequencing: 5394 {'name': 'site.10.iso.1.subject.YA00099906.lab_id.YA00099906.seq_reps.1'}
inconclusive sequencing: 5396 {'name': 'site.20.iso.1.subject.SCH8041859.lab_id.YA00134169.seq_reps.1'}


 18%|██████▋                              | 5470/30454 [00:17<01:13, 339.97it/s]

inconclusive sequencing: 5418 {'name': 'site.08.iso.1.subject.24TB-080.lab_id.2273.seq_reps.1'}
inconclusive sequencing: 5421 {'name': 'site.08.iso.1.subject.24TB-080.lab_id.2257.seq_reps.1'}
inconclusive sequencing: 5436 {'name': 'site.02.iso.1.subject.0530.lab_id.241139-14.seq_reps.14241139'}
inconclusive sequencing: 5479 {'name': 'site.02.iso.1.subject.1033.lab_id.2013185075.seq_reps.2013185075'}


 18%|██████▊                              | 5576/30454 [00:17<01:12, 341.04it/s]

inconclusive sequencing: 5530 {'name': 'site.08.iso.1.subject.02TB0408.lab_id.17682.seq_reps.1'}


 19%|███████                              | 5864/30454 [00:18<01:11, 344.65it/s]

inconclusive sequencing: 5820 {'name': 'site.04.iso.1.subject.03415.lab_id.827704.seq_reps.1'}
inconclusive sequencing: 5841 {'name': 'site.04.iso.1.subject.05253.lab_id.911547.seq_reps.1'}


 20%|███████▎                             | 6006/30454 [00:19<01:13, 332.11it/s]

inconclusive sequencing: 5950 {'name': 'site.10.iso.1.subject.YA00005293.lab_id.YA00005293.seq_reps.1'}
inconclusive sequencing: 5980 {'name': 'site.10.iso.1.subject.YA00026325.lab_id.YA00026325.seq_reps.1'}
inconclusive sequencing: 5981 {'name': 'site.10.iso.1.subject.YA00026333.lab_id.YA00026333.seq_reps.1'}


 20%|███████▍                             | 6151/30454 [00:19<01:12, 337.14it/s]

inconclusive sequencing: 6082 {'name': 'site.10.iso.1.subject.YA00024450.lab_id.YA00024450.seq_reps.1'}
inconclusive sequencing: 6150 {'name': 'site.10.iso.1.subject.YA00053708.lab_id.YA00053708.seq_reps.1'}


 20%|███████▌                             | 6223/30454 [00:19<01:10, 343.80it/s]

inconclusive sequencing: 6165 {'name': 'site.10.iso.1.subject.YA00134710.lab_id.YA00134710.seq_reps.1'}
inconclusive sequencing: 6182 {'name': 'site.10.iso.1.subject.XD02650754.lab_id.XD02650754.seq_reps.1'}
inconclusive sequencing: 6235 {'name': 'site.20.iso.1.subject.SA00422875.lab_id.YA00135294.seq_reps.1'}
inconclusive sequencing: 6240 {'name': 'site.10.iso.1.subject.UH01324061.lab_id.UH01324061.seq_reps.1'}
inconclusive sequencing:

 21%|███████▋                             | 6299/30454 [00:19<01:07, 360.47it/s]

 6248 {'name': 'site.04.iso.1.subject.02947.lab_id.822027.seq_reps.1'}
inconclusive sequencing: 6252 {'name': 'site.04.iso.1.subject.00927.lab_id.716467.seq_reps.1'}
inconclusive sequencing: 6255 {'name': 'site.02.iso.1.subject.0968.lab_id.22A208.seq_reps.208'}
inconclusive sequencing: 6258 {'name': 'site.04.iso.1.subject.02393.lab_id.815134.seq_reps.1_2'}
inconclusive sequencing: 6260 {'name': 'site.04.iso.1.subject.00949.lab_id.716971.seq_reps.1'}
inconclusive sequencing: 6262 {'name': 'site.04.iso.1.subject.03993.lab_id.830698.seq_reps.1'}
inconclusive sequencing: 6266 {'name': 'site.04.iso.1.subject.00546.lab_id.714945.seq_reps.1'}
inconclusive sequencing: 6268 {'name': 'site.04.iso.1.subject.00540.lab_id.711869.seq_reps.1'}


 21%|███████▊                             | 6411/30454 [00:20<01:11, 336.54it/s]

inconclusive sequencing: 6350 {'name': 'site.04.iso.1.subject.03414.lab_id.828547.seq_reps.1'}
inconclusive sequencing: 6351 {'name': 'site.04.iso.1.subject.03875.lab_id.829858.seq_reps.1'}
inconclusive sequencing: 6353 {'name': 'site.04.iso.1.subject.05589.lab_id.919947.seq_reps.1'}
inconclusive sequencing: 6370 {'name': 'site.04.iso.1.subject.04478.lab_id.902793.seq_reps.1'}
inconclusive sequencing: 6392 {'name': 'site.03.iso.1.subject.DR-279.lab_id.IML-01187.seq_reps.1'}
inconclusive sequencing: 6412 {'name': 'site.03.iso.1.subject.DR-305.lab_id.IML-01192.seq_reps.1'}
inconclusive sequencing: 6416 {'name': 'site.03.iso.1.subject.DR-314.lab_id.IML-01198.seq_reps.1'}


 21%|███████▉                             | 6483/30454 [00:20<01:10, 340.29it/s]

inconclusive sequencing: 6440 {'name': 'site.03.iso.1.subject.DR-281.lab_id.IML-01188.seq_reps.1'}
inconclusive sequencing: 6447 {'name': 'site.04.iso.1.subject.03470.lab_id.827987.seq_reps.1'}
inconclusive sequencing: 6458 {'name': 'site.03.iso.1.subject.DR-121.lab_id.IML-01105.seq_reps.1'}
inconclusive sequencing: 6461 {'name': 'site.04.iso.1.subject.04502.lab_id.902838.seq_reps.1'}
inconclusive sequencing: 6470 {'name': 'site.04.iso.1.subject.04449.lab_id.904981.seq_reps.1'}
inconclusive sequencing: 6483 {'name': 'site.04.iso.1.subject.04500.lab_id.904529.seq_reps.1'}
inconclusive sequencing: 6504 {'name': 'site.04.iso.1.subject.03533.lab_id.829545.seq_reps.1'}
inconclusive sequencing:

 22%|███████▉                             | 6555/30454 [00:20<01:09, 341.66it/s]

 6514 {'name': 'site.04.iso.1.subject.03646.lab_id.JJH9567.seq_reps.1'}
inconclusive sequencing: 6525 {'name': 'site.04.iso.1.subject.03529.lab_id.829855.seq_reps.1_2'}
inconclusive sequencing: 6547 {'name': 'site.04.iso.1.subject.03975.lab_id.831547.seq_reps.1'}
inconclusive sequencing: 6580 {'name': 'site.04.iso.1.subject.03542.lab_id.826122.seq_reps.1'}


 22%|████████                             | 6663/30454 [00:21<01:09, 344.26it/s]

inconclusive sequencing: 6594 {'name': 'site.04.iso.1.subject.03324.lab_id.827121.seq_reps.1'}
inconclusive sequencing: 6607 {'name': 'site.04.iso.1.subject.05477.lab_id.913495.seq_reps.1'}
inconclusive sequencing: 6617 {'name': 'site.04.iso.1.subject.04569.lab_id.905056.seq_reps.1'}
inconclusive sequencing: 6635 {'name': 'site.04.iso.1.subject.04513.lab_id.832156.seq_reps.1'}
inconclusive sequencing: 6655 {'name': 'site.04.iso.1.subject.03508.lab_id.827362.seq_reps.1'}
inconclusive sequencing: 6661 {'name': 'site.04.iso.1.subject.04224.lab_id.833174.seq_reps.1'}


 22%|████████▎                            | 6805/30454 [00:21<01:10, 333.43it/s]

inconclusive sequencing: 6735 {'name': 'site.04.iso.1.subject.04559.lab_id.901648.seq_reps.1_2'}
inconclusive sequencing: 6760 {'name': 'site.04.iso.1.subject.04763.lab_id.905930.seq_reps.1'}
inconclusive sequencing: 6765 {'name': 'site.04.iso.1.subject.03546.lab_id.828627.seq_reps.1'}


 23%|████████▍                            | 6947/30454 [00:21<01:08, 340.88it/s]

inconclusive sequencing: 6880 {'name': 'site.06.iso.1.subject.06TB_0034.lab_id.06MIL0495.seq_reps.1'}
inconclusive sequencing: 6888 {'name': 'site.06.iso.1.subject.06TB_0465.lab_id.06MIL1143.seq_reps.1'}


 23%|████████▌                            | 7017/30454 [00:22<01:10, 330.13it/s]

inconclusive sequencing: 6972 {'name': 'site.10.iso.1.subject.XD02583671.lab_id.XD02583671.seq_reps.1'}
inconclusive sequencing: 6996 {'name': 'site.10.iso.1.subject.XD01227017.lab_id.XD01227017.seq_reps.1'}


 23%|████████▋                            | 7124/30454 [00:22<01:09, 335.24it/s]

inconclusive sequencing: 7066 {'name': 'site.14.iso.1.subject.4349.lab_id.4349.seq_reps.1'}


 24%|████████▊                            | 7265/30454 [00:22<01:07, 341.46it/s]

inconclusive sequencing: 7200 {'name': 'site.11.iso.1.subject.XTB-18-232.lab_id.XTB-18-232.seq_reps.1'}
inconclusive sequencing: 7202 {'name': 'site.11.iso.1.subject.XTB-18-231.lab_id.XTB-18-231.seq_reps.1'}
inconclusive sequencing: 7230 {'name': 'site.03.iso.1.subject.T298.lab_id.T298.seq_reps.1'}


 25%|█████████▍                           | 7724/30454 [00:24<01:07, 337.11it/s]

inconclusive sequencing: 7662 {'name': 'site.11.iso.1.subject.XTB_17-552.lab_id.XTB_17-552.seq_reps.1'}


 26%|█████████▋                           | 8023/30454 [00:25<01:12, 308.72it/s]

inconclusive sequencing: 7970 {'name': 'site.02.iso.1.subject.0067.lab_id.22A017.seq_reps.17'}
inconclusive sequencing: 8009 {'name': 'site.02.iso.1.subject.0903.lab_id.22A137.seq_reps.137'}
inconclusive sequencing: 8042 {'name': 'site.20.iso.1.subject.SA00103754.lab_id.YA00134640.seq_reps.1'}


 27%|█████████▉                           | 8180/30454 [00:25<01:17, 288.54it/s]

inconclusive sequencing: 8124 {'name': 'site.10.iso.1.subject.YA00006705.lab_id.YA00006705.seq_reps.1'}
inconclusive sequencing: 8142 {'name': 'site.11.iso.1.subject.XTB_18-027.lab_id.XTB_18-027.seq_reps.1_2'}
inconclusive sequencing: 8146 {'name': 'site.03.iso.1.subject.KGZ-13412-2018.lab_id.13412-2018.seq_reps.1'}


 27%|██████████                           | 8238/30454 [00:25<01:18, 284.47it/s]

inconclusive sequencing: 8182 {'name': 'site.06.iso.1.subject.06TB_0266.lab_id.06MIL0857.seq_reps.1'}


 27%|██████████                           | 8323/30454 [00:26<01:27, 252.82it/s]

inconclusive sequencing: 8280 {'name': 'site.03.iso.1.subject.10815.lab_id.IML-01026.seq_reps.1'}
inconclusive sequencing: 8289 {'name': 'site.11.iso.1.subject.MDR153.lab_id.SWE-121.seq_reps.1_2_3_4'}
inconclusive sequencing: 8301 {'name': 'site.03.iso.1.subject.T1010.lab_id.IML-00008.seq_reps.1'}


 27%|██████████▏                          | 8373/30454 [00:26<01:39, 221.59it/s]

inconclusive sequencing: 8334 {'name': 'site.03.iso.1.subject.11159.lab_id.IML-00811.seq_reps.1'}


 28%|██████████▎                          | 8483/30454 [00:27<01:47, 204.93it/s]

inconclusive sequencing: 8440 {'name': 'site.03.iso.1.subject.10823.lab_id.IML-00975.seq_reps.1'}
inconclusive sequencing: 8442 {'name': 'site.03.iso.1.subject.10262.lab_id.IML-01016.seq_reps.1'}
inconclusive sequencing: 8480 {'name': 'site.06.iso.1.subject.06TB_0025.lab_id.06MIL0486.seq_reps.1'}


 28%|██████████▍                          | 8574/30454 [00:27<01:43, 211.83it/s]

inconclusive sequencing: 8542 {'name': 'site.03.iso.1.subject.GB-7075-180.lab_id.7075-180.seq_reps.1'}
inconclusive sequencing: 8544 {'name': 'site.11.iso.1.subject.MDR100.lab_id.SWE-76.seq_reps.1_2_3_4'}


 28%|██████████▌                          | 8667/30454 [00:28<02:24, 151.23it/s]

inconclusive sequencing: 8646 {'name': 'site.03.iso.1.subject.BOR-1696-17.lab_id.IML-00754.seq_reps.1'}


 29%|██████████▋                          | 8749/30454 [00:28<02:24, 149.92it/s]

inconclusive sequencing: 8727 {'name': 'site.03.iso.1.subject.T8554.lab_id.IML-00126.seq_reps.1'}


 29%|██████████▊                          | 8951/30454 [00:29<01:43, 208.47it/s]

inconclusive sequencing: 8902 {'name': 'site.04.iso.1.subject.03688.lab_id.JJH10305.seq_reps.1'}
inconclusive sequencing: 8904 {'name': 'site.03.iso.1.subject.11142.lab_id.IML-00808.seq_reps.1'}
inconclusive sequencing: 8906 {'name': 'site.03.iso.1.subject.11086.lab_id.IML-00871.seq_reps.1'}
inconclusive sequencing: 8908 {'name': 'site.10.iso.1.subject.YA00023063.lab_id.YA00023063.seq_reps.1'}
inconclusive sequencing: 8912 {'name': 'site.10.iso.1.subject.YA00027815.lab_id.YA00027815.seq_reps.1'}
inconclusive sequencing: 8913 {'name': 'site.10.iso.1.subject.YA00038960.lab_id.YA00038960.seq_reps.1'}
inconclusive sequencing: 8916 {'name': 'site.03.iso.1.subject.T7331.lab_id.IML00135.seq_reps.1'}
inconclusive sequencing: 8918 {'name': 'site.04.iso.1.subject.03442.lab_id.827260.seq_reps.1'}
inconclusive sequencing: 8920 {'name': 'site.04.iso.1.subject.02006.lab_id.802524.seq_reps.1'}
inconclusive sequencing: 8924 {'name': 'site.08.iso.1.subject.02TB1934.lab_id.21396.seq_reps.1'}


 30%|██████████▉                          | 9051/30454 [00:30<01:31, 234.08it/s]

inconclusive sequencing: 9006 {'name': 'site.02.iso.1.subject.0130.lab_id.22A085.seq_reps.85'}
inconclusive sequencing: 9050 {'name': 'site.02.iso.1.subject.0093.lab_id.22A045.seq_reps.45'}
inconclusive sequencing:

 31%|███████████▎                         | 9292/30454 [00:31<01:11, 296.37it/s]

 9061 {'name': 'site.02.iso.1.subject.1019.lab_id.2013183230.seq_reps.2013183230'}
inconclusive sequencing: 9210 {'name': 'site.02.iso.1.subject.0455.lab_id.251040-15.seq_reps.15251040'}


 31%|███████████▍                         | 9443/30454 [00:31<01:13, 287.28it/s]

inconclusive sequencing: 9398 {'name': 'site.02.iso.1.subject.0867.lab_id.22A101.seq_reps.101'}
inconclusive sequencing: 9434 {'name': 'site.08.iso.1.subject.02TB0334.lab_id.17436.seq_reps.1'}
inconclusive sequencing: 9436 {'name': 'site.08.iso.1.subject.02TB0334.lab_id.17201.seq_reps.1'}
inconclusive sequencing: 9456 {'name': 'site.08.iso.1.subject.02TB0159.lab_id.17105.seq_reps.1'}


 31%|███████████▌                         | 9560/30454 [00:32<01:49, 191.68it/s]

inconclusive sequencing: 9528 {'name': 'site.02.iso.1.subject.0936.lab_id.22A171.seq_reps.171'}
inconclusive sequencing: 9560 {'name': 'site.04.iso.1.subject.01451.lab_id.726829.seq_reps.1'}
inconclusive sequencing: 9561 {'name': 'site.04.iso.1.subject.01579.lab_id.730821.seq_reps.1'}
inconclusive sequencing: 9570 {'name': 'site.02.iso.1.subject.0116.lab_id.22A070.seq_reps.70'}


 32%|███████████▋                         | 9640/30454 [00:32<01:41, 204.38it/s]

inconclusive sequencing: 9611 {'name': 'site.04.iso.1.subject.03541.lab_id.828165.seq_reps.1'}


 33%|████████████                         | 9933/30454 [00:34<01:32, 221.03it/s]

inconclusive sequencing: 9889 {'name': 'site.08.iso.1.subject.28TB-D25-017.lab_id.3079.seq_reps.1'}
inconclusive sequencing: 9940 {'name': 'site.04.iso.1.subject.03192.lab_id.816831.seq_reps.1'}


 33%|████████████▏                        | 9989/30454 [00:34<01:21, 250.65it/s]

inconclusive sequencing: 9954 {'name': 'site.04.iso.1.subject.04509.lab_id.902937.seq_reps.1_2'}
inconclusive sequencing: 9958 {'name': 'site.04.iso.1.subject.04785.lab_id.906894.seq_reps.1'}
inconclusive sequencing: 9966 {'name': 'site.04.iso.1.subject.04515.lab_id.905889.seq_reps.1'}
inconclusive sequencing: 9977 {'name': 'site.04.iso.1.subject.04627.lab_id.906710.seq_reps.1'}
inconclusive sequencing: 9978 {'name': 'site.04.iso.1.subject.03467.lab_id.828293.seq_reps.1'}
inconclusive sequencing: 9984 {'name': 'site.04.iso.1.subject.04039.lab_id.832143.seq_reps.1'}


 33%|███████████▊                        | 10045/30454 [00:34<01:16, 265.09it/s]

inconclusive sequencing: 10010 {'name': 'site.04.iso.1.subject.03504.lab_id.829540.seq_reps.1'}
inconclusive sequencing: 10018 {'name': 'site.04.iso.1.subject.04607.lab_id.906480.seq_reps.1'}
inconclusive sequencing: 10028 {'name': 'site.04.iso.1.subject.04774.lab_id.907691.seq_reps.1'}
inconclusive sequencing: 10053 {'name': 'site.04.iso.1.subject.04913.lab_id.906933.seq_reps.1'}
inconclusive sequencing: 10064 {'name': 'site.04.iso.1.subject.03602.lab_id.829428.seq_reps.1'}


 33%|███████████▉                        | 10133/30454 [00:34<01:13, 274.77it/s]

inconclusive sequencing: 10075 {'name': 'site.04.iso.1.subject.04219.lab_id.901037.seq_reps.1'}
inconclusive sequencing: 10106 {'name': 'site.04.iso.1.subject.04784.lab_id.907810.seq_reps.1'}
inconclusive sequencing: 10109 {'name': 'site.04.iso.1.subject.03000.lab_id.823467.seq_reps.1'}
inconclusive sequencing: 10124 {'name': 'site.04.iso.1.subject.04608.lab_id.905248.seq_reps.1'}


 34%|████████████                        | 10218/30454 [00:35<01:14, 272.79it/s]

inconclusive sequencing: 10170 {'name': 'site.04.iso.1.subject.04588.lab_id.905619.seq_reps.1'}
inconclusive sequencing: 10196 {'name': 'site.04.iso.1.subject.04441.lab_id.902320.seq_reps.1'}
inconclusive sequencing: 10197 {'name': 'site.04.iso.1.subject.04493.lab_id.902434.seq_reps.1'}


 34%|████████████▏                       | 10319/30454 [00:35<01:06, 304.25it/s]

inconclusive sequencing: 10250 {'name': 'site.04.iso.1.subject.03854.lab_id.901503.seq_reps.1'}
inconclusive sequencing: 10257 {'name': 'site.04.iso.1.subject.04645.lab_id.905638.seq_reps.1'}
inconclusive sequencing: 10258 {'name': 'site.04.iso.1.subject.05640.lab_id.919103.seq_reps.1'}
inconclusive sequencing: 10264 {'name': 'site.04.iso.1.subject.04231.lab_id.902783.seq_reps.1'}
inconclusive sequencing: 10302 {'name': 'site.04.iso.1.subject.04161.lab_id.912212.seq_reps.1'}


 34%|████████████▎                       | 10421/30454 [00:35<01:04, 308.46it/s]

inconclusive sequencing: 10346 {'name': 'site.04.iso.1.subject.04149.lab_id.913375.seq_reps.1'}
inconclusive sequencing: 10356 {'name': 'site.04.iso.1.subject.04655.lab_id.900918.seq_reps.1'}


 34%|████████████▍                       | 10483/30454 [00:36<01:07, 295.53it/s]

inconclusive sequencing: 10432 {'name': 'site.04.iso.1.subject.03462.lab_id.828523.seq_reps.1'}
inconclusive sequencing: 10449 {'name': 'site.04.iso.1.subject.03418.lab_id.826143.seq_reps.1'}
inconclusive sequencing: 10478 {'name': 'site.04.iso.1.subject.03665.lab_id.JJH10041.seq_reps.1'}
inconclusive sequencing: 10486 {'name': 'site.08.iso.1.subject.DTU-079.lab_id.2182.seq_reps.1'}


 35%|████████████▌                       | 10581/30454 [00:36<01:07, 292.72it/s]

inconclusive sequencing: 10504 {'name': 'site.02.iso.1.subject.0499.lab_id.251034-15.seq_reps.15251034'}


 35%|████████████▌                       | 10671/30454 [00:36<01:10, 281.60it/s]

inconclusive sequencing: 10628 {'name': 'site.10.iso.1.subject.YA00099903.lab_id.YA00099903.seq_reps.1'}
inconclusive sequencing: 10630 {'name': 'site.10.iso.1.subject.XD02692733.lab_id.XD02692733.seq_reps.1'}
inconclusive sequencing: 10632 {'name': 'site.13.iso.1.subject.160200065.lab_id.160200065.seq_reps.1'}
inconclusive sequencing: 10634 {'name': 'site.04.iso.1.subject.03402.lab_id.827187.seq_reps.1'}
inconclusive sequencing: 10638 {'name': 'site.10.iso.1.subject.XD01458051.lab_id.XD01458051.seq_reps.1'}


 36%|████████████▊                       | 10857/30454 [00:37<01:03, 310.79it/s]

inconclusive sequencing: 10788 {'name': 'site.03.iso.1.subject.JPN-R2011-0071.lab_id.JPN-R2011-0071.seq_reps.1'}
inconclusive sequencing: 10824 {'name': 'site.08.iso.1.subject.02TB0187.lab_id.17344.seq_reps.1'}


 37%|█████████████▍                      | 11335/30454 [00:38<01:05, 290.96it/s]

inconclusive sequencing: 11280 {'name': 'site.04.iso.1.subject.04499.lab_id.904068.seq_reps.1'}
inconclusive sequencing: 11282 {'name': 'site.04.iso.1.subject.04267.lab_id.901742.seq_reps.1_2'}
inconclusive sequencing: 11284 {'name': 'site.04.iso.1.subject.01719.lab_id.730558.seq_reps.1'}
inconclusive sequencing: 11286 {'name': 'site.06.iso.1.subject.SGD_0084-14.lab_id.06MIL0410.seq_reps.1'}
inconclusive sequencing: 11290 {'name': 'site.04.iso.1.subject.01146.lab_id.720155.seq_reps.1'}
inconclusive sequencing: 11294 {'name': 'site.04.iso.1.subject.03986.lab_id.829800.seq_reps.1'}
inconclusive sequencing: 11298 {'name': 'site.04.iso.1.subject.00042.lab_id.631456.seq_reps.1'}
inconclusive sequencing: 11300 {'name': 'site.04.iso.1.subject.00787.lab_id.713388.seq_reps.1'}
inconclusive sequencing: 11301 {'name': 'site.04.iso.1.subject.02009.lab_id.732263.seq_reps.1'}
inconclusive sequencing: 11302 {'name': 'site.20.iso.1.subject.SA00377494.lab_id.YA00135266.seq_reps.1'}
inconclusive sequenc

 38%|█████████████▌                      | 11442/30454 [00:39<01:24, 226.09it/s]

inconclusive sequencing: 11416 {'name': 'site.04.iso.1.subject.01017.lab_id.720678.seq_reps.1'}
inconclusive sequencing: 11418 {'name': 'site.06.iso.1.subject.SZH_0040-14.lab_id.06MIL0314.seq_reps.1'}
inconclusive sequencing: 11423 {'name': 'site.04.iso.1.subject.04438.lab_id.901563.seq_reps.1'}
inconclusive sequencing: 11426 {'name': 'site.03.iso.1.subject.DS-155.lab_id.IML-01241.seq_reps.1'}


 38%|█████████████▌                      | 11512/30454 [00:39<01:30, 208.71it/s]

inconclusive sequencing: 11478 {'name': 'site.06.iso.1.subject.JHL_0098-14.lab_id.06MIL0456.seq_reps.1'}


 38%|█████████████▋                      | 11598/30454 [00:40<01:35, 197.20it/s]

inconclusive sequencing: 11566 {'name': 'site.06.iso.1.subject.MHL_0173-14.lab_id.06MIL0228.seq_reps.1'}


 38%|█████████████▊                      | 11663/30454 [00:40<01:42, 183.88it/s]

inconclusive sequencing: 11628 {'name': 'site.06.iso.1.subject.JHL_0096-14.lab_id.06MIL0419.seq_reps.1'}


 39%|█████████████▉                      | 11748/30454 [00:41<01:31, 204.25it/s]

inconclusive sequencing: 11724 {'name': 'site.06.iso.1.subject.BVH_0010-14.lab_id.06MIL0235.seq_reps.1'}
inconclusive sequencing: 11734 {'name': 'site.06.iso.1.subject.JHL_0046-14.lab_id.06MIL0185.seq_reps.1'}
inconclusive sequencing: 11770 {'name': 'site.06.iso.1.subject.ICK-1198.lab_id.06MIL1096.seq_reps.1'}


 39%|█████████████▉                      | 11833/30454 [00:41<01:13, 252.40it/s]

inconclusive sequencing: 11788 {'name': 'site.06.iso.1.subject.NHM_0324-14.lab_id.06MIL0453.seq_reps.1'}
inconclusive sequencing: 11793 {'name': 'site.06.iso.1.subject.SSM_0138_14.lab_id.06MIL0222.seq_reps.1'}
inconclusive sequencing: 11799 {'name': 'site.06.iso.1.subject.RLH_0278-14.lab_id.06MIL0210.seq_reps.1'}
inconclusive sequencing: 11811 {'name': 'site.06.iso.1.subject.NHM_0291-14.lab_id.06MIL0346.seq_reps.1'}
inconclusive sequencing: 11812 {'name': 'site.04.iso.1.subject.03285.lab_id.825110.seq_reps.1'}
inconclusive sequencing: 11820 {'name': 'site.06.iso.1.subject.RLH_0289-14.lab_id.06MIL0257.seq_reps.1'}
inconclusive sequencing:

 39%|██████████████                      | 11887/30454 [00:41<01:13, 251.76it/s]

 11847 {'name': 'site.06.iso.1.subject.NHM_0323-14.lab_id.06MIL0452.seq_reps.1'}
inconclusive sequencing: 11851 {'name': 'site.04.iso.1.subject.00933.lab_id.715240.seq_reps.1'}


 39%|██████████████                      | 11945/30454 [00:41<01:08, 270.47it/s]

inconclusive sequencing: 11902 {'name': 'site.06.iso.1.subject.MHL_0156-14.lab_id.06MIL0152.seq_reps.1'}
inconclusive sequencing: 11952 {'name': 'site.06.iso.1.subject.MHL_0007_12.lab_id.06MIL0115.seq_reps.1'}


 39%|██████████████▏                     | 11999/30454 [00:42<01:16, 240.93it/s]

inconclusive sequencing: 11964 {'name': 'site.04.iso.1.subject.04256.lab_id.901537.seq_reps.1'}
inconclusive sequencing: 11966 {'name': 'site.04.iso.1.subject.04037.lab_id.830098.seq_reps.1_2'}
inconclusive sequencing: 11967 {'name': 'site.04.iso.1.subject.04947.lab_id.904603.seq_reps.1'}
inconclusive sequencing: 11992 {'name': 'site.04.iso.1.subject.04440.lab_id.832125.seq_reps.1'}
inconclusive sequencing: 11994 {'name': 'site.04.iso.1.subject.03558.lab_id.823529.seq_reps.1'}


 40%|██████████████▏                     | 12053/30454 [00:42<01:13, 250.67it/s]

inconclusive sequencing: 12023 {'name': 'site.04.iso.1.subject.02952.lab_id.827574.seq_reps.1'}


 40%|██████████████▎                     | 12105/30454 [00:42<01:15, 243.22it/s]

inconclusive sequencing: 12078 {'name': 'site.04.iso.1.subject.05501.lab_id.913629.seq_reps.1'}
inconclusive sequencing: 12088 {'name': 'site.04.iso.1.subject.05622.lab_id.918821.seq_reps.1'}


 40%|██████████████▍                     | 12201/30454 [00:42<01:04, 282.97it/s]

inconclusive sequencing: 12132 {'name': 'site.04.iso.1.subject.00823.lab_id.713738.seq_reps.1'}


 40%|██████████████▌                     | 12323/30454 [00:43<01:11, 252.92it/s]

inconclusive sequencing: 12278 {'name': 'site.06.iso.1.subject.NHM_0488-14.lab_id.06MIL0064.seq_reps.1'}
inconclusive sequencing: 12285 {'name': 'site.04.iso.1.subject.04571.lab_id.903433.seq_reps.1'}
inconclusive sequencing: 12304 {'name': 'site.06.iso.1.subject.SGD_0083-14.lab_id.06MIL0411.seq_reps.1'}


 41%|██████████████▊                     | 12548/30454 [00:44<00:59, 300.35it/s]

inconclusive sequencing: 12481 {'name': 'site.04.iso.1.subject.04528.lab_id.904472.seq_reps.1'}
inconclusive sequencing: 12530 {'name': 'site.06.iso.1.subject.MHL_0013-12.lab_id.06MIL0079.seq_reps.1'}


 42%|██████████████▉                     | 12672/30454 [00:44<01:00, 294.88it/s]

inconclusive sequencing: 12629 {'name': 'site.04.iso.1.subject.03379.lab_id.828130.seq_reps.1'}
inconclusive sequencing: 12650 {'name': 'site.04.iso.1.subject.04592.lab_id.905079.seq_reps.1_2'}


 42%|███████████████                     | 12763/30454 [00:44<01:00, 293.54it/s]

inconclusive sequencing: 12730 {'name': 'site.04.iso.1.subject.04002.lab_id.900530.seq_reps.1'}
inconclusive sequencing: 12740 {'name': 'site.04.iso.1.subject.04673.lab_id.906913.seq_reps.1_2'}
inconclusive sequencing: 12751 {'name': 'site.04.iso.1.subject.04523.lab_id.904332.seq_reps.1'}
inconclusive sequencing: 12754 {'name': 'site.06.iso.1.subject.SGD_0059-14.lab_id.06MIL0292.seq_reps.1'}
inconclusive sequencing: 12760 {'name': 'site.06.iso.1.subject.BVH_0038-14.lab_id.06MIL0381.seq_reps.1'}


 42%|███████████████▏                    | 12857/30454 [00:45<00:59, 298.11it/s]

inconclusive sequencing: 12820 {'name': 'site.04.iso.1.subject.02563.lab_id.815563.seq_reps.1'}
inconclusive sequencing: 12825 {'name': 'site.04.iso.1.subject.03888.lab_id.832521.seq_reps.1'}


 43%|███████████████▎                    | 12986/30454 [00:45<00:55, 312.75it/s]

inconclusive sequencing: 12922 {'name': 'site.04.iso.1.subject.04453.lab_id.905101.seq_reps.1'}
inconclusive sequencing:

 43%|███████████████▍                    | 13050/30454 [00:45<00:58, 298.65it/s]

 12995 {'name': 'site.04.iso.1.subject.04279.lab_id.902914.seq_reps.1'}
inconclusive sequencing: 12996 {'name': 'site.04.iso.1.subject.03257.lab_id.826330.seq_reps.1'}
inconclusive sequencing: 13008 {'name': 'site.04.iso.1.subject.03782.lab_id.829533.seq_reps.1'}
inconclusive sequencing: 13010 {'name': 'site.03.iso.1.subject.DR-292.lab_id.IML-01197.seq_reps.1'}


 43%|███████████████▌                    | 13152/30454 [00:46<00:54, 318.47it/s]

inconclusive sequencing: 13086 {'name': 'site.04.iso.1.subject.04675.lab_id.906454.seq_reps.1'}


 44%|███████████████▋                    | 13249/30454 [00:46<00:55, 308.47it/s]

inconclusive sequencing: 13205 {'name': 'site.06.iso.1.subject.RLH_0302-14.lab_id.06MIL0294.seq_reps.1'}
inconclusive sequencing: 13215 {'name': 'site.06.iso.1.subject.MHL_0142-14.lab_id.06MIL0050.seq_reps.1'}


 44%|███████████████▊                    | 13373/30454 [00:46<00:57, 298.30it/s]

inconclusive sequencing: 13318 {'name': 'site.06.iso.1.subject.D0543-17.lab_id.06MIL1561.seq_reps.1'}
inconclusive sequencing: 13378 {'name': 'site.06.iso.1.subject.NHM_0313-14.lab_id.06MIL0413.seq_reps.1'}


 44%|███████████████▉                    | 13437/30454 [00:46<00:56, 300.69it/s]

inconclusive sequencing: 13399 {'name': 'site.06.iso.1.subject.RLH_0236-14.lab_id.06MIL0086.seq_reps.1'}
inconclusive sequencing: 13439 {'name': 'site.04.iso.1.subject.04638.lab_id.904751.seq_reps.1'}
inconclusive sequencing: 13440 {'name': 'site.04.iso.1.subject.04190.lab_id.901180.seq_reps.1'}
inconclusive sequencing: 13461 {'name': 'site.06.iso.1.subject.IR30.lab_id.06MIL1854.seq_reps.1'}


 44%|███████████████▉                    | 13531/30454 [00:47<00:57, 295.45it/s]

inconclusive sequencing: 13492 {'name': 'site.04.iso.1.subject.04362.lab_id.831548.seq_reps.1'}
inconclusive sequencing: 13493 {'name': 'site.04.iso.1.subject.04920.lab_id.908048.seq_reps.1'}


 45%|████████████████                    | 13621/30454 [00:47<00:58, 289.14it/s]

inconclusive sequencing: 13585 {'name': 'site.04.iso.1.subject.04476.lab_id.903922.seq_reps.1'}
inconclusive sequencing: 13594 {'name': 'site.08.iso.1.subject.24TB-047.lab_id.2135.seq_reps.1'}


 45%|████████████████▎                   | 13810/30454 [00:48<00:54, 304.09it/s]

inconclusive sequencing: 13762 {'name': 'site.04.iso.1.subject.05467.lab_id.913724.seq_reps.1'}


 46%|████████████████▍                   | 13873/30454 [00:48<00:54, 306.57it/s]

inconclusive sequencing: 13831 {'name': 'site.06.iso.1.subject.SGD_0189-15.lab_id.06MIL0061.seq_reps.1'}
inconclusive sequencing: 13850 {'name': 'site.04.iso.1.subject.03514.lab_id.829103.seq_reps.1'}
inconclusive sequencing: 13882 {'name': 'site.04.iso.1.subject.03965.lab_id.900753.seq_reps.1'}


 46%|████████████████▌                   | 13967/30454 [00:48<00:55, 298.98it/s]

inconclusive sequencing: 13924 {'name': 'site.04.iso.1.subject.03863.lab_id.831332.seq_reps.1'}
inconclusive sequencing: 13950 {'name': 'site.04.iso.1.subject.00173.lab_id.701433.seq_reps.1'}


 46%|████████████████▋                   | 14113/30454 [00:49<00:58, 279.02it/s]

inconclusive sequencing: 14066 {'name': 'site.06.iso.1.subject.BVH_0011-14.lab_id.06MIL0264.seq_reps.1'}


 47%|████████████████▊                   | 14201/30454 [00:49<00:57, 282.67it/s]

inconclusive sequencing: 14164 {'name': 'site.04.iso.1.subject.04187.lab_id.900623.seq_reps.1_2'}
inconclusive sequencing: 14230 {'name': 'site.06.iso.1.subject.SGD_0067-14.lab_id.06MIL0424.seq_reps.1'}


 47%|████████████████▉                   | 14297/30454 [00:49<00:53, 303.52it/s]

inconclusive sequencing: 14234 {'name': 'site.06.iso.1.subject.BVH_0016-14.lab_id.06MIL0247.seq_reps.1'}


 47%|█████████████████                   | 14394/30454 [00:50<00:51, 314.78it/s]

inconclusive sequencing: 14346 {'name': 'site.04.iso.1.subject.03666.lab_id.912713.seq_reps.1'}
inconclusive sequencing: 14364 {'name': 'site.04.iso.1.subject.00987.lab_id.717126.seq_reps.1'}
inconclusive sequencing: 14378 {'name': 'site.04.iso.1.subject.03886.lab_id.830570.seq_reps.1'}


 48%|█████████████████▏                  | 14495/30454 [00:50<00:50, 316.94it/s]

inconclusive sequencing: 14454 {'name': 'site.06.iso.1.subject.JHL_0061-14.lab_id.06MIL0267.seq_reps.1'}
inconclusive sequencing: 14457 {'name': 'site.04.iso.1.subject.05502.lab_id.913559.seq_reps.1'}
inconclusive sequencing: 14490 {'name': 'site.04.iso.1.subject.05664.lab_id.918988.seq_reps.1'}


 48%|█████████████████▎                  | 14593/30454 [00:50<00:50, 316.14it/s]

inconclusive sequencing: 14551 {'name': 'site.04.iso.1.subject.05044.lab_id.908595.seq_reps.1'}
inconclusive sequencing: 14556 {'name': 'site.04.iso.1.subject.04479.lab_id.903938.seq_reps.1'}
inconclusive sequencing: 14558 {'name': 'site.04.iso.1.subject.04206.lab_id.901267.seq_reps.1'}
inconclusive sequencing: 14573 {'name': 'site.04.iso.1.subject.03537.lab_id.828949.seq_reps.1'}
inconclusive sequencing: 14608 {'name': 'site.04.iso.1.subject.00976.lab_id.716294.seq_reps.1'}


 49%|█████████████████▍                  | 14794/30454 [00:51<00:48, 321.63it/s]

inconclusive sequencing: 14736 {'name': 'site.06.iso.1.subject.MHL_0150-14.lab_id.06MIL0075.seq_reps.1'}


 49%|█████████████████▌                  | 14898/30454 [00:51<00:47, 326.89it/s]

inconclusive sequencing: 14814 {'name': 'site.04.iso.1.subject.00654.lab_id.714029.seq_reps.1'}
inconclusive sequencing: 14857 {'name': 'site.04.iso.1.subject.04612.lab_id.900688.seq_reps.1'}
inconclusive sequencing: 14861 {'name': 'site.04.iso.1.subject.03511.lab_id.828495.seq_reps.1'}
inconclusive sequencing: 14895 {'name': 'site.06.iso.1.subject.MHL_0145-13.lab_id.06MIL0072.seq_reps.1'}
inconclusive sequencing: 14902 {'name': 'site.04.iso.1.subject.04560.lab_id.902718.seq_reps.1'}
inconclusive sequencing: 14903 {'name': 'site.04.iso.1.subject.05566.lab_id.916742.seq_reps.1'}
inconclusive sequencing: 14919 {'name': 'site.04.iso.1.subject.04794.lab_id.907389.seq_reps.1'}


 49%|█████████████████▊                  | 15037/30454 [00:52<00:45, 337.16it/s]

inconclusive sequencing: 14974 {'name': 'site.04.iso.1.subject.00127.lab_id.634380.seq_reps.1'}
inconclusive sequencing: 14994 {'name': 'site.04.iso.1.subject.00574.lab_id.706414.seq_reps.1'}
inconclusive sequencing: 14998 {'name': 'site.04.iso.1.subject.04412.lab_id.902976.seq_reps.1'}
inconclusive sequencing: 15004 {'name': 'site.04.iso.1.subject.03764.lab_id.829856.seq_reps.1'}
inconclusive sequencing: 15012 {'name': 'site.04.iso.1.subject.03039.lab_id.827673.seq_reps.1'}
inconclusive sequencing: 15014 {'name': 'site.04.iso.1.subject.04430.lab_id.902675.seq_reps.1'}
inconclusive sequencing: 15018 {'name': 'site.04.iso.1.subject.01261.lab_id.721904.seq_reps.1'}
other lineage: 15022
other lineage: 15023
other lineage: 15028
other lineage: 15030
other lineage: 15031
other lineage: 15034
other lineage: 15035
other lineage: 15036
other lineage: 15038
other lineage: 15040
other lineage: 15041
no lineage: 15044
no lineage: 15048
no lineage:

 50%|█████████████████▊                  | 15104/30454 [00:52<00:48, 318.92it/s]

 15049
no lineage: 15050
no lineage: 15054
no lineage: 15056
no lineage: 15057
no lineage: 15058
no lineage: 15060
no lineage: 15061
no lineage: 15062
no lineage: 15066
no lineage: 15068
no lineage: 15069
outliers: {'name': 'site.06.iso.1.subject.06TB_0666.lab_id.06MIL1604.seq_reps.1'}
outliers: {'name': 'site.06.iso.1.subject.06TB_1029.lab_id.06MIL2034.seq_reps.1'}
outliers: {'name': 'site.06.iso.1.subject.06TB_1406.lab_id.06MIL2684.seq_reps.1'}
outliers: {'name': 'site.06.iso.1.subject.06TB_1408.lab_id.06MIL2686.seq_reps.1'}
no lineage: 15080
no lineage: 15082
no lineage: 15083
no lineage: 15084
no lineage: 15086
no lineage: 15088
no lineage: 15090
no lineage: 15091
inconclusive sequencing: 15094 {'name': 'site.04.iso.1.subject.03296.lab_id.825159.seq_reps.1'}
inconclusive sequencing: 15096 {'name': 'site.04.iso.1.subject.03606.lab_id.826678.seq_reps.1'}
inconclusive sequencing: 15098 {'name': 'site.04.iso.1.subject.01053.lab_id.714898.seq_reps.1'}
inconclusive sequencing: 15100 {'na


 50%|█████████████████▉                  | 15143/30454 [00:52<00:45, 335.02it/s]

inconclusive sequencing: 15112 {'name': 'site.04.iso.1.subject.04397.lab_id.832405.seq_reps.1'}
inconclusive sequencing: 15113 {'name': 'site.10.iso.1.subject.YA00046953.lab_id.YA00046953.seq_reps.1'}
inconclusive sequencing: 15116 {'name': 'site.08.iso.1.subject.24TB-081.lab_id.2249.seq_reps.1'}


 50%|██████████████████                  | 15314/30454 [00:53<00:46, 327.18it/s]

inconclusive sequencing: 15248 {'name': 'site.05.iso.1.subject.CA-0116.lab_id.CO-07428-18.seq_reps.1'}
inconclusive sequencing: 15275 {'name': 'site.04.iso.1.subject.04844.lab_id.905760.seq_reps.1'}
inconclusive sequencing: 15288 {'name': 'site.08.iso.1.subject.02TB0158.lab_id.17101.seq_reps.1'}
inconclusive sequencing: 15289 {'name': 'site.08.iso.1.subject.02TB0896.lab_id.19235.seq_reps.1'}
inconclusive sequencing: 15290 {'name': 'site.08.iso.1.subject.23TB03-117.lab_id.2122.seq_reps.1'}


 51%|██████████████████▏                 | 15413/30454 [00:53<00:48, 309.54it/s]

inconclusive sequencing: 15353 {'name': 'site.08.iso.1.subject.02TB0048.lab_id.17496.seq_reps.1'}


 51%|██████████████████▎                 | 15477/30454 [00:53<00:48, 311.21it/s]

inconclusive sequencing: 15432 {'name': 'site.08.iso.1.subject.02TB1380.lab_id.20875.seq_reps.1'}


 51%|██████████████████▍                 | 15605/30454 [00:54<00:48, 306.02it/s]

inconclusive sequencing: 15560 {'name': 'site.08.iso.1.subject.02TB2402.lab_id.25893.seq_reps.1'}


 52%|██████████████████▌                 | 15728/30454 [00:54<00:50, 289.35it/s]

inconclusive sequencing: 15672 {'name': 'site.08.iso.1.subject.05TB43025.lab_id.1944.seq_reps.1'}


 52%|██████████████████▋                 | 15839/30454 [00:55<01:08, 213.69it/s]

inconclusive sequencing: 15802 {'name': 'site.04.iso.1.subject.03553.lab_id.828663.seq_reps.1'}


 52%|██████████████████▊                 | 15912/30454 [00:55<01:07, 214.04it/s]

inconclusive sequencing: 15872 {'name': 'site.04.iso.1.subject.03057.lab_id.827163.seq_reps.1'}
inconclusive sequencing: 15880 {'name': 'site.04.iso.1.subject.03989.lab_id.830874.seq_reps.1'}
inconclusive sequencing: 15886 {'name': 'site.04.iso.1.subject.00800.lab_id.715057.seq_reps.1'}


 52%|██████████████████▊                 | 15961/30454 [00:55<01:08, 212.53it/s]

inconclusive sequencing: 15934 {'name': 'site.04.iso.1.subject.00744.lab_id.706847.seq_reps.1'}


 53%|██████████████████▉                 | 16023/30454 [00:56<01:22, 174.74it/s]

inconclusive sequencing: 15995 {'name': 'site.04.iso.1.subject.02741.lab_id.816087.seq_reps.1'}


 53%|███████████████████                 | 16083/30454 [00:56<01:18, 183.23it/s]

inconclusive sequencing: 16050 {'name': 'site.04.iso.1.subject.04225.lab_id.901531.seq_reps.1'}
inconclusive sequencing: 16083 {'name': 'site.04.iso.1.subject.01029.lab_id.716739.seq_reps.1'}


 53%|███████████████████                 | 16121/30454 [00:56<01:18, 182.31it/s]

inconclusive sequencing: 16092 {'name': 'site.04.iso.1.subject.00015.lab_id.631149.seq_reps.1'}


 53%|███████████████████                 | 16161/30454 [00:56<01:17, 184.01it/s]

inconclusive sequencing: 16139 {'name': 'site.04.iso.1.subject.04843.lab_id.907308.seq_reps.1'}


 53%|███████████████████▏                | 16245/30454 [00:57<01:15, 187.82it/s]

inconclusive sequencing: 16206 {'name': 'site.04.iso.1.subject.00091.lab_id.705647.seq_reps.1'}
inconclusive sequencing: 16207 {'name': 'site.04.iso.1.subject.03869.lab_id.829921.seq_reps.1'}
inconclusive sequencing: 16223 {'name': 'site.04.iso.1.subject.03630.lab_id.828569.seq_reps.1'}


 54%|███████████████████▎                | 16321/30454 [00:57<01:25, 165.67it/s]

inconclusive sequencing: 16289 {'name': 'site.04.iso.1.subject.03228.lab_id.824346.seq_reps.1'}
inconclusive sequencing: 16297 {'name': 'site.04.iso.1.subject.05663.lab_id.918688.seq_reps.1'}


 54%|███████████████████▌                | 16500/30454 [00:58<01:22, 169.61it/s]

inconclusive sequencing: 16477 {'name': 'site.04.iso.1.subject.04373.lab_id.901532.seq_reps.1'}


 54%|███████████████████▌                | 16539/30454 [00:58<01:19, 175.75it/s]

inconclusive sequencing: 16514 {'name': 'site.04.iso.1.subject.03929.lab_id.832237.seq_reps.1'}
inconclusive sequencing: 16519 {'name': 'site.04.iso.1.subject.03271.lab_id.826349.seq_reps.1'}
inconclusive sequencing: 16521 {'name': 'site.04.iso.1.subject.03678.lab_id.JJH9853.seq_reps.1'}
inconclusive sequencing: 16533 {'name': 'site.04.iso.1.subject.03632.lab_id.827666.seq_reps.1'}
inconclusive sequencing: 16542 {'name': 'site.04.iso.1.subject.03500.lab_id.828647.seq_reps.1'}
inconclusive sequencing: 16548 {'name': 'site.04.iso.1.subject.00376.lab_id.701593.seq_reps.1'}
inconclusive sequencing: 16550 {'name': 'site.04.iso.1.subject.01633.lab_id.730144.seq_reps.1'}
inconclusive sequencing: 16552 {'name': 'site.04.iso.1.subject.03294.lab_id.823395.seq_reps.1'}


 55%|███████████████████▋                | 16605/30454 [00:59<01:09, 199.24it/s]

inconclusive sequencing: 16582 {'name': 'site.04.iso.1.subject.04891.lab_id.907806.seq_reps.1'}
inconclusive sequencing: 16628 {'name': 'site.04.iso.1.subject.03554.lab_id.828586.seq_reps.1'}


 55%|███████████████████▊                | 16729/30454 [00:59<01:06, 206.66it/s]

inconclusive sequencing: 16683 {'name': 'site.06.iso.1.subject.06TB_1012.lab_id.06MIL2007.seq_reps.1'}


 55%|███████████████████▉                | 16841/30454 [01:00<01:06, 203.56it/s]

inconclusive sequencing: 16807 {'name': 'site.04.iso.1.subject.04550.lab_id.904001.seq_reps.1'}
inconclusive sequencing: 16820 {'name': 'site.04.iso.1.subject.04212.lab_id.901186.seq_reps.1'}


 56%|████████████████████                | 16971/30454 [01:01<01:07, 200.60it/s]

inconclusive sequencing: 16949 {'name': 'site.04.iso.1.subject.04131.lab_id.900834.seq_reps.1'}


 56%|████████████████████▏               | 17055/30454 [01:01<01:08, 195.17it/s]

inconclusive sequencing: 17021 {'name': 'site.06.iso.1.subject.SZH_0013-14.lab_id.06MIL0195.seq_reps.1'}


 56%|████████████████████▎               | 17148/30454 [01:01<01:01, 216.54it/s]

inconclusive sequencing: 17104 {'name': 'site.04.iso.1.subject.04120.lab_id.833088.seq_reps.1'}


 57%|████████████████████▎               | 17223/30454 [01:02<00:56, 233.49it/s]

inconclusive sequencing: 17186 {'name': 'site.04.iso.1.subject.03292.lab_id.824174.seq_reps.1'}
inconclusive sequencing: 17187 {'name': 'site.04.iso.1.subject.04589.lab_id.904941.seq_reps.1'}


 57%|████████████████████▍               | 17271/30454 [01:02<01:04, 205.00it/s]

inconclusive sequencing: 17246 {'name': 'site.06.iso.1.subject.OIC-1206.lab_id.06MIL1099.seq_reps.1'}


 57%|████████████████████▌               | 17351/30454 [01:02<00:54, 238.65it/s]

inconclusive sequencing: 17296 {'name': 'site.04.iso.1.subject.05456.lab_id.913735.seq_reps.1'}
inconclusive sequencing: 17302 {'name': 'site.08.iso.1.subject.02TB2324.lab_id.25790.seq_reps.1'}
inconclusive sequencing: 17305 {'name': 'site.08.iso.1.subject.24TB-031.lab_id.2103.seq_reps.1'}
inconclusive sequencing: 17306 {'name': 'site.04.iso.1.subject.00957.lab_id.717048.seq_reps.1'}
inconclusive sequencing: 17308 {'name': 'site.04.iso.1.subject.00954.lab_id.714841.seq_reps.1'}
inconclusive sequencing: 17312 {'name': 'site.05.iso.1.subject.CA-1399.lab_id.CO-01063-19.seq_reps.1'}
inconclusive sequencing: 17316 {'name': 'site.05.iso.1.subject.CA-0206.lab_id.CO-09546-18.seq_reps.1'}
inconclusive sequencing: 17318 {'name': 'site.05.iso.1.subject.CA-0149.lab_id.CO-07575-18.seq_reps.1'}
inconclusive sequencing: 17319 {'name': 'site.05.iso.1.subject.LS-1093.lab_id.LS-10946-18.seq_reps.1'}
inconclusive sequencing: 17322 {'name': 'site.05.iso.1.subject.LS-1414.lab_id.MA-00011-19.seq_reps.1'}
in

 57%|████████████████████▌               | 17412/30454 [01:03<00:49, 264.62it/s]

inconclusive sequencing: 17374 {'name': 'site.05.iso.1.subject.PMOP-0540.lab_id.MOP-066.seq_reps.1'}


 58%|████████████████████▊               | 17575/30454 [01:03<00:49, 262.23it/s]

inconclusive sequencing: 17532 {'name': 'site.05.iso.1.subject.PSLM-0811.lab_id.SLM-071.seq_reps.1'}
inconclusive sequencing: 17540 {'name': 'site.06.iso.1.subject.06TB_0596.lab_id.06MIL1450.seq_reps.1'}
inconclusive sequencing: 17542 {'name': 'site.06.iso.1.subject.06TB_0994.lab_id.06MIL1989.seq_reps.1'}
inconclusive sequencing: 17544 {'name': 'site.02.iso.1.subject.1082.lab_id.2014184064.seq_reps.2014184064'}
inconclusive sequencing: 17546 {'name': 'site.02.iso.1.subject.0366.lab_id.222025-14.seq_reps.14222025'}
inconclusive sequencing: 17548 {'name': 'site.10.iso.1.subject.YA00026185.lab_id.YA00026185.seq_reps.1'}


 58%|████████████████████▊               | 17657/30454 [01:04<00:50, 252.03it/s]

inconclusive sequencing: 17620 {'name': 'site.03.iso.1.subject.10963.lab_id.IML-00795.seq_reps.1'}
inconclusive sequencing: 17636 {'name': 'site.10.iso.1.subject.YA00026188.lab_id.YA00026188.seq_reps.1'}


 58%|█████████████████████               | 17795/30454 [01:04<00:47, 269.08it/s]

inconclusive sequencing: 17753 {'name': 'site.02.iso.1.subject.0085.lab_id.22A036.seq_reps.36'}
inconclusive sequencing: 17754 {'name': 'site.02.iso.1.subject.0964.lab_id.22A204.seq_reps.204'}


 59%|█████████████████████▏              | 17912/30454 [01:05<00:44, 282.38it/s]

inconclusive sequencing: 17867 {'name': 'site.04.iso.1.subject.05336.lab_id.912600.seq_reps.1'}
inconclusive sequencing: 17868 {'name': 'site.04.iso.1.subject.03729.lab_id.JJH9315.seq_reps.1'}


 59%|█████████████████████▎              | 18011/30454 [01:05<00:40, 310.44it/s]

inconclusive sequencing: 17947 {'name': 'site.04.iso.1.subject.03757.lab_id.830589.seq_reps.1'}
inconclusive sequencing: 17995 {'name': 'site.06.iso.1.subject.06TB_0989.lab_id.06MIL1984.seq_reps.1'}


 60%|█████████████████████▍              | 18177/30454 [01:05<00:39, 314.71it/s]

inconclusive sequencing: 18084 {'name': 'site.06.iso.1.subject.06TB_0947.lab_id.06MIL1942.seq_reps.1'}


 60%|█████████████████████▌              | 18245/30454 [01:06<00:37, 322.93it/s]

inconclusive sequencing: 18200 {'name': 'site.04.iso.1.subject.03176.lab_id.822101.seq_reps.1'}
inconclusive sequencing: 18205 {'name': 'site.04.iso.1.subject.04635.lab_id.905112.seq_reps.1'}
inconclusive sequencing: 18208 {'name': 'site.04.iso.1.subject.04539.lab_id.904470.seq_reps.1'}
inconclusive sequencing: 18224 {'name': 'site.04.iso.1.subject.03750.lab_id.828912.seq_reps.1'}
inconclusive sequencing: 18266 {'name': 'site.04.iso.1.subject.03512.lab_id.829895.seq_reps.1'}


 60%|█████████████████████▋              | 18345/30454 [01:06<00:39, 310.02it/s]

inconclusive sequencing: 18278 {'name': 'site.05.iso.1.subject.LS-1033.lab_id.MA-02068-18.seq_reps.1'}
inconclusive sequencing: 18280 {'name': 'site.04.iso.1.subject.02796.lab_id.818659.seq_reps.1_2'}
inconclusive sequencing: 18282 {'name': 'site.05.iso.1.subject.PSLM-0802.lab_id.SLM-061.seq_reps.1'}
inconclusive sequencing: 18284 {'name': 'site.14.iso.1.subject.4411.lab_id.4411.seq_reps.1'}
inconclusive sequencing: 18286 {'name': 'site.20.iso.1.subject.SCH8123945.lab_id.YA00134192.seq_reps.1'}


 61%|█████████████████████▉              | 18539/30454 [01:07<00:38, 307.11it/s]

inconclusive sequencing: 18494 {'name': 'site.05.iso.1.subject.LR-2419.lab_id.FN-00641-18.seq_reps.1'}


 62%|██████████████████████▏             | 18811/30454 [01:07<00:37, 311.74it/s]

inconclusive sequencing: 18752 {'name': 'site.05.iso.1.subject.PMK-0958.lab_id.MK-0889.seq_reps.1'}
inconclusive sequencing: 18802 {'name': 'site.14.iso.1.subject.4198.lab_id.4198.seq_reps.1'}
inconclusive sequencing: 18808 {'name': 'site.05.iso.1.subject.CA-1159.lab_id.CO-00141-19.seq_reps.1'}
inconclusive sequencing: 18810 {'name': 'site.14.iso.1.subject.4341.lab_id.4341.seq_reps.1'}
inconclusive sequencing: 18814 {'name': 'site.05.iso.1.subject.LS-1146.lab_id.MA-01447-19.seq_reps.1'}
inconclusive sequencing: 18816 {'name': 'site.05.iso.1.subject.PMK-0926.lab_id.MK-0188.seq_reps.1'}


 62%|██████████████████████▍             | 19017/30454 [01:08<00:36, 310.31it/s]

inconclusive sequencing: 18980 {'name': 'site.05.iso.1.subject.LR-2153.lab_id.FN-00117-16.seq_reps.1'}
inconclusive sequencing: 18994 {'name': 'site.05.iso.1.subject.CA-1366.lab_id.CO-02570-19.seq_reps.1'}
inconclusive sequencing: 19029 {'name': 'site.04.iso.1.subject.03601.lab_id.821968.seq_reps.1'}


 63%|██████████████████████▊             | 19325/30454 [01:09<00:34, 318.78it/s]

inconclusive sequencing: 19260 {'name': 'site.05.iso.1.subject.CA-1364.lab_id.CO-02526-19.seq_reps.1'}
inconclusive sequencing: 19262 {'name': 'site.05.iso.1.subject.LR-2189.lab_id.FN-01526-16.seq_reps.1'}
inconclusive sequencing: 19264 {'name': 'site.05.iso.1.subject.LR-2322.lab_id.FN-01100-18.seq_reps.1'}
inconclusive sequencing: 19266 {'name': 'site.05.iso.1.subject.CA-1323.lab_id.CO-01765-19.seq_reps.1'}


 64%|███████████████████████             | 19522/30454 [01:10<00:36, 297.38it/s]

inconclusive sequencing: 19488 {'name': 'site.05.iso.1.subject.PMOP-0644.lab_id.MOP-214.seq_reps.1'}
inconclusive sequencing: 19538 {'name': 'site.06.iso.1.subject.A43826.lab_id.06MIL1288.seq_reps.1'}


 65%|███████████████████████▎            | 19743/30454 [01:10<00:37, 285.70it/s]

inconclusive sequencing: 19705 {'name': 'site.05.iso.1.subject.PMFR-0701.lab_id.MFR-134.seq_reps.1'}
inconclusive sequencing: 19734 {'name': 'site.14.iso.1.subject.3753.lab_id.3753.seq_reps.1'}
inconclusive sequencing: 19740 {'name': 'site.05.iso.1.subject.LR-2043.lab_id.FN-00557-15.seq_reps.1'}
inconclusive sequencing: 19746 {'name': 'site.05.iso.1.subject.LS-0561.lab_id.LS-09620-18.seq_reps.1'}
inconclusive sequencing: 19748 {'name': 'site.05.iso.1.subject.CA-1427.lab_id.CO-02136-19.seq_reps.1'}
inconclusive sequencing: 19753 {'name': 'site.05.iso.1.subject.LR-2034.lab_id.FN-00458-15.seq_reps.1'}
inconclusive sequencing: 19760 {'name': 'site.05.iso.1.subject.LR-2082.lab_id.FN-01188-15.seq_reps.1'}


 65%|███████████████████████▍            | 19842/30454 [01:11<00:36, 288.33it/s]

inconclusive sequencing: 19784 {'name': 'site.05.iso.1.subject.PSLM-0816.lab_id.SLM-077.seq_reps.1'}


 66%|███████████████████████▋            | 20007/30454 [01:12<00:46, 226.26it/s]

inconclusive sequencing: 19970 {'name': 'site.05.iso.1.subject.CA-1477.lab_id.CO-03485-19.seq_reps.1'}


 66%|███████████████████████▉            | 20217/30454 [01:13<00:52, 194.79it/s]

inconclusive sequencing: 20180 {'name': 'site.05.iso.1.subject.CA-1426.lab_id.CO-02041-19.seq_reps.1'}
inconclusive sequencing: 20200 {'name': 'site.05.iso.1.subject.PSLM-0842.lab_id.SLM-111.seq_reps.1'}


 67%|███████████████████████▉            | 20256/30454 [01:13<00:56, 180.30it/s]

inconclusive sequencing: 20226 {'name': 'site.05.iso.1.subject.LR-2053.lab_id.FN-00668-15.seq_reps.1'}
inconclusive sequencing: 20250 {'name': 'site.05.iso.1.subject.PSLM-0815.lab_id.SLM-076.seq_reps.1'}


 67%|███████████████████████▉            | 20293/30454 [01:13<01:07, 151.10it/s]

inconclusive sequencing: 20260 {'name': 'site.05.iso.1.subject.LR-2023.lab_id.FN-00302-15.seq_reps.1'}


 67%|████████████████████████▏           | 20413/30454 [01:14<00:55, 180.16it/s]

inconclusive sequencing: 20388 {'name': 'site.05.iso.1.subject.LS-1079.lab_id.MA-02897-18.seq_reps.1'}


 67%|████████████████████████▎           | 20537/30454 [01:14<00:44, 221.69it/s]

inconclusive sequencing: 20502 {'name': 'site.05.iso.1.subject.LS-1434.lab_id.SJ-01039-19.seq_reps.1'}
inconclusive sequencing: 20504 {'name': 'site.05.iso.1.subject.LR-2202.lab_id.FN-02211-16.seq_reps.1'}


 68%|████████████████████████▍           | 20621/30454 [01:15<00:38, 258.05it/s]

inconclusive sequencing: 20565 {'name': 'site.05.iso.1.subject.CA-1192.lab_id.CO-00829-19.seq_reps.1'}


 68%|████████████████████████▌           | 20798/30454 [01:15<00:34, 282.26it/s]

inconclusive sequencing: 20754 {'name': 'site.14.iso.1.subject.3797.lab_id.3797.seq_reps.1'}


 69%|████████████████████████▋           | 20922/30454 [01:16<00:31, 302.41it/s]

inconclusive sequencing: 20870 {'name': 'site.05.iso.1.subject.CA-0200.lab_id.CO-09078-18.seq_reps.1'}


 69%|████████████████████████▉           | 21110/30454 [01:16<00:30, 309.32it/s]

inconclusive sequencing: 21076 {'name': 'site.14.iso.1.subject.4401.lab_id.4401.seq_reps.1'}


 70%|█████████████████████████           | 21239/30454 [01:17<00:31, 290.01it/s]

inconclusive sequencing: 21179 {'name': 'site.06.iso.1.subject.A43917.lab_id.06MIL1817.seq_reps.1'}
inconclusive sequencing: 21198 {'name': 'site.04.iso.1.subject.04203.lab_id.901529.seq_reps.1'}
inconclusive sequencing: 21201 {'name': 'site.04.iso.1.subject.05591.lab_id.917510.seq_reps.1'}
inconclusive sequencing: 21204 {'name': 'site.05.iso.1.subject.LR-2388.lab_id.FN-00704-18.seq_reps.1'}
inconclusive sequencing: 21208 {'name': 'site.05.iso.1.subject.LR-2174.lab_id.FN-00526-16.seq_reps.1'}
inconclusive sequencing: 21232 {'name': 'site.05.iso.1.subject.LR-2046.lab_id.FN-00592-15.seq_reps.1'}


 71%|█████████████████████████▌          | 21627/30454 [01:18<00:34, 257.51it/s]

inconclusive sequencing: 21578 {'name': 'site.04.iso.1.subject.03205.lab_id.824233.seq_reps.1'}
inconclusive sequencing: 21582 {'name': 'site.04.iso.1.subject.03830.lab_id.827812.seq_reps.1'}
inconclusive sequencing: 21586 {'name': 'site.04.iso.1.subject.03174.lab_id.824301.seq_reps.1'}
inconclusive sequencing: 21588 {'name': 'site.04.iso.1.subject.03519.lab_id.912126.seq_reps.1'}
inconclusive sequencing: 21589 {'name': 'site.04.iso.1.subject.05038.lab_id.910168.seq_reps.1'}


 71%|█████████████████████████▋          | 21740/30454 [01:19<00:33, 263.71it/s]

inconclusive sequencing: 21685 {'name': 'site.06.iso.1.subject.ATH_0086-14.lab_id.06MIL0300.seq_reps.1'}


 72%|█████████████████████████▊          | 21887/30454 [01:19<00:31, 272.38it/s]

inconclusive sequencing: 21846 {'name': 'site.10.iso.1.subject.HG06854962.lab_id.HG06854962.seq_reps.1'}
inconclusive sequencing: 21882 {'name': 'site.10.iso.1.subject.YA00014035.lab_id.YA00014035.seq_reps.1'}


 73%|██████████████████████████          | 22091/30454 [01:20<00:30, 272.88it/s]

inconclusive sequencing: 22056 {'name': 'site.06.iso.1.subject.06TB_0846.lab_id.06MIL1793.seq_reps.1'}


 73%|██████████████████████████▏         | 22189/30454 [01:20<00:26, 308.16it/s]

inconclusive sequencing: 22128 {'name': 'site.14.iso.1.subject.4197.lab_id.4197.seq_reps.1'}
inconclusive sequencing: 22130 {'name': 'site.05.iso.1.subject.PTAN-0030.lab_id.TAN-096.seq_reps.1'}
inconclusive sequencing: 22132 {'name': 'site.05.iso.1.subject.PTAN-0045.lab_id.TAN-139.seq_reps.1'}
inconclusive sequencing: 22136 {'name': 'site.05.iso.1.subject.LR-2257.lab_id.FN-00609-17.seq_reps.1'}
inconclusive sequencing: 22140 {'name': 'site.03.iso.1.subject.10278.lab_id.IML-00837.seq_reps.1'}
inconclusive sequencing: 22144 {'name': 'site.05.iso.1.subject.LS-0996.lab_id.MA-02939-18.seq_reps.1'}
inconclusive sequencing: 22148 {'name': 'site.05.iso.1.subject.LR-2040.lab_id.FN-00527-15.seq_reps.1'}
inconclusive sequencing: 22149 {'name': 'site.05.iso.1.subject.LR-2126.lab_id.FN-01856-15.seq_reps.1'}
inconclusive sequencing: 22150 {'name': 'site.05.iso.1.subject.LR-2049.lab_id.FN-00619-15.seq_reps.1'}
inconclusive sequencing: 22152 {'name': 'site.05.iso.1.subject.LR-2041.lab_id.FN-00534-15.s

 73%|██████████████████████████▎         | 22282/30454 [01:20<00:27, 297.50it/s]

inconclusive sequencing: 22226 {'name': 'site.05.iso.1.subject.LR-2354.lab_id.FN-00598-16.seq_reps.1'}


 74%|██████████████████████████▌         | 22469/30454 [01:21<00:26, 298.44it/s]

inconclusive sequencing: 22380 {'name': 'site.05.iso.1.subject.LR-2255.lab_id.FN-00544-17.seq_reps.1'}


 74%|██████████████████████████▋         | 22532/30454 [01:21<00:26, 301.31it/s]

inconclusive sequencing: 22492 {'name': 'site.05.iso.1.subject.LR-2352.lab_id.FN-00479-16.seq_reps.1'}


 74%|██████████████████████████▊         | 22661/30454 [01:22<00:25, 305.40it/s]

inconclusive sequencing: 22604 {'name': 'site.05.iso.1.subject.PMK-0984.lab_id.MK-1285.seq_reps.1'}


 75%|██████████████████████████▉         | 22758/30454 [01:22<00:24, 311.64it/s]

inconclusive sequencing: 22700 {'name': 'site.14.iso.1.subject.4277.lab_id.4277.seq_reps.1'}
inconclusive sequencing: 22706 {'name': 'site.04.iso.1.subject.03403.lab_id.823488.seq_reps.1'}
inconclusive sequencing: 22731 {'name': 'site.08.iso.1.subject.02TB2016.lab_id.24551.seq_reps.1'}
inconclusive sequencing: 22757 {'name': 'site.04.iso.1.subject.03811.lab_id.828657.seq_reps.1'}


 76%|███████████████████████████▍        | 23159/30454 [01:23<00:23, 315.33it/s]

inconclusive sequencing: 23094 {'name': 'site.10.iso.1.subject.KD01935501.lab_id.KD01935501.seq_reps.1'}
inconclusive sequencing: 23096 {'name': 'site.10.iso.1.subject.YA00125839.lab_id.YA00125839.seq_reps.1'}


 77%|███████████████████████████▊        | 23524/30454 [01:25<00:24, 283.63it/s]

inconclusive sequencing: 23471 {'name': 'site.06.iso.1.subject.NM-17-027.lab_id.06MIL0833.seq_reps.1'}


 78%|███████████████████████████▉        | 23640/30454 [01:25<00:25, 267.38it/s]

inconclusive sequencing: 23609 {'name': 'site.06.iso.1.subject.N1744.lab_id.06MIL1483.seq_reps.1'}
inconclusive sequencing: 23620 {'name': 'site.06.iso.1.subject.06TB_0854.lab_id.06MIL1802.seq_reps.1'}
inconclusive sequencing: 23624 {'name': 'site.06.iso.1.subject.N524.lab_id.06MIL1582.seq_reps.1'}
inconclusive sequencing: 23645 {'name': 'site.06.iso.1.subject.NM-17-077.lab_id.06MIL0846.seq_reps.1'}


 78%|████████████████████████████        | 23781/30454 [01:26<00:24, 275.70it/s]

inconclusive sequencing: 23732 {'name': 'site.06.iso.1.subject.N679.lab_id.06MIL1554.seq_reps.1'}
inconclusive sequencing: 23752 {'name': 'site.02.iso.1.subject.0666.lab_id.235178-15.seq_reps.15235178'}
inconclusive sequencing: 23754 {'name': 'site.02.iso.1.subject.1167.lab_id.2013185497.seq_reps.2013185497'}


 79%|████████████████████████████▍       | 24023/30454 [01:27<00:31, 202.69it/s]

inconclusive sequencing: 24000 {'name': 'site.02.iso.1.subject.0533.lab_id.242321-14.seq_reps.14242321'}


 79%|████████████████████████████▌       | 24133/30454 [01:27<00:30, 204.26it/s]

inconclusive sequencing: 24090 {'name': 'site.08.iso.1.subject.02TB1100.lab_id.20956.seq_reps.1'}


 79%|████████████████████████████▌       | 24187/30454 [01:27<00:27, 231.25it/s]

inconclusive sequencing: 24155 {'name': 'site.02.iso.1.subject.1049.lab_id.2013185124.seq_reps.2013185124'}
inconclusive sequencing: 24184 {'name': 'site.02.iso.1.subject.0925.lab_id.22A160.seq_reps.160'}


 80%|████████████████████████████▊       | 24345/30454 [01:28<00:24, 247.48it/s]

inconclusive sequencing: 24298 {'name': 'site.08.iso.1.subject.24TB00-060.lab_id.2395.seq_reps.1'}
inconclusive sequencing: 24303 {'name': 'site.08.iso.1.subject.24TB00-060.lab_id.2442.seq_reps.1'}
inconclusive sequencing: 24319 {'name': 'site.02.iso.1.subject.0703.lab_id.2014242256.seq_reps.14242256'}


 80%|████████████████████████████▉       | 24454/30454 [01:29<00:23, 253.67it/s]

inconclusive sequencing: 24418 {'name': 'site.02.iso.1.subject.0186.lab_id.2013222215.seq_reps.2013222215'}


 81%|█████████████████████████████▏      | 24726/30454 [01:29<00:19, 300.79it/s]

inconclusive sequencing: 24670 {'name': 'site.04.iso.1.subject.04581.lab_id.901852.seq_reps.1'}
inconclusive sequencing: 24744 {'name': 'site.04.iso.1.subject.03622.lab_id.830059.seq_reps.1'}


 82%|█████████████████████████████▍      | 24886/30454 [01:30<00:19, 291.27it/s]

inconclusive sequencing: 24845 {'name': 'site.06.iso.1.subject.06TB_0215.lab_id.06MIL0676.seq_reps.1'}


 83%|█████████████████████████████▋      | 25136/30454 [01:31<00:17, 295.76it/s]

inconclusive sequencing: 25105 {'name': 'site.06.iso.1.subject.SSM_0134-14.lab_id.06MIL0206.seq_reps.1'}


 83%|█████████████████████████████▊      | 25222/30454 [01:31<00:20, 258.32it/s]

inconclusive sequencing: 25183 {'name': 'site.10.iso.1.subject.YA00036589.lab_id.YA00036589.seq_reps.1'}
inconclusive sequencing: 25234 {'name': 'site.05.iso.1.subject.LR-2408.lab_id.FN-00931-17.seq_reps.1'}


 83%|█████████████████████████████▉      | 25362/30454 [01:32<00:19, 259.35it/s]

inconclusive sequencing: 25332 {'name': 'site.04.iso.1.subject.03536.lab_id.827489.seq_reps.1'}
inconclusive sequencing: 25374 {'name': 'site.04.iso.1.subject.05329.lab_id.911208.seq_reps.1'}
inconclusive sequencing: 25382 {'name': 'site.04.iso.1.subject.03468.lab_id.827200.seq_reps.1'}


 84%|██████████████████████████████▏     | 25489/30454 [01:32<00:17, 290.96it/s]

inconclusive sequencing: 25408 {'name': 'site.04.iso.1.subject.03509.lab_id.828917.seq_reps.1'}
inconclusive sequencing: 25421 {'name': 'site.04.iso.1.subject.04093.lab_id.830108.seq_reps.1'}
inconclusive sequencing: 25477 {'name': 'site.06.iso.1.subject.06TB_1519.lab_id.06MIL2798.seq_reps.1'}


 84%|██████████████████████████████▏     | 25555/30454 [01:32<00:16, 300.33it/s]

inconclusive sequencing: 25498 {'name': 'site.04.iso.1.subject.03531.lab_id.825618.seq_reps.1'}


 84%|██████████████████████████████▎     | 25649/30454 [01:33<00:16, 292.19it/s]

inconclusive sequencing: 25603 {'name': 'site.05.iso.1.subject.PMK-0921.lab_id.MK-0024.seq_reps.1'}


 85%|██████████████████████████████▋     | 25927/30454 [01:34<00:15, 298.65it/s]

inconclusive sequencing: 25842 {'name': 'site.06.iso.1.subject.06TB_0026.lab_id.06MIL0487.seq_reps.1'}
inconclusive sequencing: 25857 {'name': 'site.06.iso.1.subject.06TB_0094.lab_id.06MIL0555.seq_reps.1'}


 86%|██████████████████████████████▉     | 26180/30454 [01:35<00:13, 309.06it/s]

inconclusive sequencing: 26140 {'name': 'site.10.iso.1.subject.YA00043356.lab_id.YA00043356.seq_reps.1'}


 86%|███████████████████████████████     | 26315/30454 [01:35<00:12, 320.36it/s]

inconclusive sequencing: 26228 {'name': 'site.04.iso.1.subject.04392.lab_id.902356.seq_reps.1'}
inconclusive sequencing: 26235 {'name': 'site.04.iso.1.subject.03607.lab_id.830102.seq_reps.1'}


 87%|███████████████████████████████▎    | 26483/30454 [01:35<00:12, 327.89it/s]

inconclusive sequencing: 26334 {'name': 'site.10.iso.1.subject.YA00029563.lab_id.YA00029563.seq_reps.1'}
inconclusive sequencing: 26437 {'name': 'site.04.iso.1.subject.04073.lab_id.830328.seq_reps.1'}
inconclusive sequencing: 26448 {'name': 'site.04.iso.1.subject.04483.lab_id.905492.seq_reps.1'}
inconclusive sequencing: 26478 {'name': 'site.04.iso.1.subject.03658.lab_id.JJH9636.seq_reps.1'}
inconclusive sequencing: 26496 {'name': 'site.04.iso.1.subject.01110.lab_id.719927.seq_reps.1'}
inconclusive sequencing: 26497 {'name': 'site.04.iso.1.subject.03981.lab_id.900045.seq_reps.1'}


 87%|███████████████████████████████▍    | 26628/30454 [01:36<00:11, 345.38it/s]

inconclusive sequencing: 26546 {'name': 'site.14.iso.1.subject.4219.lab_id.4219.seq_reps.1'}
inconclusive sequencing: 26550 {'name': 'site.05.iso.1.subject.LS-0594.lab_id.LS-09084-18.seq_reps.1'}
inconclusive sequencing: 26552 {'name': 'site.05.iso.1.subject.PSLM-0831.lab_id.SLM-095.seq_reps.1'}
inconclusive sequencing: 26554 {'name': 'site.05.iso.1.subject.PMFR-0719.lab_id.MFR-181.seq_reps.1'}
inconclusive sequencing: 26564 {'name': 'site.05.iso.1.subject.LR-2271.lab_id.FN-01151-17.seq_reps.1'}
inconclusive sequencing: 26565 {'name': 'site.05.iso.1.subject.LR-2385.lab_id.FN-00460-17.seq_reps.1'}
inconclusive sequencing: 26568 {'name': 'site.05.iso.1.subject.LR-2039.lab_id.FN-00515-15.seq_reps.1'}
inconclusive sequencing: 26604 {'name': 'site.05.iso.1.subject.PTAN-0241.lab_id.TAN-604.seq_reps.1'}
inconclusive sequencing: 26651 {'name': 'site.04.iso.1.subject.03465.lab_id.826457.seq_reps.1'}


 88%|███████████████████████████████▌    | 26698/30454 [01:36<00:11, 327.71it/s]

inconclusive sequencing: 26655 {'name': 'site.04.iso.1.subject.04322.lab_id.901387.seq_reps.1'}


 88%|███████████████████████████████▊    | 26931/30454 [01:37<00:11, 315.85it/s]

inconclusive sequencing: 26860 {'name': 'site.14.iso.1.subject.2050.lab_id.2050.seq_reps.1'}
inconclusive sequencing: 26890 {'name': 'site.10.iso.1.subject.YA00046931.lab_id.YA00046931.seq_reps.1'}


 89%|████████████████████████████████    | 27137/30454 [01:38<00:10, 308.33it/s]

inconclusive sequencing: 27076 {'name': 'site.05.iso.1.subject.LR-2048.lab_id.FN-00618-15.seq_reps.1'}
inconclusive sequencing: 27084 {'name': 'site.05.iso.1.subject.LR-2328.lab_id.FN-01229-18.seq_reps.1'}
inconclusive sequencing: 27090 {'name': 'site.05.iso.1.subject.LR-2445.lab_id.FN-00278-17.seq_reps.1'}


 89%|████████████████████████████████▏   | 27239/30454 [01:38<00:10, 306.00it/s]

inconclusive sequencing: 27195 {'name': 'site.05.iso.1.subject.PSLM-0777.lab_id.SLM-032.seq_reps.1'}


 90%|████████████████████████████████▍   | 27462/30454 [01:39<00:09, 303.80it/s]

inconclusive sequencing: 27391 {'name': 'site.10.iso.1.subject.YA00023153.lab_id.YA00023153.seq_reps.1'}


 91%|████████████████████████████████▋   | 27617/30454 [01:39<00:09, 290.54it/s]

inconclusive sequencing: 27582 {'name': 'site.06.iso.1.subject.06TB_1458.lab_id.06MIL2737.seq_reps.1'}
inconclusive sequencing: 27584 {'name': 'site.06.iso.1.subject.06TB_0121.lab_id.06MIL0582.seq_reps.1'}
inconclusive sequencing: 27594 {'name': 'site.05.iso.1.subject.LS-1646.lab_id.LS-01716-19.seq_reps.1'}
inconclusive sequencing: 27640 {'name': 'site.05.iso.1.subject.LS-1438.lab_id.SJ-01362-19.seq_reps.1'}


 92%|████████████████████████████████▉   | 27907/30454 [01:40<00:08, 305.34it/s]

inconclusive sequencing: 27853 {'name': 'site.05.iso.1.subject.LR-2410.lab_id.FN-00332-17.seq_reps.1'}


 92%|█████████████████████████████████▏  | 28103/30454 [01:41<00:07, 305.95it/s]

inconclusive sequencing: 28046 {'name': 'site.05.iso.1.subject.LR-2030.lab_id.FN-00391-15.seq_reps.1'}
inconclusive sequencing: 28050 {'name': 'site.05.iso.1.subject.LR-2059.lab_id.FN-00766-15.seq_reps.1'}


 93%|█████████████████████████████████▍  | 28266/30454 [01:41<00:06, 313.98it/s]

inconclusive sequencing: 28178 {'name': 'site.10.iso.1.subject.YA00026034.lab_id.YA00026034.seq_reps.1'}
inconclusive sequencing: 28196 {'name': 'site.10.iso.1.subject.YA00161028.lab_id.YA00161028.seq_reps.1'}
inconclusive sequencing: 28224 {'name': 'site.10.iso.1.subject.TD02715638.lab_id.TD02715638.seq_reps.1'}


 94%|█████████████████████████████████▊  | 28575/30454 [01:42<00:05, 331.45it/s]

inconclusive sequencing: 28486 {'name': 'site.10.iso.1.subject.YA00013555.lab_id.YA00013555.seq_reps.1'}


 94%|█████████████████████████████████▊  | 28642/30454 [01:42<00:05, 309.74it/s]

inconclusive sequencing: 28580 {'name': 'site.10.iso.1.subject.YA00011214.lab_id.YA00011214.seq_reps.1'}
inconclusive sequencing: 28641 {'name': 'site.10.iso.1.subject.YA00020543.lab_id.YA00020543.seq_reps.1'}
inconclusive sequencing: 28642 {'name': 'site.20.iso.1.subject.SA00390429.lab_id.YA00135269.seq_reps.1'}
inconclusive sequencing: 28665 {'name': 'site.05.iso.1.subject.PTAN-0335.lab_id.TAN-329.seq_reps.1'}


 94%|█████████████████████████████████▉  | 28751/30454 [01:43<00:05, 328.16it/s]

inconclusive sequencing: 28716 {'name': 'site.05.iso.1.subject.LR-2304.lab_id.FN-00520-18.seq_reps.1'}


 95%|██████████████████████████████████  | 28854/30454 [01:43<00:05, 306.43it/s]

inconclusive sequencing: 28810 {'name': 'site.11.iso.1.subject.XTB_17-509.lab_id.XTB_17-509.seq_reps.1'}
inconclusive sequencing: 28812 {'name': 'site.04.iso.1.subject.01907.lab_id.733231.seq_reps.1'}
inconclusive sequencing: 28858 {'name': 'site.06.iso.1.subject.06TB_0537.lab_id.06MIL1395.seq_reps.1'}


 95%|██████████████████████████████████▎ | 28995/30454 [01:44<00:04, 326.25it/s]

inconclusive sequencing: 28898 {'name': 'site.11.iso.1.subject.XTB-18-173.lab_id.XTB-18-173.seq_reps.1'}


 96%|██████████████████████████████████▋ | 29312/30454 [01:45<00:03, 342.33it/s]

inconclusive sequencing: 29273 {'name': 'site.04.iso.1.subject.04036.lab_id.900047.seq_reps.1_2'}
inconclusive sequencing: 29327 {'name': 'site.04.iso.1.subject.04796.lab_id.904161.seq_reps.1'}


 97%|██████████████████████████████████▊ | 29457/30454 [01:45<00:02, 353.38it/s]

inconclusive sequencing: 29397 {'name': 'site.04.iso.1.subject.03261.lab_id.824783.seq_reps.1'}
inconclusive sequencing: 29402 {'name': 'site.04.iso.1.subject.03619.lab_id.826279.seq_reps.1'}
inconclusive sequencing: 29403 {'name': 'site.04.iso.1.subject.04992.lab_id.909759.seq_reps.1_2'}
inconclusive sequencing: 29462 {'name': 'site.06.iso.1.subject.06TB_0016.lab_id.06MIL0477.seq_reps.1'}


 99%|███████████████████████████████████▌| 30032/30454 [01:47<00:01, 342.00it/s]

inconclusive sequencing: 29994 {'name': 'site.05.iso.1.subject.LR-2356.lab_id.FN-00723-16.seq_reps.1'}
inconclusive sequencing: 30054 {'name': 'site.05.iso.1.subject.LR-2355.lab_id.FN-00718-16.seq_reps.1'}


 99%|███████████████████████████████████▊| 30286/30454 [01:47<00:00, 342.59it/s]

inconclusive sequencing: 30220 {'name': 'site.05.iso.1.subject.LR-2426.lab_id.FN-01116-15.seq_reps.1'}
inconclusive sequencing: 30222 {'name': 'site.05.iso.1.subject.PTAN-0100.lab_id.TAN-306.seq_reps.1'}
inconclusive sequencing: 30228 {'name': 'site.05.iso.1.subject.PMOP-0526.lab_id.MOP-044.seq_reps.1'}
inconclusive sequencing: 30230 {'name': 'site.05.iso.1.subject.LR-2353.lab_id.FN-00515-16.seq_reps.1'}


100%|███████████████████████████████████▉| 30391/30454 [01:48<00:00, 335.55it/s]

inconclusive sequencing: 30344 {'name': 'site.05.iso.1.subject.CA-1360.lab_id.CO-02151-19.seq_reps.1'}
inconclusive sequencing: 30352 {'name': 'site.05.iso.1.subject.PSLM-0843.lab_id.SLM-112.seq_reps.1'}


100%|████████████████████████████████████| 30454/30454 [01:48<00:00, 281.01it/s]


Dynamically create the ITOL annotations file to add symbols at the end of the branches coloured by lineage

In [282]:
vis = 'symbol'

lineage_header="""DATASET_SYMBOL

SEPARATOR COMMA

DATASET_LABEL,example symbols

COLOR,#e41a1c

MAXIMUM_SIZE,3


DATA
"""

colour_lookup = {1: '#4daf4a', 2: '#e41a1c', 3: '#984ea3', 4: '#377eb8', 5: '#ff7f00', 6: '#a65628'}

lineage_annotations = ''

for i in tqdm(range(ts.num_nodes)):
# for i in tqdm(range(20010,20040)):    

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']

        lineage = LABELS.loc[uid]['LINEAGE_NAME']
        if lineage!='' and lineage[0]=='L':
            lineage=int(lineage[-1])
        else:
            lineage=None

        if lineage in colour_lookup.keys():
            colour = colour_lookup[lineage]
        else:
            colour = '#bbbbbb'

        line = 'n'+str(i) +',1,1,' + colour + ',1,1\n'
        lineage_annotations += line 
    

lineage_annotations = lineage_header + lineage_annotations
            

100%|███████████████████████████████████| 30454/30454 [00:24<00:00, 1263.32it/s]


Display lineage as colored strip instead

In [153]:
vis = 'colorstrip'

lineage_header="""DATASET_COLORSTRIP

SEPARATOR COMMA

DATASET_LABEL,label1

COLOR,#e41a1c

COLOR_BRANCHES,0


DATA
"""

colour_lookup = {1: '#e41a1c', 2: '#377eb8', 3: '#4daf4a', 4: '#ff7f00', 5: '#ff7f00', 6: '#a65628'}

lineage_annotations = ''

for i in tqdm(range(ts.num_nodes)):
# for i in tqdm(range(20010,20040)):    

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']

        lineage = LABELS.loc[uid]['LINEAGE_NAME']
        if lineage!='' and lineage[0]=='L':
            lineage=int(lineage[-1])
        else:
            lineage=None

        if lineage in colour_lookup.keys():
            colour = colour_lookup[lineage]
        else:
            colour = '#bbbbbb'

        line = 'n'+ str(i) + ',' + colour + ',Lineage ' + str(lineage) + '\n'
        lineage_annotations += line 
    

lineage_annotations = lineage_header + lineage_annotations


100%|███████████████████████████████████| 30454/30454 [00:27<00:00, 1093.67it/s]


Display lineages as colored ranges

In [20]:
vis = 'colorstrip'

lineage_header="""TREE_COLORS

SEPARATOR COMMA

DATA
"""

colour_lookup = {1: '#4daf4a', 2: '#e41a1c', 3: '#984ea3', 4: '#377eb8', 5: '#ff7f00', 6: '#a65628'}

lineage_annotations = ''

for i in tqdm(range(ts.num_nodes)):  

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']

        lineage = LABELS.loc[uid]['LINEAGE_NAME']
        if lineage!='' and lineage[0]=='L':
            lineage=int(lineage[-1])
        else:
            lineage=None

        if lineage in colour_lookup.keys():
            colour = colour_lookup[lineage]
        else:
            colour = '#bbbbbb'

        line = 'n'+ str(i) + ',' + 'range,' + colour + ',Lineage ' + str(lineage) + '\n'
        lineage_annotations += line 
    

lineage_annotations = lineage_header + lineage_annotations


100%|███████████████████████████████████| 30454/30454 [00:25<00:00, 1208.66it/s]


Some ORIGINAL_IDS are duplicated due to a step before (join LINEAGE), but it would make no difference to simply choose one of the instances

In [24]:
list = []
for i in tqdm(range(ts.num_nodes)):  
    row = new_tables.nodes[i]
    if 'name' in row.metadata.keys():
        uid = row.metadata['name']
        mutation = LABELS.loc[uid]
        if mutation.IS_RES.size > 1:  
            print(mutation.IS_RES.size, mutation.index)
            list.append(mutation.iloc[0].IS_RES == mutation.iloc[1].IS_RES)
print(all(list))

  5%|█▉                                  | 1662/30454 [00:01<00:22, 1272.41it/s]

2 Index(['site.17.iso.1.subject.K0057.lab_id.123-20-0057-1000.seq_reps.1', 'site.17.iso.1.subject.K0057.lab_id.123-20-0057-1000.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.02TB2346.lab_id.26277.seq_reps.1', 'site.08.iso.1.subject.02TB2346.lab_id.26277.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.24TB00-016.lab_id.2394.seq_reps.1', 'site.08.iso.1.subject.24TB00-016.lab_id.2394.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.24TB00-016.lab_id.2297.seq_reps.1', 'site.08.iso.1.subject.24TB00-016.lab_id.2297.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.24TB00-016.lab_id.2248.seq_reps.1', 'site.08.iso.1.subject.24TB00-016.lab_id.2248.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.24TB00-016.lab_id.2296.seq_reps.1', 'site.08.iso.1.subject.24TB00-016.lab_id.2296.seq_reps.1'], dtype='object', name='ORIGINAL_UID'

  7%|██▌                                 | 2167/30454 [00:01<00:24, 1132.70it/s]

2 Index(['site.02.iso.1.subject.0129.lab_id.22A084.seq_reps.84', 'site.02.iso.1.subject.0129.lab_id.22A084.seq_reps.84'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0486.lab_id.235060-14.seq_reps.14235060', 'site.02.iso.1.subject.0486.lab_id.235060-14.seq_reps.14235060'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0114.lab_id.22A068.seq_reps.68', 'site.02.iso.1.subject.0114.lab_id.22A068.seq_reps.68'], dtype='object', name='ORIGINAL_UID')


  8%|███                                 | 2559/30454 [00:02<00:22, 1237.15it/s]

2 Index(['site.02.iso.1.subject.0877.lab_id.22A111.seq_reps.111', 'site.02.iso.1.subject.0877.lab_id.22A111.seq_reps.111'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0919.lab_id.22A154.seq_reps.154', 'site.02.iso.1.subject.0919.lab_id.22A154.seq_reps.154'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.01004.lab_id.718501.seq_reps.1', 'site.04.iso.1.subject.01004.lab_id.718501.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.11.iso.1.subject.MDR156.lab_id.SWE-124.seq_reps.1_2_3_4', 'site.11.iso.1.subject.MDR156.lab_id.SWE-124.seq_reps.1_2_3_4'], dtype='object', name='ORIGINAL_UID')


 11%|███▉                                | 3341/30454 [00:02<00:21, 1243.63it/s]

3 Index(['site.02.iso.1.subject.0133.lab_id.22A088.seq_reps.88',
       'site.02.iso.1.subject.0133.lab_id.22A088.seq_reps.88',
       'site.02.iso.1.subject.0133.lab_id.22A088.seq_reps.88'],
      dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0946.lab_id.22A181.seq_reps.181', 'site.02.iso.1.subject.0946.lab_id.22A181.seq_reps.181'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0087.lab_id.22A039.seq_reps.39', 'site.02.iso.1.subject.0087.lab_id.22A039.seq_reps.39'], dtype='object', name='ORIGINAL_UID')


 12%|████▍                               | 3724/30454 [00:03<00:26, 1015.21it/s]

2 Index(['site.03.iso.1.subject.JPN-R2012-0023.lab_id.JPN-R2012-0023.seq_reps.1', 'site.03.iso.1.subject.JPN-R2012-0023.lab_id.JPN-R2012-0023.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.02TB1380.lab_id.21047.seq_reps.1', 'site.08.iso.1.subject.02TB1380.lab_id.21047.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 13%|████▉                                | 4036/30454 [00:03<00:27, 973.85it/s]

2 Index(['site.02.iso.1.subject.0108.lab_id.22A062.seq_reps.62', 'site.02.iso.1.subject.0108.lab_id.22A062.seq_reps.62'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.02TB1295.lab_id.19778.seq_reps.1', 'site.08.iso.1.subject.02TB1295.lab_id.19778.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 16%|█████▊                               | 4776/30454 [00:04<00:25, 998.64it/s]

2 Index(['site.05.iso.1.subject.LR-2251.lab_id.FN-00446-17.seq_reps.1', 'site.05.iso.1.subject.LR-2251.lab_id.FN-00446-17.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.05.iso.1.subject.PMFR-0661.lab_id.MFR-010.seq_reps.1', 'site.05.iso.1.subject.PMFR-0661.lab_id.MFR-010.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 16%|██████                               | 4974/30454 [00:04<00:26, 952.40it/s]

2 Index(['site.13.iso.1.subject.160200127.lab_id.160200127.seq_reps.1', 'site.13.iso.1.subject.160200127.lab_id.160200127.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0064.lab_id.22A014.seq_reps.14', 'site.02.iso.1.subject.0064.lab_id.22A014.seq_reps.14'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0109.lab_id.22A063.seq_reps.63', 'site.02.iso.1.subject.0109.lab_id.22A063.seq_reps.63'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.02TB1470.lab_id.20004.seq_reps.1', 'site.08.iso.1.subject.02TB1470.lab_id.20004.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 17%|██████▎                             | 5295/30454 [00:04<00:24, 1006.66it/s]

2 Index(['site.04.iso.1.subject.01851.lab_id.731052.seq_reps.1', 'site.04.iso.1.subject.01851.lab_id.731052.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.05421.lab_id.912075.seq_reps.1', 'site.04.iso.1.subject.05421.lab_id.912075.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.02197.lab_id.805429.seq_reps.1', 'site.04.iso.1.subject.02197.lab_id.805429.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.00153.lab_id.701553.seq_reps.1', 'site.04.iso.1.subject.00153.lab_id.701553.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.02057.lab_id.803377.seq_reps.1', 'site.04.iso.1.subject.02057.lab_id.803377.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 19%|██████▋                             | 5667/30454 [00:05<00:21, 1164.21it/s]

2 Index(['site.02.iso.1.subject.0915.lab_id.22A149.seq_reps.149', 'site.02.iso.1.subject.0915.lab_id.22A149.seq_reps.149'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0097.lab_id.06MIL0558.seq_reps.1', 'site.06.iso.1.subject.06TB_0097.lab_id.06MIL0558.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 21%|███████▍                            | 6311/30454 [00:05<00:18, 1271.84it/s]

2 Index(['site.10.iso.1.subject.YA00029135.lab_id.YA00029135.seq_reps.1', 'site.10.iso.1.subject.YA00029135.lab_id.YA00029135.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.01290.lab_id.722418.seq_reps.1', 'site.04.iso.1.subject.01290.lab_id.722418.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 23%|████████▍                           | 7123/30454 [00:06<00:17, 1346.30it/s]

2 Index(['site.06.iso.1.subject.06TB_0041.lab_id.06MIL0502.seq_reps.1', 'site.06.iso.1.subject.06TB_0041.lab_id.06MIL0502.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.T4683.lab_id.IML-00063.seq_reps.1', 'site.03.iso.1.subject.T4683.lab_id.IML-00063.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 27%|█████████▌                          | 8108/30454 [00:06<00:16, 1388.84it/s]

2 Index(['site.02.iso.1.subject.0916.lab_id.22A151.seq_reps.151', 'site.02.iso.1.subject.0916.lab_id.22A151.seq_reps.151'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0079.lab_id.22A031.seq_reps.31', 'site.02.iso.1.subject.0079.lab_id.22A031.seq_reps.31'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.10678.lab_id.IML-00977.seq_reps.1', 'site.03.iso.1.subject.10678.lab_id.IML-00977.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0283.lab_id.06MIL1340.seq_reps.1', 'site.06.iso.1.subject.06TB_0283.lab_id.06MIL1340.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0653.lab_id.06MIL1539.seq_reps.1', 'site.06.iso.1.subject.06TB_0653.lab_id.06MIL1539.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.GB-85010098.lab_id.85010098.seq_reps.1', 'site.03.iso.1.subject.GB-85010098.lab_id.85010098.seq_reps.1'], dtype='object', name='ORIGINAL_

 28%|█████████▉                          | 8383/30454 [00:07<00:16, 1341.30it/s]


2 Index(['site.03.iso.1.subject.10889.lab_id.IML-00990.seq_reps.1', 'site.03.iso.1.subject.10889.lab_id.IML-00990.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.11.iso.1.subject.XTB_17-574.lab_id.XTB_17-574.seq_reps.1', 'site.11.iso.1.subject.XTB_17-574.lab_id.XTB_17-574.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.11.iso.1.subject.XTB_18-045.lab_id.XTB_18-045.seq_reps.1_2', 'site.11.iso.1.subject.XTB_18-045.lab_id.XTB_18-045.seq_reps.1_2'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.T779.lab_id.T779.seq_reps.1', 'site.03.iso.1.subject.T779.lab_id.T779.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.GB-92250159.lab_id.IML-01172.seq_reps.1', 'site.03.iso.1.subject.GB-92250159.lab_id.IML-01172.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.11.iso.1.subject.MDR153.lab_id.SWE-121.seq_reps.1_2_3_4', 'site.11.iso.1.subject.MDR153.lab_id.SWE-121.seq_reps.1_2_3_4'], dtype='o

 28%|██████████▏                         | 8651/30454 [00:07<00:18, 1208.13it/s]

 Index(['site.06.iso.1.subject.06TB_0060.lab_id.06MIL0521.seq_reps.1', 'site.06.iso.1.subject.06TB_0060.lab_id.06MIL0521.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0189.lab_id.06MIL0650.seq_reps.1', 'site.06.iso.1.subject.06TB_0189.lab_id.06MIL0650.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.11.iso.1.subject.XTB-18-187.lab_id.XTB-18-187.seq_reps.1', 'site.11.iso.1.subject.XTB-18-187.lab_id.XTB-18-187.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.11.iso.1.subject.XTB-18-188.lab_id.XTB-18-188.seq_reps.1', 'site.11.iso.1.subject.XTB-18-188.lab_id.XTB-18-188.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.GB-84130099.lab_id.IML-01763.seq_reps.1', 'site.03.iso.1.subject.GB-84130099.lab_id.IML-01763.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0275.lab_id.06MIL0866.seq_reps.1', 'site.06.iso.1.subject.06TB_0275.lab_id.06MIL0866.se

 29%|██████████▌                         | 8896/30454 [00:07<00:18, 1136.64it/s]


2 Index(['site.03.iso.1.subject.GB-84210116.lab_id.IML-01914.seq_reps.1', 'site.03.iso.1.subject.GB-84210116.lab_id.IML-01914.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.BOR-18007016.lab_id.IML-00185.seq_reps.1', 'site.03.iso.1.subject.BOR-18007016.lab_id.IML-00185.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.GB-82760043.lab_id.82760043.seq_reps.1', 'site.03.iso.1.subject.GB-82760043.lab_id.82760043.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.KGZ-6834-2018.lab_id.6834-2018.seq_reps.1', 'site.03.iso.1.subject.KGZ-6834-2018.lab_id.6834-2018.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.T1607.lab_id.IML-00025.seq_reps.1', 'site.03.iso.1.subject.T1607.lab_id.IML-00025.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.10743.lab_id.IML-01013.seq_reps.1', 'site.03.iso.1.subject.10743.lab_id.IML-01013.seq_reps

 30%|███████████                          | 9135/30454 [00:07<00:21, 969.89it/s]

3 Index(['site.06.iso.1.subject.PIMS_0020-14.lab_id.06MIL0109.seq_reps.1',
       'site.06.iso.1.subject.PIMS_0020-14.lab_id.06MIL0109.seq_reps.1',
       'site.06.iso.1.subject.PIMS_0020-14.lab_id.06MIL0109.seq_reps.1'],
      dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0063.lab_id.22A013.seq_reps.13', 'site.02.iso.1.subject.0063.lab_id.22A013.seq_reps.13'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0949.lab_id.22A184.seq_reps.184', 'site.02.iso.1.subject.0949.lab_id.22A184.seq_reps.184'], dtype='object', name='ORIGINAL_UID')


 31%|███████████▍                         | 9463/30454 [00:08<00:21, 990.38it/s]

2 Index(['site.03.iso.1.subject.JPN-R2012-00021.lab_id.03-JPN-R2012-0021.seq_reps.1', 'site.03.iso.1.subject.JPN-R2012-00021.lab_id.03-JPN-R2012-0021.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0970.lab_id.22A210.seq_reps.210', 'site.02.iso.1.subject.0970.lab_id.22A210.seq_reps.210'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.02TB0159.lab_id.16990.seq_reps.1', 'site.08.iso.1.subject.02TB0159.lab_id.16990.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.02TB0159.lab_id.17585.seq_reps.1', 'site.08.iso.1.subject.02TB0159.lab_id.17585.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 32%|███████████▌                        | 9777/30454 [00:08<00:20, 1005.87it/s]

2 Index(['site.13.iso.1.subject.110300280.lab_id.110300280.seq_reps.1', 'site.13.iso.1.subject.110300280.lab_id.110300280.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.AF11681.lab_id.06MIL1559.seq_reps.1', 'site.06.iso.1.subject.AF11681.lab_id.06MIL1559.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.OIC-1179.lab_id.06MIL1092.seq_reps.1', 'site.06.iso.1.subject.OIC-1179.lab_id.06MIL1092.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.00276.lab_id.T-4091.seq_reps.1', 'site.04.iso.1.subject.00276.lab_id.T-4091.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
3 Index(['site.02.iso.1.subject.0070.lab_id.22A021.seq_reps.21',
       'site.02.iso.1.subject.0070.lab_id.22A021.seq_reps.21',
       'site.02.iso.1.subject.0070.lab_id.22A021.seq_reps.21'],
      dtype='object', name='ORIGINAL_UID')
3 Index(['site.02.iso.1.subject.0183.lab_id.2013222280.seq_reps.2013222280',
       'site.02.

 35%|████████████▎                      | 10688/30454 [00:09<00:17, 1106.74it/s]

2 Index(['site.08.iso.1.subject.02TB2486.lab_id.26519.seq_reps.1', 'site.08.iso.1.subject.02TB2486.lab_id.26519.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.JPN-R2012-0029.lab_id.JPN-R2012-0029.seq_reps.1', 'site.03.iso.1.subject.JPN-R2012-0029.lab_id.JPN-R2012-0029.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.08.iso.1.subject.DTU-059.lab_id.2159.seq_reps.1', 'site.08.iso.1.subject.DTU-059.lab_id.2159.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 36%|████████████▋                      | 11062/30454 [00:09<00:16, 1204.46it/s]

2 Index(['site.10.iso.1.subject.UH00075855.lab_id.UH00075855.seq_reps.1', 'site.10.iso.1.subject.UH00075855.lab_id.UH00075855.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.10.iso.1.subject.YA00027786.lab_id.YA00027786.seq_reps.1', 'site.10.iso.1.subject.YA00027786.lab_id.YA00027786.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.10.iso.1.subject.YA00027931.lab_id.YA00027931.seq_reps.1', 'site.10.iso.1.subject.YA00027931.lab_id.YA00027931.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.10.iso.1.subject.YA00053736.lab_id.YA00053736.seq_reps.1', 'site.10.iso.1.subject.YA00053736.lab_id.YA00053736.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 40%|██████████████▎                     | 12086/30454 [00:10<00:18, 998.83it/s]

2 Index(['site.06.iso.1.subject.MHL_0129-15.lab_id.06MIL0018.seq_reps.1', 'site.06.iso.1.subject.MHL_0129-15.lab_id.06MIL0018.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.ATH_0074-14.lab_id.06MIL0217.seq_reps.1', 'site.06.iso.1.subject.ATH_0074-14.lab_id.06MIL0217.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.21.iso.1.subject.006.lab_id.MR304907G.seq_reps.1', 'site.21.iso.1.subject.006.lab_id.MR304907G.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.01199.lab_id.720017.seq_reps.1', 'site.04.iso.1.subject.01199.lab_id.720017.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.03339.lab_id.826118.seq_reps.1', 'site.04.iso.1.subject.03339.lab_id.826118.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.00254.lab_id.702164.seq_reps.1', 'site.04.iso.1.subject.00254.lab_id.702164.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 40%|██████████████▌                     | 12288/30454 [00:10<00:18, 988.07it/s]

2 Index(['site.04.iso.1.subject.02824.lab_id.819313.seq_reps.1', 'site.04.iso.1.subject.02824.lab_id.819313.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.11.iso.1.subject.MDR054.lab_id.SWE-42.seq_reps.1_2_3_4', 'site.11.iso.1.subject.MDR054.lab_id.SWE-42.seq_reps.1_2_3_4'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.NHM_0299-14.lab_id.06MIL0387.seq_reps.1', 'site.06.iso.1.subject.NHM_0299-14.lab_id.06MIL0387.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 41%|██████████████▌                    | 12636/30454 [00:11<00:16, 1098.57it/s]

2 Index(['site.06.iso.1.subject.IHK-1155.lab_id.06MIL1085.seq_reps.1', 'site.06.iso.1.subject.IHK-1155.lab_id.06MIL1085.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.MHL_0146-14.lab_id.06MIL0051.seq_reps.1', 'site.06.iso.1.subject.MHL_0146-14.lab_id.06MIL0051.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 42%|██████████████▊                    | 12865/30454 [00:11<00:15, 1119.06it/s]

2 Index(['site.06.iso.1.subject.SGD_0061-14.lab_id.06MIL0262.seq_reps.1', 'site.06.iso.1.subject.SGD_0061-14.lab_id.06MIL0262.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.01761.lab_id.732277.seq_reps.1', 'site.04.iso.1.subject.01761.lab_id.732277.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.02167.lab_id.804933.seq_reps.1', 'site.04.iso.1.subject.02167.lab_id.804933.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.MHL_0241-14.lab_id.06MIL0460.seq_reps.1', 'site.06.iso.1.subject.MHL_0241-14.lab_id.06MIL0460.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 44%|███████████████▎                   | 13323/30454 [00:11<00:15, 1118.03it/s]

2 Index(['site.06.iso.1.subject.SGD_0057-14.lab_id.06MIL0265.seq_reps.1', 'site.06.iso.1.subject.SGD_0057-14.lab_id.06MIL0265.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
3 Index(['site.06.iso.1.subject.RLH_0055-15.lab_id.06MIL0058.seq_reps.1',
       'site.06.iso.1.subject.RLH_0055-15.lab_id.06MIL0058.seq_reps.1',
       'site.06.iso.1.subject.RLH_0055-15.lab_id.06MIL0058.seq_reps.1'],
      dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.ATH_0078-14.lab_id.06MIL0218.seq_reps.1', 'site.06.iso.1.subject.ATH_0078-14.lab_id.06MIL0218.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 46%|████████████████▏                  | 14037/30454 [00:12<00:13, 1184.39it/s]

2 Index(['site.06.iso.1.subject.PIMS_0019-14.lab_id.06MIL0181.seq_reps.1', 'site.06.iso.1.subject.PIMS_0019-14.lab_id.06MIL0181.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 47%|████████████████▌                  | 14400/30454 [00:12<00:13, 1173.47it/s]

2 Index(['site.06.iso.1.subject.ICK-1180.lab_id.06MIL1093.seq_reps.1', 'site.06.iso.1.subject.ICK-1180.lab_id.06MIL1093.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.RLH_0315-14.lab_id.06MIL0370.seq_reps.1', 'site.06.iso.1.subject.RLH_0315-14.lab_id.06MIL0370.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.ICK-1174.lab_id.06MIL1090.seq_reps.1', 'site.06.iso.1.subject.ICK-1174.lab_id.06MIL1090.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.ICK-1186.lab_id.06MIL1095.seq_reps.1', 'site.06.iso.1.subject.ICK-1186.lab_id.06MIL1095.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.IHK-1174.lab_id.06MIL1091.seq_reps.1', 'site.06.iso.1.subject.IHK-1174.lab_id.06MIL1091.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.RLH_0269-14.lab_id.06MIL0194.seq_reps.1', 'site.06.iso.1.subject.RLH_0269-14.lab_id.06MIL0194.seq_reps.1'],

 49%|█████████████████                  | 14895/30454 [00:13<00:12, 1197.72it/s]

2 Index(['site.06.iso.1.subject.BVH_0025_14.lab_id.06MIL0340.seq_reps.1', 'site.06.iso.1.subject.BVH_0025_14.lab_id.06MIL0340.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.MHL_0234-14.lab_id.06MIL0443.seq_reps.1', 'site.06.iso.1.subject.MHL_0234-14.lab_id.06MIL0443.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 50%|█████████████████▌                 | 15282/30454 [00:13<00:13, 1146.27it/s]

2 Index(['site.13.iso.1.subject.180200031.lab_id.180200031.seq_reps.1', 'site.13.iso.1.subject.180200031.lab_id.180200031.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.JPN-R2012-0087.lab_id.JPN-R2012-0087.seq_reps.1', 'site.03.iso.1.subject.JPN-R2012-0087.lab_id.JPN-R2012-0087.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 54%|██████████████████▊                | 16419/30454 [00:14<00:10, 1284.66it/s]

2 Index(['site.04.iso.1.subject.00636.lab_id.710100.seq_reps.1', 'site.04.iso.1.subject.00636.lab_id.710100.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 55%|███████████████████▏               | 16687/30454 [00:14<00:10, 1300.47it/s]

2 Index(['site.04.iso.1.subject.00505.lab_id.21757.seq_reps.1', 'site.04.iso.1.subject.00505.lab_id.21757.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.00197.lab_id.700492.seq_reps.1', 'site.04.iso.1.subject.00197.lab_id.700492.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.01574.lab_id.729868.seq_reps.1', 'site.04.iso.1.subject.01574.lab_id.729868.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 57%|███████████████████▉               | 17364/30454 [00:14<00:09, 1341.62it/s]

2 Index(['site.06.iso.1.subject.SZH_0050-14.lab_id.06MIL0427.seq_reps.1', 'site.06.iso.1.subject.SZH_0050-14.lab_id.06MIL0427.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.00077.lab_id.634379.seq_reps.1', 'site.04.iso.1.subject.00077.lab_id.634379.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.05.iso.1.subject.PPAR-0896.lab_id.PAR-044.seq_reps.1', 'site.05.iso.1.subject.PPAR-0896.lab_id.PAR-044.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.05.iso.1.subject.PSLM-0788.lab_id.SLM-046.seq_reps.1', 'site.05.iso.1.subject.PSLM-0788.lab_id.SLM-046.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 58%|████████████████████▍              | 17769/30454 [00:15<00:09, 1327.27it/s]

2 Index(['site.05.iso.1.subject.PTPI-0905.lab_id.TPI-973.seq_reps.1', 'site.05.iso.1.subject.PTPI-0905.lab_id.TPI-973.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.10951.lab_id.IML-01006.seq_reps.1', 'site.03.iso.1.subject.10951.lab_id.IML-01006.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.10531.lab_id.IML-01050.seq_reps.1', 'site.03.iso.1.subject.10531.lab_id.IML-01050.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.11071.lab_id.IML-00997.seq_reps.1', 'site.03.iso.1.subject.11071.lab_id.IML-00997.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 59%|████████████████████▋              | 18045/30454 [00:15<00:09, 1321.09it/s]

2 Index(['site.02.iso.1.subject.0869.lab_id.22A103.seq_reps.103', 'site.02.iso.1.subject.0869.lab_id.22A103.seq_reps.103'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0960.lab_id.22A200.seq_reps.200', 'site.02.iso.1.subject.0960.lab_id.22A200.seq_reps.200'], dtype='object', name='ORIGINAL_UID')
3 Index(['site.03.iso.1.subject.JPN-R2012-0044.lab_id.JPN-R2012-0044.seq_reps.1',
       'site.03.iso.1.subject.JPN-R2012-0044.lab_id.JPN-R2012-0044.seq_reps.1',
       'site.03.iso.1.subject.JPN-R2012-0044.lab_id.JPN-R2012-0044.seq_reps.1'],
      dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.BOR-17010158.lab_id.IML-00660.seq_reps.1', 'site.03.iso.1.subject.BOR-17010158.lab_id.IML-00660.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0970.lab_id.06MIL1965.seq_reps.1', 'site.06.iso.1.subject.06TB_0970.lab_id.06MIL1965.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.DJB28.la

 61%|█████████████████████▏             | 18446/30454 [00:15<00:09, 1310.41it/s]

2 Index(['site.06.iso.1.subject.BVH_0008-14.lab_id.06MIL0198.seq_reps.1', 'site.06.iso.1.subject.BVH_0008-14.lab_id.06MIL0198.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.JHL_0048-14.lab_id.06MIL0178.seq_reps.1', 'site.06.iso.1.subject.JHL_0048-14.lab_id.06MIL0178.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.DR-261.lab_id.IML-01177.seq_reps.1', 'site.03.iso.1.subject.DR-261.lab_id.IML-01177.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.ATH_0065-14.lab_id.06MIL0147.seq_reps.1', 'site.06.iso.1.subject.ATH_0065-14.lab_id.06MIL0147.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.10.iso.1.subject.YA00046932.lab_id.YA00046932.seq_reps.1', 'site.10.iso.1.subject.YA00046932.lab_id.YA00046932.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 62%|█████████████████████▋             | 18849/30454 [00:16<00:08, 1323.81it/s]

2 Index(['site.04.iso.1.subject.01130.lab_id.718446.seq_reps.1', 'site.04.iso.1.subject.01130.lab_id.718446.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.01855.lab_id.734284.seq_reps.1', 'site.04.iso.1.subject.01855.lab_id.734284.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.10.iso.1.subject.YA00032100.lab_id.YA00032100.seq_reps.1', 'site.10.iso.1.subject.YA00032100.lab_id.YA00032100.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_1486.lab_id.06MIL2765.seq_reps.1', 'site.06.iso.1.subject.06TB_1486.lab_id.06MIL2765.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0162.lab_id.06MIL0623.seq_reps.1', 'site.06.iso.1.subject.06TB_0162.lab_id.06MIL0623.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 63%|██████████████████████▏            | 19264/30454 [00:16<00:08, 1345.26it/s]

2 Index(['site.05.iso.1.subject.CA-1642.lab_id.CO-14766-18.seq_reps.1', 'site.05.iso.1.subject.CA-1642.lab_id.CO-14766-18.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.05.iso.1.subject.LR-3112.lab_id.CR-01992-16.seq_reps.1', 'site.05.iso.1.subject.LR-3112.lab_id.CR-01992-16.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 65%|██████████████████████▊            | 19816/30454 [00:16<00:07, 1368.98it/s]

2 Index(['site.06.iso.1.subject.06TB_0951.lab_id.06MIL1946.seq_reps.1', 'site.06.iso.1.subject.06TB_0951.lab_id.06MIL1946.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 72%|█████████████████████████▎         | 22077/30454 [00:18<00:06, 1387.98it/s]

2 Index(['site.06.iso.1.subject.14015.lab_id.06MIL2154.seq_reps.1', 'site.06.iso.1.subject.14015.lab_id.06MIL2154.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_1564.lab_id.06MIL2843.seq_reps.1', 'site.06.iso.1.subject.06TB_1564.lab_id.06MIL2843.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 74%|██████████████████████████         | 22637/30454 [00:18<00:05, 1322.26it/s]

2 Index(['site.05.iso.1.subject.LS-1658.lab_id.MA-01344-19.seq_reps.1', 'site.05.iso.1.subject.LS-1658.lab_id.MA-01344-19.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0557.lab_id.06MIL1363.seq_reps.1', 'site.06.iso.1.subject.06TB_0557.lab_id.06MIL1363.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 77%|███████████████████████████        | 23590/30454 [00:19<00:05, 1360.61it/s]

2 Index(['site.06.iso.1.subject.06TB_0523.lab_id.06MIL1201.seq_reps.1', 'site.06.iso.1.subject.06TB_0523.lab_id.06MIL1201.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0074.lab_id.22A026.seq_reps.26', 'site.02.iso.1.subject.0074.lab_id.22A026.seq_reps.26'], dtype='object', name='ORIGINAL_UID')


 80%|████████████████████████████       | 24423/30454 [00:20<00:04, 1363.79it/s]

2 Index(['site.02.iso.1.subject.0880.lab_id.22A114.seq_reps.114', 'site.02.iso.1.subject.0880.lab_id.22A114.seq_reps.114'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.13.iso.1.subject.160200058.lab_id.160200058.seq_reps.1', 'site.13.iso.1.subject.160200058.lab_id.160200058.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.13.iso.1.subject.110200121.lab_id.110200121.seq_reps.1', 'site.13.iso.1.subject.110200121.lab_id.110200121.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.02.iso.1.subject.0954.lab_id.22A190.seq_reps.190', 'site.02.iso.1.subject.0954.lab_id.22A190.seq_reps.190'], dtype='object', name='ORIGINAL_UID')


 82%|████████████████████████████▋      | 24975/30454 [00:20<00:04, 1358.51it/s]

2 Index(['site.06.iso.1.subject.SGD_0076-14.lab_id.06MIL0339.seq_reps.1', 'site.06.iso.1.subject.SGD_0076-14.lab_id.06MIL0339.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 83%|█████████████████████████████▏     | 25397/30454 [00:20<00:03, 1383.92it/s]

2 Index(['site.05.iso.1.subject.CA-1629.lab_id.CO-02971-19.seq_reps.1', 'site.05.iso.1.subject.CA-1629.lab_id.CO-02971-19.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 85%|█████████████████████████████▊     | 25953/30454 [00:21<00:03, 1314.78it/s]

2 Index(['site.06.iso.1.subject.06TB_0391.lab_id.06MIL0982.seq_reps.1', 'site.06.iso.1.subject.06TB_0391.lab_id.06MIL0982.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0485.lab_id.06MIL1163.seq_reps.1', 'site.06.iso.1.subject.06TB_0485.lab_id.06MIL1163.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0843.lab_id.06MIL1790.seq_reps.1', 'site.06.iso.1.subject.06TB_0843.lab_id.06MIL1790.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0640.lab_id.06MIL1524.seq_reps.1', 'site.06.iso.1.subject.06TB_0640.lab_id.06MIL1524.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0072.lab_id.06MIL0533.seq_reps.1', 'site.06.iso.1.subject.06TB_0072.lab_id.06MIL0533.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.06TB_0141.lab_id.06MIL0602.seq_reps.1', 'site.06.iso.1.subject.06TB_0141.lab_id.06MIL0602.seq_reps.1'],

 87%|██████████████████████████████▌    | 26647/30454 [00:21<00:02, 1353.37it/s]

2 Index(['site.06.iso.1.subject.ICK-1151.lab_id.06MIL1084.seq_reps.1', 'site.06.iso.1.subject.ICK-1151.lab_id.06MIL1084.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.IHK-1123.lab_id.06MIL1078.seq_reps.1', 'site.06.iso.1.subject.IHK-1123.lab_id.06MIL1078.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.06.iso.1.subject.RLH_0304-14.lab_id.06MIL0282.seq_reps.1', 'site.06.iso.1.subject.RLH_0304-14.lab_id.06MIL0282.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.03150.lab_id.823509.seq_reps.1', 'site.04.iso.1.subject.03150.lab_id.823509.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.00795.lab_id.718293.seq_reps.1', 'site.04.iso.1.subject.00795.lab_id.718293.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.04.iso.1.subject.02831.lab_id.819986.seq_reps.1', 'site.04.iso.1.subject.02831.lab_id.819986.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 90%|███████████████████████████████▍   | 27341/30454 [00:22<00:02, 1371.90it/s]

2 Index(['site.05.iso.1.subject.PMK-0982.lab_id.MK-1262.seq_reps.1', 'site.05.iso.1.subject.PMK-0982.lab_id.MK-1262.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 95%|█████████████████████████████████▏ | 28835/30454 [00:23<00:01, 1217.76it/s]

2 Index(['site.03.iso.1.subject.BOR-12632-16.lab_id.12632-16.seq_reps.1', 'site.03.iso.1.subject.BOR-12632-16.lab_id.12632-16.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.BOR-13462-16.lab_id.IML-00757.seq_reps.1', 'site.03.iso.1.subject.BOR-13462-16.lab_id.IML-00757.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.03.iso.1.subject.BOR-14593-16.lab_id.IML-00767.seq_reps.1', 'site.03.iso.1.subject.BOR-14593-16.lab_id.IML-00767.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 96%|█████████████████████████████████▋ | 29347/30454 [00:23<00:00, 1248.36it/s]

2 Index(['site.10.iso.1.subject.KD01666167.lab_id.KD01666167.seq_reps.1', 'site.10.iso.1.subject.KD01666167.lab_id.KD01666167.seq_reps.1'], dtype='object', name='ORIGINAL_UID')


 98%|██████████████████████████████████▏| 29730/30454 [00:24<00:00, 1196.85it/s]

2 Index(['site.05.iso.1.subject.PMK-1048.lab_id.MK-2348.seq_reps.1', 'site.05.iso.1.subject.PMK-1048.lab_id.MK-2348.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.10.iso.1.subject.BA00346437.lab_id.BA00346437.seq_reps.1', 'site.10.iso.1.subject.BA00346437.lab_id.BA00346437.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.10.iso.1.subject.YA00013886.lab_id.YA00013886.seq_reps.1', 'site.10.iso.1.subject.YA00013886.lab_id.YA00013886.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.17.iso.1.subject.K0059.lab_id.123-20-0059-1000.seq_reps.1', 'site.17.iso.1.subject.K0059.lab_id.123-20-0059-1000.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.10.iso.1.subject.YA00038859.lab_id.YA00038859.seq_reps.1', 'site.10.iso.1.subject.YA00038859.lab_id.YA00038859.seq_reps.1'], dtype='object', name='ORIGINAL_UID')
2 Index(['site.17.iso.1.subject.K0091.lab_id.123-20-0091-1000.seq_reps.1', 'site.17.iso.1.subject.K0091.lab_id.123-20-0091-

100%|███████████████████████████████████| 30454/30454 [00:24<00:00, 1224.16it/s]

True





Display resistance/ CM presence on nodes (red = resistance mutation, blue = CM, green = both)

In [28]:
vis = 'symbol'

mutation_header="""DATASET_SYMBOL

SEPARATOR COMMA

DATASET_LABEL,example symbols

COLOR,#e41a1c

MAXIMUM_SIZE,3


DATA
"""

colour_lookup = {1: '#e41a1c', 2: '#377eb8', 3: '#4daf4a'}

mutation_annotations = ''

for i in tqdm(range(ts.num_nodes)):   

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']

        mutation = LABELS.loc[uid]
        
        if mutation.IS_RES.size > 1:
            mutation = mutation.iloc[0]
            
        if mutation.IS_RES == True:
            if mutation.IS_CM == True:
                mutation = 3
            else:
                mutation = 1
        else:
            if mutation.IS_CM == True:
                mutation = 2
            else:
                mutation = None

        if mutation in colour_lookup.keys():
            colour = colour_lookup[mutation]
            
            line = 'n'+str(i) +',1,1,' + colour + ',1,0.5\n'
            mutation_annotations += line 
        else:
            continue
    

mutation_annotations = mutation_header + mutation_annotations


100%|███████████████████████████████████| 30454/30454 [00:22<00:00, 1340.57it/s]


Do the same for specific combinations of CM + S450L

In [47]:
vis = 'symbol'

mutation_header="""DATASET_SYMBOL

SEPARATOR COMMA

DATASET_LABEL,example symbols

COLOR,#e41a1c

MAXIMUM_SIZE,3


DATA
"""

colour_lookup = {1: '#e41a1c', 2: '#377eb8', 3: '#4daf4a'}

mutation_annotations = ''

for i in tqdm(range(ts.num_nodes)):   

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']

        mutation = LABELS.loc[uid]
        
        if mutation.IS_RES.size > 1:
            mutation = mutation.iloc[0]
            
        if mutation.HAS_S450L == True:
            if mutation.HAS_E1092D == True:
                mutation = 3
            else:
                mutation = 1
        else:
            if mutation.HAS_E1092D == True:
                mutation = 2
            else:
                mutation = None

        if mutation in colour_lookup.keys():
            colour = colour_lookup[mutation]
            
            line = 'n'+str(i) +',1,1,' + colour + ',1,0.5\n'
            mutation_annotations += line 
        else:
            continue
    

mutation_annotations = mutation_header + mutation_annotations


100%|███████████████████████████████████| 30454/30454 [00:22<00:00, 1356.06it/s]


Do the same for single CMs without resistance mutations

In [38]:
vis = 'symbol'

mutation_header="""DATASET_SYMBOL

SEPARATOR COMMA

DATASET_LABEL,example symbols

COLOR,#e41a1c

MAXIMUM_SIZE,3


DATA
"""

colour_lookup = {1: '#e41a1c', 2: '#377eb8', 3: '#4daf4a', 4: '#984ea3', 5: '#ff7f00', 6: '#a65628'}

mutation_annotations = ''

for i in tqdm(range(ts.num_nodes)):   

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']

        mutation = LABELS.loc[uid]
        
        if mutation.IS_RES.size > 1:
            mutation = mutation.iloc[0]
            
        if mutation.HAS_V483G == True or mutation.HAS_V483A == True:
            mutation = 1

        elif mutation.HAS_I491V == True or mutation.HAS_I491T == True:
            mutation = 2
            
        elif mutation.HAS_P1040R == True:
            mutation = 3
            
        elif mutation.HAS_G332S == True:
            mutation = 4
            
        elif mutation.HAS_G433S == True:
            mutation = 5
            
        elif mutation.HAS_N698S == True:
            mutation = 6
            
        else:
            mutation = None

        if mutation in colour_lookup.keys():
            colour = colour_lookup[mutation]
            
            line = 'n'+str(i) +',1,1,' + colour + ',1,0.5\n'
            mutation_annotations += line 
        else:
            continue
    

mutation_annotations = mutation_header + mutation_annotations


100%|███████████████████████████████████| 30454/30454 [00:27<00:00, 1113.62it/s]


Dynamically create the ITOL annotations file to small circles around the circle if the sample contains an S450L mutation in rpoB

In [100]:
vis = 'binary'

binary_header="""DATASET_BINARY

SEPARATOR COMMA

DATASET_LABEL,binary

COLOR,#e41a1c

FIELD_SHAPES,1

FIELD_LABELS,f1

DATA
"""

binary_annotations = ''

for i in tqdm(range(ts.num_nodes)):   

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']

        if uid in LABELS[LABELS['HAS_T853A']].index:
            line = 'n'+str(i) +',1\n'
            binary_annotations += line 

binary_annotations = binary_header + binary_annotations

with open('ct-CM-annotations_name_circle.txt','w') as f:
    f.write(binary_annotations)

  0%|                                       | 1/30454 [00:00<00:08, 3799.19it/s]


KeyError: 'HAS_T853A'

for new hits through fishers test

In [28]:
for i,name in enumerate(mutation_list):
    
    print(name)
    
    vis = 'binary'

    binary_header="""DATASET_BINARY

    SEPARATOR COMMA

    DATASET_LABEL,binary

    COLOR,#e41a1c

    FIELD_SHAPES,1

    FIELD_LABELS,f1

    DATA
    """

    binary_annotations = ''

    for i in tqdm(range(ts.num_nodes)):   

        row = new_tables.nodes[i]

        if 'name' in row.metadata.keys():

            uid = row.metadata['name']

            if uid in LABELS[LABELS[name]].index:
                line = 'n'+str(i) +',1\n'
                binary_annotations += line 

    binary_annotations = binary_header + binary_annotations
    
    title = 'ct-CM-annotations_' + name + '_circle.txt'
    print(title)

    with open(title,'w') as f:
        f.write(binary_annotations)

sigA_L386M


100%|███████████████████████████████████| 30454/30454 [00:03<00:00, 9180.32it/s]


ct-CM-annotations_sigA_L386M_circle.txt
sigA_G380A


100%|███████████████████████████████████| 30454/30454 [00:03<00:00, 8044.15it/s]


ct-CM-annotations_sigA_G380A_circle.txt
rpoC_I128V


100%|███████████████████████████████████| 30454/30454 [00:03<00:00, 8073.83it/s]


ct-CM-annotations_rpoC_I128V_circle.txt
sigA_I382V


100%|███████████████████████████████████| 30454/30454 [00:03<00:00, 7953.99it/s]


ct-CM-annotations_sigA_I382V_circle.txt
rpoB_R791T


100%|███████████████████████████████████| 30454/30454 [00:04<00:00, 6188.82it/s]


ct-CM-annotations_rpoB_R791T_circle.txt
rpoB_E460D


100%|███████████████████████████████████| 30454/30454 [00:04<00:00, 7573.62it/s]


ct-CM-annotations_rpoB_E460D_circle.txt
sigA_E385Q


100%|███████████████████████████████████| 30454/30454 [00:03<00:00, 8211.95it/s]

ct-CM-annotations_sigA_E385Q_circle.txt





In [150]:
tree = ts.first()

newick_string = tree.as_newick()

'(n1:6.90799999999990177,n2:3.09200000000009823,(n4:0.46453999999994267,(n6:8.18533999999999651,(((n12:9.91015000000015789,n13:8.08985000000006949):0.17857000000003609,((((n20:1.73206000000004678,n21:4.26793999999995322):0.87799999999992906,(n22:0.00099999999997635,n23:2.00000000000000000):3.62199999999984357):8.94817000000011831,(n24:7.96229000000016640,(n26:5.42753000000016073,(n28:1.00000000000000000,n29:0.00099999999997635):3.07247000000006665):2.53771000000006097):1.48932999999988169):0.11167999999997846,(((n34:0.89059000000020205,(n36:1.04859000000010383,n37:1.95141000000012355):2.10941000000002532):12.25711999999998625,(n38:12.04489999999987049,(n40:6.05602000000021690,(n42:7.91893000000027314,(n44:0.86409999999978027,n45:5.13589999999999236):0.08107000000018161):0.19398000000001048):0.70509999999967476):2.83663000000024113):0.34405999999989945,(n46:4.74698000000012144,((n50:6.11341000000015811,n51:8.88659000000006927):0.02957999999989624,((n54:0.00099999999997635,(n56:6.1057700

Make a bar plot for the growth corresponding to each sample

In [85]:
vis = 'bar'

bar_header="""DATASET_SIMPLEBAR

SEPARATOR COMMA

DATASET_LABEL,bar

COLOR,#ff0000

WIDTH,100

DATA
"""

#ID1,value1
#ID2,value2
#9606,10000
#LEAF1|LEAF2,11000

bar_annotations = ''

for i in tqdm(range(ts.num_nodes)):   

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']
        
        if numpy.any(PLATES[PLATES.UNIQUEID==LABELS.loc[uid].UNIQUEID].IM_POS_AVERAGE.values):
            
            growth = PLATES[PLATES.UNIQUEID==LABELS.loc[uid].UNIQUEID].IM_POS_AVERAGE.values[0]
            line = 'n'+ str(i) + ',' + str(growth) + '\n'
            bar_annotations += line 
        
        else:
            
            continue

#         if uid in LABELS[LABELS['HAS_T853A']].index:
#             line = 'n'+str(i) +',1\n'
#             bar_annotations += line 

bar_annotations = bar_header + bar_annotations


100%|████████████████████████████████████| 30454/30454 [01:35<00:00, 317.89it/s]


In [175]:
uid = new_tables.nodes[2].metadata['name']
GENOMES[GENOMES.TB_TYPE_1 == 'XDR']

#load UNIQUEIDs of resistant samples with CMs

res_CM_samples = []

for i, name in enumerate(hits_final.resistant_mutation.unique()):
    resistance = MUTATIONS[(MUTATIONS.GENE_MUTATION == name)].UNIQUEID
    hit_samples_local = MUTATIONS[(MUTATIONS.GENE_MUTATION.isin(hits_final.other_mutation.values.tolist())) 
                                  & (MUTATIONS.UNIQUEID.isin(resistance.tolist()))].UNIQUEID.unique()
    res_CM_samples.append(hit_samples_local)

res_CM_samples = numpy.concatenate(res_CM_samples)
res_CM_samples = pandas.Series(res_CM_samples)
res_CM_samples = res_CM_samples.tolist()

print(len(GENOMES[((GENOMES.TB_TYPE_1 == 'XDR')|(GENOMES.TB_TYPE_1 == 'MDR')) & (GENOMES.MYKROBE_LINEAGE_NAME_1 == 'Lineage 2') & (GENOMES.index.isin(res_CM_samples))])/len(GENOMES[((GENOMES.TB_TYPE_1 == 'XDR')|(GENOMES.TB_TYPE_1 == 'MDR')) & (GENOMES.MYKROBE_LINEAGE_NAME_1 == 'Lineage 2')]))
#GENOMES.index

0.4973363324257133


Make a colored strip for resistance type (RIF/ MDR/ XDR/ UNK/ SUS)

In [22]:
vis = 'colorstrip'

resistance_header="""DATASET_COLORSTRIP

SEPARATOR COMMA

DATASET_LABEL,label1

COLOR,#e41a1c

COLOR_BRANCHES,0

DATA
"""

colour_lookup = {1: '#F2CD5C', 2: '#F2921D', 3: '#A61F69', 4: '#400E32', 5: '#D3D3D3'}

resistance_annotations = ''

for i in tqdm(range(ts.num_nodes)): 

    row = new_tables.nodes[i]

    if 'name' in row.metadata.keys():

        uid = row.metadata['name']
        
        res_type = GENOMES.TB_TYPE_1[LABELS.loc[uid].UNIQUEID]

        if res_type == 'SUS':
            res_colour = colour_lookup[1]
            
        elif res_type == 'RIF':
            res_colour = colour_lookup[3]
            
        elif res_type == 'MDR':
            res_colour = colour_lookup[3]
            
        elif res_type == 'XDR':
            res_colour = colour_lookup[3]
            
        elif res_type == 'UNK':
            res_colour = colour_lookup[5]

        else:
            colour = '#bbbbbb'

        line = 'n'+ str(i) + ',' + res_colour + ',Resistance ' + res_type + '\n'
        resistance_annotations += line 
    

resistance_annotations = resistance_header + resistance_annotations


100%|███████████████████████████████████████████████████████████████████████████████| 30454/30454 [00:27<00:00, 1120.46it/s]


Write out the Tree in Newick format (probably don't need as we read it in earlier..) and the matching annotation files that you can drag and drop onto ITOL

In [24]:
# with open('ct-tree.newick','w') as f:
#     f.write(newick_string)

# with open('ct-prune-sparse.txt','w') as f:
#     f.write(prune_annotations)

# with open('ct-lineage-annotations-circle.txt','w') as f:
#     f.write(lineage_annotations)

# with open('ct-lineage-annotations-circle.txt','w') as f:
#     f.write(lineage_annotations)

# with open('ct-lineage-annotations-range.txt','w') as f:
#     f.write(lineage_annotations)

# with open('ct-annotations.txt','w') as f:
#     f.write(binary_annotations)
    
# with open('ct-resistant-annotations.txt','w') as f:
#     f.write(binary_annotations)
    
# with open('T853A_ct-CM-annotations_circle.txt','w') as f:
#     f.write(binary_annotations)

# with open('ct-mutation-annotations_V483G.txt','w') as f:
#     f.write(mutation_annotations)

# with open('ct-mutation-annotations-CMs-colored.txt','w') as f:
#     f.write(mutation_annotations)

# with open('ct-growth-bar.txt','w') as f:
#     f.write(bar_annotations)
    
with open('ct-resistance_type.txt','w') as f:
    f.write(resistance_annotations)

**Below are investigations of specific sub clades with either interesting aspects or accumulated CMs**

Investigate nodes with CMs arising before resistance

In [121]:
#n8190 - n8900

interesting_clade = []

for i in new_tables.nodes[8190:8900]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        interesting_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)
    
df = pandas.DataFrame(interesting_clade)
df

Unnamed: 0,0
0,rpoA_a-40o
1,rpoB_A1075A
2,rpoC_E1092D
3,rpoC_L1307L
4,rpoA_g-100x
...,...
5795,rpoC_E1092D
5796,rpoB_S450L
5797,rpoB_A1075A
5798,rpoC_I491T


In [122]:
df.value_counts()[0:50]

rpoB_A1075A    357
rpoC_E1092D    356
rpoB_S450L     225
rpoA_a-40o     157
rpoA_c-92x      83
rpoA_g-63x      83
rpoA_g-61x      83
rpoA_g-58x      83
rpoA_g-52x      83
rpoA_g-100x     83
rpoA_c-91x      83
rpoA_g-65x      83
rpoA_c-89x      83
rpoA_c-88x      83
rpoA_c-87x      83
rpoA_c-85x      83
rpoA_g-64x      83
rpoA_g-66x      83
rpoA_c-78x      83
rpoA_g-67x      83
rpoA_g-81x      83
rpoA_g-83x      83
rpoA_g-90x      83
rpoA_g-93x      83
rpoA_g-95x      83
rpoA_g-96x      83
rpoA_g-97x      83
rpoA_g-98x      83
rpoA_g-99x      83
rpoA_t-69x      83
rpoA_t-80x      83
rpoA_c-84x      83
rpoA_c-94x      83
rpoA_c-77x      83
rpoA_c-62x      83
rpoA_a-73x      83
rpoA_a-79x      83
rpoA_a-82x      83
rpoA_c-53x      83
rpoA_c-54x      83
rpoA_c-55x      83
rpoA_c-76x      83
rpoA_c-56x      83
rpoA_c-57x      83
rpoA_c-59x      83
rpoA_c-60x      83
rpoA_c-86x      83
rpoA_c-75x      83
rpoA_c-68x      83
rpoA_c-70x      83
dtype: int64

Interesting clade of lineage 4 clusters with lineage 5 and 6

In [30]:
#n15072, n15074, n15077, n15076
list = [15072, 15074, 15077, 15076]
interesting_clade2 = []

for i in list:
    node = new_tables.nodes[i]
    if 'name' in node.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == node.metadata['name']].index
        print(index[0])
        interesting_clade2.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)
    
df = pandas.DataFrame(interesting_clade2)

site.06.subj.06TB_0666.lab.06MIL1604.iso.1
site.06.subj.06TB_1029.lab.06MIL2034.iso.1
site.06.subj.06TB_1408.lab.06MIL2686.iso.1
site.06.subj.06TB_1406.lab.06MIL2684.iso.1


In [28]:
df.value_counts()

rpoA_a-40o     4
rpoA_g-67x     4
rpoA_c-92x     4
rpoA_c-94x     4
rpoA_g-100x    4
rpoA_g-48x     4
rpoA_g-50x     4
rpoA_g-52x     4
rpoA_g-58x     4
rpoA_g-61x     4
rpoA_g-63x     4
rpoA_g-64x     4
rpoA_g-65x     4
rpoA_g-66x     4
rpoA_g-81x     4
rpoA_a-42x     4
rpoA_g-83x     4
rpoA_g-90x     4
rpoA_g-93x     4
rpoA_g-95x     4
rpoA_g-96x     4
rpoA_g-97x     4
rpoA_g-98x     4
rpoA_g-99x     4
rpoA_t-49x     4
rpoA_t-69x     4
rpoA_t-80x     4
rpoB_A1075A    4
rpoA_c-91x     4
rpoA_c-89x     4
rpoA_c-88x     4
rpoA_c-87x     4
rpoA_a-73x     4
rpoA_a-79x     4
rpoA_a-82x     4
rpoA_c-41x     4
rpoA_c-43x     4
rpoA_c-53x     4
rpoA_c-54x     4
rpoA_c-55x     4
rpoA_c-56x     4
rpoA_c-57x     4
rpoA_c-59x     4
rpoA_c-60x     4
rpoA_c-62x     4
rpoA_c-68x     4
rpoA_c-70x     4
rpoA_c-71x     4
rpoA_c-72x     4
rpoA_c-74x     4
rpoA_c-75x     4
rpoA_c-76x     4
rpoA_c-77x     4
rpoA_c-78x     4
rpoA_c-84x     4
rpoA_c-85x     4
rpoA_c-86x     4
rpoC_V626I     1
dtype: int64

Interesting clade within lineage 2 (root): blue nodes have S450O instead of S450L!

In [171]:
#n1-n600
interesting_clade3 = []

for i in new_tables.nodes[1:600]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        interesting_clade3.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)
    
df = pandas.DataFrame(interesting_clade3)
df

Unnamed: 0,0
0,rpoB_S450O
1,rpoB_A1075A
2,rpoC_I491V
3,rpoB_S450L
4,rpoB_A1075A
...,...
4889,rpoA_a-40o
4890,rpoB_S450L
4891,rpoB_A1075A
4892,rpoC_G388G


In [46]:
df.value_counts()[:40]

rpoB_A1075A    299
rpoC_I491V     293
rpoB_S450L     286
rpoA_a-40o     118
rpoA_g-66x      72
rpoA_c-88x      72
rpoA_c-89x      72
rpoA_c-92x      72
rpoA_c-94x      72
rpoA_g-100x     72
rpoA_g-52x      72
rpoA_g-58x      72
rpoA_g-61x      72
rpoA_g-63x      72
rpoA_g-64x      72
rpoA_g-65x      72
rpoA_g-81x      72
rpoA_g-67x      72
rpoA_c-85x      72
rpoA_g-83x      72
rpoA_g-90x      72
rpoA_g-93x      72
rpoA_g-95x      72
rpoA_g-96x      72
rpoA_g-97x      72
rpoA_g-98x      72
rpoA_g-99x      72
rpoA_t-69x      72
rpoA_t-80x      72
rpoA_c-86x      72
rpoA_c-91x      72
rpoA_c-84x      72
rpoA_c-70x      72
rpoA_a-73x      72
rpoA_a-79x      72
rpoA_a-82x      72
rpoA_c-53x      72
rpoA_c-54x      72
rpoA_c-55x      72
rpoA_c-56x      72
dtype: int64

In [26]:
SAMPLES = pandas.read_pickle('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/tb-rnap-compensation/tb_rnap_compensation/tables/SAMPLES.pkl.gz')
SAMPLES.COUNTRY_WHERE_SAMPLE_TAKEN.value_counts()
SAMPLES.reset_index(inplace=True)

def create_uid(row):

    return 'site.' + row.SITEID + '.subj.' + row.SUBJID + '.lab.' + row.LABID + '.iso.1'

SAMPLES['UNIQUEID'] = SAMPLES.apply(create_uid, axis=1)

SAMPLES

Unnamed: 0,SITEID,SUBJID,LABID,COUNTRY_WHERE_SAMPLE_TAKEN,REGION,COLLECTION_DATE,ISOLATE_COLLECTED_PROSPECTIVELY,ANATOMICAL_ORIGIN,SMEAR_RESULT,WGS_SEQUENCING_PLATFORM,XPERT_MTB_RIF,HAIN_RIF,HAIN_INH,HAIN_FL,HAIN_AM,HAIN_ETH,SMOKER,INJECT_DRUG_USER,IS_HOMELESS,IS_IMPRISONED,HIV,DIABETES,WHO_OUTCOME,UNIQUEID
0,02,0958,22A197,CHN,ChongQing,2017-12-04 00:00:00+07:00,False,not known,not known,HiSeq,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.02.subj.0958.lab.22A197.iso.1
1,02,0823,2013241494,CHN,GuiZhou,2013-10-06 00:00:00+07:00,False,not known,not known,HiSeq,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.02.subj.0823.lab.2013241494.iso.1
2,02,0359,222018-14,CHN,ChongQing,2014-01-01 00:00:00+07:00,False,not known,not known,HiSeq,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.02.subj.0359.lab.222018-14.iso.1
3,02,0224,2013221088,CHN,ChongQing,2013-01-01 00:00:00+07:00,False,not known,not known,HiSeq,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.02.subj.0224.lab.2013221088.iso.1
4,02,1347,2013154310,CHN,Shandong,2013-01-01 00:00:00+07:00,False,not known,not known,HiSeq,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.02.subj.1347.lab.2013154310.iso.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16664,06,06TB_0565,06MIL1243,ITA,,NaT,False,not known,not known,Unknown,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.06.subj.06TB_0565.lab.06MIL1243.iso.1
16665,06,06TB_0566,06MIL1244,ITA,,NaT,False,not known,not known,Unknown,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.06.subj.06TB_0566.lab.06MIL1244.iso.1
16666,06,ICK-1132,06MIL1248,PAK,,NaT,False,not known,not known,Unknown,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.06.subj.ICK-1132.lab.06MIL1248.iso.1
16667,06,06TB_0474,06MIL1249,ITA,,NaT,False,not known,not known,Unknown,not tested,not tested,not tested,not tested,not tested,not tested,not known,not known,False,False,not known,not known,not known,site.06.subj.06TB_0474.lab.06MIL1249.iso.1


Look at SAMPLES.COUNTRY_WHERE_SAMPLE_TAKEN and SAMPLES.REGION

Investigate clades with accumulated CMs for oversampling of outbreaks

In [96]:
#V483A, n6419 - n6878
v483a_clade = []
growth_v483a = []

for i in new_tables.nodes[6419:6878]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        growth_v483a.extend(index)
        v483a_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)

df = pandas.DataFrame(v483a_clade)
df.value_counts()[:20]
growth_v483a

# df = pandas.DataFrame(growth_v483a)
# df.to_csv('subclade_V483A.csv', index = False)

['site.03.subj.DR-285.lab.IML-01190.iso.1',
 'site.03.subj.DS-64.lab.IML-01232.iso.1',
 'site.03.subj.DS-150.lab.IML-01256.iso.1',
 'site.04.subj.01225.lab.720110.iso.1',
 'site.04.subj.01423.lab.26568.iso.1',
 'site.04.subj.03248.lab.823504.iso.1',
 'site.04.subj.04775.lab.907378.iso.1',
 'site.03.subj.DS-36.lab.IML-01239.iso.1',
 'site.03.subj.DR-44.lab.IML-01077.iso.1',
 'site.03.subj.DR-79.lab.IML-01092.iso.1',
 'site.03.subj.DS-10.lab.IML-01203.iso.1',
 'site.04.subj.00991.lab.717123.iso.1',
 'site.03.subj.DR-281.lab.IML-01188.iso.1',
 'site.04.subj.01901.lab.732532.iso.1',
 'site.04.subj.03470.lab.827987.iso.1',
 'site.04.subj.04118.lab.832668.iso.1',
 'site.04.subj.01900.lab.801874.iso.1',
 'site.04.subj.03027.lab.823876.iso.1',
 'site.03.subj.DR-320.lab.IML-01200.iso.1',
 'site.03.subj.DR-91.lab.IML-01095.iso.1',
 'site.03.subj.DR-121.lab.IML-01105.iso.1',
 'site.04.subj.01318.lab.723498.iso.1',
 'site.04.subj.04502.lab.902838.iso.1',
 'site.04.subj.03305.lab.825625.iso.1',
 's

In [28]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_v483a)].COUNTRY_WHERE_SAMPLE_TAKEN.value_counts()[:5]

COUNTRY_WHERE_SAMPLE_TAKEN
IND    181
NPL     48
SWE      1
AFG      0
NGA      0
Name: count, dtype: int64

In [29]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_v483a)].REGION.value_counts()[:30]

REGION
Kathmandu                     48
Mumbai                        32
Mumbai (Malad)                 8
Mumbra                         6
Mumbai (Mazagaon)              6
Mumbai (Central)               5
Mumbai (Jogeshwari)            5
Mumbai (Kurla)                 4
Mumbai (Malad west)            4
Mumbai (Byculla)               4
Mumbai (Wadala)                4
Mumbai (Grant Road)            4
Mumbai (Mira Road)             4
Thane                          4
Mumbai (Sion)                  4
Mumbai (Bhandup)               3
Mira Bhayandar (Bhayandar)     3
Mumbai (Kurla west)            3
Mumbai (Andheri west)          3
Mumbai (Nagpada)               3
Mumbai (Ghatkopar)             3
Mumbai (Mahim)                 3
Mumbai (Andheri)               3
Nala Sopara                    2
Mumbai (Dadar)                 2
Mumbai (Dahisar)               2
Mumbai (Chunabhatti)           2
Mumbai (Borivali)              2
Mumbai (Saki Naka)             2
Mumbai (Govandi)               2
Nam

In [103]:

for i in growth_v483a:
    sub_lin = GENOMES[GENOMES.index == i].MYKROBE_LINEAGE_NAME_2
    print(sub_lin)

UNIQUEID
site.03.subj.DR-285.lab.IML-01190.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.DS-64.lab.IML-01232.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.DS-150.lab.IML-01256.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01225.lab.720110.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01423.lab.26568.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03248.lab.823504.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04775.lab.907378.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.DS-36.lab.IML-01239.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.DR-44.lab.IML-01077.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.DR-79.lab.IML-01092.iso.1    lineag

UNIQUEID
site.04.subj.01550.lab.729588.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03324.lab.827121.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03263.lab.825133.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04900.lab.903336.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00755.lab.708950.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04909.lab.909510.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04192.lab.831568.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01465.lab.725621.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00392.lab.715620.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.05477.lab.913495.iso.1    lineage2.2
Name: MYKROBE_

UNIQUEID
site.04.subj.03571.lab.829922.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01598.lab.726612.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.05621.lab.918800.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01062.lab.719968.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00168.lab.700996.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00780.lab.706828.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00024.lab.631993.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00299.lab.702745.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.DR-8.lab.IML-01060.iso.1    lineage2.2
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.GB-83220123.lab.GB-83220123.iso.1    lineage2.2
N

In [131]:
snp_distance = numpy.load('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/GPI_SNP_DISTANCES_VALUES.npy')
snp_label = numpy.load('/Users/viktoriabrunner/Documents/Studium/PhD/DPhil/paper/GPI_SNP_DISTANCES_LABELS.npy')

print(len(snp_distance), len(snp_label))
print(snp_distance.shape)
len(snp_distance[(snp_distance<30) & (snp_distance!=0)])
print(len(snp_distance[(snp_distance<200) & (snp_distance!=0)])/(len(snp_distance)*len(snp_distance)))

15228 15228
(15228, 15228)
0.032514552120093985


In [251]:
snp_distances_clade1 = []
for i in growth_v483a:
    for j in growth_v483a:
        dist = snp_distance[snp_label == i][0][snp_label == j][0]
        snp_distances_clade1.append(dist)
    
snp_distances_clade1
df = pandas.DataFrame(snp_distances_clade1)
list_df = df.value_counts()

In [252]:
list_df = list_df.reset_index()

list_df.rename({0:'SNP_dist'}, axis = 1, inplace = True)

#remove SNP_dist = 0 since that equals the self-distance
index = list_df[list_df['SNP_dist'] == 0].index
list_df = list_df.drop(axis = 0, index = index)

list_df = list_df.reset_index()
list_df.drop(axis = 1, columns='index', inplace = True)

list_df

Unnamed: 0,SNP_dist,count
0,22,3328
1,21,3318
2,23,3214
3,20,3074
4,19,2886
...,...,...
140,142,2
141,144,2
142,145,2
143,148,2


In [253]:
sum_df = 0
total_count = 0

for i in range(0,len(list_df)):
    sum_df += list_df['SNP_dist'][i] * list_df['count'][i]
    total_count += list_df['count'][i] 
    
average_SNP_dist = sum_df/total_count
print('The average SNP-distance is', average_SNP_dist, 'for V483A')

The average SNP-distance is 30.659347695088883 for I491V


In [254]:
list_df['SNP_dist'].max()

149

In [250]:
#V483G, n10889 - n10982
v483g_clade = []
growth_v483g = []

for i in new_tables.nodes[10889:10982]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        growth_v483g.extend(index)
        v483g_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)

df = pandas.DataFrame(v483g_clade)
df.value_counts()[:20]

# df = pandas.DataFrame(growth_v483g)
# df.to_csv('subclade_V483G.csv', index = False)

rpoB_A1075A    49
rpoB_S450L     49
rpoC_V483G     46
rpoA_a-40o     18
rpoC_V483A      2
rpoA_c-53x      1
rpoA_g-90x      1
rpoA_g-52x      1
rpoA_g-58x      1
rpoA_g-61x      1
rpoA_g-63x      1
rpoA_g-64x      1
rpoA_g-65x      1
rpoA_g-66x      1
rpoA_g-67x      1
rpoA_g-81x      1
rpoA_g-83x      1
rpoA_g-93x      1
rpoA_c-54x      1
rpoA_g-95x      1
Name: count, dtype: int64

In [129]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_v483g)].COUNTRY_WHERE_SAMPLE_TAKEN.value_counts()[:5]

ZAF    43
SWE     1
NGA     0
VNM     0
UKR     0
Name: COUNTRY_WHERE_SAMPLE_TAKEN, dtype: int64

In [130]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_v483g)].REGION.value_counts()[:30]

Eastern Cape     25
Western Cape     11
KwaZulu-Natal     5
Stockholm         1
South Africa      1
Gauteng           1
Name: REGION, dtype: int64

In [166]:
#I491T, n7646 - n7860
i491t_clade = []
growth_i491t = []

for i in new_tables.nodes[7646:7860]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        growth_i491t.extend(index)
        i491t_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)

df = pandas.DataFrame(i491t_clade)
df.value_counts()

# #I491 T (V) n1143 - n1358
i491t_clade = []
growth_i491t = []

for i in new_tables.nodes[1143:1358]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        growth_i491t.extend(index)
        i491t_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)

df = pandas.DataFrame(i491t_clade)
df.value_counts()

# df = pandas.DataFrame(growth_i491t)
# df.to_csv('subclade_I491t.csv', index = False)

In [137]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_i491t)].COUNTRY_WHERE_SAMPLE_TAKEN.value_counts()[:5]

IND    102
NPL      7
AFG      0
NGA      0
VNM      0
Name: COUNTRY_WHERE_SAMPLE_TAKEN, dtype: int64

In [138]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_i491t)].REGION.value_counts()[:30]

Mumbai                             16
Thane                              11
Kathmandu                           7
Nala Sopara                         6
Bhiwandi                            5
Mumbai (Kandivali)                  5
Mumbai (Ghatkopar)                  4
Mumbai (Mahim)                      3
Mumbra                              3
Mumbai (Dharavi)                    3
Mumbai (Tardeo)                     2
Mumbai (Charni Road)                2
Mumbai (Malad east)                 2
Mumbai (Parel)                      2
Ulhasnagar                          2
Mumbai (Chembur)                    2
Mumbai (Saki Naka)                  2
Thane (west)                        2
Vasai-Virar                         1
Mumbai (Govandi)                    1
Mumbai (Vidyavihar)                 1
Mumbai (Worli)                      1
Mumbai (Kandivali east)             1
Mumbai (Guru Teg Bahadur Nagar)     1
Mumbai (Sewri)                      1
Mumbai (Agripada)                   1
Nashik      

In [46]:
#I491V, n1 - n616
i491v_clade = []
growth_i491v = []

for i in new_tables.nodes[1:616]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        growth_i491v.extend(index)
        i491v_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)

df = pandas.DataFrame(i491v_clade)
df.value_counts()

# df = pandas.DataFrame(growth_i491v)
# df.to_csv('subclade_I491V.csv', index = False)

rpoB_A1075A    304
rpoC_I491V     295
rpoB_S450L     290
rpoA_a-40o     120
rpoA_g-100x     73
              ... 
rpoA_g-66o       1
rpoA_c-77o       1
rpoA_c-71o       1
rpoA_D253O       1
sigA_G314S       1
Name: count, Length: 78, dtype: int64

In [47]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_i491v)].COUNTRY_WHERE_SAMPLE_TAKEN.value_counts()[:5]

COUNTRY_WHERE_SAMPLE_TAKEN
IND    293
NPL     10
AFG      0
NGA      0
VNM      0
Name: count, dtype: int64

In [48]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_i491v)].REGION.value_counts()[:30]

REGION
Mumbai                    61
Mumbai (Mahim)            25
Thane                     12
Kathmandu                 10
Mumbai (Kurla)             9
Mumbai (Malad)             7
Dombivli                   6
Mumbai (Malad west)        6
Mumbai (Kandivali)         6
Mumbai (Andheri)           6
Mumbra                     5
Mumbai (Wadala)            5
Mumbai (Central)           5
Mumbai (Parel)             5
Mumbai (Antop Hill)        4
Mumbai (Borivali)          4
Mumbai (Mazagaon)          4
Mumbai (Sewri)             4
Virar                      4
Mumbai (Chembur)           4
Mumbai (Bandra)            4
Mumbai (Sion)              4
Mumbai (Goregaon east)     3
Mumbai (Goregaon)          3
Mumbai (Bhandup)           3
Vasai-Virar                3
Mumbai (Prabhadevi)        3
Mumbai (Santacruz)         3
Mumbai (Girgaon)           3
Mumbai (Ghatkopar)         3
Name: count, dtype: int64

In [246]:
snp_distances_clade2 = []
for i in growth_i491v:
    for j in growth_i491v:
        dist = snp_distance[snp_label == i][0][snp_label == j][0]
        snp_distances_clade2.append(dist)
    
snp_distances_clade2
df = pandas.DataFrame(snp_distances_clade2)
list_df = df.value_counts()

In [104]:
for i in growth_i491v:
    sub_lin = GENOMES[GENOMES.index == i].MYKROBE_LINEAGE_NAME_2
    print(sub_lin)

UNIQUEID
site.04.subj.03818.lab.830476.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00310.lab.701951.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01627.lab.27972.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01042.lab.717652.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.02112.lab.803805.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04137.lab.832790.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01432.lab.724551.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.02235.lab.805177.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00300.lab.702487.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03648.lab.JJH9682.iso.1    lineage

UNIQUEID
site.04.subj.03824.lab.830216.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.05660.lab.919709.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00482.lab.704543.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03893.lab.832777.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00080.lab.633839.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.05005.lab.908704.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03363.lab.827295.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01877.lab.801709.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03870.lab.826149.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04918.lab.909362.iso.1    lineage

UNIQUEID
site.04.subj.03224.lab.822877.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.02062.lab.21190.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.05614.lab.918016.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00998.lab.715906.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03211.lab.825251.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04174.lab.833074.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01682.lab.732653.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03995.lab.832048.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01002.lab.717763.iso.1    lineage2.2.7
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03741.lab.826347.iso.1    lineage2

In [105]:
#P1040R, n9922 - n10424
p1040r_clade = []
growth_p1040r = []

for i in new_tables.nodes[9922:10424]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        growth_p1040r.extend(index)
        p1040r_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)
        
df = pandas.DataFrame(p1040r_clade)
df.value_counts()

df = pandas.DataFrame(growth_p1040r)
df.to_csv('subclade_P1040R.csv', index = False)

In [106]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_p1040r)].COUNTRY_WHERE_SAMPLE_TAKEN.value_counts()[:5]

COUNTRY_WHERE_SAMPLE_TAKEN
IND    242
NPL      8
AFG      0
NGA      0
VNM      0
Name: count, dtype: int64

In [107]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_p1040r)].REGION.value_counts()[:30]

REGION
Mumbai                 56
Thane                  14
Kalyan                 12
Mumbai (Mahim)         10
Mumbai (Malad)          8
Kathmandu               8
Mumbai (Santacruz)      7
Mumbai (Wadala)         6
Mumbai (Dadar)          5
Mumbai (Dharavi)        5
Nala Sopara             5
Bandra (east)           4
Mumbai (Mazagaon)       4
Mumbai (Kurla)          4
Mumbra                  4
Mumbai (Goregaon)       4
Mumbai (Bandra)         4
Mumbai (Andheri)        3
Mumbai (Malad west)     3
Mumbai (Parel)          3
Ulhasnagar              3
Mumbai (Mulund)         3
Mumbai (Jogeshwari)     3
Mumbai (Ghatkopar)      3
Mumbai (Sion)           3
Bhiwandi                3
Mumbai (Prabhadevi)     2
Mumbai (Govandi)        2
Mumbai (Vile Parle)     2
Dombivli                2
Name: count, dtype: int64

In [109]:
for i in growth_p1040r:
    sub_lin = GENOMES[GENOMES.index == i].MYKROBE_LINEAGE_NAME_2
    print(sub_lin)

UNIQUEID
site.04.subj.04787.lab.904613.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00506.lab.709045.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.05418.lab.913041.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00942.lab.715954.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03168.lab.821506.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04407.lab.903102.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03864.lab.830782.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.02158.lab.732526.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03528.lab.829149.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03192.lab.816831.iso.1    lineage

UNIQUEID
site.04.subj.03478.lab.827844.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04852.lab.907752.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00620.lab.707489.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00757.lab.714670.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.04115.lab.832555.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.DS-24.lab.IML-01207.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01711.lab.729885.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.03.subj.DR-161.lab.IML-01125.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00966.lab.716955.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01788.lab.732893.iso.1    

UNIQUEID
site.04.subj.00781.lab.708146.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.05576.lab.919703.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00997.lab.718598.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00769.lab.710846.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.00524.lab.706416.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.05639.lab.919649.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01589.lab.730725.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.03340.lab.826105.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01480.lab.729217.iso.1    lineage2.2.3
Name: MYKROBE_LINEAGE_NAME_2, dtype: object
UNIQUEID
site.04.subj.01508.lab.725271.iso.1    lineage

In [159]:
#G332S n5215 - n5392
g332s_clade = []
growth_g332s = []

for i in new_tables.nodes[5215:5392]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        growth_g332s.extend(index)
        g332s_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)
        
df = pandas.DataFrame(g332s_clade)
df.value_counts()

#df = pandas.DataFrame(growth_g332s)
#df.to_csv('subclade_G332S.csv', index = False)

In [147]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_g332s)].COUNTRY_WHERE_SAMPLE_TAKEN.value_counts()[:5]

IND    88
NPL     1
AFG     0
NGA     0
VNM     0
Name: COUNTRY_WHERE_SAMPLE_TAKEN, dtype: int64

In [149]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_g332s)].REGION.value_counts()[:30]

Mumbai                        23
Mumbai (Mahim)                13
Virar                          4
Mumbai (Kandivali)             4
Mumbai (Ghatkopar)             4
Mumbai (Grant Road)            2
Mumbai (Chembur)               2
Thane                          2
Mumbai (Wadala)                2
Mumbai (Sion)                  2
Nala Sopara                    2
Mumbai (Marine Line)           2
Mira Bhayandar (Bhayandar)     1
Mumbai (Lower Parel)           1
Mumbai (Chinchpokli)           1
Mumbai (Central)               1
Dapoli                         1
Pant Nagar Ghat                1
Panvel                         1
Mumbai (Santacruz)             1
Mumbai (Mankhurd)              1
Mumbai (Mulund)                1
Goa                            1
Mumbai (Byculla)               1
Uran                           1
Navi Mumbai (Kharghar)         1
Mumbai (Kurla east)            1
Mumbai (Worli)                 1
Mumbai (Sewri)                 1
Mumbai (Charni Road)           1
Name: REGI

In [162]:
#G433S n940 - n1076
g433s_clade = []
growth_g433s = []

for i in new_tables.nodes[940:1076]:
    if 'name' in i.metadata.keys():
        index = GENOMES[GENOMES['ORIGINAL_UID'] == i.metadata['name']].index
        growth_g433s.extend(index)
        g433s_clade.extend(MUTATIONS[MUTATIONS.UNIQUEID == index[0]].GENE_MUTATION.values)
        
df = pandas.DataFrame(g433s_clade)
df.value_counts()

#df = pandas.DataFrame(growth_g433s)
#df.to_csv('subclade_G433S.csv', index = False)

In [151]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_g433s)].COUNTRY_WHERE_SAMPLE_TAKEN.value_counts()[:5]

IND    61
NPL     8
AFG     0
NGA     0
VNM     0
Name: COUNTRY_WHERE_SAMPLE_TAKEN, dtype: int64

In [152]:
SAMPLES[SAMPLES.UNIQUEID.isin(growth_g433s)].REGION.value_counts()[:30]

Mumbai                     16
Kathmandu                   8
Kalyan                      5
Mumbai (Mahim)              5
Mumbai (Bhandup)            4
Mumbai (Andheri)            3
Mumbai (Girgaon)            2
Mumbai (Kurla)              2
Mumbai (Wadala)             2
Ulhasnagar                  2
Mumbai (Charni Road)        1
Mumbai (Sewri)              1
Mumbai (Govandi)            1
Mumbai (Parel)              1
Navi Mumbai (Vashi)         1
Mumbra                      1
Dombivli                    1
Mumbai (Vikhroli)           1
Mumbai (Khar)               1
Ambivli                     1
Mira Bhayandar (Dongri)     1
Mumbai (Chembur)            1
Kalwa (west)                1
Thane (east)                1
Mumbai (Mazagaon)           1
Ratnagiri                   1
Mumbai (Malad)              1
Mumbai (Sion)               1
Badlapur                    1
Mumbai (Dharavi)            1
Name: REGION, dtype: int64