# Example code to design chr2 sequential DNA-FISH primary probe library

by Pu Zheng

2020.07.06

# Table of contents

>0. [Minimum required packages and settings](#0)
>
>>0.1: [import required packages](#0.1)
>>
>1. [Extract region sequences](#1)
>
>2. [Design probe targeting sequences by probe_designer](#2)
>
>>2.1 [construct count table of 17-mers in the whole genome, transcriptome, repeat sequences and chr2](#2.1)
>>
>>2.2 [prepare probe designer](#2.2)
>>
>>2.3 [run designer for 250kb](#2.3)
>>
>>2.4 [load and parse 250kb library](#2.4)
>>
>>2.5 [run designer for 50kb](#2.5)
>>
>>2.6 [load and parse 50kb library](#2.6)
>>
>>2.7 [merge chromosome wide 250kb and 10Mb 50kb library](#2.7)
>>
>3. [Assemble probes with primers and readout binding sequences](#3)
>
>>3.1 [load primers](#3.1)
>>
>>3.2 [load readouts](#3.2)
>>
>>3.3 [patch probes together](#3.3)
>>
>4. [Check quality](#4)
>
>>4.1 [check primers and readouts](#4.1)
>>
>>4.2 [check crosstalk between probes](#4.2)
>>
>>4.3 [BLAST all candidate probes](#4.3)
>>
>>4.4 [extra round of check](#4.4)
>>
>>4.5 [trim 1 base on both sides to make it fit in 120nt](#4.5)
>>
>5. [Save probes](#5)

<a id='0'></a>
# 0. Minimum required packages and settings

<a id='0.1'></a>
## 0.1 import required packages

In [1]:
# imports
import os,sys
import numpy as np
import pickle

sys.path.append(os.path.abspath(os.path.join(r"..", r".")))

import source as ia

from source.library_tools import LibraryDesigner as ld
from source.library_tools import LibraryTools as lt

print(os.getpid()) # print this so u can terminate through cmd / task-manager

12024


# 1 Extract region sequences

In [3]:
## Some folders
# human genome
genome_folder = r'X:\Libraries\Genomes\human\hg38'
# Library directories
pool_folder = r'X:\Libraries\CTP-07'
# folder for sub-pool
library_folder = os.path.join(pool_folder, 'chr2')
# folder for fasta sequences
sequence_folder = os.path.join(library_folder, 'sequences')
# folder to save result probes
save_folder = os.path.join(library_folder, 'reports')

In [6]:
seq_dic_list, reg_list = library_tools.sequences.Batch_Extract_Sequences(library_folder, os.path.join(genome_folder, 'chroms'), 
                                                                         merge=False, save=True,
                                                                         resolution=50000, flanking=0)

Input region file is: X:\Libraries\CTP-07\chr2\Regions.txt
-writing region number:4844


<a id='2'></a>
# 2. Design probe targeting sequences by probe_designer

<a id='2.1'></a>
## 2.1 construct count table of 17-mers in the whole genome, transcriptome, repeat sequences and chr2 (which is the targeting region)

Only do this if you don't have pre-built 17-mer

The input file: human_genome.fasta is the hg38 genome reference

In [None]:
# genome
reload(ld)
names,seqs = lt.fastaread(os.path.join(genome_folder, 'human_genome.fasta'))
ct = ld.countTable(word=17,save_file=os.path.join(genome_folder, 'hg38_genome_17w.npy'), sparse=False)
for iseq in tqdm(range(len(seqs))):
    print(names[iseq], end=' ')
    ct.consume(seqs[iseq],verbose=True)
ct.complete(verbose=True)
ct.save()

In [None]:
# transcriptome
reload(ld)
names,seqs = lt.fastaread(os.path.join(genome_folder, 'Transcriptone',  'hg38-mRNA', 'mrna.fa'))
ct = ld.countTable(word=17,save_file=os.path.join(genome_folder, 'hg38_transcriptome_17w.npy'), sparse=False)
for iseq in tqdm(range(len(seqs))):
    print(names[iseq], end=' ')
    ct.consume(seqs[iseq],verbose=True)
ct.complete(verbose=True)
ct.save()

In [None]:
# repeats
names,seqs = lt.fastaread(os.path.join(genome_folder, 'HumanRepeats.fasta'))
ct = ld.countTable(word=17,save_file=os.path.join(genome_folder, 'hg38_genome_repeats_17w.npy'), sparse=False)
for iseq in tqdm(range(len(seqs))):
    print(names[iseq], end=' ')
    ct.consume(seqs[iseq],verbose=True)
ct.complete(verbose=True)
ct.save()

In [6]:
# local
from tqdm import tqdm
names,seqs = lt.fastaread(os.path.join(genome_folder, 'chroms', 'chr2.fa'))
ct = ld.countTable(word=17,save_file=os.path.join(genome_folder, 'hg38_chr2_17w.npy'), sparse=False)
for iseq in tqdm(range(len(seqs))):
    print(names[iseq], end=' ')
    ct.consume(seqs[iseq],verbose=True)
ct.complete(verbose=True)
ct.save()

  0%|                                                                                            | 0/1 [00:00<?, ?it/s]

chr2 Time to compute seq2Int: 30.757819414138794


100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [02:07<00:00, 127.65s/it]


Time to compute unique and clip: 57.464475870132446
Time to update matrix: 21.017058849334717


<a id='2.2'></a>
## 2.2 prepare probe designer

In [4]:
# requires pre_defined genome_folder and library_folder

# Indices
genome_index = os.path.join(genome_folder, r'hg38_genome_17w.npy')
repeat_index = os.path.join(genome_folder, r'hg38_genome_repeats_17w.npy')
transcriptome_index = os.path.join(genome_folder, r'hg38_transcriptome_17w.npy')
local_index = os.path.join(genome_folder, 'hg38_chr2_17w.npy')
# get input files 
input_files = glob.glob(os.path.join(sequence_folder, os.path.basename(library_folder), '*.fasta'))
# folder to save result probes
report_folder = os.path.join(library_folder, 'reports')

print(f"{len(input_files)} regions loaded to design probes.")
overwrite = True

if not os.path.exists(report_folder):
    os.makedirs(report_folder)

4844 regions loaded to design probes.


<a id='2.3'></a>
## 2.3 run designer for 250kb

This is every first 50kb within 250kb segments of chr2

In [5]:
limit = 50 # batch size

for _start in np.arange(0, len(input_files[::5]), limit):
    _start = int(_start)
    _end = min(_start+limit, len(input_files[::5]))
    
    _sel_files = input_files[::5][_start:_end] # this step selects every first 50kb in 250kb

    _save_file = os.path.join(report_folder, f'merged_probes_250kb_{_start}_{_end}.pbr')#'/pb_reports_reg_'+str(-i-1)+'.pbr'
    print(_save_file)
    if not os.path.exists(_save_file) or overwrite: # if probe report file doesnt exist

        #local_genome_fl = in_file
        #print(in_file)
        pb_designer = ld.pb_reports_class(
            sequence_dic={'file':_sel_files,'use_revc':False,'use_kmer':True},
            map_dic={'genome':{'file':genome_index,'use_revc':True,'use_kmer':True},
                     'transcriptome':{'file':transcriptome_index,'use_revc':False,'use_kmer':True},
                     'rep_genome':{'file':repeat_index,'use_revc':True,'use_kmer':True},
                     'local_genome':{'file':local_index,'force_list':True,'use_revc':True,'use_kmer':True}},
            save_file=_save_file,
            params_dic={'word_size':17,'pb_len':42,'buffer_len':2,'max_count':2**16-1,'check_on_go':False,'auto':False},
            dic_check={('genome','local_genome'):75,'rep_genome':0,'transcriptome':14, 
                        'local_genome':40, 'gc':[0.25,0.85],'tm':70})

        pb_designer.computeOTmaps()
        pb_designer.compute_pb_report()
        pb_designer.perform_check_end()
        pb_designer.plots()
        pb_designer.save_csv()
        print(f"-- number of probes kept: {len(pb_designer.pb_reports_keep)}")
        del(pb_designer)

X:\Libraries\CTP-07\chr2\reports\merged_probes_250kb_500_550.pbr
Setting attribute: map_genome
Setting attribute: map_transcriptome
Setting attribute: map_rep_genome
Setting attribute: map_local_genome
Time(s): 240.11573958396912
Dealing with sequence: 1 out of 50
Time(s): 17.612715244293213
Dealing with sequence: 2 out of 50
Time(s): 15.260539770126343
Dealing with sequence: 3 out of 50
Time(s): 15.319840431213379
Dealing with sequence: 4 out of 50
Time(s): 16.325456380844116
Dealing with sequence: 5 out of 50
Time(s): 20.287190675735474
Dealing with sequence: 6 out of 50
Time(s): 17.600955486297607
Dealing with sequence: 7 out of 50
Time(s): 18.123807191848755
Dealing with sequence: 8 out of 50
Time(s): 19.30851984024048
Dealing with sequence: 9 out of 50
Time(s): 19.959815740585327
Dealing with sequence: 10 out of 50
Time(s): 21.164879322052002
Dealing with sequence: 11 out of 50
Time(s): 21.331434965133667
Dealing with sequence: 12 out of 50
Time(s): 21.75262141227722
Dealing with 

Time(s): 28.78710103034973
Dealing with sequence: 18 out of 50
Time(s): 29.34898543357849
Dealing with sequence: 19 out of 50
Time(s): 30.840848684310913
Dealing with sequence: 20 out of 50
Time(s): 30.750227451324463
Dealing with sequence: 21 out of 50
Time(s): 31.850215911865234
Dealing with sequence: 22 out of 50
Time(s): 32.904237508773804
Dealing with sequence: 23 out of 50
Time(s): 33.07935929298401
Dealing with sequence: 24 out of 50
Time(s): 34.64068818092346
Dealing with sequence: 25 out of 50
Time(s): 35.32100582122803
Dealing with sequence: 26 out of 50
Time(s): 35.9759304523468
Dealing with sequence: 27 out of 50
Time(s): 36.90061688423157
Dealing with sequence: 28 out of 50
Time(s): 37.62016034126282
Dealing with sequence: 29 out of 50
Time(s): 39.36325812339783
Dealing with sequence: 30 out of 50
Time(s): 39.38551068305969
Dealing with sequence: 31 out of 50
Time(s): 41.71787452697754
Dealing with sequence: 32 out of 50
Time(s): 42.59587478637695
Dealing with sequence: 33

Time(s): 46.98578763008118
Dealing with sequence: 38 out of 50
Time(s): 48.11209678649902
Dealing with sequence: 39 out of 50
Time(s): 50.26440644264221
Dealing with sequence: 40 out of 50
Time(s): 49.90664982795715
Dealing with sequence: 41 out of 50
Time(s): 50.79159712791443
Dealing with sequence: 42 out of 50
Time(s): 51.351036071777344
Dealing with sequence: 43 out of 50
Time(s): 52.49957752227783
Dealing with sequence: 44 out of 50
Time(s): 53.35527753829956
Dealing with sequence: 45 out of 50
Time(s): 54.19122838973999
Dealing with sequence: 46 out of 50
Time(s): 56.139692306518555
Dealing with sequence: 47 out of 50
Time(s): 56.06440782546997
Dealing with sequence: 48 out of 50
Time(s): 58.66342639923096
Dealing with sequence: 49 out of 50
Time(s): 58.16177463531494
Dealing with sequence: 50 out of 50
Time(s): 60.09125995635986
Picking non-overlaping sequences.
Time(s): 69.65978407859802
-- number of probes kept: 36054
X:\Libraries\CTP-07\chr2\reports\merged_probes_250kb_750_80

Time(s): 16.1421000957489
Dealing with sequence: 3 out of 50
Time(s): 16.608113050460815
Dealing with sequence: 4 out of 50
Time(s): 17.35661792755127
Dealing with sequence: 5 out of 50
Time(s): 17.437999486923218
Dealing with sequence: 6 out of 50
Time(s): 18.545193195343018
Dealing with sequence: 7 out of 50
Time(s): 19.568544626235962
Dealing with sequence: 8 out of 50
Time(s): 21.11223793029785
Dealing with sequence: 9 out of 50
Time(s): 23.328670501708984
Dealing with sequence: 10 out of 50
Time(s): 22.939611434936523
Dealing with sequence: 11 out of 50
Time(s): 24.38501477241516
Dealing with sequence: 12 out of 50
Time(s): 24.207602500915527
Dealing with sequence: 13 out of 50
Time(s): 25.218669414520264
Dealing with sequence: 14 out of 50
Time(s): 26.83645224571228
Dealing with sequence: 15 out of 50
Time(s): 28.5338773727417
Dealing with sequence: 16 out of 50
Time(s): 28.628960371017456
Dealing with sequence: 17 out of 50
Time(s): 30.304471731185913
Dealing with sequence: 18 o

<a id='2.4'></a>
## 2.4 load and parse 250kb library

In [68]:
# initialize
pb_dic = {}
limit = 50
probe_num_limit = 100
cand_prob_nums = []
verbose=True

for _start in np.arange(0, len(input_files[::5]), limit):
    _start = int(_start)
    _end = min(_start+limit, len(input_files[::5]))
    
    _sel_files = input_files[::5][_start:_end]

    _save_file = os.path.join(report_folder, f'merged_probes_250kb_{_start}_{_end}.pbr')#'/pb_reports_reg_'+str(-i-1)+'.pbr'
    # create probe object and load probes
    if verbose:
        print(f"- loading probes from file: {_save_file}")
    _pbde = ld.pb_reports_class()
    _pbde.load_pbr(_save_file)
    # append each specific pb_index into pb_dic
    # collect reg_index:
    reg_inds = np.unique([_v['reg_index'] for _s, _v in _pbde.pb_reports_keep.items()])
    for _rid in reg_inds:
        _sel_pbs = {_s:_v for _s, _v in _pbde.pb_reports_keep.items() if _v['reg_index']==_rid}

        if len(_sel_pbs) > probe_num_limit:
            _region_gid = list(_sel_pbs.values())[0]['reg_name'].split('_')[-1]
            cand_prob_nums.append(len(_sel_pbs))
            if verbose:
                print(f"-- appending region:{_region_gid} with {len(_sel_pbs)} probes")
            _splitted_pbde = ld.pb_reports_class()
            _splitted_pbde.pb_reports_keep = _sel_pbs
            pb_dic[int(_region_gid)] = _splitted_pbde
        

- loading probes from file: X:\Libraries\CTP-07\chr2\reports\merged_probes_250kb_0_50.pbr
-- appending region:1 with 645 probes
-- appending region:6 with 811 probes
-- appending region:11 with 850 probes
-- appending region:16 with 830 probes
-- appending region:21 with 739 probes
-- appending region:26 with 812 probes
-- appending region:31 with 636 probes
-- appending region:36 with 761 probes
-- appending region:41 with 807 probes
-- appending region:46 with 884 probes
-- appending region:51 with 946 probes
-- appending region:56 with 948 probes
-- appending region:61 with 802 probes
-- appending region:66 with 890 probes
-- appending region:71 with 691 probes
-- appending region:76 with 821 probes
-- appending region:81 with 850 probes
-- appending region:86 with 786 probes
-- appending region:91 with 906 probes
-- appending region:96 with 830 probes
-- appending region:101 with 743 probes
-- appending region:106 with 776 probes
-- appending region:111 with 852 probes
-- appending

-- appending region:1001 with 843 probes
-- appending region:1006 with 790 probes
-- appending region:1011 with 894 probes
-- appending region:1016 with 899 probes
-- appending region:1021 with 840 probes
-- appending region:1026 with 835 probes
-- appending region:1031 with 695 probes
-- appending region:1036 with 638 probes
-- appending region:1041 with 578 probes
-- appending region:1046 with 706 probes
-- appending region:1051 with 706 probes
-- appending region:1056 with 703 probes
-- appending region:1061 with 718 probes
-- appending region:1066 with 627 probes
-- appending region:1071 with 849 probes
-- appending region:1076 with 659 probes
-- appending region:1081 with 510 probes
-- appending region:1086 with 846 probes
-- appending region:1091 with 727 probes
-- appending region:1096 with 831 probes
-- appending region:1101 with 824 probes
-- appending region:1106 with 554 probes
-- appending region:1111 with 588 probes
-- appending region:1116 with 619 probes
-- appending reg

-- appending region:2081 with 863 probes
-- appending region:2086 with 758 probes
-- appending region:2091 with 813 probes
-- appending region:2096 with 857 probes
-- appending region:2101 with 662 probes
-- appending region:2106 with 863 probes
-- appending region:2111 with 789 probes
-- appending region:2116 with 926 probes
-- appending region:2121 with 744 probes
-- appending region:2126 with 586 probes
-- appending region:2131 with 660 probes
-- appending region:2136 with 850 probes
-- appending region:2141 with 742 probes
-- appending region:2146 with 803 probes
-- appending region:2151 with 751 probes
-- appending region:2156 with 857 probes
-- appending region:2161 with 742 probes
-- appending region:2166 with 819 probes
-- appending region:2171 with 334 probes
-- appending region:2176 with 365 probes
-- appending region:2181 with 795 probes
-- appending region:2186 with 967 probes
-- appending region:2191 with 822 probes
-- appending region:2216 with 772 probes
-- appending reg

-- appending region:3081 with 639 probes
-- appending region:3086 with 730 probes
-- appending region:3091 with 675 probes
-- appending region:3096 with 790 probes
-- appending region:3101 with 742 probes
-- appending region:3106 with 667 probes
-- appending region:3111 with 706 probes
-- appending region:3116 with 593 probes
-- appending region:3121 with 722 probes
-- appending region:3126 with 664 probes
-- appending region:3131 with 840 probes
-- appending region:3136 with 699 probes
-- appending region:3141 with 783 probes
-- appending region:3146 with 724 probes
-- appending region:3151 with 682 probes
-- appending region:3156 with 830 probes
-- appending region:3161 with 888 probes
-- appending region:3166 with 703 probes
-- appending region:3171 with 784 probes
-- appending region:3176 with 825 probes
-- appending region:3181 with 681 probes
-- appending region:3186 with 681 probes
-- appending region:3191 with 736 probes
-- appending region:3196 with 724 probes
-- appending reg

-- appending region:4036 with 782 probes
-- appending region:4041 with 770 probes
-- appending region:4046 with 458 probes
-- appending region:4051 with 437 probes
-- appending region:4056 with 622 probes
-- appending region:4061 with 637 probes
-- appending region:4066 with 446 probes
-- appending region:4071 with 580 probes
-- appending region:4076 with 682 probes
-- appending region:4081 with 921 probes
-- appending region:4086 with 952 probes
-- appending region:4091 with 903 probes
-- appending region:4096 with 812 probes
-- appending region:4101 with 807 probes
-- appending region:4106 with 918 probes
-- appending region:4111 with 813 probes
-- appending region:4116 with 844 probes
-- appending region:4121 with 576 probes
-- appending region:4126 with 658 probes
-- appending region:4131 with 820 probes
-- appending region:4136 with 696 probes
-- appending region:4141 with 614 probes
-- appending region:4146 with 892 probes
-- appending region:4151 with 739 probes
-- appending reg

<a id='2.5'></a>
## 2.5 run designer for 50kb

This part designed a 10Mb segment (chr2:27500000-38000000) to have 50kb resolution

In [None]:
resolution = 50000
kept_inds = np.arange(int(27500000/resolution),int(38000000/resolution))
sel_files = []
for _id in kept_inds:
    if _id % 5 != 0:
        sel_files.append(input_files[_id])
        
save_file = os.path.join(report_folder, 'merged_probes_supp_50kb.pbr')#'/pb_reports_reg_'+str(-i-1)+'.pbr'
if not os.path.exists(save_file) or overwrite: # if probe report file doesnt exist

    #local_genome_fl = in_file
    #print(in_file)
    pb_designer = ld.pb_reports_class(
        sequence_dic={'file':sel_files,'use_revc':False,'use_kmer':True},
        map_dic={'genome':{'file':genome_index,'use_revc':True,'use_kmer':True},
                 'transcriptome':{'file':transcriptome_index,'use_revc':False,'use_kmer':True},
                 'rep_genome':{'file':repeat_index,'use_revc':True,'use_kmer':True},
                 'local_genome':{'file':local_index,'force_list':True,'use_revc':True,'use_kmer':True}},
        save_file=save_file,
        params_dic={'word_size':17,'pb_len':42,'buffer_len':2,'max_count':2**16-1,'check_on_go':False,'auto':False},
        dic_check={('genome','local_genome'):75,'rep_genome':0,'transcriptome':14, 
                    'local_genome':40, 'gc':[0.25,0.85],'tm':70})

    pb_designer.computeOTmaps()
    pb_designer.compute_pb_report()
    pb_designer.perform_check_end()
    pb_designer.plots()
    pb_designer.save_csv()
    print(f"-- number of probes kept: {len(pb_designer.pb_reports_keep)}")
    del(pb_designer)


<a id='2.6'></a>
## 2.6 load and parse 50kb library

In [70]:
supp_pb_dic = {}
# Parse segemented list
_save_file = os.path.join(report_folder, r'merged_probes_supp_50kb.pbr')#'/pb_reports_reg_'+str(-i-1)+'.pbr'
_pbde = ld.pb_reports_class()
_pbde.load_pbr(_save_file)

reg_inds = np.unique([_v['reg_index'] for _s, _v in _pbde.pb_reports_keep.items()])
for _rid in reg_inds:
    _sel_pbs = {_s:_v for _s, _v in _pbde.pb_reports_keep.items() if _v['reg_index']==_rid}

    if len(_sel_pbs) > 0:
        _region_gid = list(_sel_pbs.values())[0]['reg_name'].split('_')[-1]
        cand_prob_nums.append(len(_sel_pbs))
        if verbose:
            print(f"-- appending region:{_region_gid} with {len(_sel_pbs)} probes")
        _splitted_pbde = ld.pb_reports_class()
        _splitted_pbde.pb_reports_keep = _sel_pbs
        supp_pb_dic[int(_region_gid)] = _splitted_pbde

-- appending region:552 with 364 probes
-- appending region:553 with 616 probes
-- appending region:554 with 609 probes
-- appending region:555 with 763 probes
-- appending region:557 with 762 probes
-- appending region:558 with 724 probes
-- appending region:559 with 693 probes
-- appending region:560 with 762 probes
-- appending region:562 with 827 probes
-- appending region:563 with 759 probes
-- appending region:564 with 766 probes
-- appending region:565 with 852 probes
-- appending region:567 with 850 probes
-- appending region:568 with 918 probes
-- appending region:569 with 791 probes
-- appending region:570 with 780 probes
-- appending region:572 with 765 probes
-- appending region:573 with 885 probes
-- appending region:574 with 803 probes
-- appending region:575 with 642 probes
-- appending region:577 with 552 probes
-- appending region:578 with 619 probes
-- appending region:579 with 719 probes
-- appending region:580 with 772 probes
-- appending region:582 with 560 probes


<a id='2.7'></a>
## 2.7 merge chromosome wide 250kb and 10Mb 50kb library

In [122]:
# merge
merged_pb_dic = {_k:_v for _k,_v in sorted(pb_dic.items())}
for _k,_v in sorted(supp_pb_dic.items()):
    merged_pb_dic[_k] = _v

In [124]:
# filter by genome counts
num_probes_kept = 350
filtered_merged_pb_dic = check.Screen_probe_by_hit(library_folder, merged_pb_dic, num_probes_kept,
                                                   hit_type='genome')

- Screen probes by genome counts.
-- filtering region:1, screen genome to keep 350 probes
-- filtering region:6, screen genome to keep 350 probes
-- filtering region:11, screen genome to keep 350 probes
-- filtering region:16, screen genome to keep 350 probes
-- filtering region:21, screen genome to keep 350 probes
-- filtering region:26, screen genome to keep 350 probes
-- filtering region:31, screen genome to keep 350 probes
-- filtering region:36, screen genome to keep 350 probes
-- filtering region:41, screen genome to keep 350 probes
-- filtering region:46, screen genome to keep 350 probes
-- filtering region:51, screen genome to keep 350 probes
-- filtering region:56, screen genome to keep 350 probes
-- filtering region:61, screen genome to keep 350 probes
-- filtering region:66, screen genome to keep 350 probes
-- filtering region:71, screen genome to keep 350 probes
-- filtering region:76, screen genome to keep 350 probes
-- filtering region:81, screen genome to keep 350 probes

-- filtering region:916, screen genome to keep 350 probes
-- filtering region:921, screen genome to keep 350 probes
-- filtering region:926, screen genome to keep 350 probes
-- filtering region:931, screen genome to keep 350 probes
-- filtering region:936, screen genome to keep 350 probes
-- filtering region:941, screen genome to keep 350 probes
-- filtering region:946, screen genome to keep 350 probes
-- filtering region:951, screen genome to keep 350 probes
-- filtering region:956, screen genome to keep 350 probes
-- filtering region:961, screen genome to keep 350 probes
-- filtering region:966, screen genome to keep 350 probes
-- filtering region:971, screen genome to keep 350 probes
-- filtering region:976, screen genome to keep 350 probes
-- filtering region:981, screen genome to keep 350 probes
-- filtering region:986, screen genome to keep 350 probes
-- filtering region:991, screen genome to keep 350 probes
-- filtering region:996, screen genome to keep 350 probes
-- filtering r

-- filtering region:1981, screen genome to keep 350 probes
-- filtering region:1986, screen genome to keep 350 probes
-- filtering region:1991, screen genome to keep 350 probes
-- filtering region:1996, screen genome to keep 350 probes
-- filtering region:2001, screen genome to keep 350 probes
-- filtering region:2006, screen genome to keep 350 probes
-- filtering region:2011, screen genome to keep 350 probes
-- filtering region:2016, screen genome to keep 350 probes
-- filtering region:2021, screen genome to keep 350 probes
-- filtering region:2026, screen genome to keep 350 probes
-- filtering region:2031, screen genome to keep 350 probes
-- filtering region:2036, screen genome to keep 350 probes
-- filtering region:2041, screen genome to keep 350 probes
-- filtering region:2046, screen genome to keep 350 probes
-- filtering region:2051, screen genome to keep 350 probes
-- filtering region:2056, screen genome to keep 350 probes
-- filtering region:2061, screen genome to keep 350 prob

-- filtering region:2726, screen genome to keep 350 probes
-- filtering region:2731, screen genome to keep 350 probes
-- filtering region:2736, screen genome to keep 350 probes
-- filtering region:2741, screen genome to keep 350 probes
-- filtering region:2746, screen genome to keep 350 probes
-- filtering region:2751, screen genome to keep 350 probes
-- filtering region:2756, screen genome to keep 350 probes
-- filtering region:2761, screen genome to keep 350 probes
-- filtering region:2766, screen genome to keep 350 probes
-- filtering region:2771, screen genome to keep 350 probes
-- filtering region:2776, screen genome to keep 350 probes
-- filtering region:2781, screen genome to keep 350 probes
-- filtering region:2786, screen genome to keep 350 probes
-- filtering region:2791, screen genome to keep 350 probes
-- filtering region:2796, screen genome to keep 350 probes
-- filtering region:2801, screen genome to keep 350 probes
-- filtering region:2806, screen genome to keep 350 prob

-- filtering region:3421, screen genome to keep 350 probes
-- filtering region:3426, screen genome to keep 350 probes
-- filtering region:3431, screen genome to keep 350 probes
-- filtering region:3436, screen genome to keep 350 probes
-- filtering region:3441, screen genome to keep 350 probes
-- filtering region:3446, screen genome to keep 350 probes
-- filtering region:3451, screen genome to keep 350 probes
-- filtering region:3456, screen genome to keep 350 probes
-- filtering region:3461, screen genome to keep 350 probes
-- filtering region:3466, screen genome to keep 350 probes
-- filtering region:3471, screen genome to keep 350 probes
-- filtering region:3476, screen genome to keep 350 probes
-- filtering region:3481, screen genome to keep 350 probes
-- filtering region:3486, screen genome to keep 350 probes
-- filtering region:3491, screen genome to keep 350 probes
-- filtering region:3496, screen genome to keep 350 probes
-- filtering region:3501, screen genome to keep 350 prob

-- filtering region:4341, screen genome to keep 350 probes
-- filtering region:4346, screen genome to keep 350 probes
-- filtering region:4351, screen genome to keep 350 probes
-- filtering region:4356, screen genome to keep 350 probes
-- filtering region:4361, screen genome to keep 350 probes
-- filtering region:4366, screen genome to keep 350 probes
-- filtering region:4371, screen genome to keep 350 probes
-- filtering region:4376, screen genome to keep 350 probes
-- filtering region:4381, screen genome to keep 350 probes
-- filtering region:4386, screen genome to keep 350 probes
-- filtering region:4391, screen genome to keep 350 probes
-- filtering region:4396, screen genome to keep 350 probes
-- filtering region:4401, screen genome to keep 350 probes
-- filtering region:4406, screen genome to keep 350 probes
-- filtering region:4411, screen genome to keep 350 probes
-- filtering region:4416, screen genome to keep 350 probes
-- filtering region:4421, screen genome to keep 350 prob

-- filtering region:635, screen genome to keep 350 probes
-- filtering region:637, screen genome to keep 350 probes
-- filtering region:638, screen genome to keep 350 probes
-- filtering region:639, screen genome to keep 350 probes
-- filtering region:640, screen genome to keep 350 probes
-- filtering region:642, screen genome to keep 350 probes
-- filtering region:643, screen genome to keep 350 probes
-- filtering region:644, screen genome to keep 350 probes
-- filtering region:645, screen genome to keep 350 probes
-- filtering region:647, directly append 341 probes
-- filtering region:648, screen genome to keep 350 probes
-- filtering region:649, screen genome to keep 350 probes
-- filtering region:650, screen genome to keep 350 probes
-- filtering region:652, screen genome to keep 350 probes
-- filtering region:653, screen genome to keep 350 probes
-- filtering region:654, screen genome to keep 350 probes
-- filtering region:655, screen genome to keep 350 probes
-- filtering region:

<a id='3'></a>
# 3. Assemble probes with primers and readout binding sequences

<a id='3.1'></a>
## 3.1 load primers

In [4]:
primers = ia.library_tools.probes.load_primers([6,11])

- Picked primer: ID: W1A07_primer_6
Name: W1A07_primer_6
Description: W1A07_primer_6
Number of features: 0
Seq('CGCAAACTGGTGCGGAAGGC', SingleLetterAlphabet())
- Picked primer: ID: W1A12_primer_11
Name: W1A12_primer_11
Description: W1A12_primer_11
Number of features: 0
Seq('TAATACGACTCACTATAGGGCCATTGCCCGCGAGGTCGAG', SingleLetterAlphabet())


<a id='3.2'></a>
## 3.2 load readouts

In [7]:
readouts = ia.library_tools.probes.load_readouts(len(merged_pb_dic), _type='designed_readouts')

readout_dict = {'u': readouts}

### design readout dict

In [128]:
gene_readout_dict = {_k:[f"u{_i}"]*2 for _i, (_k,_v) in enumerate(sorted(pb_dic.items()))}

In [129]:
for _i, (_k,_v) in enumerate(sorted(supp_pb_dic.items())):
    gene_readout_dict[_k] = [f"u{_i+len(pb_dic)}"]*2

<a id='3.3'></a>
## 3.3 patch probes together

In [131]:
reload(library_tools.probes)
from ImageAnalysis3.library_tools.probes import Assemble_probes
cand_probes, readout_summary = Assemble_probes(library_folder, filtered_merged_pb_dic, 
                                               gene_readout_dict, readout_dict, primers, 
                                               primer_len=20,
                                               rc_targets=False, overwrite=True)

- Assemble probes by given target sequences, readouts and primers.
-- included readout types: ['u']
--- assemblying 350 probes in region: 1
--- assemblying 350 probes in region: 6
--- assemblying 350 probes in region: 11
--- assemblying 350 probes in region: 16
--- assemblying 350 probes in region: 21
--- assemblying 350 probes in region: 26
--- assemblying 350 probes in region: 31
--- assemblying 350 probes in region: 36
--- assemblying 350 probes in region: 41
--- assemblying 350 probes in region: 46
--- assemblying 350 probes in region: 51
--- assemblying 350 probes in region: 56
--- assemblying 350 probes in region: 61
--- assemblying 350 probes in region: 66
--- assemblying 350 probes in region: 71
--- assemblying 350 probes in region: 76
--- assemblying 350 probes in region: 81
--- assemblying 350 probes in region: 86
--- assemblying 350 probes in region: 91
--- assemblying 350 probes in region: 96
--- assemblying 350 probes in region: 101
--- assemblying 350 probes in region: 10

--- assemblying 350 probes in region: 986
--- assemblying 350 probes in region: 991
--- assemblying 350 probes in region: 996
--- assemblying 350 probes in region: 1001
--- assemblying 350 probes in region: 1006
--- assemblying 350 probes in region: 1011
--- assemblying 350 probes in region: 1016
--- assemblying 350 probes in region: 1021
--- assemblying 350 probes in region: 1026
--- assemblying 350 probes in region: 1031
--- assemblying 350 probes in region: 1036
--- assemblying 350 probes in region: 1041
--- assemblying 350 probes in region: 1046
--- assemblying 350 probes in region: 1051
--- assemblying 350 probes in region: 1056
--- assemblying 350 probes in region: 1061
--- assemblying 350 probes in region: 1066
--- assemblying 350 probes in region: 1071
--- assemblying 350 probes in region: 1076
--- assemblying 350 probes in region: 1081
--- assemblying 350 probes in region: 1086
--- assemblying 350 probes in region: 1091
--- assemblying 350 probes in region: 1096
--- assemblyin

--- assemblying 350 probes in region: 2076
--- assemblying 350 probes in region: 2081
--- assemblying 350 probes in region: 2086
--- assemblying 350 probes in region: 2091
--- assemblying 350 probes in region: 2096
--- assemblying 350 probes in region: 2101
--- assemblying 350 probes in region: 2106
--- assemblying 350 probes in region: 2111
--- assemblying 350 probes in region: 2116
--- assemblying 350 probes in region: 2121
--- assemblying 350 probes in region: 2126
--- assemblying 350 probes in region: 2131
--- assemblying 350 probes in region: 2136
--- assemblying 350 probes in region: 2141
--- assemblying 350 probes in region: 2146
--- assemblying 350 probes in region: 2151
--- assemblying 350 probes in region: 2156
--- assemblying 350 probes in region: 2161
--- assemblying 350 probes in region: 2166
--- assemblying 334 probes in region: 2171
--- assemblying 350 probes in region: 2176
--- assemblying 350 probes in region: 2181
--- assemblying 350 probes in region: 2186
--- assembl

--- assemblying 350 probes in region: 3086
--- assemblying 350 probes in region: 3091
--- assemblying 350 probes in region: 3096
--- assemblying 350 probes in region: 3101
--- assemblying 350 probes in region: 3106
--- assemblying 350 probes in region: 3111
--- assemblying 350 probes in region: 3116
--- assemblying 350 probes in region: 3121
--- assemblying 350 probes in region: 3126
--- assemblying 350 probes in region: 3131
--- assemblying 350 probes in region: 3136
--- assemblying 350 probes in region: 3141
--- assemblying 350 probes in region: 3146
--- assemblying 350 probes in region: 3151
--- assemblying 350 probes in region: 3156
--- assemblying 350 probes in region: 3161
--- assemblying 350 probes in region: 3166
--- assemblying 350 probes in region: 3171
--- assemblying 350 probes in region: 3176
--- assemblying 350 probes in region: 3181
--- assemblying 350 probes in region: 3186
--- assemblying 350 probes in region: 3191
--- assemblying 350 probes in region: 3196
--- assembl

--- assemblying 350 probes in region: 4046
--- assemblying 350 probes in region: 4051
--- assemblying 350 probes in region: 4056
--- assemblying 350 probes in region: 4061
--- assemblying 350 probes in region: 4066
--- assemblying 350 probes in region: 4071
--- assemblying 350 probes in region: 4076
--- assemblying 350 probes in region: 4081
--- assemblying 350 probes in region: 4086
--- assemblying 350 probes in region: 4091
--- assemblying 350 probes in region: 4096
--- assemblying 350 probes in region: 4101
--- assemblying 350 probes in region: 4106
--- assemblying 350 probes in region: 4111
--- assemblying 350 probes in region: 4116
--- assemblying 350 probes in region: 4121
--- assemblying 350 probes in region: 4126
--- assemblying 350 probes in region: 4131
--- assemblying 350 probes in region: 4136
--- assemblying 350 probes in region: 4141
--- assemblying 350 probes in region: 4146
--- assemblying 350 probes in region: 4151
--- assemblying 350 probes in region: 4156
--- assembl

--- assemblying 350 probes in region: 595
--- assemblying 350 probes in region: 597
--- assemblying 350 probes in region: 598
--- assemblying 350 probes in region: 599
--- assemblying 350 probes in region: 600
--- assemblying 350 probes in region: 602
--- assemblying 350 probes in region: 603
--- assemblying 350 probes in region: 604
--- assemblying 350 probes in region: 605
--- assemblying 350 probes in region: 607
--- assemblying 350 probes in region: 608
--- assemblying 350 probes in region: 609
--- assemblying 350 probes in region: 610
--- assemblying 350 probes in region: 612
--- assemblying 350 probes in region: 613
--- assemblying 350 probes in region: 614
--- assemblying 350 probes in region: 615
--- assemblying 350 probes in region: 617
--- assemblying 350 probes in region: 618
--- assemblying 350 probes in region: 619
--- assemblying 350 probes in region: 620
--- assemblying 350 probes in region: 622
--- assemblying 350 probes in region: 623
--- assemblying 350 probes in regi

<a id='4'></a>
# 4. Check quality

In [132]:
# biopython imports
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqRecord import SeqRecord
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML

In [133]:
# folder for this library
# candidate full-length probe filename
candidate_full_name = 'candidate_probes.fasta'
# load full probes
full_records = []
with open(os.path.join(library_folder, candidate_full_name), 'r') as handle:
    for record in SeqIO.parse(handle, "fasta"):
        full_records.append(record)

print(f"Total probe loaded: {len(full_records)}")

Total probe loaded: 385618


In [134]:
print(full_records[0])

SeqRecord(seq=Seq('CGCAAACTGGTGCGGAAGGCCCCATGATCGTCCGATCTGGTAACCCCTAACCCT...TGG', SingleLetterAlphabet()), id='chr2:1-50001_gene_1_pb_0_pos_10575_readouts_[Stv_3_u,Stv_3_u]', name='chr2:1-50001_gene_1_pb_0_pos_10575_readouts_[Stv_3_u,Stv_3_u]', description='chr2:1-50001_gene_1_pb_0_pos_10575_readouts_[Stv_3_u,Stv_3_u]', dbxrefs=[])

<a id='4.1'></a>
## 4.1 check primers and readouts

In [136]:
primer_len=20

import source.library_tools.quality_check as check
reload(check)
primer_check = check._check_primer_usage(full_records, primers[0], primers[1], primer_len=primer_len)

reg_size_dic, len_check = check._check_region_size(full_records, min_size=15)

reg_readout_dic, reg2readout_check = check._check_region_to_readouts(full_records, readout_dict, total_readout_num=2)

readout_reg_dic, readout2reg_check = check._check_readout_to_region(reg_readout_dic, full_records, 
                                                                    readout_dict, primer_len=primer_len, probe_readout_num=2)

-- Checking primer usage, total probes: 385618
gene: 1 -> 350
gene: 6 -> 350
gene: 11 -> 350
gene: 16 -> 350
gene: 21 -> 350
gene: 26 -> 350
gene: 31 -> 350
gene: 36 -> 350
gene: 41 -> 350
gene: 46 -> 350
gene: 51 -> 350
gene: 56 -> 350
gene: 61 -> 350
gene: 66 -> 350
gene: 71 -> 350
gene: 76 -> 350
gene: 81 -> 350
gene: 86 -> 350
gene: 91 -> 350
gene: 96 -> 350
gene: 101 -> 350
gene: 106 -> 350
gene: 111 -> 350
gene: 116 -> 350
gene: 121 -> 350
gene: 126 -> 350
gene: 131 -> 350
gene: 136 -> 350
gene: 141 -> 350
gene: 146 -> 350
gene: 151 -> 350
gene: 156 -> 350
gene: 161 -> 350
gene: 166 -> 350
gene: 171 -> 350
gene: 176 -> 350
gene: 181 -> 350
gene: 186 -> 350
gene: 191 -> 350
gene: 196 -> 350
gene: 201 -> 350
gene: 206 -> 350
gene: 211 -> 350
gene: 216 -> 350
gene: 221 -> 350
gene: 226 -> 350
gene: 231 -> 350
gene: 236 -> 350
gene: 241 -> 350
gene: 246 -> 350
gene: 251 -> 350
gene: 256 -> 350
gene: 261 -> 350
gene: 266 -> 350
gene: 271 -> 350
gene: 276 -> 350
gene: 281 -> 350
gene: 

1 -> ['Stv_3', 'Stv_3']
6 -> ['Stv_32', 'Stv_32']
11 -> ['Stv_91', 'Stv_91']
16 -> ['Stv_4', 'Stv_4']
21 -> ['Stv_33', 'Stv_33']
26 -> ['Stv_92', 'Stv_92']
31 -> ['Stv_5', 'Stv_5']
36 -> ['Stv_35', 'Stv_35']
41 -> ['Stv_94', 'Stv_94']
46 -> ['Stv_6', 'Stv_6']
51 -> ['Stv_36', 'Stv_36']
56 -> ['Stv_95', 'Stv_95']
61 -> ['Stv_7', 'Stv_7']
66 -> ['Stv_37', 'Stv_37']
71 -> ['Stv_99', 'Stv_99']
76 -> ['Stv_8', 'Stv_8']
81 -> ['Stv_39', 'Stv_39']
86 -> ['Stv_100', 'Stv_100']
91 -> ['Stv_9', 'Stv_9']
96 -> ['Stv_40', 'Stv_40']
101 -> ['Stv_101', 'Stv_101']
106 -> ['Stv_10', 'Stv_10']
111 -> ['Stv_42', 'Stv_42']
116 -> ['Stv_104', 'Stv_104']
121 -> ['Stv_11', 'Stv_11']
126 -> ['Stv_44', 'Stv_44']
131 -> ['Stv_105', 'Stv_105']
136 -> ['Stv_12', 'Stv_12']
141 -> ['Stv_45', 'Stv_45']
146 -> ['Stv_106', 'Stv_106']
151 -> ['Stv_13', 'Stv_13']
156 -> ['Stv_46', 'Stv_46']
161 -> ['Stv_107', 'Stv_107']
166 -> ['Stv_14', 'Stv_14']
171 -> ['Stv_48', 'Stv_48']
176 -> ['Stv_109', 'Stv_109']
181 -> ['Stv_1

-- extracting probes from >region:1, region:6, region:11, region:16, region:21, region:26, region:31, region:36, region:41, region:46, region:51, region:56, region:61, region:66, region:71, region:76, region:81, region:86, region:91, region:96, region:101, region:106, region:111, region:116, region:121, region:126, region:131, region:136, region:141, region:146, region:151, region:156, region:161, region:166, region:171, region:176, region:181, region:186, region:191, region:196, region:201, region:206, region:211, region:216, region:221, region:226, region:231, region:236, region:241, region:246, region:251, region:256, region:261, region:266, region:271, region:276, region:281, region:286, region:291, region:296, region:301, region:306, region:311, region:316, region:321, region:326, region:331, region:336, region:341, region:346, region:351, region:356, region:361, region:366, region:371, region:376, region:381, region:386, region:391, region:396, region:401, region:406, region:411,

region:3376, region:3381, region:3386, region:3391, region:3396, region:3401, region:3406, region:3411, region:3416, region:3421, region:3426, region:3431, region:3436, region:3441, region:3446, region:3451, region:3456, region:3461, region:3466, region:3471, region:3476, region:3481, region:3486, region:3491, region:3496, region:3501, region:3506, region:3511, region:3516, region:3521, region:3526, region:3531, region:3536, region:3541, region:3546, region:3551, region:3556, region:3561, region:3566, region:3571, region:3576, region:3581, region:3586, region:3591, region:3596, region:3601, region:3606, region:3611, region:3616, region:3621, region:3626, region:3631, region:3636, region:3641, region:3646, region:3651, region:3656, region:3661, region:3666, region:3671, region:3676, region:3681, region:3686, region:3691, region:3696, region:3701, region:3706, region:3711, region:3716, region:3721, region:3726, region:3731, region:3736, region:3741, region:3746, region:3751, region:3756,

<a id='4.2'></a>
## 4.2 check crosstalk between probes

In [138]:
# construct internal map
int_map = check._construct_internal_map(full_records, library_folder)

# internal check probes
readout_count_dic, readout_count_check = check._check_readout_in_probes(readout_reg_dic, reg_size_dic, int_map, readout_dict)

kept_records, removed_count = check._check_between_probes(full_records, int_map)
# save kept records
with open(os.path.join(library_folder, 'filtered_full_probes.fasta'), 'w') as output_handle:
    SeqIO.write(kept_records, output_handle, "fasta")

--- Sequence:chr2:1500001-1550001_gene_31_pb_68_pos_10259_readouts_[Stv_5_u,Stv_5_u] got hits:149, dumped
--- Sequence:chr2:3000001-3050001_gene_61_pb_287_pos_42029_readouts_[Stv_7_u,Stv_7_u] got hits:893, dumped
--- Sequence:chr2:3500001-3550001_gene_71_pb_55_pos_18936_readouts_[Stv_99_u,Stv_99_u] got hits:51, dumped
--- Sequence:chr2:3500001-3550001_gene_71_pb_58_pos_19122_readouts_[Stv_99_u,Stv_99_u] got hits:51, dumped
--- Sequence:chr2:26250001-26300001_gene_526_pb_76_pos_12012_readouts_[NDB_31_u,NDB_31_u] got hits:364, dumped
--- Sequence:chr2:35750001-35800001_gene_716_pb_135_pos_29912_readouts_[NDB_120_u,NDB_120_u] got hits:55, dumped
--- Sequence:chr2:167250001-167300001_gene_3346_pb_311_pos_44424_readouts_[NDB_698_u,NDB_698_u] got hits:364, dumped
--- Sequence:chr2:174000001-174050001_gene_3481_pb_44_pos_8530_readouts_[NDB_731_u,NDB_731_u] got hits:165, dumped
--- Sequence:chr2:28050001-28100001_gene_562_pb_263_pos_39842_readouts_[NDB_1099_u,NDB_1099_u] got hits:51, dumped
--

<a id='4.3'></a>
## 4.3 BLAST all candidate probes

In [144]:
check.Blast_probes(kept_records, library_folder, num_threads=16)

- Blast probe against \\SMIRNOV\Chromatin_NAS_2\Libraries\Genomes\human\hg38\hg38
-- number of probes loaded: 385608
--- processing region: 1 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1.fasta
-- *blasting region: 1
--- total time for blast 1: 39.27963066101074
--- processing region: 1001 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1001.fasta
-- *blasting region: 1001
--- total time for blast 1001: 37.81762719154358
--- processing region: 1006 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1006.fasta
-- *blasting region: 1006
--- total time for blast 1006: 42.19583535194397
--- processing region: 101 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_101.fasta
-- *blasting region: 101
--- total time for blast 101: 42.178897857666016
--- processing region: 1011 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1011.fasta
-- *blasting regio

--- total time for blast 1181: 39.41302442550659
--- processing region: 1186 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1186.fasta
-- *blasting region: 1186
--- total time for blast 1186: 35.75106859207153
--- processing region: 1191 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1191.fasta
-- *blasting region: 1191
--- total time for blast 1191: 32.33786392211914
--- processing region: 1196 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1196.fasta
-- *blasting region: 1196
--- total time for blast 1196: 33.69080877304077
--- processing region: 1201 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1201.fasta
-- *blasting region: 1201
--- total time for blast 1201: 35.96647667884827
--- processing region: 1206 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1206.fasta
-- *blasting region: 1206
--- total time for blast 1206: 36.08663058280

--- total time for blast 1376: 45.3168740272522
--- processing region: 1381 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1381.fasta
-- *blasting region: 1381
--- total time for blast 1381: 40.53492188453674
--- processing region: 1386 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1386.fasta
-- *blasting region: 1386
--- total time for blast 1386: 36.77984404563904
--- processing region: 1391 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1391.fasta
-- *blasting region: 1391
--- total time for blast 1391: 42.77488827705383
--- processing region: 1396 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1396.fasta
-- *blasting region: 1396
--- total time for blast 1396: 46.440409660339355
--- processing region: 1401 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1401.fasta
-- *blasting region: 1401
--- total time for blast 1401: 44.81960821151

--- total time for blast 1571: 53.36672234535217
--- processing region: 1576 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1576.fasta
-- *blasting region: 1576
--- total time for blast 1576: 63.181013345718384
--- processing region: 1581 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1581.fasta
-- *blasting region: 1581
--- total time for blast 1581: 38.659369230270386
--- processing region: 1586 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1586.fasta
-- *blasting region: 1586
--- total time for blast 1586: 52.41525936126709
--- processing region: 1591 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1591.fasta
-- *blasting region: 1591
--- total time for blast 1591: 44.92254614830017
--- processing region: 1596 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1596.fasta
-- *blasting region: 1596
--- total time for blast 1596: 44.574594497

--- total time for blast 1771: 44.80302023887634
--- processing region: 1776 with 335 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1776.fasta
-- *blasting region: 1776
--- total time for blast 1776: 60.562318563461304
--- processing region: 1781 with 247 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1781.fasta
-- *blasting region: 1781
--- total time for blast 1781: 41.91860508918762
--- processing region: 1796 with 226 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1796.fasta
-- *blasting region: 1796
--- total time for blast 1796: 89.12143802642822
--- processing region: 1801 with 270 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_1801.fasta
-- *blasting region: 1801
--- total time for blast 1801: 48.83714318275452
--- processing region: 181 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_181.fasta
-- *blasting region: 181
--- total time for blast 181: 32.68942523002624

--- total time for blast 2061: 35.382444620132446
--- processing region: 2066 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2066.fasta
-- *blasting region: 2066
--- total time for blast 2066: 36.10872530937195
--- processing region: 2071 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2071.fasta
-- *blasting region: 2071
--- total time for blast 2071: 46.760759353637695
--- processing region: 2076 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2076.fasta
-- *blasting region: 2076
--- total time for blast 2076: 41.217376470565796
--- processing region: 2081 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2081.fasta
-- *blasting region: 2081
--- total time for blast 2081: 37.00560760498047
--- processing region: 2086 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2086.fasta
-- *blasting region: 2086
--- total time for blast 2086: 48.00250959

--- total time for blast 2276: 43.08695411682129
--- processing region: 2281 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2281.fasta
-- *blasting region: 2281
--- total time for blast 2281: 44.07290244102478
--- processing region: 2286 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2286.fasta
-- *blasting region: 2286
--- total time for blast 2286: 43.70109701156616
--- processing region: 2291 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2291.fasta
-- *blasting region: 2291
--- total time for blast 2291: 37.717697620391846
--- processing region: 2296 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2296.fasta
-- *blasting region: 2296
--- total time for blast 2296: 44.01702094078064
--- processing region: 2301 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2301.fasta
-- *blasting region: 2301
--- total time for blast 2301: 63.1821444034

--- total time for blast 2471: 53.30276298522949
--- processing region: 2476 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2476.fasta
-- *blasting region: 2476
--- total time for blast 2476: 51.98684310913086
--- processing region: 2481 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2481.fasta
-- *blasting region: 2481
--- total time for blast 2481: 36.29305291175842
--- processing region: 2486 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2486.fasta
-- *blasting region: 2486
--- total time for blast 2486: 37.0119948387146
--- processing region: 2491 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2491.fasta
-- *blasting region: 2491
--- total time for blast 2491: 37.53613901138306
--- processing region: 2496 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2496.fasta
-- *blasting region: 2496
--- total time for blast 2496: 39.331719398498

--- total time for blast 2676: 38.402536153793335
--- processing region: 2681 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2681.fasta
-- *blasting region: 2681
--- total time for blast 2681: 38.81387710571289
--- processing region: 2686 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2686.fasta
-- *blasting region: 2686
--- total time for blast 2686: 30.77720594406128
--- processing region: 2691 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2691.fasta
-- *blasting region: 2691
--- total time for blast 2691: 29.389431953430176
--- processing region: 2696 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2696.fasta
-- *blasting region: 2696
--- total time for blast 2696: 37.930046796798706
--- processing region: 2701 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2701.fasta
-- *blasting region: 2701
--- total time for blast 2701: 44.14950180

--- total time for blast 2871: 50.43318843841553
--- processing region: 2876 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2876.fasta
-- *blasting region: 2876
--- total time for blast 2876: 40.29466509819031
--- processing region: 2881 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2881.fasta
-- *blasting region: 2881
--- total time for blast 2881: 43.71222376823425
--- processing region: 2886 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2886.fasta
-- *blasting region: 2886
--- total time for blast 2886: 44.97976517677307
--- processing region: 2891 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2891.fasta
-- *blasting region: 2891
--- total time for blast 2891: 45.274250984191895
--- processing region: 2896 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_2896.fasta
-- *blasting region: 2896
--- total time for blast 2896: 37.0472235679

--- total time for blast 3066: 55.56987428665161
--- processing region: 3071 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3071.fasta
-- *blasting region: 3071
--- total time for blast 3071: 56.22230052947998
--- processing region: 3076 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3076.fasta
-- *blasting region: 3076
--- total time for blast 3076: 46.293822050094604
--- processing region: 3081 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3081.fasta
-- *blasting region: 3081
--- total time for blast 3081: 41.06787991523743
--- processing region: 3086 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3086.fasta
-- *blasting region: 3086
--- total time for blast 3086: 44.12202191352844
--- processing region: 3091 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3091.fasta
-- *blasting region: 3091
--- total time for blast 3091: 60.6325950622

--- total time for blast 326: 39.77456736564636
--- processing region: 3261 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3261.fasta
-- *blasting region: 3261
--- total time for blast 3261: 46.179850816726685
--- processing region: 3266 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3266.fasta
-- *blasting region: 3266
--- total time for blast 3266: 44.6247456073761
--- processing region: 3271 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3271.fasta
-- *blasting region: 3271
--- total time for blast 3271: 46.97721886634827
--- processing region: 3276 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3276.fasta
-- *blasting region: 3276
--- total time for blast 3276: 36.68991804122925
--- processing region: 3281 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3281.fasta
-- *blasting region: 3281
--- total time for blast 3281: 49.154865741729

--- total time for blast 3456: 43.820154666900635
--- processing region: 346 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_346.fasta
-- *blasting region: 346
--- total time for blast 346: 36.51487374305725
--- processing region: 3461 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3461.fasta
-- *blasting region: 3461
--- total time for blast 3461: 34.90760064125061
--- processing region: 3466 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3466.fasta
-- *blasting region: 3466
--- total time for blast 3466: 37.19484519958496
--- processing region: 3471 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3471.fasta
-- *blasting region: 3471
--- total time for blast 3471: 37.762510538101196
--- processing region: 3476 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3476.fasta
-- *blasting region: 3476
--- total time for blast 3476: 39.9159240722656

--- total time for blast 3646: 39.240036964416504
--- processing region: 3651 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3651.fasta
-- *blasting region: 3651
--- total time for blast 3651: 35.0236701965332
--- processing region: 3656 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3656.fasta
-- *blasting region: 3656
--- total time for blast 3656: 53.05698037147522
--- processing region: 366 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_366.fasta
-- *blasting region: 366
--- total time for blast 366: 42.20220708847046
--- processing region: 3661 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3661.fasta
-- *blasting region: 3661
--- total time for blast 3661: 37.324809312820435
--- processing region: 3666 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3666.fasta
-- *blasting region: 3666
--- total time for blast 3666: 41.40708065032959

--- total time for blast 3841: 34.47208762168884
--- processing region: 3846 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3846.fasta
-- *blasting region: 3846
--- total time for blast 3846: 42.34418964385986
--- processing region: 3851 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3851.fasta
-- *blasting region: 3851
--- total time for blast 3851: 57.44083118438721
--- processing region: 3856 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3856.fasta
-- *blasting region: 3856
--- total time for blast 3856: 35.461188077926636
--- processing region: 386 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_386.fasta
-- *blasting region: 386
--- total time for blast 386: 39.74452233314514
--- processing region: 3861 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_3861.fasta
-- *blasting region: 3861
--- total time for blast 3861: 49.56831526756286

--- total time for blast 4036: 39.47799015045166
--- processing region: 4041 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4041.fasta
-- *blasting region: 4041
--- total time for blast 4041: 36.04855132102966
--- processing region: 4046 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4046.fasta
-- *blasting region: 4046
--- total time for blast 4046: 68.57742357254028
--- processing region: 4051 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4051.fasta
-- *blasting region: 4051
--- total time for blast 4051: 52.50885462760925
--- processing region: 4056 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4056.fasta
-- *blasting region: 4056
--- total time for blast 4056: 43.41022348403931
--- processing region: 406 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_406.fasta
-- *blasting region: 406
--- total time for blast 406: 44.54538607597351


--- total time for blast 4231: 47.9759955406189
--- processing region: 4236 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4236.fasta
-- *blasting region: 4236
--- total time for blast 4236: 48.30950093269348
--- processing region: 4241 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4241.fasta
-- *blasting region: 4241
--- total time for blast 4241: 44.22398638725281
--- processing region: 4246 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4246.fasta
-- *blasting region: 4246
--- total time for blast 4246: 44.33450412750244
--- processing region: 4251 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4251.fasta
-- *blasting region: 4251
--- total time for blast 4251: 32.29377222061157
--- processing region: 4256 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4256.fasta
-- *blasting region: 4256
--- total time for blast 4256: 40.668651580810

--- total time for blast 4426: 42.83451795578003
--- processing region: 4431 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4431.fasta
-- *blasting region: 4431
--- total time for blast 4431: 40.17542290687561
--- processing region: 4436 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4436.fasta
-- *blasting region: 4436
--- total time for blast 4436: 44.11208701133728
--- processing region: 4441 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4441.fasta
-- *blasting region: 4441
--- total time for blast 4441: 35.05807447433472
--- processing region: 4446 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4446.fasta
-- *blasting region: 4446
--- total time for blast 4446: 34.632089614868164
--- processing region: 4451 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4451.fasta
-- *blasting region: 4451
--- total time for blast 4451: 42.3784065246

--- total time for blast 4616: 38.83055257797241
--- processing region: 4621 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4621.fasta
-- *blasting region: 4621
--- total time for blast 4621: 35.990238666534424
--- processing region: 4626 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4626.fasta
-- *blasting region: 4626
--- total time for blast 4626: 45.03602147102356
--- processing region: 4631 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4631.fasta
-- *blasting region: 4631
--- total time for blast 4631: 35.791197299957275
--- processing region: 4636 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4636.fasta
-- *blasting region: 4636
--- total time for blast 4636: 37.17099380493164
--- processing region: 4641 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4641.fasta
-- *blasting region: 4641
--- total time for blast 4641: 42.960001945

--- total time for blast 4811: 36.74315047264099
--- processing region: 4816 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4816.fasta
-- *blasting region: 4816
--- total time for blast 4816: 34.732786655426025
--- processing region: 4821 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4821.fasta
-- *blasting region: 4821
--- total time for blast 4821: 47.0902054309845
--- processing region: 4826 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4826.fasta
-- *blasting region: 4826
--- total time for blast 4826: 33.33813452720642
--- processing region: 4831 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4831.fasta
-- *blasting region: 4831
--- total time for blast 4831: 30.013431072235107
--- processing region: 4836 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_4836.fasta
-- *blasting region: 4836
--- total time for blast 4836: 39.1341993808

--- total time for blast 573: 29.009172677993774
--- processing region: 574 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_574.fasta
-- *blasting region: 574
--- total time for blast 574: 33.27935338020325
--- processing region: 575 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_575.fasta
-- *blasting region: 575
--- total time for blast 575: 44.708436489105225
--- processing region: 576 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_576.fasta
-- *blasting region: 576
--- total time for blast 576: 56.074458599090576
--- processing region: 577 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_577.fasta
-- *blasting region: 577
--- total time for blast 577: 48.29420447349548
--- processing region: 578 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_578.fasta
-- *blasting region: 578
--- total time for blast 578: 53.251973390579224
--- processin

--- total time for blast 615: 39.96345829963684
--- processing region: 616 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_616.fasta
-- *blasting region: 616
--- total time for blast 616: 40.02054476737976
--- processing region: 617 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_617.fasta
-- *blasting region: 617
--- total time for blast 617: 41.441121339797974
--- processing region: 618 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_618.fasta
-- *blasting region: 618
--- total time for blast 618: 44.14690184593201
--- processing region: 619 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_619.fasta
-- *blasting region: 619
--- total time for blast 619: 32.27335834503174
--- processing region: 620 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_620.fasta
-- *blasting region: 620
--- total time for blast 620: 29.178160905838013
--- processing 

--- total time for blast 659: 47.75735807418823
--- processing region: 66 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_66.fasta
-- *blasting region: 66
--- total time for blast 66: 32.16227388381958
--- processing region: 660 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_660.fasta
-- *blasting region: 660
--- total time for blast 660: 33.78565835952759
--- processing region: 661 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_661.fasta
-- *blasting region: 661
--- total time for blast 661: 32.466872692108154
--- processing region: 662 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_662.fasta
-- *blasting region: 662
--- total time for blast 662: 29.197235107421875
--- processing region: 663 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_663.fasta
-- *blasting region: 663
--- total time for blast 663: 37.06407833099365
--- processing regi

--- total time for blast 702: 53.628610134124756
--- processing region: 703 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_703.fasta
-- *blasting region: 703
--- total time for blast 703: 59.42450833320618
--- processing region: 704 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_704.fasta
-- *blasting region: 704
--- total time for blast 704: 50.51720714569092
--- processing region: 705 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_705.fasta
-- *blasting region: 705
--- total time for blast 705: 52.222246408462524
--- processing region: 706 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_706.fasta
-- *blasting region: 706
--- total time for blast 706: 53.577948808670044
--- processing region: 707 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_707.fasta
-- *blasting region: 707
--- total time for blast 707: 43.78398132324219
--- processing

--- total time for blast 745: 46.06180691719055
--- processing region: 746 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_746.fasta
-- *blasting region: 746
--- total time for blast 746: 43.36485505104065
--- processing region: 747 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_747.fasta
-- *blasting region: 747
--- total time for blast 747: 36.37946367263794
--- processing region: 748 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_748.fasta
-- *blasting region: 748
--- total time for blast 748: 40.70314812660217
--- processing region: 749 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_749.fasta
-- *blasting region: 749
--- total time for blast 749: 44.264599084854126
--- processing region: 750 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_750.fasta
-- *blasting region: 750
--- total time for blast 750: 43.82235383987427
--- processing r

--- total time for blast 886: 60.25855112075806
--- processing region: 891 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_891.fasta
-- *blasting region: 891
--- total time for blast 891: 53.845683574676514
--- processing region: 896 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_896.fasta
-- *blasting region: 896
--- total time for blast 896: 37.878173828125
--- processing region: 901 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_901.fasta
-- *blasting region: 901
--- total time for blast 901: 42.947447538375854
--- processing region: 906 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_906.fasta
-- *blasting region: 906
--- total time for blast 906: 36.39663362503052
--- processing region: 91 with 350 probes
--- *writing file: X:\Libraries\CTP-07\chr2\blast\probe_gene_91.fasta
-- *blasting region: 91
--- total time for blast 91: 40.57167077064514
--- processing region

In [149]:
# filter based on blast result
kept_pbs, blast_keep_dic, hard_count_list, soft_count_list = check.Screening_Probes_by_Blast(library_folder, 250)

- Number of probes loaded: 385608
- Number of regions in this library: 1108
-- checking probes in region: 1
--- gene=1, id=0 removed by soft count = 44
--- gene=1, id=2 removed by soft count = 36
--- gene=1, id=9 removed by soft count = 36
--- gene=1, id=21 removed by hard count = 3
--- gene=1, id=31 removed by hard count = 3
--- gene=1, id=37 removed by soft count = 44
--- gene=1, id=73 removed by soft count = 36
--- gene=1, id=82 removed by soft count = 46
--- gene=1, id=112 removed by soft count = 32
--- gene=1, id=129 removed by soft count = 73
--- gene=1, id=131 removed by soft count = 280
--- number of probes: 350 , kept by blast: 339 , if remove dups: 337
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 6
--- gene=6, id=24 removed by soft count = 282
--- gene=6, id=112 removed by soft count = 116
--- gene=6, id=114 removed by soft count = 86
--- gene=6, id=115 removed by soft count = 73
--- ge

--- gene=66, id=0 removed by soft count = 529
--- gene=66, id=8 removed by soft count = 32
--- gene=66, id=95 removed by soft count = 46
--- gene=66, id=147 removed by soft count = 131
--- gene=66, id=148 removed by soft count = 260
--- number of probes: 350 , kept by blast: 345 , if remove dups: 345
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 71
--- gene=71, id=28 removed by soft count = 540
--- gene=71, id=32 removed by soft count = 43
--- gene=71, id=92 removed by soft count = 31
--- gene=71, id=141 removed by soft count = 41
--- gene=71, id=143 removed by hard count = 0
--- gene=71, id=144 removed by hard count = 0
--- gene=71, id=145 removed by hard count = 0
--- gene=71, id=166 removed by soft count = 48
--- gene=71, id=329 removed by soft count = 1864
--- gene=71, id=330 removed by soft count = 303
--- number of probes: 348 , kept by blast: 338 , if remove dups: 322
--- remove duplicated 

--- gene=161, id=30 removed by soft count = 56
--- gene=161, id=37 removed by soft count = 37
--- gene=161, id=301 removed by soft count = 37
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 166
--- gene=166, id=217 removed by soft count = 43
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 171
--- gene=171, id=28 removed by soft count = 3140
--- gene=171, id=52 removed by soft count = 196
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 176
--- gene=176, id=38 removed by soft count = 45
--- gene=176, id=39 removed by soft cou

--- gene=246, id=13 removed by soft count = 62
--- gene=246, id=16 removed by soft count = 116
--- gene=246, id=219 removed by soft count = 39
--- gene=246, id=220 removed by soft count = 179
--- gene=246, id=233 removed by soft count = 155
--- gene=246, id=234 removed by soft count = 506
--- number of probes: 350 , kept by blast: 344 , if remove dups: 344
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 251
--- number of probes: 350 , kept by blast: 350 , if remove dups: 350
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 256
--- gene=256, id=243 removed by soft count = 465
--- gene=256, id=293 removed by soft count = 159
--- gene=256, id=349 removed by soft count = 353
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of pro

--- gene=321, id=330 removed by soft count = 109
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 326
--- gene=326, id=40 removed by soft count = 61
--- gene=326, id=41 removed by soft count = 34
--- gene=326, id=123 removed by soft count = 262
--- gene=326, id=127 removed by soft count = 46
--- gene=326, id=240 removed by soft count = 53
--- number of probes: 350 , kept by blast: 345 , if remove dups: 345
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 331
--- gene=331, id=216 removed by soft count = 54
--- gene=331, id=293 removed by soft count = 111
--- gene=331, id=294 removed by soft count = 6094
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probe

--- gene=401, id=286 removed by soft count = 26991
--- number of probes: 350 , kept by blast: 341 , if remove dups: 341
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 406
--- gene=406, id=348 removed by soft count = 130
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 411
--- gene=411, id=35 removed by soft count = 56
--- gene=411, id=245 removed by soft count = 82
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 416
--- gene=416, id=5 removed by soft count = 79
--- gene=416, id=47 removed by soft count = 48
--- gene=416, id=136 removed by soft count = 156
--- gene=416, id=137 removed by soft

--- gene=491, id=306 removed by soft count = 49
--- gene=491, id=307 removed by soft count = 34
--- gene=491, id=308 removed by soft count = 187
--- gene=491, id=349 removed by soft count = 2000
--- number of probes: 350 , kept by blast: 334 , if remove dups: 334
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 496
--- gene=496, id=76 removed by soft count = 62
--- gene=496, id=173 removed by soft count = 73
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 501
--- gene=501, id=37 removed by soft count = 342
--- gene=501, id=38 removed by soft count = 55
--- gene=501, id=52 removed by soft count = 94
--- gene=501, id=193 removed by soft count = 118
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated probes
---

--- gene=576, id=145 removed by soft count = 37
--- gene=576, id=156 removed by soft count = 37
--- gene=576, id=214 removed by soft count = 6658
--- gene=576, id=273 removed by soft count = 35
--- gene=576, id=334 removed by soft count = 362
--- gene=576, id=335 removed by soft count = 187
--- gene=576, id=336 removed by soft count = 38
--- gene=576, id=344 removed by soft count = 179
--- number of probes: 350 , kept by blast: 337 , if remove dups: 337
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 581
--- gene=581, id=100 removed by soft count = 48
--- gene=581, id=101 removed by soft count = 131
--- gene=581, id=152 removed by soft count = 211
--- gene=581, id=305 removed by soft count = 40
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 586

--- gene=636, id=283 removed by soft count = 222
--- gene=636, id=284 removed by soft count = 41
--- gene=636, id=285 removed by soft count = 147
--- gene=636, id=286 removed by soft count = 48
--- gene=636, id=287 removed by soft count = 154
--- gene=636, id=304 removed by soft count = 55
--- gene=636, id=306 removed by soft count = 816
--- gene=636, id=307 removed by soft count = 2225
--- gene=636, id=308 removed by soft count = 1197
--- gene=636, id=309 removed by soft count = 635
--- gene=636, id=311 removed by soft count = 143
--- gene=636, id=312 removed by soft count = 327
--- number of probes: 350 , kept by blast: 285 , if remove dups: 285
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 641
--- gene=641, id=15 removed by soft count = 40
--- gene=641, id=49 removed by soft count = 75
--- gene=641, id=129 removed by soft count = 338
--- gene=641, id=134 removed by soft count = 60
--- gene=641,

--- gene=691, id=315 removed by soft count = 521
--- gene=691, id=316 removed by soft count = 52
--- gene=691, id=332 removed by soft count = 109
--- number of probes: 350 , kept by blast: 333 , if remove dups: 333
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 696
--- gene=696, id=312 removed by soft count = 1024
--- gene=696, id=313 removed by soft count = 201
--- gene=696, id=314 removed by soft count = 1528
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 701
--- gene=701, id=26 removed by soft count = 145
--- gene=701, id=40 removed by soft count = 41
--- gene=701, id=41 removed by soft count = 47
--- gene=701, id=42 removed by soft count = 7600
--- gene=701, id=127 removed by soft count = 35
--- gene=701, id=232 removed by soft count = 56


--- number of probes: 350 , kept by blast: 350 , if remove dups: 350
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 741
--- gene=741, id=165 removed by soft count = 43
--- gene=741, id=170 removed by soft count = 42
--- gene=741, id=237 removed by soft count = 52
--- gene=741, id=311 removed by soft count = 234
--- gene=741, id=312 removed by soft count = 36
--- gene=741, id=313 removed by soft count = 32
--- gene=741, id=333 removed by soft count = 2857
--- number of probes: 350 , kept by blast: 343 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 746
--- gene=746, id=17 removed by soft count = 40
--- gene=746, id=104 removed by soft count = 38
--- gene=746, id=219 removed by soft count = 49
--- gene=746, id=242 removed by soft count = 651
--- gene=746, id=291 removed by soft count = 42
---

--- gene=821, id=64 removed by soft count = 41
--- gene=821, id=112 removed by soft count = 3850
--- gene=821, id=113 removed by soft count = 6583
--- gene=821, id=150 removed by soft count = 98
--- gene=821, id=151 removed by soft count = 93
--- gene=821, id=155 removed by soft count = 231
--- gene=821, id=157 removed by soft count = 62
--- gene=821, id=173 removed by soft count = 204
--- gene=821, id=251 removed by soft count = 249
--- gene=821, id=256 removed by soft count = 93
--- gene=821, id=292 removed by soft count = 44
--- gene=821, id=315 removed by soft count = 290
--- number of probes: 350 , kept by blast: 338 , if remove dups: 338
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 826
--- gene=826, id=123 removed by soft count = 100
--- gene=826, id=151 removed by soft count = 51
--- gene=826, id=172 removed by soft count = 469
--- gene=826, id=173 removed by soft count = 78
--- gene=826, 

--- gene=886, id=68 removed by soft count = 62
--- gene=886, id=101 removed by soft count = 194
--- gene=886, id=102 removed by soft count = 48
--- gene=886, id=110 removed by soft count = 94
--- gene=886, id=112 removed by soft count = 146
--- gene=886, id=113 removed by soft count = 135
--- gene=886, id=115 removed by soft count = 244
--- gene=886, id=122 removed by soft count = 164
--- gene=886, id=123 removed by soft count = 68
--- gene=886, id=137 removed by soft count = 32
--- gene=886, id=141 removed by soft count = 57191
--- gene=886, id=171 removed by soft count = 42
--- gene=886, id=172 removed by soft count = 126
--- gene=886, id=177 removed by soft count = 1430
--- gene=886, id=221 removed by soft count = 31
--- gene=886, id=231 removed by soft count = 485
--- gene=886, id=232 removed by soft count = 157
--- gene=886, id=233 removed by soft count = 37
--- gene=886, id=236 removed by soft count = 48
--- gene=886, id=242 removed by soft count = 43
--- number of probes: 350 , 

--- gene=961, id=252 removed by soft count = 44
--- gene=961, id=267 removed by soft count = 1422
--- number of probes: 350 , kept by blast: 342 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 966
--- gene=966, id=88 removed by soft count = 440
--- gene=966, id=207 removed by soft count = 83
--- gene=966, id=278 removed by soft count = 91
--- gene=966, id=281 removed by soft count = 53
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 971
--- number of probes: 350 , kept by blast: 350 , if remove dups: 350
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 976
--- gene=976, id=94 removed by soft count = 400
--- gene=976, id=95 removed by soft

--- gene=1036, id=137 removed by soft count = 319
--- gene=1036, id=149 removed by soft count = 498
--- gene=1036, id=151 removed by soft count = 36
--- gene=1036, id=196 removed by soft count = 960
--- gene=1036, id=288 removed by soft count = 105
--- gene=1036, id=291 removed by soft count = 134
--- gene=1036, id=305 removed by soft count = 125
--- gene=1036, id=307 removed by soft count = 73
--- gene=1036, id=308 removed by soft count = 161
--- gene=1036, id=318 removed by soft count = 38
--- gene=1036, id=319 removed by soft count = 38
--- gene=1036, id=321 removed by soft count = 231
--- gene=1036, id=323 removed by soft count = 77
--- gene=1036, id=328 removed by soft count = 60
--- gene=1036, id=331 removed by soft count = 197
--- gene=1036, id=332 removed by soft count = 171
--- gene=1036, id=333 removed by soft count = 40
--- gene=1036, id=334 removed by soft count = 1030
--- gene=1036, id=336 removed by soft count = 151
--- gene=1036, id=337 removed by soft count = 105
--- ge

--- gene=1061, id=252 removed by soft count = 34
--- gene=1061, id=335 removed by soft count = 424
--- gene=1061, id=337 removed by soft count = 39
--- gene=1061, id=341 removed by soft count = 70
--- gene=1061, id=342 removed by soft count = 175
--- gene=1061, id=343 removed by soft count = 105
--- gene=1061, id=345 removed by soft count = 405
--- number of probes: 350 , kept by blast: 336 , if remove dups: 336
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1066
--- gene=1066, id=63 removed by soft count = 189
--- gene=1066, id=78 removed by soft count = 2067
--- gene=1066, id=117 removed by soft count = 1281
--- gene=1066, id=118 removed by soft count = 101
--- gene=1066, id=139 removed by soft count = 87
--- gene=1066, id=140 removed by soft count = 47
--- gene=1066, id=141 removed by soft count = 86
--- gene=1066, id=143 removed by soft count = 33
--- gene=1066, id=145 removed by soft count = 1

--- gene=1111, id=5 removed by soft count = 67
--- gene=1111, id=6 removed by soft count = 58
--- gene=1111, id=51 removed by soft count = 101
--- gene=1111, id=112 removed by soft count = 63
--- gene=1111, id=189 removed by soft count = 3118
--- gene=1111, id=277 removed by soft count = 52
--- gene=1111, id=335 removed by soft count = 2482
--- number of probes: 350 , kept by blast: 343 , if remove dups: 338
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1116
--- gene=1116, id=0 removed by soft count = 119
--- gene=1116, id=1 removed by soft count = 719
--- gene=1116, id=6 removed by soft count = 40
--- gene=1116, id=13 removed by soft count = 37
--- gene=1116, id=14 removed by soft count = 81
--- gene=1116, id=15 removed by soft count = 34
--- gene=1116, id=127 removed by soft count = 56
--- gene=1116, id=237 removed by soft count = 76
--- gene=1116, id=238 removed by soft count = 97
--- gene=1116

--- gene=1146, id=141 removed by soft count = 118
--- gene=1146, id=143 removed by soft count = 40
--- gene=1146, id=145 removed by soft count = 1096
--- gene=1146, id=146 removed by soft count = 42
--- gene=1146, id=148 removed by soft count = 263
--- gene=1146, id=149 removed by soft count = 104
--- gene=1146, id=150 removed by soft count = 45
--- gene=1146, id=157 removed by soft count = 141
--- gene=1146, id=158 removed by soft count = 293
--- gene=1146, id=166 removed by soft count = 165
--- gene=1146, id=168 removed by soft count = 48
--- gene=1146, id=169 removed by soft count = 151
--- gene=1146, id=173 removed by soft count = 180
--- gene=1146, id=175 removed by soft count = 140
--- gene=1146, id=176 removed by soft count = 801
--- gene=1146, id=178 removed by soft count = 1009
--- gene=1146, id=179 removed by soft count = 161
--- gene=1146, id=180 removed by soft count = 97
--- gene=1146, id=209 removed by soft count = 108
--- gene=1146, id=254 removed by soft count = 38
--- 

--- number of probes: 350 , kept by blast: 327 , if remove dups: 326
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1221
--- gene=1221, id=120 removed by soft count = 128
--- gene=1221, id=185 removed by soft count = 448
--- gene=1221, id=233 removed by soft count = 1017
--- gene=1221, id=235 removed by soft count = 49
--- gene=1221, id=236 removed by soft count = 90
--- gene=1221, id=241 removed by soft count = 153
--- gene=1221, id=242 removed by soft count = 38
--- gene=1221, id=270 removed by soft count = 53
--- gene=1221, id=271 removed by soft count = 580
--- gene=1221, id=272 removed by soft count = 2225
--- gene=1221, id=273 removed by soft count = 3021
--- gene=1221, id=274 removed by soft count = 12116
--- gene=1221, id=275 removed by soft count = 17910
--- gene=1221, id=276 removed by soft count = 3124
--- gene=1221, id=280 removed by soft count = 559
--- gene=1221, id=289 removed by sof

--- gene=1266, id=121 removed by soft count = 760
--- gene=1266, id=122 removed by soft count = 785
--- gene=1266, id=123 removed by soft count = 485
--- gene=1266, id=128 removed by soft count = 429
--- gene=1266, id=146 removed by soft count = 93
--- gene=1266, id=148 removed by soft count = 34
--- gene=1266, id=150 removed by soft count = 45
--- gene=1266, id=154 removed by soft count = 39
--- gene=1266, id=179 removed by soft count = 39
--- gene=1266, id=186 removed by soft count = 153
--- gene=1266, id=188 removed by soft count = 143
--- gene=1266, id=189 removed by soft count = 65
--- gene=1266, id=190 removed by soft count = 1137
--- gene=1266, id=191 removed by soft count = 430
--- gene=1266, id=192 removed by soft count = 845
--- gene=1266, id=193 removed by soft count = 639
--- gene=1266, id=194 removed by soft count = 477
--- gene=1266, id=195 removed by soft count = 1006
--- gene=1266, id=196 removed by soft count = 124
--- gene=1266, id=197 removed by soft count = 1033
---

--- gene=1321, id=154 removed by soft count = 91
--- gene=1321, id=172 removed by soft count = 57
--- gene=1321, id=173 removed by soft count = 100
--- number of probes: 350 , kept by blast: 345 , if remove dups: 345
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1326
--- number of probes: 350 , kept by blast: 350 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1331
--- number of probes: 350 , kept by blast: 350 , if remove dups: 350
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1336
--- gene=1336, id=192 removed by soft count = 128
--- gene=1336, id=210 removed by soft count = 139
--- gene=1336, id=211 removed by soft count = 2296
--- gene=1336, id=247 removed by soft count = 33
--- gene=1336, id=282 

--- number of probes: 350 , kept by blast: 345 , if remove dups: 345
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1406
--- gene=1406, id=7 removed by soft count = 217
--- gene=1406, id=8 removed by soft count = 1188
--- gene=1406, id=13 removed by soft count = 5173
--- gene=1406, id=15 removed by soft count = 59
--- gene=1406, id=17 removed by soft count = 82
--- gene=1406, id=18 removed by soft count = 149
--- gene=1406, id=19 removed by soft count = 59
--- gene=1406, id=21 removed by soft count = 187
--- gene=1406, id=24 removed by soft count = 102
--- gene=1406, id=27 removed by soft count = 41
--- gene=1406, id=28 removed by soft count = 61
--- gene=1406, id=66 removed by soft count = 68
--- gene=1406, id=116 removed by soft count = 183
--- gene=1406, id=120 removed by soft count = 40
--- gene=1406, id=121 removed by soft count = 117
--- gene=1406, id=122 removed by soft count = 39
--- gene=1

--- gene=1421, id=303 removed by soft count = 165
--- gene=1421, id=305 removed by soft count = 112
--- gene=1421, id=306 removed by soft count = 112
--- gene=1421, id=307 removed by soft count = 533
--- gene=1421, id=308 removed by soft count = 212
--- gene=1421, id=309 removed by soft count = 329
--- gene=1421, id=310 removed by soft count = 69
--- gene=1421, id=311 removed by soft count = 31
--- gene=1421, id=312 removed by soft count = 31
--- gene=1421, id=313 removed by soft count = 205
--- gene=1421, id=314 removed by soft count = 38
--- gene=1421, id=315 removed by soft count = 69
--- gene=1421, id=317 removed by soft count = 47
--- gene=1421, id=318 removed by soft count = 36
--- gene=1421, id=320 removed by soft count = 39
--- gene=1421, id=322 removed by soft count = 369
--- gene=1421, id=323 removed by soft count = 77
--- gene=1421, id=324 removed by soft count = 87
--- gene=1421, id=326 removed by soft count = 38
--- gene=1421, id=327 removed by soft count = 54
--- gene=142

--- gene=1471, id=189 removed by soft count = 1253
--- gene=1471, id=191 removed by soft count = 643
--- gene=1471, id=192 removed by soft count = 675
--- gene=1471, id=193 removed by soft count = 295
--- gene=1471, id=194 removed by soft count = 2771
--- gene=1471, id=195 removed by soft count = 1666
--- gene=1471, id=197 removed by soft count = 190
--- gene=1471, id=198 removed by soft count = 281
--- gene=1471, id=199 removed by soft count = 116
--- gene=1471, id=200 removed by soft count = 128
--- gene=1471, id=202 removed by soft count = 419
--- gene=1471, id=203 removed by soft count = 516
--- gene=1471, id=204 removed by soft count = 318
--- gene=1471, id=205 removed by soft count = 122
--- gene=1471, id=206 removed by soft count = 792
--- gene=1471, id=279 removed by soft count = 137
--- gene=1471, id=283 removed by soft count = 64
--- gene=1471, id=307 removed by soft count = 172
--- gene=1471, id=308 removed by soft count = 50
--- gene=1471, id=309 removed by soft count = 51


--- gene=1541, id=180 removed by soft count = 116
--- gene=1541, id=182 removed by soft count = 183
--- gene=1541, id=296 removed by soft count = 109
--- gene=1541, id=344 removed by soft count = 316
--- gene=1541, id=345 removed by soft count = 112
--- number of probes: 350 , kept by blast: 344 , if remove dups: 344
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1546
--- gene=1546, id=33 removed by soft count = 34
--- gene=1546, id=82 removed by soft count = 962
--- gene=1546, id=103 removed by soft count = 45
--- gene=1546, id=146 removed by soft count = 96
--- gene=1546, id=147 removed by soft count = 157
--- gene=1546, id=149 removed by soft count = 54
--- gene=1546, id=151 removed by soft count = 55
--- gene=1546, id=164 removed by soft count = 49
--- gene=1546, id=243 removed by soft count = 380
--- number of probes: 350 , kept by blast: 341 , if remove dups: 340
--- remove duplicated probes


--- gene=1576, id=200 removed by soft count = 2928
--- gene=1576, id=237 removed by soft count = 20787
--- gene=1576, id=303 removed by soft count = 60
--- gene=1576, id=305 removed by soft count = 151
--- gene=1576, id=306 removed by soft count = 68
--- gene=1576, id=308 removed by soft count = 46
--- gene=1576, id=310 removed by soft count = 63
--- gene=1576, id=312 removed by soft count = 51
--- gene=1576, id=325 removed by soft count = 44
--- number of probes: 350 , kept by blast: 331 , if remove dups: 331
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1581
--- gene=1581, id=243 removed by soft count = 43
--- gene=1581, id=347 removed by soft count = 49
--- gene=1581, id=349 removed by soft count = 49
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in

--- gene=1626, id=125 removed by soft count = 51
--- gene=1626, id=130 removed by soft count = 42
--- gene=1626, id=143 removed by soft count = 49
--- gene=1626, id=144 removed by soft count = 49
--- gene=1626, id=233 removed by soft count = 41
--- gene=1626, id=239 removed by soft count = 75
--- gene=1626, id=244 removed by soft count = 106
--- gene=1626, id=330 removed by soft count = 216
--- number of probes: 350 , kept by blast: 331 , if remove dups: 331
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1631
--- gene=1631, id=9 removed by soft count = 36
--- gene=1631, id=34 removed by soft count = 31
--- gene=1631, id=103 removed by soft count = 53
--- gene=1631, id=185 removed by soft count = 41
--- number of probes: 350 , kept by blast: 346 , if remove dups: 345
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region

--- gene=1656, id=161 removed by soft count = 462
--- gene=1656, id=162 removed by soft count = 172
--- gene=1656, id=163 removed by soft count = 453
--- gene=1656, id=164 removed by soft count = 137
--- gene=1656, id=165 removed by soft count = 110
--- gene=1656, id=166 removed by soft count = 2872
--- gene=1656, id=168 removed by soft count = 254
--- gene=1656, id=174 removed by soft count = 35
--- gene=1656, id=183 removed by soft count = 49
--- gene=1656, id=187 removed by soft count = 48
--- gene=1656, id=191 removed by soft count = 33
--- gene=1656, id=224 removed by soft count = 405
--- gene=1656, id=241 removed by soft count = 121
--- gene=1656, id=242 removed by soft count = 2107
--- gene=1656, id=243 removed by soft count = 296
--- gene=1656, id=245 removed by soft count = 449
--- gene=1656, id=264 removed by soft count = 38
--- gene=1656, id=265 removed by soft count = 117
--- gene=1656, id=267 removed by soft count = 64
--- gene=1656, id=268 removed by soft count = 104
--- 

--- gene=1696, id=118 removed by soft count = 84
--- gene=1696, id=122 removed by soft count = 87
--- gene=1696, id=129 removed by soft count = 188
--- gene=1696, id=135 removed by soft count = 52
--- gene=1696, id=136 removed by soft count = 33
--- gene=1696, id=145 removed by soft count = 110
--- gene=1696, id=146 removed by soft count = 109
--- gene=1696, id=147 removed by soft count = 1394
--- gene=1696, id=157 removed by hard count = 3
--- gene=1696, id=162 removed by soft count = 41
--- gene=1696, id=163 removed by soft count = 70
--- gene=1696, id=164 removed by soft count = 37
--- gene=1696, id=165 removed by soft count = 48
--- gene=1696, id=166 removed by soft count = 39
--- gene=1696, id=263 removed by soft count = 164
--- gene=1696, id=265 removed by soft count = 128
--- gene=1696, id=270 removed by soft count = 35
--- gene=1696, id=283 removed by soft count = 76
--- gene=1696, id=333 removed by soft count = 51
--- number of probes: 350 , kept by blast: 325 , if remove dups

--- gene=1766, id=211 removed by soft count = 103
--- gene=1766, id=308 removed by soft count = 1148
--- number of probes: 350 , kept by blast: 344 , if remove dups: 344
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1771
--- gene=1771, id=22 removed by soft count = 2891
--- gene=1771, id=56 removed by soft count = 34
--- gene=1771, id=73 removed by soft count = 102
--- gene=1771, id=91 removed by soft count = 48
--- gene=1771, id=96 removed by soft count = 60
--- gene=1771, id=178 removed by soft count = 33
--- gene=1771, id=275 removed by soft count = 39
--- gene=1771, id=285 removed by soft count = 63
--- number of probes: 350 , kept by blast: 342 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 1776
--- gene=1776, id=47 removed by soft count = 88
--- gene=1776, id=55 removed by soft coun

--- gene=1796, id=184 removed by soft count = 725
--- gene=1796, id=185 removed by hard count = 3
--- gene=1796, id=186 removed by soft count = 1388
--- gene=1796, id=187 removed by soft count = 665
--- gene=1796, id=188 removed by soft count = 2142
--- gene=1796, id=189 removed by soft count = 296
--- gene=1796, id=190 removed by soft count = 232
--- gene=1796, id=191 removed by soft count = 2531
--- gene=1796, id=192 removed by soft count = 553
--- gene=1796, id=193 removed by soft count = 1026
--- gene=1796, id=194 removed by soft count = 1178
--- gene=1796, id=195 removed by soft count = 995
--- gene=1796, id=196 removed by soft count = 501
--- gene=1796, id=197 removed by soft count = 890
--- gene=1796, id=198 removed by soft count = 868
--- gene=1796, id=199 removed by soft count = 1820
--- gene=1796, id=200 removed by soft count = 2267
--- gene=1796, id=201 removed by soft count = 1931
--- gene=1796, id=202 removed by soft count = 1065
--- gene=1796, id=203 removed by soft count

--- gene=1836, id=253 removed by hard count = 3
--- gene=1836, id=258 removed by hard count = 3
--- gene=1836, id=263 removed by hard count = 3
--- gene=1836, id=272 removed by hard count = 3
--- gene=1836, id=275 removed by hard count = 3
--- gene=1836, id=283 removed by hard count = 3
--- gene=1836, id=284 removed by hard count = 4
--- gene=1836, id=285 removed by hard count = 4
--- gene=1836, id=288 removed by hard count = 3
--- gene=1836, id=291 removed by hard count = 4
--- gene=1836, id=292 removed by hard count = 4
--- gene=1836, id=293 removed by hard count = 4
--- gene=1836, id=295 removed by hard count = 4
--- gene=1836, id=300 removed by hard count = 3
--- gene=1836, id=303 removed by hard count = 3
--- gene=1836, id=304 removed by hard count = 3
--- gene=1836, id=307 removed by hard count = 3
--- gene=1836, id=313 removed by hard count = 3
--- gene=1836, id=319 removed by hard count = 3
--- gene=1836, id=321 removed by hard count = 6
--- gene=1836, id=322 removed by hard co

--- gene=1896, id=256 removed by soft count = 199
--- gene=1896, id=257 removed by soft count = 152
--- gene=1896, id=259 removed by soft count = 33
--- gene=1896, id=260 removed by soft count = 65
--- gene=1896, id=261 removed by soft count = 64
--- gene=1896, id=262 removed by soft count = 122
--- gene=1896, id=263 removed by soft count = 241
--- gene=1896, id=264 removed by soft count = 341
--- gene=1896, id=265 removed by soft count = 1596
--- gene=1896, id=266 removed by soft count = 50
--- gene=1896, id=267 removed by soft count = 96
--- gene=1896, id=268 removed by soft count = 127
--- gene=1896, id=269 removed by soft count = 220
--- gene=1896, id=270 removed by soft count = 324
--- gene=1896, id=271 removed by soft count = 203
--- gene=1896, id=272 removed by soft count = 43
--- gene=1896, id=273 removed by soft count = 51
--- gene=1896, id=274 removed by soft count = 84
--- gene=1896, id=275 removed by hard count = 5
--- gene=1896, id=276 removed by soft count = 131
--- gene=

--- gene=1941, id=45 removed by hard count = 3
--- gene=1941, id=55 removed by soft count = 44
--- gene=1941, id=73 removed by soft count = 43
--- gene=1941, id=74 removed by soft count = 44
--- gene=1941, id=75 removed by soft count = 68
--- gene=1941, id=76 removed by soft count = 106
--- gene=1941, id=82 removed by soft count = 210
--- gene=1941, id=93 removed by hard count = 3
--- gene=1941, id=96 removed by soft count = 373
--- gene=1941, id=97 removed by soft count = 1056
--- gene=1941, id=105 removed by soft count = 33
--- gene=1941, id=117 removed by soft count = 50
--- gene=1941, id=153 removed by hard count = 3
--- gene=1941, id=158 removed by hard count = 5
--- gene=1941, id=159 removed by hard count = 3
--- gene=1941, id=176 removed by soft count = 534
--- gene=1941, id=189 removed by soft count = 771
--- gene=1941, id=190 removed by soft count = 139
--- number of probes: 198 , kept by blast: 170 , if remove dups: 165
-- number of probes kept for this region: 170
-- checkin

--- gene=1981, id=254 removed by soft count = 82
--- gene=1981, id=255 removed by soft count = 52
--- gene=1981, id=256 removed by soft count = 65
--- gene=1981, id=257 removed by soft count = 73
--- gene=1981, id=258 removed by soft count = 71
--- gene=1981, id=259 removed by soft count = 65
--- gene=1981, id=260 removed by soft count = 39
--- gene=1981, id=261 removed by soft count = 62
--- gene=1981, id=300 removed by soft count = 44
--- gene=1981, id=302 removed by soft count = 113
--- gene=1981, id=306 removed by soft count = 60
--- gene=1981, id=307 removed by soft count = 90
--- gene=1981, id=308 removed by soft count = 292
--- gene=1981, id=309 removed by soft count = 1200
--- gene=1981, id=310 removed by soft count = 708
--- gene=1981, id=311 removed by soft count = 420
--- gene=1981, id=312 removed by soft count = 717
--- gene=1981, id=313 removed by soft count = 1186
--- gene=1981, id=314 removed by soft count = 1653
--- gene=1981, id=315 removed by soft count = 115
--- gene

--- gene=2041, id=276 removed by soft count = 845
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2046
--- gene=2046, id=29 removed by soft count = 1789
--- gene=2046, id=125 removed by soft count = 44
--- gene=2046, id=180 removed by soft count = 32
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2051
--- gene=2051, id=210 removed by soft count = 41
--- gene=2051, id=304 removed by soft count = 50
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2056
--- gene=2056, id=10 removed by soft count = 425
--- gene=2056, id=12 remo

--- gene=2111, id=192 removed by soft count = 153
--- gene=2111, id=213 removed by soft count = 78
--- gene=2111, id=216 removed by soft count = 71
--- gene=2111, id=220 removed by soft count = 74
--- gene=2111, id=221 removed by soft count = 63
--- gene=2111, id=238 removed by soft count = 39
--- gene=2111, id=343 removed by soft count = 33
--- number of probes: 350 , kept by blast: 333 , if remove dups: 333
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2116
--- gene=2116, id=66 removed by soft count = 81
--- gene=2116, id=68 removed by soft count = 65
--- gene=2116, id=309 removed by soft count = 66
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2121
--- gene=2121, id=16 removed by soft count = 191
--- gene=2121, id=164 removed by soft coun

--- gene=2171, id=42 removed by soft count = 112
--- gene=2171, id=43 removed by soft count = 129
--- gene=2171, id=44 removed by soft count = 97
--- gene=2171, id=45 removed by soft count = 308
--- gene=2171, id=46 removed by soft count = 165
--- gene=2171, id=54 removed by soft count = 517
--- gene=2171, id=55 removed by soft count = 49
--- gene=2171, id=59 removed by soft count = 3489
--- gene=2171, id=91 removed by soft count = 180
--- gene=2171, id=92 removed by soft count = 41
--- gene=2171, id=98 removed by soft count = 58
--- gene=2171, id=101 removed by soft count = 207
--- gene=2171, id=102 removed by soft count = 41
--- gene=2171, id=104 removed by soft count = 58
--- gene=2171, id=113 removed by soft count = 169
--- gene=2171, id=114 removed by soft count = 656
--- gene=2171, id=115 removed by soft count = 53
--- gene=2171, id=120 removed by soft count = 54
--- gene=2171, id=125 removed by soft count = 38
--- gene=2171, id=155 removed by soft count = 922
--- gene=2171, id=1

--- gene=2246, id=18 removed by soft count = 35
--- gene=2246, id=75 removed by soft count = 35
--- gene=2246, id=87 removed by soft count = 202
--- gene=2246, id=91 removed by soft count = 92
--- gene=2246, id=95 removed by soft count = 48
--- gene=2246, id=96 removed by soft count = 82
--- gene=2246, id=103 removed by soft count = 48
--- gene=2246, id=104 removed by soft count = 40
--- gene=2246, id=141 removed by soft count = 656
--- gene=2246, id=152 removed by soft count = 613
--- gene=2246, id=322 removed by soft count = 101
--- number of probes: 350 , kept by blast: 339 , if remove dups: 339
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2251
--- gene=2251, id=17 removed by soft count = 214
--- gene=2251, id=18 removed by hard count = 3
--- gene=2251, id=29 removed by soft count = 876
--- gene=2251, id=30 removed by soft count = 88
--- gene=2251, id=34 removed by soft count = 167
--- gene=22

--- gene=2271, id=226 removed by hard count = 3
--- gene=2271, id=227 removed by hard count = 4
--- gene=2271, id=228 removed by hard count = 3
--- gene=2271, id=229 removed by hard count = 3
--- gene=2271, id=230 removed by hard count = 4
--- gene=2271, id=233 removed by hard count = 4
--- gene=2271, id=237 removed by hard count = 3
--- gene=2271, id=239 removed by hard count = 4
--- gene=2271, id=240 removed by hard count = 4
--- gene=2271, id=242 removed by hard count = 4
--- gene=2271, id=243 removed by hard count = 4
--- gene=2271, id=246 removed by hard count = 4
--- gene=2271, id=248 removed by hard count = 3
--- gene=2271, id=249 removed by hard count = 4
--- gene=2271, id=251 removed by hard count = 3
--- gene=2271, id=253 removed by hard count = 3
--- gene=2271, id=254 removed by hard count = 3
--- gene=2271, id=255 removed by hard count = 4
--- gene=2271, id=256 removed by hard count = 3
--- gene=2271, id=257 removed by hard count = 3
--- gene=2271, id=258 removed by hard co

--- gene=2306, id=29 removed by soft count = 1154
--- gene=2306, id=31 removed by soft count = 70
--- gene=2306, id=32 removed by soft count = 634
--- gene=2306, id=33 removed by soft count = 121
--- gene=2306, id=185 removed by soft count = 97
--- gene=2306, id=220 removed by soft count = 77
--- gene=2306, id=221 removed by soft count = 279
--- gene=2306, id=222 removed by soft count = 265
--- gene=2306, id=253 removed by soft count = 74
--- gene=2306, id=348 removed by soft count = 36
--- number of probes: 350 , kept by blast: 340 , if remove dups: 340
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2311
--- gene=2311, id=75 removed by soft count = 31
--- gene=2311, id=97 removed by soft count = 999
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in regi

--- gene=2331, id=247 removed by soft count = 199
--- gene=2331, id=276 removed by soft count = 36
--- gene=2331, id=283 removed by soft count = 44
--- gene=2331, id=284 removed by soft count = 50
--- gene=2331, id=293 removed by soft count = 120
--- gene=2331, id=294 removed by soft count = 1554
--- number of probes: 350 , kept by blast: 319 , if remove dups: 319
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2336
--- gene=2336, id=19 removed by soft count = 36
--- gene=2336, id=41 removed by soft count = 67
--- gene=2336, id=44 removed by soft count = 97
--- gene=2336, id=45 removed by soft count = 47
--- gene=2336, id=47 removed by soft count = 250
--- gene=2336, id=48 removed by soft count = 69
--- gene=2336, id=49 removed by soft count = 97
--- gene=2336, id=50 removed by soft count = 49
--- gene=2336, id=51 removed by soft count = 115
--- gene=2336, id=52 removed by soft count = 39
--- gene=2

--- gene=2366, id=207 removed by soft count = 70
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2371
--- gene=2371, id=148 removed by soft count = 36
--- gene=2371, id=149 removed by soft count = 109
--- gene=2371, id=150 removed by soft count = 35
--- gene=2371, id=154 removed by soft count = 451
--- gene=2371, id=157 removed by soft count = 319
--- gene=2371, id=274 removed by soft count = 37
--- gene=2371, id=275 removed by soft count = 47
--- number of probes: 350 , kept by blast: 343 , if remove dups: 343
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2376
--- gene=2376, id=181 removed by soft count = 62
--- gene=2376, id=236 removed by soft count = 41
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplica

--- number of probes: 350 , kept by blast: 335 , if remove dups: 335
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2441
--- gene=2441, id=172 removed by soft count = 248
--- gene=2441, id=179 removed by soft count = 70
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2446
--- gene=2446, id=107 removed by soft count = 102
--- gene=2446, id=125 removed by soft count = 66
--- gene=2446, id=254 removed by soft count = 77
--- gene=2446, id=255 removed by soft count = 215
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2451
--- gene=2451, id=33 removed by soft count = 243
--- gene=2451, id=114 re

--- gene=2501, id=261 removed by soft count = 49
--- gene=2501, id=262 removed by soft count = 73
--- gene=2501, id=263 removed by soft count = 79
--- gene=2501, id=264 removed by soft count = 124
--- gene=2501, id=265 removed by soft count = 54
--- gene=2501, id=267 removed by soft count = 54
--- number of probes: 350 , kept by blast: 331 , if remove dups: 331
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2506
--- gene=2506, id=39 removed by soft count = 116
--- gene=2506, id=40 removed by soft count = 34
--- gene=2506, id=56 removed by soft count = 34
--- gene=2506, id=62 removed by soft count = 42
--- gene=2506, id=63 removed by soft count = 50
--- gene=2506, id=66 removed by soft count = 38
--- gene=2506, id=69 removed by soft count = 172
--- gene=2506, id=70 removed by soft count = 89
--- gene=2506, id=76 removed by soft count = 265
--- gene=2506, id=83 removed by soft count = 948
--- gene=25

--- gene=2551, id=90 removed by soft count = 33
--- gene=2551, id=93 removed by soft count = 127
--- gene=2551, id=107 removed by soft count = 82
--- gene=2551, id=109 removed by soft count = 126
--- gene=2551, id=192 removed by soft count = 106
--- gene=2551, id=252 removed by soft count = 35
--- gene=2551, id=273 removed by soft count = 93
--- gene=2551, id=311 removed by soft count = 369
--- number of probes: 350 , kept by blast: 342 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2556
--- gene=2556, id=202 removed by soft count = 225
--- gene=2556, id=207 removed by soft count = 2210
--- gene=2556, id=215 removed by soft count = 85
--- gene=2556, id=216 removed by soft count = 39
--- gene=2556, id=248 removed by soft count = 31
--- gene=2556, id=288 removed by soft count = 1419
--- gene=2556, id=292 removed by soft count = 37
--- gene=2556, id=295 removed by soft count = 47

--- gene=2591, id=230 removed by soft count = 33
--- gene=2591, id=231 removed by soft count = 68
--- gene=2591, id=234 removed by soft count = 282
--- gene=2591, id=253 removed by soft count = 355
--- gene=2591, id=273 removed by soft count = 38
--- gene=2591, id=276 removed by soft count = 559
--- gene=2591, id=311 removed by soft count = 214
--- gene=2591, id=319 removed by soft count = 31
--- gene=2591, id=320 removed by soft count = 176
--- gene=2591, id=322 removed by soft count = 136
--- number of probes: 350 , kept by blast: 278 , if remove dups: 278
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2596
--- gene=2596, id=39 removed by soft count = 43
--- gene=2596, id=112 removed by soft count = 273
--- gene=2596, id=118 removed by soft count = 61
--- gene=2596, id=260 removed by soft count = 3851
--- gene=2596, id=284 removed by soft count = 84
--- number of probes: 350 , kept by blast: 345 

--- gene=2636, id=40 removed by hard count = 3
--- gene=2636, id=42 removed by soft count = 63
--- gene=2636, id=44 removed by soft count = 35
--- gene=2636, id=46 removed by hard count = 6
--- gene=2636, id=48 removed by soft count = 44
--- gene=2636, id=51 removed by soft count = 41
--- gene=2636, id=53 removed by soft count = 57
--- gene=2636, id=55 removed by soft count = 33
--- gene=2636, id=57 removed by soft count = 57
--- gene=2636, id=63 removed by hard count = 6
--- gene=2636, id=72 removed by hard count = 5
--- gene=2636, id=75 removed by hard count = 3
--- gene=2636, id=92 removed by hard count = 4
--- gene=2636, id=93 removed by hard count = 4
--- gene=2636, id=98 removed by soft count = 39
--- gene=2636, id=103 removed by soft count = 108
--- gene=2636, id=107 removed by soft count = 365
--- gene=2636, id=110 removed by soft count = 207
--- gene=2636, id=116 removed by hard count = 3
--- gene=2636, id=118 removed by hard count = 6
--- gene=2636, id=121 removed by hard cou

--- gene=2646, id=125 removed by soft count = 650
--- gene=2646, id=126 removed by soft count = 36
--- gene=2646, id=127 removed by soft count = 79
--- gene=2646, id=128 removed by soft count = 75
--- gene=2646, id=145 removed by soft count = 99
--- gene=2646, id=146 removed by soft count = 72
--- gene=2646, id=151 removed by soft count = 203739
--- gene=2646, id=181 removed by hard count = 10
--- gene=2646, id=213 removed by soft count = 210
--- gene=2646, id=214 removed by soft count = 123
--- gene=2646, id=215 removed by hard count = 5
--- gene=2646, id=216 removed by soft count = 35
--- gene=2646, id=217 removed by soft count = 38
--- gene=2646, id=218 removed by soft count = 36
--- gene=2646, id=219 removed by soft count = 39
--- gene=2646, id=220 removed by soft count = 31
--- gene=2646, id=221 removed by soft count = 34
--- gene=2646, id=222 removed by soft count = 37
--- gene=2646, id=226 removed by soft count = 57
--- gene=2646, id=228 removed by soft count = 31
--- gene=2646,

--- gene=2706, id=1 removed by soft count = 2550
--- gene=2706, id=2 removed by soft count = 166
--- gene=2706, id=25 removed by soft count = 197
--- gene=2706, id=26 removed by soft count = 66
--- gene=2706, id=27 removed by soft count = 130
--- gene=2706, id=36 removed by soft count = 84
--- gene=2706, id=38 removed by soft count = 55
--- gene=2706, id=43 removed by soft count = 206
--- gene=2706, id=48 removed by soft count = 68
--- gene=2706, id=53 removed by soft count = 88
--- gene=2706, id=59 removed by soft count = 175
--- gene=2706, id=78 removed by soft count = 34
--- gene=2706, id=106 removed by soft count = 354
--- gene=2706, id=107 removed by soft count = 303
--- gene=2706, id=108 removed by soft count = 124
--- gene=2706, id=109 removed by soft count = 39
--- gene=2706, id=110 removed by soft count = 160
--- gene=2706, id=113 removed by soft count = 698
--- gene=2706, id=216 removed by soft count = 44
--- gene=2706, id=270 removed by soft count = 103
--- gene=2706, id=326

--- gene=2766, id=16 removed by soft count = 45
--- gene=2766, id=22 removed by soft count = 2094
--- gene=2766, id=32 removed by soft count = 39
--- gene=2766, id=46 removed by soft count = 1003
--- gene=2766, id=61 removed by soft count = 127
--- gene=2766, id=62 removed by soft count = 519
--- gene=2766, id=63 removed by soft count = 1696
--- gene=2766, id=111 removed by soft count = 58
--- gene=2766, id=122 removed by soft count = 76
--- gene=2766, id=123 removed by soft count = 163
--- gene=2766, id=124 removed by soft count = 52
--- gene=2766, id=139 removed by soft count = 32
--- gene=2766, id=140 removed by soft count = 165
--- gene=2766, id=141 removed by soft count = 592
--- gene=2766, id=142 removed by soft count = 33
--- gene=2766, id=144 removed by soft count = 48
--- gene=2766, id=149 removed by soft count = 43
--- gene=2766, id=151 removed by soft count = 37
--- gene=2766, id=166 removed by soft count = 89
--- gene=2766, id=227 removed by soft count = 100
--- gene=2766, 

--- gene=2826, id=121 removed by soft count = 49
--- gene=2826, id=124 removed by soft count = 91
--- gene=2826, id=141 removed by soft count = 34
--- gene=2826, id=185 removed by soft count = 37
--- gene=2826, id=186 removed by soft count = 45
--- gene=2826, id=196 removed by soft count = 2656
--- gene=2826, id=329 removed by soft count = 377
--- gene=2826, id=333 removed by soft count = 46
--- number of probes: 350 , kept by blast: 342 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2831
--- gene=2831, id=105 removed by soft count = 56
--- gene=2831, id=110 removed by soft count = 49
--- gene=2831, id=311 removed by soft count = 56
--- gene=2831, id=313 removed by soft count = 77
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in re

--- gene=2876, id=105 removed by soft count = 52
--- gene=2876, id=106 removed by soft count = 75
--- gene=2876, id=281 removed by soft count = 77
--- gene=2876, id=283 removed by soft count = 40
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2881
--- gene=2881, id=36 removed by soft count = 149
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2886
--- gene=2886, id=6 removed by soft count = 1143
--- gene=2886, id=8 removed by soft count = 119
--- gene=2886, id=9 removed by soft count = 56
--- gene=2886, id=105 removed by soft count = 325
--- gene=2886, id=142 removed by soft count = 40
--- gene=2886, id=143 removed by soft count = 46
--- gene=2886, id=144 removed by soft count

--- gene=2941, id=143 removed by soft count = 590
--- gene=2941, id=144 removed by soft count = 53
--- number of probes: 350 , kept by blast: 347 , if remove dups: 346
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2946
--- gene=2946, id=91 removed by soft count = 133
--- gene=2946, id=92 removed by soft count = 364
--- gene=2946, id=93 removed by soft count = 84
--- gene=2946, id=94 removed by soft count = 41
--- gene=2946, id=98 removed by soft count = 213
--- gene=2946, id=126 removed by soft count = 56
--- gene=2946, id=131 removed by soft count = 99
--- gene=2946, id=155 removed by soft count = 35
--- gene=2946, id=172 removed by soft count = 35
--- number of probes: 350 , kept by blast: 341 , if remove dups: 341
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 2951
--- gene=2951, id=61 removed by soft count

--- gene=3006, id=99 removed by soft count = 37
--- gene=3006, id=101 removed by soft count = 35
--- gene=3006, id=108 removed by soft count = 222
--- gene=3006, id=111 removed by soft count = 41
--- gene=3006, id=114 removed by soft count = 98
--- gene=3006, id=115 removed by soft count = 115
--- gene=3006, id=117 removed by soft count = 166
--- gene=3006, id=118 removed by soft count = 38
--- gene=3006, id=119 removed by soft count = 58
--- gene=3006, id=122 removed by soft count = 31
--- gene=3006, id=140 removed by soft count = 42
--- gene=3006, id=143 removed by soft count = 67
--- gene=3006, id=171 removed by soft count = 208
--- gene=3006, id=183 removed by soft count = 41
--- gene=3006, id=184 removed by soft count = 32
--- gene=3006, id=195 removed by soft count = 774
--- gene=3006, id=260 removed by soft count = 115
--- gene=3006, id=343 removed by soft count = 87
--- gene=3006, id=349 removed by soft count = 175
--- number of probes: 350 , kept by blast: 331 , if remove dups

--- gene=3061, id=239 removed by soft count = 181
--- gene=3061, id=240 removed by soft count = 201
--- gene=3061, id=241 removed by soft count = 117
--- gene=3061, id=256 removed by soft count = 431
--- gene=3061, id=257 removed by soft count = 1942
--- gene=3061, id=258 removed by soft count = 36
--- gene=3061, id=259 removed by soft count = 156
--- gene=3061, id=260 removed by soft count = 64
--- gene=3061, id=280 removed by soft count = 522
--- number of probes: 350 , kept by blast: 326 , if remove dups: 326
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3066
--- gene=3066, id=0 removed by soft count = 243
--- gene=3066, id=2 removed by soft count = 56
--- gene=3066, id=3 removed by soft count = 98
--- gene=3066, id=4 removed by soft count = 91
--- gene=3066, id=6 removed by soft count = 129
--- gene=3066, id=7 removed by soft count = 451
--- gene=3066, id=8 removed by soft count = 1085
--- gen

--- gene=3076, id=260 removed by soft count = 66
--- gene=3076, id=261 removed by soft count = 39
--- gene=3076, id=262 removed by soft count = 99
--- gene=3076, id=263 removed by soft count = 3719
--- number of probes: 350 , kept by blast: 306 , if remove dups: 306
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3081
--- gene=3081, id=4 removed by soft count = 108
--- gene=3081, id=5 removed by soft count = 287
--- gene=3081, id=6 removed by soft count = 31
--- gene=3081, id=58 removed by soft count = 115
--- gene=3081, id=59 removed by soft count = 40
--- gene=3081, id=76 removed by soft count = 34
--- gene=3081, id=77 removed by soft count = 638
--- gene=3081, id=78 removed by soft count = 402
--- gene=3081, id=79 removed by soft count = 230
--- gene=3081, id=80 removed by soft count = 179
--- gene=3081, id=81 removed by soft count = 533
--- gene=3081, id=83 removed by soft count = 337
--- gene=3

--- gene=3116, id=220 removed by soft count = 3718
--- gene=3116, id=221 removed by soft count = 896
--- gene=3116, id=222 removed by soft count = 91
--- gene=3116, id=223 removed by soft count = 503
--- gene=3116, id=224 removed by soft count = 5396
--- gene=3116, id=225 removed by soft count = 224
--- gene=3116, id=286 removed by soft count = 36
--- gene=3116, id=303 removed by soft count = 277
--- gene=3116, id=321 removed by soft count = 703
--- gene=3116, id=322 removed by soft count = 106
--- gene=3116, id=323 removed by soft count = 303
--- gene=3116, id=324 removed by soft count = 1636
--- gene=3116, id=325 removed by soft count = 1408
--- gene=3116, id=326 removed by soft count = 1122
--- gene=3116, id=327 removed by soft count = 1704
--- gene=3116, id=349 removed by soft count = 897
--- number of probes: 350 , kept by blast: 319 , if remove dups: 319
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in

--- gene=3186, id=291 removed by soft count = 392
--- gene=3186, id=317 removed by soft count = 33
--- gene=3186, id=349 removed by soft count = 41
--- number of probes: 350 , kept by blast: 343 , if remove dups: 343
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3191
--- gene=3191, id=26 removed by soft count = 34
--- gene=3191, id=73 removed by soft count = 32
--- gene=3191, id=75 removed by soft count = 52
--- gene=3191, id=173 removed by soft count = 109
--- gene=3191, id=199 removed by soft count = 55
--- gene=3191, id=242 removed by soft count = 953
--- gene=3191, id=249 removed by soft count = 339
--- gene=3191, id=254 removed by soft count = 74
--- gene=3191, id=294 removed by soft count = 186
--- gene=3191, id=295 removed by soft count = 32
--- gene=3191, id=349 removed by soft count = 626
--- number of probes: 350 , kept by blast: 339 , if remove dups: 339
--- remove duplicated probes
---

--- gene=3231, id=316 removed by soft count = 3224
--- gene=3231, id=317 removed by soft count = 38
--- gene=3231, id=318 removed by soft count = 1083
--- gene=3231, id=330 removed by soft count = 38
--- gene=3231, id=333 removed by soft count = 37
--- number of probes: 350 , kept by blast: 305 , if remove dups: 215
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3236
--- gene=3236, id=0 removed by soft count = 129
--- gene=3236, id=13 removed by soft count = 122
--- gene=3236, id=56 removed by soft count = 365
--- gene=3236, id=57 removed by soft count = 269
--- gene=3236, id=127 removed by soft count = 40
--- gene=3236, id=129 removed by soft count = 51
--- gene=3236, id=130 removed by soft count = 130
--- gene=3236, id=134 removed by soft count = 45
--- gene=3236, id=172 removed by soft count = 34
--- gene=3236, id=209 removed by soft count = 134
--- gene=3236, id=246 removed by soft count = 255
--- gene=3236, id=247 removed

--- gene=3296, id=65 removed by soft count = 269
--- gene=3296, id=66 removed by soft count = 68
--- gene=3296, id=105 removed by soft count = 1922
--- gene=3296, id=182 removed by soft count = 37
--- gene=3296, id=227 removed by soft count = 71
--- number of probes: 350 , kept by blast: 345 , if remove dups: 345
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3301
--- gene=3301, id=19 removed by soft count = 150
--- gene=3301, id=20 removed by soft count = 2632
--- gene=3301, id=28 removed by soft count = 41
--- gene=3301, id=82 removed by soft count = 38
--- gene=3301, id=165 removed by soft count = 120
--- gene=3301, id=176 removed by soft count = 131
--- gene=3301, id=193 removed by soft count = 83
--- gene=3301, id=243 removed by soft count = 48
--- gene=3301, id=284 removed by soft count = 31
--- gene=3301, id=311 removed by soft count = 1570
--- gene=3301, id=312 removed by soft count = 74
--

--- gene=3361, id=138 removed by soft count = 48
--- gene=3361, id=215 removed by soft count = 37
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3366
--- gene=3366, id=16 removed by soft count = 32
--- gene=3366, id=17 removed by soft count = 52
--- gene=3366, id=49 removed by soft count = 96
--- gene=3366, id=50 removed by soft count = 467
--- gene=3366, id=51 removed by soft count = 293
--- gene=3366, id=52 removed by soft count = 532
--- gene=3366, id=53 removed by soft count = 469
--- gene=3366, id=54 removed by soft count = 45
--- gene=3366, id=55 removed by soft count = 112
--- gene=3366, id=56 removed by soft count = 137
--- gene=3366, id=102 removed by soft count = 37
--- gene=3366, id=134 removed by soft count = 41
--- gene=3366, id=256 removed by soft count = 615
--- gene=3366, id=331 removed by soft count = 85
--- numbe

--- gene=3416, id=156 removed by soft count = 33
--- gene=3416, id=166 removed by soft count = 778
--- gene=3416, id=178 removed by soft count = 185
--- gene=3416, id=269 removed by soft count = 72
--- gene=3416, id=271 removed by soft count = 36
--- gene=3416, id=344 removed by soft count = 61
--- number of probes: 350 , kept by blast: 331 , if remove dups: 331
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3421
--- gene=3421, id=2 removed by soft count = 34
--- gene=3421, id=85 removed by soft count = 4705
--- gene=3421, id=116 removed by soft count = 40
--- gene=3421, id=117 removed by soft count = 180
--- gene=3421, id=241 removed by soft count = 123
--- gene=3421, id=258 removed by soft count = 97
--- gene=3421, id=274 removed by soft count = 133
--- number of probes: 350 , kept by blast: 343 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes 

--- gene=3466, id=321 removed by soft count = 168
--- gene=3466, id=322 removed by soft count = 43
--- gene=3466, id=323 removed by soft count = 51
--- gene=3466, id=324 removed by soft count = 234
--- gene=3466, id=325 removed by soft count = 32
--- gene=3466, id=326 removed by soft count = 92
--- gene=3466, id=327 removed by soft count = 54
--- gene=3466, id=328 removed by soft count = 42
--- number of probes: 350 , kept by blast: 335 , if remove dups: 335
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3471
--- gene=3471, id=1 removed by soft count = 194
--- gene=3471, id=12 removed by soft count = 49
--- gene=3471, id=23 removed by soft count = 132
--- gene=3471, id=24 removed by soft count = 737
--- gene=3471, id=89 removed by soft count = 92
--- number of probes: 350 , kept by blast: 345 , if remove dups: 345
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept 

--- gene=3531, id=92 removed by soft count = 63
--- gene=3531, id=248 removed by soft count = 32
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3536
--- gene=3536, id=30 removed by soft count = 43
--- gene=3536, id=177 removed by soft count = 237
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3541
--- gene=3541, id=33 removed by soft count = 1261
--- gene=3541, id=34 removed by soft count = 42
--- gene=3541, id=285 removed by soft count = 410
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3546
--- gene=3546, id=72 remove

--- gene=3606, id=210 removed by soft count = 39
--- gene=3606, id=251 removed by soft count = 2161
--- gene=3606, id=252 removed by soft count = 7547
--- gene=3606, id=253 removed by soft count = 631
--- gene=3606, id=254 removed by soft count = 97
--- gene=3606, id=321 removed by soft count = 428
--- gene=3606, id=332 removed by soft count = 124
--- gene=3606, id=336 removed by soft count = 59
--- gene=3606, id=342 removed by soft count = 233
--- gene=3606, id=346 removed by soft count = 63
--- number of probes: 350 , kept by blast: 332 , if remove dups: 332
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3611
--- gene=3611, id=59 removed by soft count = 243
--- gene=3611, id=60 removed by soft count = 904
--- gene=3611, id=229 removed by soft count = 565
--- gene=3611, id=334 removed by soft count = 131
--- gene=3611, id=336 removed by soft count = 146
--- number of probes: 350 , kept by blast: 3

--- gene=3671, id=34 removed by soft count = 273
--- gene=3671, id=35 removed by soft count = 324
--- gene=3671, id=36 removed by soft count = 166
--- gene=3671, id=37 removed by soft count = 163
--- gene=3671, id=47 removed by soft count = 335
--- gene=3671, id=178 removed by soft count = 528
--- gene=3671, id=251 removed by soft count = 648
--- gene=3671, id=253 removed by soft count = 59
--- gene=3671, id=299 removed by soft count = 618
--- gene=3671, id=337 removed by soft count = 122
--- gene=3671, id=339 removed by soft count = 50
--- gene=3671, id=347 removed by soft count = 206
--- gene=3671, id=348 removed by soft count = 1242
--- gene=3671, id=349 removed by soft count = 477
--- number of probes: 350 , kept by blast: 326 , if remove dups: 326
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3676
--- gene=3676, id=32 removed by soft count = 38
--- gene=3676, id=54 removed by soft count = 54


--- gene=3706, id=79 removed by soft count = 143
--- gene=3706, id=91 removed by soft count = 127
--- gene=3706, id=128 removed by soft count = 84
--- gene=3706, id=129 removed by soft count = 345
--- gene=3706, id=130 removed by soft count = 583
--- gene=3706, id=131 removed by soft count = 314
--- gene=3706, id=132 removed by soft count = 31
--- gene=3706, id=169 removed by soft count = 891
--- gene=3706, id=271 removed by soft count = 31
--- gene=3706, id=282 removed by soft count = 66
--- gene=3706, id=288 removed by soft count = 71
--- gene=3706, id=294 removed by soft count = 86
--- gene=3706, id=295 removed by soft count = 35
--- gene=3706, id=301 removed by soft count = 127
--- gene=3706, id=311 removed by soft count = 105
--- gene=3706, id=321 removed by soft count = 1248
--- gene=3706, id=322 removed by soft count = 724
--- gene=3706, id=324 removed by soft count = 482
--- gene=3706, id=325 removed by soft count = 196
--- gene=3706, id=326 removed by soft count = 109
--- gene

--- gene=3736, id=133 removed by soft count = 32
--- gene=3736, id=200 removed by soft count = 37
--- gene=3736, id=203 removed by soft count = 2781
--- gene=3736, id=204 removed by soft count = 64
--- gene=3736, id=206 removed by soft count = 42
--- gene=3736, id=208 removed by soft count = 38
--- gene=3736, id=213 removed by soft count = 262
--- gene=3736, id=214 removed by soft count = 88
--- gene=3736, id=215 removed by soft count = 54
--- gene=3736, id=216 removed by soft count = 239
--- gene=3736, id=217 removed by soft count = 98
--- gene=3736, id=219 removed by soft count = 83
--- gene=3736, id=220 removed by soft count = 150
--- gene=3736, id=221 removed by soft count = 198
--- gene=3736, id=222 removed by soft count = 215
--- gene=3736, id=223 removed by soft count = 625
--- gene=3736, id=224 removed by soft count = 67
--- gene=3736, id=229 removed by soft count = 123
--- gene=3736, id=233 removed by soft count = 37
--- gene=3736, id=234 removed by soft count = 433
--- gene=3

--- gene=3751, id=143 removed by soft count = 41
--- gene=3751, id=144 removed by soft count = 35
--- gene=3751, id=146 removed by soft count = 61
--- gene=3751, id=202 removed by soft count = 102
--- gene=3751, id=236 removed by soft count = 282
--- gene=3751, id=237 removed by soft count = 325
--- gene=3751, id=238 removed by soft count = 1117
--- gene=3751, id=239 removed by soft count = 6509
--- number of probes: 350 , kept by blast: 320 , if remove dups: 320
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3756
--- gene=3756, id=0 removed by soft count = 131
--- gene=3756, id=1 removed by soft count = 47
--- gene=3756, id=2 removed by soft count = 308
--- gene=3756, id=8 removed by soft count = 1683
--- gene=3756, id=9 removed by soft count = 2159
--- gene=3756, id=10 removed by soft count = 727
--- gene=3756, id=11 removed by soft count = 1884
--- gene=3756, id=12 removed by soft count = 172
--

--- gene=3791, id=296 removed by soft count = 31
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3796
--- gene=3796, id=20 removed by soft count = 99
--- gene=3796, id=64 removed by soft count = 1556
--- gene=3796, id=278 removed by soft count = 103
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3801
--- gene=3801, id=49 removed by soft count = 1123
--- gene=3801, id=93 removed by soft count = 498
--- gene=3801, id=94 removed by soft count = 154
--- gene=3801, id=96 removed by soft count = 47
--- gene=3801, id=97 removed by soft count = 49
--- gene=3801, id=120 removed by soft count = 101
--- gene=3801, id=217 removed by soft count = 33
--- gene=3801, id=253 removed by soft co

--- number of probes: 350 , kept by blast: 335 , if remove dups: 335
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3856
--- gene=3856, id=156 removed by soft count = 123
--- gene=3856, id=158 removed by soft count = 40
--- gene=3856, id=217 removed by soft count = 481
--- gene=3856, id=238 removed by soft count = 61
--- gene=3856, id=239 removed by soft count = 86
--- gene=3856, id=264 removed by soft count = 551
--- number of probes: 350 , kept by blast: 344 , if remove dups: 344
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3861
--- gene=3861, id=44 removed by soft count = 4284
--- gene=3861, id=47 removed by soft count = 134
--- gene=3861, id=48 removed by soft count = 586
--- gene=3861, id=93 removed by soft count = 36
--- gene=3861, id=99 removed by soft count = 31
--- gene=3861, id=101 removed by soft c

--- gene=3881, id=141 removed by soft count = 84
--- gene=3881, id=144 removed by soft count = 80
--- gene=3881, id=145 removed by soft count = 77
--- gene=3881, id=146 removed by soft count = 67
--- gene=3881, id=150 removed by soft count = 521
--- gene=3881, id=154 removed by soft count = 293
--- gene=3881, id=155 removed by soft count = 2102
--- gene=3881, id=156 removed by soft count = 941
--- gene=3881, id=162 removed by soft count = 37
--- gene=3881, id=172 removed by soft count = 187
--- gene=3881, id=173 removed by soft count = 786
--- gene=3881, id=188 removed by soft count = 450
--- gene=3881, id=198 removed by soft count = 45
--- gene=3881, id=206 removed by soft count = 33
--- gene=3881, id=207 removed by soft count = 55
--- gene=3881, id=208 removed by soft count = 136
--- gene=3881, id=209 removed by soft count = 73
--- gene=3881, id=211 removed by soft count = 32
--- gene=3881, id=216 removed by soft count = 142
--- gene=3881, id=217 removed by soft count = 31
--- gene=3

--- gene=3911, id=337 removed by soft count = 1358
--- gene=3911, id=340 removed by soft count = 257
--- number of probes: 350 , kept by blast: 336 , if remove dups: 336
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3916
--- gene=3916, id=82 removed by soft count = 219
--- gene=3916, id=83 removed by soft count = 122
--- gene=3916, id=84 removed by soft count = 36
--- gene=3916, id=85 removed by soft count = 31
--- gene=3916, id=87 removed by soft count = 94
--- gene=3916, id=88 removed by soft count = 371
--- gene=3916, id=90 removed by soft count = 107
--- gene=3916, id=91 removed by soft count = 822
--- gene=3916, id=92 removed by soft count = 325
--- gene=3916, id=104 removed by soft count = 42
--- gene=3916, id=106 removed by soft count = 177
--- gene=3916, id=119 removed by soft count = 160
--- gene=3916, id=219 removed by soft count = 380
--- gene=3916, id=221 removed by soft count = 135
--

--- gene=3976, id=84 removed by soft count = 33
--- gene=3976, id=324 removed by soft count = 165
--- gene=3976, id=341 removed by soft count = 304
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 3981
--- gene=3981, id=2 removed by soft count = 69
--- gene=3981, id=12 removed by soft count = 18123
--- gene=3981, id=25 removed by soft count = 133
--- gene=3981, id=26 removed by soft count = 193
--- gene=3981, id=27 removed by soft count = 129
--- gene=3981, id=28 removed by soft count = 95
--- gene=3981, id=130 removed by soft count = 39
--- gene=3981, id=194 removed by soft count = 298
--- gene=3981, id=202 removed by soft count = 1623
--- gene=3981, id=203 removed by soft count = 448
--- gene=3981, id=312 removed by soft count = 735
--- gene=3981, id=342 removed by soft count = 687
--- number of probes: 350 , kept by blast: 338 , 

--- gene=4036, id=29 removed by soft count = 54
--- gene=4036, id=128 removed by soft count = 51
--- gene=4036, id=142 removed by soft count = 36
--- gene=4036, id=251 removed by soft count = 71
--- gene=4036, id=260 removed by soft count = 45
--- gene=4036, id=282 removed by soft count = 609
--- gene=4036, id=335 removed by soft count = 53
--- gene=4036, id=336 removed by soft count = 107
--- number of probes: 350 , kept by blast: 342 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4041
--- gene=4041, id=59 removed by soft count = 39
--- gene=4041, id=79 removed by soft count = 45
--- gene=4041, id=346 removed by soft count = 203
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4046
--- gene=4046, id=0 removed by soft count

--- gene=4071, id=111 removed by soft count = 105
--- gene=4071, id=127 removed by soft count = 64
--- gene=4071, id=128 removed by soft count = 57
--- gene=4071, id=132 removed by soft count = 96
--- gene=4071, id=172 removed by soft count = 819
--- gene=4071, id=173 removed by soft count = 83
--- gene=4071, id=176 removed by soft count = 39
--- gene=4071, id=323 removed by soft count = 46
--- gene=4071, id=337 removed by soft count = 119
--- gene=4071, id=338 removed by soft count = 81
--- number of probes: 350 , kept by blast: 328 , if remove dups: 328
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4076
--- gene=4076, id=24 removed by soft count = 348
--- gene=4076, id=25 removed by soft count = 199
--- gene=4076, id=26 removed by soft count = 136
--- gene=4076, id=29 removed by soft count = 967
--- gene=4076, id=81 removed by soft count = 167
--- gene=4076, id=89 removed by soft count = 126
---

--- number of probes: 350 , kept by blast: 350 , if remove dups: 350
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4151
--- gene=4151, id=67 removed by soft count = 34
--- gene=4151, id=95 removed by soft count = 31
--- gene=4151, id=149 removed by soft count = 34
--- gene=4151, id=189 removed by soft count = 32
--- gene=4151, id=214 removed by soft count = 64
--- gene=4151, id=320 removed by soft count = 41
--- number of probes: 350 , kept by blast: 344 , if remove dups: 344
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4156
--- gene=4156, id=56 removed by soft count = 34
--- gene=4156, id=196 removed by soft count = 35
--- gene=4156, id=270 removed by soft count = 69
--- gene=4156, id=294 removed by soft count = 189
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated p

--- gene=4181, id=278 removed by soft count = 67
--- gene=4181, id=280 removed by soft count = 32
--- gene=4181, id=283 removed by soft count = 35
--- gene=4181, id=294 removed by soft count = 58
--- gene=4181, id=340 removed by soft count = 41
--- gene=4181, id=341 removed by soft count = 190
--- number of probes: 350 , kept by blast: 329 , if remove dups: 329
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4186
--- gene=4186, id=0 removed by soft count = 152
--- gene=4186, id=2 removed by soft count = 70
--- gene=4186, id=4 removed by soft count = 2227
--- gene=4186, id=5 removed by soft count = 47
--- gene=4186, id=6 removed by soft count = 134
--- gene=4186, id=52 removed by soft count = 344
--- gene=4186, id=81 removed by soft count = 134
--- gene=4186, id=82 removed by soft count = 82
--- gene=4186, id=83 removed by soft count = 136
--- gene=4186, id=119 removed by soft count = 881
--- gene=41

--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4251
--- gene=4251, id=42 removed by soft count = 66
--- gene=4251, id=43 removed by soft count = 428
--- gene=4251, id=44 removed by soft count = 127
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4256
--- gene=4256, id=3 removed by soft count = 38
--- gene=4256, id=4 removed by soft count = 378
--- gene=4256, id=6 removed by soft count = 74
--- gene=4256, id=7 removed by soft count = 525
--- gene=4256, id=8 removed by soft count = 228
--- gene=4256, id=9 removed by soft count = 54
--- gene=4256, id=10 removed by soft count = 33
--- gene=4256, id=14 removed by soft count = 48
--- gene=4256, id=18 removed by soft count = 203
--- 

--- gene=4311, id=5 removed by soft count = 90
--- gene=4311, id=114 removed by soft count = 59
--- gene=4311, id=137 removed by soft count = 226
--- gene=4311, id=203 removed by soft count = 98
--- gene=4311, id=204 removed by soft count = 63
--- gene=4311, id=226 removed by soft count = 2104
--- gene=4311, id=338 removed by soft count = 32
--- gene=4311, id=349 removed by soft count = 1230
--- number of probes: 350 , kept by blast: 342 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4316
--- gene=4316, id=56 removed by soft count = 97
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4321
--- number of probes: 350 , kept by blast: 350 , if remove dups: 350
--- remove duplicated probes
--- keep probes from beginning
-- numbe

--- gene=4386, id=72 removed by soft count = 39
--- gene=4386, id=98 removed by soft count = 102
--- gene=4386, id=219 removed by soft count = 274
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4391
--- gene=4391, id=108 removed by soft count = 58
--- gene=4391, id=127 removed by soft count = 33
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4396
--- gene=4396, id=36 removed by soft count = 103
--- gene=4396, id=78 removed by soft count = 87
--- gene=4396, id=80 removed by soft count = 317
--- gene=4396, id=108 removed by soft count = 97
--- gene=4396, id=232 removed by soft count = 42
--- gene=4396, id=233 removed by soft count = 49
--- gene=4396, id=260 removed by soft coun

--- gene=4471, id=191 removed by soft count = 31
--- gene=4471, id=208 removed by soft count = 264
--- gene=4471, id=214 removed by soft count = 138
--- gene=4471, id=232 removed by soft count = 311
--- gene=4471, id=304 removed by soft count = 34
--- gene=4471, id=314 removed by soft count = 34
--- number of probes: 350 , kept by blast: 344 , if remove dups: 344
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4476
--- gene=4476, id=225 removed by soft count = 137
--- gene=4476, id=272 removed by soft count = 199
--- gene=4476, id=297 removed by soft count = 74
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4481
--- gene=4481, id=102 removed by soft count = 32
--- gene=4481, id=105 removed by soft count = 46
--- number of probes: 350 , kept by 

--- gene=4551, id=338 removed by soft count = 43
--- gene=4551, id=339 removed by soft count = 637
--- gene=4551, id=340 removed by soft count = 310
--- number of probes: 350 , kept by blast: 342 , if remove dups: 342
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4556
--- gene=4556, id=0 removed by soft count = 242
--- gene=4556, id=10 removed by soft count = 296
--- gene=4556, id=13 removed by soft count = 174
--- gene=4556, id=14 removed by soft count = 539
--- gene=4556, id=15 removed by soft count = 48
--- gene=4556, id=24 removed by soft count = 31
--- gene=4556, id=27 removed by soft count = 89
--- gene=4556, id=28 removed by soft count = 67
--- gene=4556, id=74 removed by soft count = 31
--- gene=4556, id=84 removed by soft count = 96
--- gene=4556, id=144 removed by soft count = 34
--- gene=4556, id=146 removed by soft count = 63
--- gene=4556, id=147 removed by soft count = 60
--- gene=45

--- number of probes: 350 , kept by blast: 341 , if remove dups: 331
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4611
--- gene=4611, id=42 removed by soft count = 150
--- gene=4611, id=131 removed by soft count = 33
--- gene=4611, id=134 removed by soft count = 56
--- gene=4611, id=139 removed by soft count = 35
--- gene=4611, id=172 removed by soft count = 150
--- gene=4611, id=174 removed by soft count = 32
--- gene=4611, id=175 removed by soft count = 52
--- gene=4611, id=176 removed by soft count = 85
--- gene=4611, id=187 removed by soft count = 39
--- gene=4611, id=188 removed by soft count = 87
--- gene=4611, id=190 removed by soft count = 237
--- gene=4611, id=191 removed by soft count = 42
--- gene=4611, id=192 removed by soft count = 75
--- gene=4611, id=193 removed by soft count = 51
--- gene=4611, id=194 removed by soft count = 75
--- gene=4611, id=195 removed by soft count = 32
--- 

--- gene=4681, id=2 removed by soft count = 177
--- gene=4681, id=98 removed by soft count = 720
--- gene=4681, id=178 removed by soft count = 1697
--- gene=4681, id=274 removed by soft count = 86
--- gene=4681, id=277 removed by soft count = 69
--- gene=4681, id=291 removed by soft count = 63
--- gene=4681, id=294 removed by soft count = 181
--- gene=4681, id=295 removed by soft count = 81
--- gene=4681, id=296 removed by soft count = 225
--- gene=4681, id=299 removed by soft count = 63
--- gene=4681, id=302 removed by soft count = 73
--- gene=4681, id=304 removed by soft count = 72
--- number of probes: 350 , kept by blast: 337 , if remove dups: 337
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4686
--- gene=4686, id=113 removed by soft count = 95
--- gene=4686, id=126 removed by soft count = 47
--- gene=4686, id=145 removed by soft count = 300
--- gene=4686, id=154 removed by soft count = 108
-

--- gene=4771, id=258 removed by soft count = 37
--- gene=4771, id=264 removed by soft count = 45
--- gene=4771, id=265 removed by soft count = 41
--- gene=4771, id=266 removed by soft count = 166
--- gene=4771, id=267 removed by soft count = 33
--- gene=4771, id=269 removed by soft count = 33
--- gene=4771, id=272 removed by soft count = 56
--- gene=4771, id=273 removed by soft count = 34
--- gene=4771, id=275 removed by soft count = 82
--- gene=4771, id=277 removed by soft count = 66
--- gene=4771, id=278 removed by soft count = 76
--- gene=4771, id=284 removed by soft count = 45
--- gene=4771, id=286 removed by soft count = 126
--- gene=4771, id=287 removed by soft count = 98
--- gene=4771, id=289 removed by soft count = 64
--- gene=4771, id=290 removed by soft count = 65
--- gene=4771, id=292 removed by soft count = 96
--- gene=4771, id=294 removed by soft count = 44
--- gene=4771, id=298 removed by soft count = 124
--- gene=4771, id=303 removed by soft count = 153
--- gene=4771, i

--- gene=4836, id=308 removed by soft count = 155
--- gene=4836, id=309 removed by soft count = 175
--- gene=4836, id=310 removed by soft count = 101
--- gene=4836, id=311 removed by soft count = 41
--- number of probes: 350 , kept by blast: 324 , if remove dups: 286
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 4841
--- gene=4841, id=0 removed by hard count = 3
--- gene=4841, id=1 removed by hard count = 3
--- gene=4841, id=2 removed by hard count = 3
--- gene=4841, id=3 removed by hard count = 3
--- gene=4841, id=4 removed by hard count = 3
--- gene=4841, id=5 removed by hard count = 3
--- gene=4841, id=6 removed by hard count = 3
--- gene=4841, id=7 removed by hard count = 3
--- gene=4841, id=8 removed by hard count = 3
--- gene=4841, id=9 removed by hard count = 3
--- gene=4841, id=10 removed by hard count = 3
--- gene=4841, id=11 removed by hard count = 3
--- gene=4841, id=12 removed by hard 

--- gene=4841, id=178 removed by hard count = 3
--- gene=4841, id=179 removed by hard count = 3
--- gene=4841, id=180 removed by hard count = 3
--- gene=4841, id=181 removed by hard count = 3
--- gene=4841, id=182 removed by hard count = 3
--- gene=4841, id=183 removed by hard count = 3
--- gene=4841, id=184 removed by hard count = 3
--- gene=4841, id=185 removed by hard count = 3
--- gene=4841, id=186 removed by hard count = 3
--- gene=4841, id=187 removed by hard count = 3
--- gene=4841, id=188 removed by hard count = 3
--- gene=4841, id=189 removed by hard count = 3
--- gene=4841, id=190 removed by hard count = 3
--- gene=4841, id=191 removed by hard count = 3
--- gene=4841, id=192 removed by hard count = 3
--- gene=4841, id=193 removed by hard count = 3
--- gene=4841, id=194 removed by hard count = 3
--- gene=4841, id=195 removed by hard count = 3
--- gene=4841, id=196 removed by hard count = 3
--- gene=4841, id=197 removed by hard count = 3
--- gene=4841, id=198 removed by hard co

--- gene=552, id=0 removed by soft count = 774
--- gene=552, id=2 removed by soft count = 73
--- gene=552, id=7 removed by soft count = 34
--- gene=552, id=9 removed by soft count = 159
--- gene=552, id=15 removed by soft count = 35
--- gene=552, id=16 removed by soft count = 297
--- gene=552, id=19 removed by soft count = 35
--- gene=552, id=20 removed by soft count = 57
--- gene=552, id=33 removed by soft count = 78
--- gene=552, id=36 removed by soft count = 69
--- gene=552, id=38 removed by soft count = 66
--- gene=552, id=39 removed by soft count = 45
--- gene=552, id=41 removed by soft count = 150
--- gene=552, id=42 removed by soft count = 65
--- gene=552, id=44 removed by soft count = 201
--- gene=552, id=45 removed by soft count = 53
--- gene=552, id=46 removed by soft count = 608
--- gene=552, id=47 removed by soft count = 151
--- gene=552, id=48 removed by soft count = 1440
--- gene=552, id=50 removed by soft count = 375
--- gene=552, id=54 removed by soft count = 258
--- ge

--- gene=564, id=217 removed by soft count = 60
--- gene=564, id=219 removed by soft count = 92
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 565
--- gene=565, id=235 removed by soft count = 108
--- gene=565, id=312 removed by soft count = 66
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 567
--- gene=567, id=86 removed by soft count = 58
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 568
--- number of probes: 350 , kept by blast: 350 , if remove dups: 350
--- remove duplicated probes
--- keep probes from beginning
-- n

--- gene=589, id=297 removed by soft count = 71
--- number of probes: 350 , kept by blast: 346 , if remove dups: 346
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 590
--- number of probes: 349 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 592
--- gene=592, id=11 removed by soft count = 37
--- gene=592, id=166 removed by soft count = 69
--- gene=592, id=204 removed by soft count = 31
--- gene=592, id=290 removed by soft count = 132
--- gene=592, id=304 removed by soft count = 103
--- number of probes: 350 , kept by blast: 345 , if remove dups: 345
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 593
--- gene=593, id=30 removed by soft count = 222
--- gene=593, id=31 removed by soft 

--- gene=614, id=153 removed by soft count = 44
--- gene=614, id=184 removed by soft count = 853
--- gene=614, id=212 removed by soft count = 42
--- gene=614, id=267 removed by soft count = 46
--- gene=614, id=346 removed by soft count = 34
--- number of probes: 350 , kept by blast: 343 , if remove dups: 343
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 615
--- number of probes: 350 , kept by blast: 350 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 617
--- gene=617, id=213 removed by soft count = 80
--- gene=617, id=231 removed by soft count = 148
--- gene=617, id=232 removed by soft count = 48
--- gene=617, id=233 removed by soft count = 77
--- gene=617, id=235 removed by soft count = 45
--- gene=617, id=237 removed by soft count = 117
--- gene=617, id=239 removed by soft count = 82
---

--- gene=630, id=323 removed by soft count = 108
--- gene=630, id=324 removed by soft count = 162
--- gene=630, id=325 removed by soft count = 181
--- gene=630, id=326 removed by soft count = 169
--- gene=630, id=327 removed by soft count = 70
--- gene=630, id=335 removed by soft count = 31
--- gene=630, id=345 removed by soft count = 373
--- gene=630, id=346 removed by soft count = 379
--- gene=630, id=347 removed by soft count = 21228
--- gene=630, id=348 removed by soft count = 43
--- number of probes: 350 , kept by blast: 278 , if remove dups: 278
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 632
--- gene=632, id=1 removed by soft count = 66
--- gene=632, id=2 removed by soft count = 355
--- gene=632, id=3 removed by soft count = 31
--- gene=632, id=17 removed by soft count = 50
--- gene=632, id=20 removed by soft count = 47
--- gene=632, id=21 removed by soft count = 60
--- gene=632, id=22 re

--- gene=633, id=174 removed by soft count = 116
--- gene=633, id=175 removed by soft count = 807
--- gene=633, id=176 removed by soft count = 47
--- gene=633, id=177 removed by soft count = 487
--- gene=633, id=179 removed by soft count = 639
--- gene=633, id=180 removed by soft count = 220
--- gene=633, id=181 removed by soft count = 3878
--- gene=633, id=182 removed by soft count = 84
--- gene=633, id=183 removed by soft count = 270
--- gene=633, id=184 removed by soft count = 144
--- gene=633, id=185 removed by soft count = 52
--- gene=633, id=187 removed by soft count = 38
--- gene=633, id=191 removed by soft count = 411
--- gene=633, id=193 removed by soft count = 258
--- gene=633, id=195 removed by soft count = 3741
--- gene=633, id=196 removed by soft count = 369
--- gene=633, id=197 removed by soft count = 35
--- gene=633, id=198 removed by soft count = 109
--- gene=633, id=200 removed by soft count = 236
--- gene=633, id=201 removed by soft count = 115
--- gene=633, id=203 re

--- gene=637, id=229 removed by soft count = 634
--- gene=637, id=260 removed by soft count = 168
--- gene=637, id=334 removed by soft count = 119
--- gene=637, id=339 removed by soft count = 156
--- gene=637, id=340 removed by soft count = 40
--- number of probes: 350 , kept by blast: 306 , if remove dups: 305
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 638
--- gene=638, id=112 removed by soft count = 55
--- gene=638, id=273 removed by soft count = 558
--- gene=638, id=349 removed by soft count = 736
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 639
--- gene=639, id=46 removed by soft count = 41
--- gene=639, id=53 removed by soft count = 32
--- gene=639, id=55 removed by soft count = 527
--- gene=639, id=123 removed by soft count = 69
--

--- gene=647, id=79 removed by soft count = 2157
--- gene=647, id=92 removed by soft count = 769
--- gene=647, id=93 removed by soft count = 178
--- gene=647, id=94 removed by soft count = 104
--- gene=647, id=96 removed by soft count = 40
--- gene=647, id=97 removed by soft count = 314
--- gene=647, id=125 removed by soft count = 101
--- gene=647, id=134 removed by soft count = 65
--- gene=647, id=136 removed by soft count = 41
--- gene=647, id=153 removed by soft count = 298
--- gene=647, id=159 removed by soft count = 41
--- gene=647, id=164 removed by soft count = 558
--- gene=647, id=196 removed by soft count = 36
--- gene=647, id=223 removed by soft count = 16135
--- gene=647, id=225 removed by soft count = 810
--- gene=647, id=283 removed by soft count = 217
--- gene=647, id=288 removed by soft count = 84
--- gene=647, id=291 removed by soft count = 7229
--- gene=647, id=305 removed by soft count = 53
--- gene=647, id=306 removed by soft count = 459
--- gene=647, id=308 removed 

--- gene=660, id=140 removed by soft count = 55
--- gene=660, id=143 removed by soft count = 164
--- gene=660, id=145 removed by soft count = 263
--- gene=660, id=146 removed by soft count = 216
--- gene=660, id=147 removed by soft count = 301
--- gene=660, id=148 removed by soft count = 305
--- gene=660, id=149 removed by soft count = 37
--- gene=660, id=156 removed by soft count = 47
--- gene=660, id=163 removed by soft count = 59
--- gene=660, id=169 removed by soft count = 50
--- gene=660, id=173 removed by soft count = 139
--- gene=660, id=174 removed by soft count = 229
--- gene=660, id=225 removed by soft count = 47
--- gene=660, id=248 removed by soft count = 40
--- number of probes: 350 , kept by blast: 336 , if remove dups: 336
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 662
--- gene=662, id=23 removed by soft count = 189
--- gene=662, id=24 removed by soft count = 178
--- gene=662, id

--- gene=674, id=302 removed by soft count = 86
--- gene=674, id=303 removed by soft count = 41
--- gene=674, id=305 removed by soft count = 8103
--- gene=674, id=306 removed by soft count = 56
--- gene=674, id=307 removed by soft count = 643
--- gene=674, id=308 removed by soft count = 264
--- gene=674, id=339 removed by soft count = 39
--- gene=674, id=343 removed by soft count = 243
--- gene=674, id=348 removed by soft count = 42
--- gene=674, id=349 removed by soft count = 129
--- number of probes: 350 , kept by blast: 288 , if remove dups: 288
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 675
--- gene=675, id=14 removed by soft count = 43
--- gene=675, id=15 removed by soft count = 89
--- gene=675, id=18 removed by soft count = 220
--- gene=675, id=116 removed by soft count = 88
--- gene=675, id=117 removed by soft count = 42
--- gene=675, id=119 removed by soft count = 657
--- gene=675, id=1

--- gene=685, id=39 removed by soft count = 31
--- gene=685, id=50 removed by soft count = 866
--- gene=685, id=52 removed by soft count = 390
--- gene=685, id=62 removed by soft count = 60
--- gene=685, id=127 removed by soft count = 1104
--- gene=685, id=152 removed by soft count = 31
--- gene=685, id=236 removed by soft count = 62
--- gene=685, id=242 removed by soft count = 107
--- gene=685, id=307 removed by soft count = 53
--- number of probes: 350 , kept by blast: 341 , if remove dups: 341
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 687
--- gene=687, id=87 removed by soft count = 42
--- number of probes: 350 , kept by blast: 349 , if remove dups: 349
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 688
--- gene=688, id=183 removed by soft count = 54
--- number of probes: 350 , kept by blast: 349 , if re

--- number of probes: 350 , kept by blast: 318 , if remove dups: 318
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 698
--- gene=698, id=29 removed by soft count = 32
--- gene=698, id=32 removed by soft count = 183
--- gene=698, id=33 removed by soft count = 47
--- gene=698, id=36 removed by soft count = 140
--- gene=698, id=37 removed by soft count = 319
--- gene=698, id=38 removed by soft count = 175
--- gene=698, id=40 removed by soft count = 137
--- gene=698, id=198 removed by soft count = 102
--- gene=698, id=199 removed by soft count = 118
--- gene=698, id=200 removed by soft count = 35
--- gene=698, id=202 removed by soft count = 463
--- gene=698, id=237 removed by soft count = 107
--- gene=698, id=238 removed by soft count = 107
--- gene=698, id=239 removed by soft count = 134
--- gene=698, id=240 removed by soft count = 147
--- gene=698, id=241 removed by soft count = 91
--- gene=698, id=2

--- gene=704, id=57 removed by soft count = 37
--- gene=704, id=61 removed by soft count = 121
--- gene=704, id=62 removed by soft count = 34
--- gene=704, id=161 removed by soft count = 120
--- gene=704, id=183 removed by soft count = 385
--- gene=704, id=186 removed by soft count = 50
--- gene=704, id=189 removed by soft count = 51
--- gene=704, id=275 removed by soft count = 128
--- gene=704, id=312 removed by soft count = 116
--- gene=704, id=314 removed by soft count = 32
--- gene=704, id=345 removed by soft count = 1732
--- gene=704, id=346 removed by soft count = 222
--- gene=704, id=347 removed by soft count = 54
--- number of probes: 350 , kept by blast: 328 , if remove dups: 328
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 705
--- gene=705, id=0 removed by soft count = 75
--- gene=705, id=2 removed by soft count = 209
--- gene=705, id=4 removed by soft count = 37
--- gene=705, id=8 remo

--- gene=714, id=154 removed by soft count = 74
--- gene=714, id=161 removed by soft count = 31
--- gene=714, id=162 removed by soft count = 110
--- gene=714, id=163 removed by soft count = 279
--- gene=714, id=164 removed by soft count = 605
--- gene=714, id=165 removed by soft count = 917
--- gene=714, id=166 removed by soft count = 36
--- gene=714, id=169 removed by soft count = 41
--- gene=714, id=170 removed by soft count = 691
--- gene=714, id=171 removed by soft count = 200
--- gene=714, id=174 removed by soft count = 49
--- gene=714, id=180 removed by soft count = 32
--- gene=714, id=186 removed by soft count = 40
--- gene=714, id=296 removed by soft count = 145
--- gene=714, id=328 removed by soft count = 74
--- gene=714, id=329 removed by soft count = 197
--- gene=714, id=330 removed by soft count = 92
--- gene=714, id=331 removed by soft count = 34
--- gene=714, id=332 removed by soft count = 63
--- gene=714, id=333 removed by soft count = 1089
--- gene=714, id=335 removed b

--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 732
--- gene=732, id=160 removed by soft count = 92
--- gene=732, id=272 removed by soft count = 240
--- number of probes: 350 , kept by blast: 348 , if remove dups: 348
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 733
--- gene=733, id=0 removed by soft count = 694
--- gene=733, id=1 removed by soft count = 779
--- gene=733, id=32 removed by soft count = 35
--- gene=733, id=60 removed by soft count = 151
--- gene=733, id=126 removed by soft count = 538
--- gene=733, id=128 removed by soft count = 67
--- gene=733, id=131 removed by soft count = 31
--- gene=733, id=134 removed by soft count = 143
--- gene=733, id=135 removed by soft count = 249
--- gene=733, id=136 removed by soft count = 106
--- 

--- gene=745, id=86 removed by soft count = 513
--- gene=745, id=263 removed by soft count = 91
--- gene=745, id=267 removed by soft count = 223
--- gene=745, id=333 removed by soft count = 35
--- number of probes: 350 , kept by blast: 341 , if remove dups: 341
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 747
--- gene=747, id=77 removed by soft count = 114
--- gene=747, id=80 removed by soft count = 39
--- gene=747, id=238 removed by soft count = 76
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
--- keep probes from beginning
-- number of probes kept for this region: 250
-- checking probes in region: 748
--- gene=748, id=240 removed by soft count = 295
--- gene=748, id=241 removed by soft count = 779
--- gene=748, id=242 removed by soft count = 411
--- number of probes: 350 , kept by blast: 347 , if remove dups: 347
--- remove duplicated probes
-

-- region: 3441 , number of probes: 250
-- region: 3446 , number of probes: 250
-- region: 3451 , number of probes: 250
-- region: 3456 , number of probes: 250
-- region: 3461 , number of probes: 250
-- region: 3466 , number of probes: 250
-- region: 3471 , number of probes: 250
-- region: 3476 , number of probes: 250
-- region: 3481 , number of probes: 250
-- region: 3486 , number of probes: 250
-- region: 3491 , number of probes: 250
-- region: 3496 , number of probes: 250
-- region: 3501 , number of probes: 250
-- region: 3506 , number of probes: 250
-- region: 3511 , number of probes: 250
-- region: 3516 , number of probes: 250
-- region: 3521 , number of probes: 250
-- region: 3526 , number of probes: 250
-- region: 3531 , number of probes: 250
-- region: 3536 , number of probes: 250
-- region: 3541 , number of probes: 250
-- region: 3546 , number of probes: 250
-- region: 3551 , number of probes: 250
-- region: 3556 , number of probes: 250
-- region: 3561 , number of probes: 250


<a id='4.4'></a>
## 4.4 extra round of check

In [150]:
primer_check = check._check_primer_usage(kept_pbs, primers[0], primers[1])
print(primer_check)
reg_size_dic, len_check = check._check_region_size(kept_pbs,min_size=15)
print(len_check)
reg_readout_dic, reg2readout_check = check._check_region_to_readouts(kept_pbs, readout_dict, total_readout_num=2)
print(reg2readout_check)

readout_reg_dic, readout2reg_check = check._check_readout_to_region(reg_readout_dic, kept_pbs, readout_dict,probe_readout_num=2)
print(readout2reg_check)

int_map = check._construct_internal_map(kept_pbs, library_folder)

readout_count_dic, readout_count_check = check._check_readout_in_probes(readout_reg_dic, reg_size_dic, int_map, readout_dict)
print(readout_count_check)

kept_records, removed_count = check._check_between_probes(kept_pbs, int_map)


-- Checking primer usage, total probes: 275184
True
gene: 1 -> 250
gene: 6 -> 250
gene: 11 -> 250
gene: 16 -> 250
gene: 21 -> 250
gene: 26 -> 250
gene: 31 -> 250
gene: 36 -> 250
gene: 41 -> 250
gene: 46 -> 250
gene: 51 -> 250
gene: 56 -> 250
gene: 61 -> 250
gene: 66 -> 250
gene: 71 -> 250
gene: 76 -> 250
gene: 81 -> 250
gene: 86 -> 250
gene: 91 -> 250
gene: 96 -> 250
gene: 101 -> 250
gene: 106 -> 250
gene: 111 -> 250
gene: 116 -> 250
gene: 121 -> 250
gene: 126 -> 250
gene: 131 -> 250
gene: 136 -> 250
gene: 141 -> 250
gene: 146 -> 250
gene: 151 -> 250
gene: 156 -> 250
gene: 161 -> 250
gene: 166 -> 250
gene: 171 -> 250
gene: 176 -> 250
gene: 181 -> 250
gene: 186 -> 250
gene: 191 -> 250
gene: 196 -> 250
gene: 201 -> 250
gene: 206 -> 250
gene: 211 -> 250
gene: 216 -> 250
gene: 221 -> 250
gene: 226 -> 250
gene: 231 -> 250
gene: 236 -> 250
gene: 241 -> 250
gene: 246 -> 250
gene: 251 -> 250
gene: 256 -> 250
gene: 261 -> 250
gene: 266 -> 250
gene: 271 -> 250
gene: 276 -> 250
gene: 281 -> 250
g

1 -> ['Stv_3', 'Stv_3']
6 -> ['Stv_32', 'Stv_32']
11 -> ['Stv_91', 'Stv_91']
16 -> ['Stv_4', 'Stv_4']
21 -> ['Stv_33', 'Stv_33']
26 -> ['Stv_92', 'Stv_92']
31 -> ['Stv_5', 'Stv_5']
36 -> ['Stv_35', 'Stv_35']
41 -> ['Stv_94', 'Stv_94']
46 -> ['Stv_6', 'Stv_6']
51 -> ['Stv_36', 'Stv_36']
56 -> ['Stv_95', 'Stv_95']
61 -> ['Stv_7', 'Stv_7']
66 -> ['Stv_37', 'Stv_37']
71 -> ['Stv_99', 'Stv_99']
76 -> ['Stv_8', 'Stv_8']
81 -> ['Stv_39', 'Stv_39']
86 -> ['Stv_100', 'Stv_100']
91 -> ['Stv_9', 'Stv_9']
96 -> ['Stv_40', 'Stv_40']
101 -> ['Stv_101', 'Stv_101']
106 -> ['Stv_10', 'Stv_10']
111 -> ['Stv_42', 'Stv_42']
116 -> ['Stv_104', 'Stv_104']
121 -> ['Stv_11', 'Stv_11']
126 -> ['Stv_44', 'Stv_44']
131 -> ['Stv_105', 'Stv_105']
136 -> ['Stv_12', 'Stv_12']
141 -> ['Stv_45', 'Stv_45']
146 -> ['Stv_106', 'Stv_106']
151 -> ['Stv_13', 'Stv_13']
156 -> ['Stv_46', 'Stv_46']
161 -> ['Stv_107', 'Stv_107']
166 -> ['Stv_14', 'Stv_14']
171 -> ['Stv_48', 'Stv_48']
176 -> ['Stv_109', 'Stv_109']
181 -> ['Stv_1

-- extracting probes from >region:1, region:6, region:11, region:16, region:21, region:26, region:31, region:36, region:41, region:46, region:51, region:56, region:61, region:66, region:71, region:76, region:81, region:86, region:91, region:96, region:101, region:106, region:111, region:116, region:121, region:126, region:131, region:136, region:141, region:146, region:151, region:156, region:161, region:166, region:171, region:176, region:181, region:186, region:191, region:196, region:201, region:206, region:211, region:216, region:221, region:226, region:231, region:236, region:241, region:246, region:251, region:256, region:261, region:266, region:271, region:276, region:281, region:286, region:291, region:296, region:301, region:306, region:311, region:316, region:321, region:326, region:331, region:336, region:341, region:346, region:351, region:356, region:361, region:366, region:371, region:376, region:381, region:386, region:391, region:396, region:401, region:406, region:411,

region:3376, region:3381, region:3386, region:3391, region:3396, region:3401, region:3406, region:3411, region:3416, region:3421, region:3426, region:3431, region:3436, region:3441, region:3446, region:3451, region:3456, region:3461, region:3466, region:3471, region:3476, region:3481, region:3486, region:3491, region:3496, region:3501, region:3506, region:3511, region:3516, region:3521, region:3526, region:3531, region:3536, region:3541, region:3546, region:3551, region:3556, region:3561, region:3566, region:3571, region:3576, region:3581, region:3586, region:3591, region:3596, region:3601, region:3606, region:3611, region:3616, region:3621, region:3626, region:3631, region:3636, region:3641, region:3646, region:3651, region:3656, region:3661, region:3666, region:3671, region:3676, region:3681, region:3686, region:3691, region:3696, region:3701, region:3706, region:3711, region:3716, region:3721, region:3726, region:3731, region:3736, region:3741, region:3746, region:3751, region:3756,

-- saving internal 17-mer map to file:X:\Libraries\CTP-07\chr2\probe_table_17.npz
True
-- total probes removed by internal screening: 0


FileNotFoundError: [Errno 2] No such file or directory: 'X:\\Libraries\\CTP-07\\chr2\\final_probes\\extra_filtered_full_probes.fasta'

In [160]:
len(kept_records)

275184

<a id='4.5'></a>
## 4.5 trim 1 base on both sides to make it fit in 120nt (for ordering purpose)

In [155]:
from copy import copy
trim_kept_records = []
for _r in kept_records:
    _new_r = copy(_r)
    _new_r.seq = _new_r.seq[1:-1]
    trim_kept_records.append(_new_r)

<a id='5'></a>
# 5. Save probes

In [159]:
final_probe_folder = os.path.join(library_folder, 'final_probes')
if not os.path.exists(final_probe_folder):
    os.makedirs(final_probe_folder)
    
# save kept records
with open(os.path.join(library_folder, 'final_probes', 'extra_filtered_full_probes.fasta'), 'w') as output_handle:
    SeqIO.write(trim_kept_records, output_handle, "fasta")

In [171]:
import csv

with open(os.path.join(final_probe_folder, 'extra_filtered_full_probes.csv'), 'w', newline='') as _file:
    _writer = csv.writer(_file, dialect='excel')
    # write csv
    for _r in trim_kept_records:
        _writer.writerow([_r.id, str(_r.seq)])