forked from liangclab/HERA
-
Notifications
You must be signed in to change notification settings - Fork 1
/
12-PacbioAlignmentLinker.pl
135 lines (109 loc) · 7.24 KB
/
12-PacbioAlignmentLinker.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/perl
#Author: huilong du
#Note: Finding the proper overlap for constructing the graph
use warnings;
use strict;
my $inputfile=shift;
my $MinOverhangLength=shift;
my $MinExtend=shift;
open IN,"<$inputfile" or die $!;
while(my $line=<IN>){
chomp $line;
next if ( $line =~ m/nCells/ );
# qName tName qStrand tStrand score percentSimilarity tStart tEnd tLength qStart qEnd qLength nCells
# m150611... contig2_size40580 0 1 -29423 89.3895 14529 21466 40580 614 8012 30328 183070
my @Columns = split( /\s+/, $line );
my $score = $Columns[4];
my $percentSimilarity = $Columns[5];
my $Final_Score=0;
# Illumina Contig
my $tName = $Columns[1];
my $tStrand = $Columns[3];
my $tStart = $Columns[6];
my $tEnd = $Columns[7];
my $tLength = $Columns[8];
# Pacbio
my $qName = $Columns[0];
my $qStrand = $Columns[2];
my $qStart = $Columns[9];
my $qEnd = $Columns[10];
my $qLength = $Columns[11];
my $RefLeftOverhang = 0;
my $RefRightOverhang = 0;
my $QryLeftOverhang = 0 ;
my $QryRightOverhang = 0 ;
if ( $qStrand == 0 && $tStrand == 0 ) {
$RefLeftOverhang = $tStart;
$RefRightOverhang = ( $tLength - $tEnd );
$QryLeftOverhang = $qStart;
$QryRightOverhang = ( $qLength - $qEnd );
# ===============>
# |||||||||||||||
# -------------------------------------------->
# qName tName qStrand tStrand score percentSimilarity tStart tEnd tLength qStart qEnd qLength nCells
# m.../0_16611 contig3_size8058 0 0 -40268 99.98 0 8058 8058 3994 12052 15952 169190
if ( (( $RefLeftOverhang <= $MinOverhangLength) && ( $RefRightOverhang <= $MinOverhangLength ) &&
( $QryLeftOverhang >= $MinOverhangLength ) && ( $QryRightOverhang >= $MinOverhangLength )) || (( $RefLeftOverhang >= $MinOverhangLength) && ( $RefRightOverhang >= $MinOverhangLength ) &&( $QryLeftOverhang <= $MinOverhangLength ) && ( $QryRightOverhang <= $MinOverhangLength )) ) {
# print "include\tforward\t$percentSimilarity\t$score\t$tName\t$qName\tinclude\t$qStart\t$qEnd\t$qLength\t$tStart $tEnd $tLength\n";
# ======================>
# |||||||||||||||
# ------------------->
# qName tName qStrand tStrand score percentSimilarity tStart tEnd tLength qStart qEnd qLength nCells
# m.../0_18694 contig9_size44098 0 0 -76540 99.8438 0 15360 44098 4340 19696 19696 322487
}elsif ( (( $RefLeftOverhang <= $MinOverhangLength ) && ( $RefRightOverhang >=$MinExtend ) &&
( $QryLeftOverhang >= $MinExtend ) && ( $QryRightOverhang <= $MinOverhangLength ) )) {
$Final_Score=int(abs($score)/100-($QryRightOverhang+$RefLeftOverhang)/2);
print "left\t0\t$percentSimilarity\t$Final_Score\t$tName\t$qName\tright\t$qStart\t$qEnd\t$qLength\t$tStart\t$tEnd\t$tLength\t$QryLeftOverhang\t$RefRightOverhang\n";
# ==========================>
# |||||||||||||||
# --------------------->
# qName tName qStrand tStrand score percentSimilarity tStart tEnd tLength qStart qEnd qLength nCells
# m.../6331_12271 contig11_size14782 0 0 -20685 100.00 10645 14782 14782 5 4142 5640 86843
}elsif ( ( $RefLeftOverhang >= $MinExtend ) && ( $RefRightOverhang <= $MinOverhangLength ) &&
( $QryLeftOverhang <= $MinOverhangLength ) && ( $QryRightOverhang >= $MinExtend ) ) {
$Final_Score=int(abs($score)/100-($RefRightOverhang+$QryLeftOverhang)/2);
print "right\t0\t$percentSimilarity\t$Final_Score\t$tName\t$qName\tleft\t$qStart\t$qEnd\t$qLength\t$tStart\t$tEnd\t$tLength\t$QryRightOverhang\t$RefLeftOverhang\n";
}
}elsif ( $qStrand == 0 && $tStrand == 1 ) {
$RefLeftOverhang = $tStart;
$RefRightOverhang = ( $tLength - $tEnd );
$QryLeftOverhang = $qStart;
$QryRightOverhang = ( $qLength - $qEnd );
# <==============
# |||||||||||||||
# -------------------------------------------->
# qName tName qStrand tStrand score percentSimilarity tStart tEnd tLength qStart qEnd qLength nCells
# m.../32347_45832 contig3_size8058 0 1 -39858 98.97 0 8058 8058 2676 10816 13374 177801
if ( ( $RefLeftOverhang <= $MinOverhangLength ) && ( $RefRightOverhang <= $MinOverhangLength ) &&
( $QryLeftOverhang >= $MinOverhangLength ) && ( $QryRightOverhang >= $MinOverhangLength ) ||
(( $RefLeftOverhang >= $MinOverhangLength ) && ( $RefRightOverhang >= $MinOverhangLength ) &&
( $QryLeftOverhang <= $MinOverhangLength ) && ( $QryRightOverhang <= $MinOverhangLength ))) {
# print "include\treverse\t$percentSimilarity\t$score\t$tName\t$qName\tinclude\t$qStart\t$qEnd\t$qLength\t$tStart $tEnd $tLength\n";
# ======================>
# ||||||||||||
# <-----------------
# <======================
# ||||||||||||||| <-----
# -------------------->
#¡¡qName tName qStrand tStrand score percentSimilarity tStart tEnd tLength qStart qEnd qLength nCells
#¡¡m.../32347_45832 contig11_size14782 0 1 -18870 100.00 11008 14782 14782 0 3774 13374 79220
}elsif ( ( $RefLeftOverhang >= $MinExtend ) && ( $RefRightOverhang <= $MinOverhangLength ) &&
( $QryLeftOverhang <= $MinOverhangLength ) && ( $QryRightOverhang >= $MinExtend ) ) {
$Final_Score=int(abs($score)/100-($RefRightOverhang+$QryLeftOverhang)/2);
print "left\t1\t$percentSimilarity\t$Final_Score\t$tName\t$qName\tleft\t$qStart\t$qEnd\t$qLength\t$tStart\t$tEnd\t$tLength\t$QryRightOverhang\t$RefLeftOverhang\n";
# ======================>
# ||||||||||||
# <----------------------
# <======================
# ||||||||||||||| <-----
# -------------------->
# qName tName qStrand tStrand score percentSimilarity tStart tEnd tLength qStart qEnd qLength nCells
# m...2558_18452 contig11_size14782 0 1 -64185 95.74 0 13496 14782 1282 15296 15297 416624
}elsif ( ( $RefLeftOverhang <= $MinOverhangLength ) && ( $RefRightOverhang >= $MinExtend ) &&
( $QryLeftOverhang >= $MinExtend ) && ( $QryRightOverhang <= $MinOverhangLength ) ) {
$Final_Score=int (abs($score)/100-($RefLeftOverhang+$QryRightOverhang)/2);
print "right\t1\t$percentSimilarity\t$Final_Score\t$tName\t$qName\tright\t$qStart\t$qEnd\t$qLength\t$tStart\t$tEnd\t$tLength\t$QryLeftOverhang\t$RefRightOverhang\n";
}
}
}
close IN;