-
Notifications
You must be signed in to change notification settings - Fork 67
/
SmarterPairedEndAligner.h
148 lines (106 loc) · 4.76 KB
/
SmarterPairedEndAligner.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/*++
Module Name:
SmarterPairedEndAligner.h
Abstract:
A more sophisticated paired-end aligner.
Authors:
Matei Zaharia, February, 2012
Environment:
User mode service.
Revision History:
--*/
#pragma once
#include "PairedEndAligner.h"
#include "BaseAligner.h"
#include "FixedSizeMap.h"
#include "FixedSizeVector.h"
#include "BigAlloc.h"
class SmarterPairedEndAligner : public PairedEndAligner
{
public:
SmarterPairedEndAligner(
GenomeIndex *index_,
unsigned maxReadSize_,
unsigned confDiff_,
unsigned maxHits_,
unsigned maxK_,
unsigned maxSeeds_,
unsigned minSpacing_, // Minimum distance to allow between the two ends.
unsigned maxSpacing_, // Maximum distance to allow between the two ends.
unsigned adaptiveConfDiffThreshold_); // Increase confDiff if this many seeds in the read have multiple hits.
virtual ~SmarterPairedEndAligner();
virtual void align(
Read *read0,
Read *read1,
PairedAlignmentResult *result);
void *operator new(size_t size) {return BigAlloc(size);}
void operator delete(void *ptr) {BigDealloc(ptr);}
private:
static const int BUCKET_SIZE = 16;
static const int INFINITE_SCORE = 0x7FFF;
static const int MAX_BUCKETS = 32 * 1024;
static const int MAX_READ_SIZE = 10000;
static const int MAX_SEED_SIZE = 32;
char complement[256];
int wrapOffset[MAX_SEED_SIZE];
GenomeIndex *index;
int seedLen;
unsigned maxReadSize;
unsigned confDiff;
unsigned maxHits;
unsigned maxK;
unsigned maxSeeds;
unsigned minSpacing;
unsigned maxSpacing;
unsigned adaptiveConfDiffThreshold;
BaseAligner *singleAligner;
BaseAligner *mateAligner;
LandauVishkin lv;
struct Bucket {
unsigned found; // Bit vector for sub-locations matched
unsigned scored; // Bit vector for sub-locations scored
unsigned score; // Best score for any element in the bucket
double matchProbability; // Match probability of the element represented by score
unsigned short bestOffset; // Offset that gave us the best score (if any)
unsigned short seedHits; // Number of seeds that hit this bucket
unsigned short disjointSeedHits; // Number of disjoint seeds that hit this bucket
unsigned short minPairScore; // Lower bound on the bucket's pair score (if not known)
AlignmentResult mateStatus; // If we've searched for a mate nearby, this is the result
int mateScore; // Score of the mate found nearby, if any
unsigned mateLocation; // Location of the mate found nearby, if any
inline bool allScored() { return scored == found; }
void *operator new[](size_t size) {return BigAlloc(size);}
void operator delete[](void *ptr) {BigDealloc(ptr);}
};
struct Candidate {
char read;
char isRC;
short seedHits;
unsigned bucketLoc;
Bucket *bucket;
Candidate(char read_, char isRC_, unsigned bucketLoc_, Bucket *bucket_, short seedHits_)
: read(read_), isRC(isRC_), bucketLoc(bucketLoc_), bucket(bucket_), seedHits(seedHits_) {}
Candidate() {}
};
Bucket *buckets;
int bucketsUsed;
FixedSizeMap<unsigned, Bucket*> bucketTable[2][2]; // [read][isRC]
FixedSizeVector<unsigned> bucketLocations[2][2]; // [read][isRC]
FixedSizeVector<Candidate> candidates;
void alignTogether(Read *reads[2], PairedAlignmentResult *result, int lowerBound[2]);
void clearState();
Bucket *newBucket();
Bucket *getBucket(int read, int isRC, unsigned location);
void computeRC(Read *read, char *outputBuf);
void scoreBucket(Bucket *bucket, int readId, bool isRC, unsigned location,
const char *readData, const char *qualityString, int readLen, int scoreLimit, double *matchProbability);
void scoreBucketMate(Bucket *bucket, int readId, bool isRC, unsigned location, Read *mate, int scoreLimit, int *mateMapq);
// Absolute difference between two unsigned values.
inline unsigned distance(unsigned a, unsigned b) { return (a > b) ? a - b : b - a; }
// Get the confDiff value to use given # of popular seeds in each [read][isRC] orientation.
int getConfDiff(int seedsTried, int popularSeeds[2][2], int seedHits[2][2]);
// Sort candidates in decreasing order of seed hits.
static inline bool compareCandidates(const Candidate &c1, const Candidate &c2) {
return c1.seedHits > c2.seedHits;
}
};