Skip to content

Commit

Permalink
working on CIGAR fix
Browse files Browse the repository at this point in the history
  • Loading branch information
gymreklab committed Feb 11, 2019
1 parent 1da2bb8 commit cc9b152
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 109 deletions.
137 changes: 30 additions & 107 deletions src/realignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -665,114 +665,37 @@ bool cigar_realignment(BamAlignment& aln,
return true;
}

// get only cigar score spanning the STR
const int& str_start_in_cigar = str_pos-aln.Position();
// position into the segment
int pos = 0;
// base pairs spanned by this cigar item
int bp = 0;
// type of the cigar item
char cigar_type;
// index into the cigar score
size_t cigar_index = 0;
// diff to go until end of this segment
int diff = 0;
// temp cigar list to store when removing flanks
std::vector<CigarOp> new_cigar_list;
// list with only cigars for the STR region
std::vector<CigarOp> str_cigar_list;
// Diff in bp from ref STR
int diff_from_ref = 0;

// get rid of left flanking region
while (pos <= str_start_in_cigar &&
cigar_index < cigar_list.size()) {
bp = cigar_list.at(cigar_index).Length;
cigar_type = cigar_list.at(cigar_index).Type;
// If match or del, increment position
if (cigar_type == 'M' || cigar_type == 'D' || cigar_type == 'S') pos += bp;
// bp to go until we hit STR
diff = pos - str_start_in_cigar;
if (diff >= 0) {
size_t cigar_index_to_include = cigar_index;
// If left adjacent cigar is not M or S, include it
if (diff == 0 && (cigar_list.at(cigar_index).Type == 'M' ||
cigar_list.at(cigar_index).Type == 'S')) {
cigar_index_to_include += 1;
} else {
diff -= cigar_list.at(cigar_index).Length;
}
size_t newsize = cigar_list.size() - cigar_index_to_include;
new_cigar_list.resize(newsize);
copy(cigar_list.begin() + cigar_index_to_include,
cigar_list.end(),
new_cigar_list.begin());
break;
}
cigar_index += 1;
}
// Update STR cigar taking away left flank
str_cigar_list = new_cigar_list;
new_cigar_list.clear();

// get rid of right flank cigars
// start at beginning of STR list
cigar_index = 0;
// Pos from end of the STR region
pos = diff;
int total_str_len = static_cast<int>(ms_length);
while (pos < total_str_len) {
if (cigar_index >= str_cigar_list.size()) {
return false;
}
bp = str_cigar_list.at(cigar_index).Length;
cigar_type = str_cigar_list.at(cigar_index).Type;
if (cigar_type == 'M' || cigar_type == 'D' || cigar_type == 'S')
pos += bp;
// Difference between our position and the end of the STR
diff = pos-total_str_len;
if (diff >= 0) {
size_t cigar_index_to_include = cigar_index;
// If right adjacent is not M or S, include it
if (cigar_index < str_cigar_list.size() - 1) {
const char& next_type = str_cigar_list.
at(cigar_index+1).Type;
if (next_type != 'M' && next_type != 'S' && diff == 0) {
cigar_index_to_include += 1;
}
}
new_cigar_list.resize(cigar_index_to_include + 1);
copy(str_cigar_list.begin(),
str_cigar_list.begin() + cigar_index_to_include + 1,
new_cigar_list.begin());
break;
}
cigar_index += 1;
// get from copy
BamAlignment copy_aln(aln);
int32_t min_read_start = str_pos-MIN_DIST_FROM_END;
int32_t max_read_stop = str_end+MIN_DIST_FROM_END;
copy_aln.TrimAlignment(min_read_start, max_read_stop);

// set diff from ref
std::vector<CigarOp> str_cigar_list = copy_aln.CigarData();
int diff_from_ref = 0;
for (size_t i = 0; i < str_cigar_list.size(); i++) {
if (str_cigar_list.at(i).Type == 'I') {
diff_from_ref += str_cigar_list.at(i).Length;
}
if (str_cigar_list.at(i).Type == 'D') {
diff_from_ref -= str_cigar_list.at(i).Length;
}
str_cigar_list.clear();
str_cigar_list = new_cigar_list;
}

// set diff from ref
diff_from_ref = 0;
for (size_t i = 0; i < str_cigar_list.size(); i++) {
if (str_cigar_list.at(i).Type == 'I') {
diff_from_ref += str_cigar_list.at(i).Length;
}
if (str_cigar_list.at(i).Type == 'D') {
diff_from_ref -= str_cigar_list.at(i).Length;
}
}
// Skeptical if not multiple of the repeat unit
if (diff_from_ref % period != 0) {
return false;
}
// std::cerr << aln.Name() << " " << aln.QueryBases() << " " << diff_from_ref << std::endl;

// Set outputs
*nCopy = (int32_t)(str_end-str_pos + 1 + diff_from_ref)/period;
*score = 10000; // Score not meaningful here
*start_pos = aln.Position();
*end_pos = aln.Position() + aln.QueryBases().size() + diff_from_ref;
*fm_start = FM_COMPLETE;
*fm_end = FM_COMPLETE;
return true;

// Skeptical if not multiple of the repeat unit
if (diff_from_ref % period != 0) {
return false;
}
// Set outputs
*nCopy = (int32_t)(str_end-str_pos + 1 + diff_from_ref)/period;
*score = 10000; // Score not meaningful here
*start_pos = aln.Position();
*end_pos = aln.Position() + aln.QueryBases().size() + diff_from_ref;
*fm_start = FM_COMPLETE;
*fm_end = FM_COMPLETE;
return true;
}
4 changes: 3 additions & 1 deletion test.bed
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
chr4 3076604 3076660 3 CAG CAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAGCAG 7
chr1 768117 768161 5 GTTTT GTTTTGTTTTGTTTTGTTTTGTTTTGTTTTGTTTTGTTTTGTTTT 0
chr1 948423 948431 3 ACA ACAACAACA 7
chr1 1587341 1587360 4 AAAC AAACAAACAAACAAACAAAC 0
4 changes: 3 additions & 1 deletion test.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,6 @@
##FORMAT=<ID=STDERR,Number=2,Type=Float,Description="Bootstrap standard error of each allele">
##FORMAT=<ID=QEXP,Number=3,Type=Float,Description="Prob. of no expansion, 1 expanded allele, both expanded alleles">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878
chr4 3076604 . cagcagcagcagcagcagcagcagcagcagcagcagcagcagcagcagcagcagcag cagcagcagcagcagcagcagcagcagcagcagcagcagcagcagcag,cagcagcagcagcagcagcagcagcagcagcagcagcagcagcagcagcagcag . . END=3076660;RU=cag;PERIOD=3;REF=19;GRID=13,22;STUTTERUP=0.0364653;STUTTERDOWN=0.0428387;STUTTERP=0.818913;EXPTHRESH=-1 GT:DP:Q:REPCN:REPCI:RC:ML:INS:STDERR:QEXP 1/2:54:0.999992:16,18:16-18,18-19:10,22,0,21:412.793:321.31,74.0015:0.459943,0.21692:-1,-1,-1
chr1 768117 . gttttgttttgttttgttttgttttgttttgttttgttttgtttt gttttgttttgttttgttttgttttgttttgttttgttttgttttgtttt . . END=768161;RU=gtttt;PERIOD=5;REF=9;GRID=7,13;STUTTERUP=0.0364653;STUTTERDOWN=0.0428387;STUTTERP=0.818913;EXPTHRESH=-1 GT:DP:Q:REPCN:REPCI:RC:ML:INS:STDERR:QEXP 1/1:97:0.999558:10,10:10-10,10-10:13,33,0,51:710.166:323.282,72.4711:0,0:-1,-1,-1
chr1 948423 . acaacaaca acaacaacaaca . . END=948431;RU=aca;PERIOD=3;REF=3;GRID=1,7;STUTTERUP=0.0364653;STUTTERDOWN=0.0428387;STUTTERP=0.818913;EXPTHRESH=-1 GT:DP:Q:REPCN:REPCI:RC:ML:INS:STDERR:QEXP 1/1:85:1:4,4:4-4,4-4:46,26,0,13:518.074:323.282,72.4711:0,0:-1,-1,-1
chr1 1587341 . aaacaaacaaacaaacaaac aaacaaacaaacaaacaaacaaac . . END=1587360;RU=aaac;PERIOD=4;REF=5;GRID=3,9;STUTTERUP=0.0364653;STUTTERDOWN=0.0428387;STUTTERP=0.818913;EXPTHRESH=-1 GT:DP:Q:REPCN:REPCI:RC:ML:INS:STDERR:QEXP 1/1:103:1:6,6:6-6,6-6:31,47,0,25:727.843:323.282,72.4711:0,0:-1,-1,-1

0 comments on commit cc9b152

Please sign in to comment.