-
Notifications
You must be signed in to change notification settings - Fork 67
/
VCF.h
138 lines (131 loc) · 4.6 KB
/
VCF.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#pragma once
#include <queue>
#include "Mutation.h"
#include "Compat.h"
#include "Genome.h"
namespace sim {
//
// Write a VCF file.
//
// Variant Call Format version 4.0: <http://www.1000genomes.org/node/101>.
//
// Polymorphisms fed to this class must be in order and nonoverlapping, with nonempty alleles.
//
class VCFWriter {
public:
explicit VCFWriter(FILE *i_output) : output(i_output) {
fprintf(output,
"##fileformat=VCFv4.0\n"
"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n"
"##source=JessePrototype\n"
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\n");
}
~VCFWriter() {
fclose(output);
}
//
// Write a nontrivial haploid or diploid mutation.
// Internally, this function converts from 0-based to 1-based chromosome coordinates.
//
template<class AlleleChange>
void addLine(const Locus &locus, const AlleleChange &alleleChange) {
addLine(locus, alleleChange.getRef(), alleleChange.getAlt());
}
void addLine(const Locus &locus, const Allele &ref, const Allele &alt) {
if (alt != ref) {
startLine(locus, ref);
endLine(alt, "1");
}
}
void addLine(const Locus &locus, const Allele &ref, const Gtype >ype);
private:
FILE *output;
void startLine(const Locus &locus, const Allele &ref) {
fprintf(output, "%s\t%u\t.\t%s\t", locus.getChrom().c_str(), locus.getPos() + 1,
ref.c_str());
}
void endLine(const std::string &alleles, const std::string >ype) {
static const char *boilerplate = "200\tPASS\t.\tGT"; // Huge Phred score.
fprintf(output, "%s\t%s\t%s\n", alleles.c_str(), boilerplate, gtype.c_str());
_ASSERT(!ferror(output));
}
};
//
// Hide the fact that VCF alleles can't be empty.
//
template<class AlleleChange> class VCFBuffer {
public:
VCFBuffer(const Genome &i_genome, VCFWriter *i_vcfWriter) :
genome(i_genome), vcfWriter(i_vcfWriter) {}
~VCFBuffer() {
flush();
}
//
// Add a mutation at 'locus'.
//
// 'locus' must be valid (0-based) in the genome used to construct this 'VCFBuffer'.
// Each allele must be an ACGT string, possibly empty.
// Mutations must be added in order, without overlapping.
//
void addMutation(const Locus &locus, const AlleleChange &alleleChange) {
_ASSERT(!alleleChange.isInsertion() || !alleleChange.isDeletion());
Line line(locus, alleleChange);
if (!alleleChange.isInsertion() && !alleleChange.isDeletion()) {
pushLine(line);
} else {
if (alleleChange.isInsertion()) { // In wgsim's stdout, an insertion follows the base.
pushLine(trivialLine(locus, genome).merge(line));
} else if (alleleChange.isDeletion()) {
if (!locus.getPos())
fprintf(stderr,
"WARNING: Jesse made me ignore deletion at chromosome start.\n");
else if (!buffer.empty() && buffer.back().justBefore(locus)) {
buffer.back().merge(line);
} else {
Locus shiftedLocus(locus.getChrom(), locus.getPos() - 1);
pushLine(trivialLine(shiftedLocus, genome).merge(line));
}
}
}
}
//
// Write any unwritten VCF lines.
//
void flush() {
while (!buffer.empty()) {
writeLine(buffer.front());
buffer.pop();
}
}
private:
struct Line {
Line(const Locus &i_locus, const AlleleChange &i_alleleChange) :
locus(i_locus), alleleChange(i_alleleChange) {}
Locus locus;
AlleleChange alleleChange;
bool justBefore(const Locus &nextLocus) const {
return locus.getChrom() == nextLocus.getChrom() &&
locus.getPos() + alleleChange.numReplaced() == nextLocus.getPos();
}
const Line &merge(const Line &that) {
alleleChange.merge(that.alleleChange);
return *this;
}
};
static Line trivialLine(const Locus &locus, const Genome &genome) {
return Line(locus, AlleleChange::trivial(locus, genome));
}
void writeLine(const Line &line) {
vcfWriter->addLine(line.locus, line.alleleChange);
}
void pushLine(const Line &line) {
flush();
buffer.push(line);
}
std::queue<Line> buffer;
const Genome &genome;
VCFWriter *vcfWriter;
};
typedef VCFBuffer<AlleleChange> HaploidVCFBuffer;
typedef VCFBuffer<DiploidAlleleChange> DiploidVCFBuffer;
}