-
Notifications
You must be signed in to change notification settings - Fork 16
/
Arcs.h
172 lines (148 loc) 路 5.01 KB
/
Arcs.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#ifndef ARCS_H
#define ARCS_H 1
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <string>
#include <iostream>
#include <utility>
#include <algorithm>
#include <cmath>
#include <map>
#include <unordered_map>
#include <fstream>
#include <sstream>
#include <utility>
#include <vector>
#include <iterator>
#include <time.h>
#include <boost/graph/undirected_graph.hpp>
#include <boost/graph/graphviz.hpp>
#include "Common/Uncompress.h"
#include "DataLayer/FastaReader.h"
#include "DataLayer/FastaReader.cpp"
namespace ARCS {
/** value to use for 'd' in ABySS dist.gv */
enum DistMode { DIST_MEDIAN=0, DIST_UPPER };
/**
* Parameters controlling ARCS run
*/
struct ArcsParams {
std::string file;
std::string fofName;
int seq_id;
int min_reads;
/** enable/disable distance estimation on graph edges */
bool dist_est;
/** bin size when computing distance estimates */
unsigned dist_bin_size;
/** output path for intra-contig distance/barcode samples (TSV) */
std::string dist_samples_tsv;
/** output path for inter-contig distance estimates (TSV) */
std::string dist_tsv;
/** chooses median or upper bound for `d` in ABySS dist.gv */
DistMode dist_mode;
int min_links;
int min_size;
std::string base_name;
std::string dist_graph_name;
std::string tsv_name;
std::string barcode_counts_name;
unsigned gap;
int min_mult;
int max_mult;
int max_degree;
int end_length;
float error_percent;
int verbose;
ArcsParams() :
seq_id(98),
min_reads(5),
dist_est(false),
dist_bin_size(20),
dist_mode(DIST_MEDIAN),
min_links(0),
min_size(500),
gap(100),
min_mult(50),
max_mult(10000),
max_degree(0),
end_length(0),
error_percent(0.05),
verbose(0) {
}
};
/* ScafMap: <pair(scaffold id, bool), count>, cout = # times index maps to scaffold (c), bool = true-head, false-tail*/
typedef std::map<std::pair<std::string, bool>, int> ScafMap;
typedef typename ScafMap::const_iterator ScafMapConstIt;
/* IndexMap: key = index sequence, value = ScafMap */
typedef std::unordered_map<std::string, ScafMap> IndexMap;
/* PairMap: key = pair(first < second) of scaf sequence id, value = num links*/
typedef std::map<std::pair<std::string, std::string>, std::vector<unsigned>> PairMap;
/** A contig end: (FASTA ID, head?) */
typedef std::pair<std::string, bool> CI;
/** a pair of contig IDs */
typedef std::pair<std::string, std::string> ContigPair;
/** maps contig FASTA ID to contig length (bp) */
typedef std::unordered_map<std::string, int> ContigToLength;
typedef typename ContigToLength::const_iterator ContigToLengthIt;
struct VertexProperties {
std::string id;
};
/* Orientation: 0-HH, 1-HT, 2-TH, 3-TT */
struct EdgeProperties {
int orientation;
int weight;
int minDist;
int dist;
int maxDist;
float jaccard;
EdgeProperties() :
orientation(0), weight(0),
minDist(std::numeric_limits<int>::min()),
dist(std::numeric_limits<int>::max()),
maxDist(std::numeric_limits<int>::max()),
jaccard(-1.0f)
{}
};
template <class GraphT>
struct EdgePropertyWriter
{
typedef typename boost::graph_traits<GraphT>::edge_descriptor E;
typedef typename boost::edge_property<GraphT>::type EP;
GraphT& m_g;
EdgePropertyWriter(GraphT& g) : m_g(g) {}
void operator()(std::ostream& out, const E& e) const
{
EP ep = m_g[e];
out << '['
<< "label=" << ep.orientation << ", "
<< "weight=" << ep.weight;
if (ep.minDist != std::numeric_limits<int>::min()) {
assert(ep.dist != std::numeric_limits<int>::max());
assert(ep.maxDist != std::numeric_limits<int>::max());
assert(ep.jaccard >= 0.0f);
out << ", "
<< "d=" << ep.dist << ", "
<< "maxd=" << ep.maxDist;
}
out << ']';
}
};
template <class GraphT>
struct VertexPropertyWriter
{
typedef typename boost::graph_traits<GraphT>::vertex_descriptor V;
typedef typename boost::vertex_property<GraphT>::type VP;
GraphT& m_g;
VertexPropertyWriter(GraphT& g) : m_g(g) {}
void operator()(std::ostream& out, const V& v) const
{
out << " [id=" << m_g[v].id << "]";
}
};
typedef boost::undirected_graph<VertexProperties, EdgeProperties> Graph;
typedef std::unordered_map<std::string, Graph::vertex_descriptor> VidVdesMap;
typedef boost::graph_traits<ARCS::Graph>::vertex_descriptor VertexDes;
}
#endif