-
Notifications
You must be signed in to change notification settings - Fork 67
/
SimilarityMap.h
54 lines (41 loc) · 1.62 KB
/
SimilarityMap.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#pragma once
#include "options.h"
#include "BigAlloc.h"
#include "Compat.h"
#include "RegionCluster.h"
#include "Genome.h"
//
// Holds a precomputed index of similar regions within a genome. The regions are grouped into
// "clusters" of substrings of a given length, so that each location in the genome is part of
// a unique cluster, or part of no cluster if it is not similar to any other substring. Each
// cluster's ID is the location of the first substring inside it. The special cluster ID
// 0xFFFFFFFF means "no cluster".
//
class SimilarityMap {
public:
static const unsigned NO_CLUSTER = 0xFFFFFFFF;
struct ClusterInfo {
unsigned clusterId;
RegionCluster *cluster;
void *operator new[](size_t size) {return BigAlloc(size);}
void operator delete[](void *ptr) {BigDealloc(ptr);}
};
SimilarityMap(unsigned genomeLength_, unsigned numClusters_, int mergeDistance_);
~SimilarityMap();
void addCluster(unsigned clusterId, RegionCluster *cluster);
//
// Return the cluster info for a particular genome location.
//
inline ClusterInfo getClusterInfo(unsigned location) { return clusterInfo[location]; }
inline unsigned getNumClusters() { return numClusters; }
inline unsigned getMergeDistance() { return mergeDistance; }
//
// Load a similarity map from a file created by SimFinder.
//
static SimilarityMap *load(const char *filename, const Genome *genome, bool computeMemberInfo = true);
private:
unsigned genomeLength;
unsigned numClusters;
int mergeDistance;
ClusterInfo *clusterInfo; // indexed by genome location
};