-
Notifications
You must be signed in to change notification settings - Fork 15
/
unifrac_task.hpp
577 lines (481 loc) · 25.5 KB
/
unifrac_task.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
/*
* BSD 3-Clause License
*
* Copyright (c) 2016-2021, UniFrac development team.
* All rights reserved.
*
* See LICENSE file for more details
*/
#include "task_parameters.hpp"
#include <math.h>
#include <vector>
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef __UNIFRAC_TASKS
#define __UNIFRAC_TASKS 1
#ifdef _OPENACC
#define SUCMP_NM su_acc
#ifndef SMALLGPU
// defaultt on larger alignment, which improves performance on GPUs like V100
#define UNIFRAC_BLOCK 64
#else
// smaller GPUs prefer smaller allignment
#define UNIFRAC_BLOCK 32
#endif
#else
#define SUCMP_NM su_cpu
// CPUs don't need such a big alignment
#define UNIFRAC_BLOCK 16
#endif
namespace SUCMP_NM {
// Note: This adds a copy, which is suboptimal
// But was the easiest way to get a contiguous buffer
// And it does allow for fp32 compute, when desired
template<class TFloat>
class UnifracTaskVector {
private:
std::vector<double*> &dm_stripes;
const su::task_parameters* const task_p;
public:
const unsigned int start_idx;
const unsigned int n_samples;
const uint64_t n_samples_r;
TFloat* const buf;
UnifracTaskVector(std::vector<double*> &_dm_stripes, const su::task_parameters* _task_p)
: dm_stripes(_dm_stripes), task_p(_task_p)
, start_idx(task_p->start), n_samples(task_p->n_samples)
, n_samples_r(((n_samples + UNIFRAC_BLOCK-1)/UNIFRAC_BLOCK)*UNIFRAC_BLOCK) // round up
, buf((dm_stripes[start_idx]==NULL) ? NULL : new TFloat[n_samples_r*(task_p->stop-start_idx)]) // dm_stripes could be null, in which case keep it null
{
TFloat* const ibuf = buf;
if (ibuf != NULL) {
#ifdef _OPENACC
const uint64_t bufels = n_samples_r * (task_p->stop-start_idx);
#endif
for(unsigned int stripe=start_idx; stripe < task_p->stop; stripe++) {
double * dm_stripe = dm_stripes[stripe];
TFloat * buf_stripe = this->operator[](stripe);
for(unsigned int j=0; j<n_samples; j++) {
// Note: We could probably just initialize to zero
buf_stripe[j] = dm_stripe[j];
}
for(unsigned int j=n_samples; j<n_samples_r; j++) {
// Avoid NaNs
buf_stripe[j] = 0.0;
}
}
#ifdef _OPENACC
#pragma acc enter data copyin(ibuf[:bufels])
#endif
}
}
TFloat * operator[](unsigned int idx) { return buf+((idx-start_idx)*n_samples_r);}
const TFloat * operator[](unsigned int idx) const { return buf+((idx-start_idx)*n_samples_r);}
~UnifracTaskVector()
{
TFloat* const ibuf = buf;
if (ibuf != NULL) {
#ifdef _OPENACC
const uint64_t bufels = n_samples_r * (task_p->stop-start_idx);
#pragma acc exit data copyout(ibuf[:bufels])
#endif
for(unsigned int stripe=start_idx; stripe < task_p->stop; stripe++) {
double * dm_stripe = dm_stripes[stripe];
TFloat * buf_stripe = this->operator[](stripe);
for(unsigned int j=0; j<n_samples; j++) {
dm_stripe[j] = buf_stripe[j];
}
}
delete [] buf;
}
}
private:
UnifracTaskVector() = delete;
UnifracTaskVector operator=(const UnifracTaskVector&other) const = delete;
};
// Base task class to be shared by all tasks
template<class TFloat, class TEmb>
class UnifracTaskBase {
public:
UnifracTaskVector<TFloat> dm_stripes;
UnifracTaskVector<TFloat> dm_stripes_total;
const su::task_parameters* task_p;
const unsigned int max_embs;
TEmb * embedded_proportions;
#ifdef _OPENACC
protected:
// alternate buffer only needed in async environments, like openacc
TEmb * embedded_proportions_alt; // used as temp
public:
#endif
UnifracTaskBase(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total, unsigned int _max_embs, const su::task_parameters* _task_p)
: dm_stripes(_dm_stripes,_task_p), dm_stripes_total(_dm_stripes_total,_task_p), task_p(_task_p)
, max_embs(_max_embs)
, embedded_proportions(initialize_embedded(dm_stripes.n_samples_r,_max_embs))
#ifdef _OPENACC
, embedded_proportions_alt(initialize_embedded(dm_stripes.n_samples_r,_max_embs))
#endif
{}
/* remove
// Note: not const, since they share a mutable state
UnifracTaskBase(UnifracTaskBase &baseObj)
: dm_stripes(baseObj.dm_stripes), dm_stripes_total(baseObj.dm_stripes_total), task_p(baseObj.task_p) {}
*/
virtual ~UnifracTaskBase()
{
#ifdef _OPENACC
const uint64_t n_samples_r = dm_stripes.n_samples_r;
const uint64_t bsize = n_samples_r * get_emb_els(max_embs);
#pragma acc exit data delete(embedded_proportions_alt[:bsize])
#pragma acc exit data delete(embedded_proportions[:bsize])
free(embedded_proportions_alt);
#endif
free(embedded_proportions);
}
void sync_embedded_proportions(unsigned int filled_embs)
{
#ifdef _OPENACC
const uint64_t n_samples_r = dm_stripes.n_samples_r;
const uint64_t bsize = n_samples_r * get_emb_els(filled_embs);
#pragma acc update device(embedded_proportions[:bsize])
#endif
}
static unsigned int get_emb_els(unsigned int max_embs);
static TEmb *initialize_embedded(const uint64_t n_samples_r, unsigned int max_embs) {
uint64_t bsize = n_samples_r * get_emb_els(max_embs);
TEmb* buf = NULL;
int err = posix_memalign((void **)&buf, 4096, sizeof(TEmb) * bsize);
if(buf == NULL || err != 0) {
fprintf(stderr, "Failed to allocate %zd bytes, err %d; [%s]:%d\n",
sizeof(TEmb) * bsize, err, __FILE__, __LINE__);
exit(EXIT_FAILURE);
}
#pragma acc enter data create(buf[:bsize])
return buf;
}
void embed_proportions_range(const TFloat* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb);
void embed_proportions(const TFloat* __restrict__ in, unsigned int emb) {embed_proportions_range(in,0,dm_stripes.n_samples,emb);}
//
// ===== Internal, do not use directly =======
//
// Just copy from one buffer to another
// May convert between fp formats in the process (if TOut!=double)
template<class TOut> void embed_proportions_range_straight(TOut* __restrict__ out, const TFloat* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) const
{
const unsigned int n_samples = dm_stripes.n_samples;
const uint64_t n_samples_r = dm_stripes.n_samples_r;
const uint64_t offset = emb * n_samples_r;
for(unsigned int i = start; i < end; i++) {
out[offset + i] = in[i-start];
}
if (end==n_samples) {
// avoid NaNs
for(unsigned int i = n_samples; i < n_samples_r; i++) {
out[offset + i] = 0.0;
}
}
}
// packed bool
// Compute (in[:]>0) on each element, and store only the boolean bit.
// The output values are stored in a multi-byte format, one bit per emb index,
// so it will likely take multiple passes to store all the values
//
// Note: assumes we are processing emb in increasing order, starting from 0
template<class TOut> void embed_proportions_range_bool(TOut* __restrict__ out, const TFloat* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) const
{
const unsigned int n_packed = sizeof(TOut)*8;// e.g. 32 for unit32_t
const unsigned int n_samples = dm_stripes.n_samples;
const uint64_t n_samples_r = dm_stripes.n_samples_r;
// The output values are stored in a multi-byte format, one bit per emb index
// Compute the element to store the bit into, as well as whichbit in that element
unsigned int emb_block = emb/n_packed; // beginning of the element block
unsigned int emb_bit = emb%n_packed; // bit inside the elements
const uint64_t offset = emb_block * n_samples_r;
if (emb_bit==0) {
// assign for emb_bit==0, so it clears the other bits
// assumes we processing emb in increasing order, starting from 0
for(unsigned int i = start; i < end; i++) {
out[offset + i] = (in[i-start] > 0);
}
if (end==n_samples) {
// avoid NaNs
for(unsigned int i = n_samples; i < n_samples_r; i++) {
out[offset + i] = 0;
}
}
} else {
// just update my bit
for(unsigned int i = start; i < end; i++) {
out[offset + i] |= (TOut(in[i-start] > 0) << emb_bit);
}
// the rest of the els are already OK
}
}
};
// straight embeded_proportions
template<> inline void UnifracTaskBase<double,double>::embed_proportions_range(const double* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) {embed_proportions_range_straight(embedded_proportions,in,start,end,emb);}
template<> inline void UnifracTaskBase<double,float>::embed_proportions_range(const double* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) {embed_proportions_range_straight(embedded_proportions,in,start,end,emb);}
template<> inline void UnifracTaskBase<float,float>::embed_proportions_range(const float* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) {embed_proportions_range_straight(embedded_proportions,in,start,end,emb);}
template<> inline unsigned int UnifracTaskBase<double,double>::get_emb_els(unsigned int max_embs) {return max_embs;}
template<> inline unsigned int UnifracTaskBase<double,float>::get_emb_els(unsigned int max_embs) {return max_embs;}
template<> inline unsigned int UnifracTaskBase<float,float>::get_emb_els(unsigned int max_embs) {return max_embs;}
//packed bool embeded_proportions
template<> inline void UnifracTaskBase<double,uint32_t>::embed_proportions_range(const double* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) {embed_proportions_range_bool(embedded_proportions,in,start,end,emb);}
template<> inline void UnifracTaskBase<float,uint32_t>::embed_proportions_range(const float* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) {embed_proportions_range_bool(embedded_proportions,in,start,end,emb);}
template<> inline unsigned int UnifracTaskBase<double,uint32_t>::get_emb_els(unsigned int max_embs) {return (max_embs+31)/32;}
template<> inline unsigned int UnifracTaskBase<float,uint32_t>::get_emb_els(unsigned int max_embs) {return (max_embs+31)/32;}
template<> inline void UnifracTaskBase<double,uint64_t>::embed_proportions_range(const double* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) {embed_proportions_range_bool(embedded_proportions,in,start,end,emb);}
template<> inline void UnifracTaskBase<float,uint64_t>::embed_proportions_range(const float* __restrict__ in, unsigned int start, unsigned int end, unsigned int emb) {embed_proportions_range_bool(embedded_proportions,in,start,end,emb);}
template<> inline unsigned int UnifracTaskBase<double,uint64_t>::get_emb_els(unsigned int max_embs) {return (max_embs+63)/64;}
template<> inline unsigned int UnifracTaskBase<float,uint64_t>::get_emb_els(unsigned int max_embs) {return (max_embs+63)/64;}
/* void unifrac tasks
*
* all methods utilize the same function signature. that signature is as follows:
*
* dm_stripes vector<double> the stripes of the distance matrix being accumulated
* into for unique branch length
* dm_stripes vector<double> the stripes of the distance matrix being accumulated
* into for total branch length (e.g., to normalize unweighted unifrac)
* embedded_proportions <double*> the proportions vector for a sample, or rather
* the counts vector normalized to 1. this vector is embedded as it is
* duplicated: if A, B and C are proportions for features A, B, and C, the
* vector will look like [A B C A B C].
* length <double> the branch length of the current node to its parent.
* task_p <task_parameters*> task specific parameters.
*/
template<class TFloat, class TEmb>
class UnifracTask : public UnifracTaskBase<TFloat,TEmb> {
protected:
// Use one cache line on CPU
// On GPU, shaing a cache line is actually a good thing
static const unsigned int step_size = 16*4/sizeof(TFloat);
#ifdef _OPENACC
// Use as big vector size as we can, to maximize cache line reuse
static const unsigned int acc_vector_size = 2048;
#endif
public:
UnifracTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total, unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracTaskBase<TFloat,TEmb>(_dm_stripes, _dm_stripes_total, _max_embs, _task_p) {}
/* delete
UnifracTask(UnifracTaskBase<TFloat> &baseObj, const TEmb * _embedded_proportions, unsigned int _max_embs)
: UnifracTaskBase<TFloat>(baseObj)
, embedded_proportions(_embedded_proportions), max_embs(_max_embs) {}
*/
virtual ~UnifracTask() {}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) = 0;
protected:
static const unsigned int RECOMMENDED_MAX_EMBS_STRAIGHT = 128-16; // a little less to leave a bit of space of maxed-out L1
// packed uses 32x less memory,so this should be 32x larger than straight... but there are additional structures, so use half of that
static const unsigned int RECOMMENDED_MAX_EMBS_BOOL = 64*32;
};
template<class TFloat>
class UnifracUnnormalizedWeightedTask : public UnifracTask<TFloat,TFloat> {
public:
static const unsigned int RECOMMENDED_MAX_EMBS = UnifracTask<TFloat,TFloat>::RECOMMENDED_MAX_EMBS_STRAIGHT;
UnifracUnnormalizedWeightedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total, unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_max_embs,_task_p)
{
const unsigned int n_samples = this->task_p->n_samples;
zcheck = NULL;
sums = NULL;
posix_memalign((void **)&zcheck, 4096, sizeof(bool) * n_samples);
posix_memalign((void **)&sums , 4096, sizeof(TFloat) * n_samples);
#pragma acc enter data create(zcheck[:n_samples],sums[:n_samples])
}
virtual ~UnifracUnnormalizedWeightedTask()
{
#ifdef _OPENACC
const unsigned int n_samples = this->task_p->n_samples;
#pragma acc exit data delete(sums[:n_samples],zcheck[:n_samples])
#endif
free(sums);
free(zcheck);
}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
protected:
// temp buffers
bool *zcheck;
TFloat *sums;
};
template<class TFloat>
class UnifracNormalizedWeightedTask : public UnifracTask<TFloat,TFloat> {
public:
static const unsigned int RECOMMENDED_MAX_EMBS = UnifracTask<TFloat,TFloat>::RECOMMENDED_MAX_EMBS_STRAIGHT;
UnifracNormalizedWeightedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total, unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_max_embs,_task_p)
{
const unsigned int n_samples = this->task_p->n_samples;
zcheck = NULL;
sums = NULL;
posix_memalign((void **)&zcheck, 4096, sizeof(bool) * n_samples);
posix_memalign((void **)&sums , 4096, sizeof(TFloat) * n_samples);
#pragma acc enter data create(zcheck[:n_samples],sums[:n_samples])
}
virtual ~UnifracNormalizedWeightedTask()
{
#ifdef _OPENACC
const unsigned int n_samples = this->task_p->n_samples;
#pragma acc exit data delete(sums[:n_samples],zcheck[:n_samples])
#endif
free(sums);
free(zcheck);
}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
protected:
// temp buffers
bool *zcheck;
TFloat *sums;
};
template<class TFloat>
class UnifracUnweightedTask : public UnifracTask<TFloat,uint64_t> {
public:
static const unsigned int RECOMMENDED_MAX_EMBS = UnifracTask<TFloat,uint64_t>::RECOMMENDED_MAX_EMBS_BOOL;
// Note: _max_emb MUST be multiple of 64
UnifracUnweightedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total, unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracTask<TFloat, uint64_t>(_dm_stripes,_dm_stripes_total,_max_embs,_task_p)
{
const unsigned int bsize = _max_embs*(0x400/32);
sums = NULL;
posix_memalign((void **)&sums, 4096, sizeof(TFloat) * bsize);
#pragma acc enter data create(sums[:bsize])
}
virtual ~UnifracUnweightedTask()
{
#ifdef _OPENACC
const unsigned int bsize = this->max_embs*(0x400/32);
#pragma acc exit data delete(sums[:bsize])
#endif
free(sums);
}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
private:
TFloat *sums; // temp buffer
};
template<class TFloat>
class UnifracGeneralizedTask : public UnifracTask<TFloat,TFloat> {
public:
static const unsigned int RECOMMENDED_MAX_EMBS = UnifracTask<TFloat,TFloat>::RECOMMENDED_MAX_EMBS_STRAIGHT;
UnifracGeneralizedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total, unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_max_embs,_task_p) {}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
};
/* void unifrac_vaw tasks
*
* all methods utilize the same function signature. that signature is as follows:
*
* dm_stripes vector<double> the stripes of the distance matrix being accumulated
* into for unique branch length
* dm_stripes vector<double> the stripes of the distance matrix being accumulated
* into for total branch length (e.g., to normalize unweighted unifrac)
* embedded_proportions <double*> the proportions vector for a sample, or rather
* the counts vector normalized to 1. this vector is embedded as it is
* duplicated: if A, B and C are proportions for features A, B, and C, the
* vector will look like [A B C A B C].
* embedded_counts <double*> the counts vector embedded in the same way and order as
* embedded_proportions. the values of this array are unnormalized feature
* counts for the subtree.
* sample_total_counts <double*> the total unnormalized feature counts for all samples
* embedded in the same way and order as embedded_proportions.
* length <double> the branch length of the current node to its parent.
* task_p <task_parameters*> task specific parameters.
*/
template<class TFloat, class TEmb>
class UnifracVawTask : public UnifracTaskBase<TFloat,TEmb> {
protected:
#ifdef _OPENACC
// The parallel nature of GPUs needs a largish step
#ifndef SMALLGPU
// default to larger step, which makes a big difference for bigger GPUs like V100
static const unsigned int step_size = 32;
// keep the vector size just big enough to keep the used emb array inside the 32k buffer
static const unsigned int acc_vector_size = 32*32*8/sizeof(TFloat);
#else
// smaller GPUs prefer a slightly smaller step
static const unsigned int step_size = 16;
// keep the vector size just big enough to keep the used emb array inside the 32k buffer
static const unsigned int acc_vector_size = 32*32*8/sizeof(TFloat);
#endif
#else
// The serial nature of CPU cores prefers a small step
static const unsigned int step_size = 4;
#endif
public:
TFloat * const embedded_counts;
const TFloat * const sample_total_counts;
static const unsigned int RECOMMENDED_MAX_EMBS = 128;
UnifracVawTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total,
const TFloat * _sample_total_counts,
unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracTaskBase<TFloat,TEmb>(_dm_stripes, _dm_stripes_total, _max_embs, _task_p)
, embedded_counts(UnifracTaskBase<TFloat,TFloat>::initialize_embedded(this->dm_stripes.n_samples_r,_max_embs)), sample_total_counts(_sample_total_counts) {}
/* delete
UnifracVawTask(UnifracTaskBase<TFloat> &baseObj,
const TEmb * _embedded_proportions, const TFloat * _sample_total_counts, unsigned int _max_embs)
: UnifracTaskBase<TFloat>(baseObj)
, embedded_proportions(_embedded_proportions), embedded_counts(initialize_embedded<TFloat>()), sample_total_counts(_sample_total_counts), max_embs(_max_embs) {}
*/
virtual ~UnifracVawTask() {}
void sync_embedded_counts(unsigned int filled_embs)
{
#ifdef _OPENACC
const uint64_t n_samples_r = this->dm_stripes.n_samples_r;
const uint64_t bsize = n_samples_r * filled_embs;
#pragma acc update device(embedded_counts[:bsize])
#endif
}
void sync_embedded(unsigned int filled_embs) { this->sync_embedded_proportions(filled_embs); this->sync_embedded_counts(filled_embs);}
void embed_range(const TFloat* __restrict__ in_proportions, const TFloat* __restrict__ in_counts, unsigned int start, unsigned int end, unsigned int emb) {
this->embed_proportions_range(in_proportions,start,end,emb);
this->embed_proportions_range_straight(this->embedded_counts,in_counts,start,end,emb);
}
void embed(const TFloat* __restrict__ in_proportions, const double* __restrict__ in_counts, unsigned int emb) { embed_range(in_proportions,in_counts,0,this->dm_stripes.n_samples,emb);}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) = 0;
};
template<class TFloat>
class UnifracVawUnnormalizedWeightedTask : public UnifracVawTask<TFloat,TFloat> {
public:
UnifracVawUnnormalizedWeightedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total,
const TFloat * _sample_total_counts,
unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracVawTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_sample_total_counts,_max_embs,_task_p) {}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
};
template<class TFloat>
class UnifracVawNormalizedWeightedTask : public UnifracVawTask<TFloat,TFloat> {
public:
UnifracVawNormalizedWeightedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total,
const TFloat * _sample_total_counts,
unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracVawTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_sample_total_counts,_max_embs,_task_p) {}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
};
template<class TFloat>
class UnifracVawUnweightedTask : public UnifracVawTask<TFloat,uint32_t> {
public:
UnifracVawUnweightedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total,
const TFloat * _sample_total_counts,
unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracVawTask<TFloat,uint32_t>(_dm_stripes,_dm_stripes_total,_sample_total_counts,_max_embs,_task_p) {}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
};
template<class TFloat>
class UnifracVawGeneralizedTask : public UnifracVawTask<TFloat,TFloat> {
public:
UnifracVawGeneralizedTask(std::vector<double*> &_dm_stripes, std::vector<double*> &_dm_stripes_total,
const TFloat * _sample_total_counts,
unsigned int _max_embs, const su::task_parameters* _task_p)
: UnifracVawTask<TFloat,TFloat>(_dm_stripes,_dm_stripes_total,_sample_total_counts,_max_embs,_task_p) {}
virtual void run(unsigned int filled_embs, const TFloat * __restrict__ length) {_run(filled_embs, length);}
void _run(unsigned int filled_embs, const TFloat * __restrict__ length);
};
}
#endif