Skip to content

Commit

Permalink
fixed bug in bcf_ip2g
Browse files Browse the repository at this point in the history
  • Loading branch information
atks committed Mar 21, 2017
1 parent dd05d75 commit e3678c2
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 85 deletions.
40 changes: 36 additions & 4 deletions hts_utils.cpp
Expand Up @@ -925,6 +925,11 @@ uint32_t bcf_ag2p(uint32_t no_alleles, uint32_t no_genotypes)

/**
* Gets genotype from genotype index and ploidy.
*
* The genotype index is computed by a summation of a series which is
* monotonically decreasing. This allows you to compute the inverse function
* from index to the ordered genotypes by using a "water rapids algorithm" with
* decreasing height of each mini water fall.
*/
std::vector<int32_t> bcf_ip2g(int32_t genotype_index, uint32_t no_ploidy)
{
Expand All @@ -933,22 +938,23 @@ std::vector<int32_t> bcf_ip2g(int32_t genotype_index, uint32_t no_ploidy)
int32_t pth = no_ploidy;
int32_t max_allele_index = genotype_index;
int32_t leftover_genotype_index = genotype_index;

while (pth>0)
{
int32_t old_pth = pth;

for (int32_t allele_index=0; allele_index <= max_allele_index; ++allele_index)
{
int32_t i = choose(pth+allele_index-1, pth);

if (i>=leftover_genotype_index)
if (i>=leftover_genotype_index || allele_index==max_allele_index)
{
if (i>leftover_genotype_index) --allele_index;
leftover_genotype_index -= choose(pth+allele_index-1, pth);
--pth;
max_allele_index = allele_index;
genotype[pth] = allele_index;
genotype[pth] = allele_index;
break;

}
}
}
Expand Down Expand Up @@ -980,6 +986,32 @@ uint32_t bcf_g2i(int32_t* g, uint32_t n)
}
}

/**
* Gets index of a genotype of n ploidy.
*/
uint32_t bcf_g2i(std::vector<int32_t>& g)
{
int32_t n = g.size();

if (n==1)
{
return g[0];
}
if (n==2)
{
return g[0] + (((g[1]+1)*(g[1]))>>1);
}
else
{
uint32_t index = 0;
for (uint32_t i=0; i<n; ++i)
{
index += bcf_ap2g(g[i], i+1);
}
return index;
}
}

/**
* Gets a string representation of a variant.
*/
Expand Down
5 changes: 5 additions & 0 deletions hts_utils.h
Expand Up @@ -348,6 +348,11 @@ std::vector<int32_t> bcf_ip2g(int32_t genotype_index, uint32_t no_ploidy);
*/
uint32_t bcf_g2i(int32_t* g, uint32_t p);

/**
* Gets index of a genotype of n ploidy.
*/
uint32_t bcf_g2i(std::vector<int32_t>& g);

/**
* Gets index of a diploid genotype.
*/
Expand Down
193 changes: 112 additions & 81 deletions test.cpp
Expand Up @@ -75,83 +75,83 @@ class Igor : Program

};

/**
* n choose r.
*/
uint32_t choose(uint32_t n, uint32_t r)
{
if (r>n)
{
return 0;
}
else if (r==n)
{
return 1;
}
else if (r==0)
{
return 1;
}
else
{
if (r>(n>>1))
{
r = n-r;
}

uint32_t num = n;
uint32_t denum = 1;

for (uint32_t i=1; i<r; ++i)
{
num *= n-i;
denum *= i+1;
}

return num/denum;
}
}


/**
* Gets number of genotypes from number of alleles and ploidy.
*/
uint32_t bcf_ap2g(uint32_t no_allele, uint32_t no_ploidy)
{
return choose(no_ploidy+no_allele-1, no_allele-1);
}

/**
* Gets number of genotypes from number of alleles and ploidy.
*/
uint32_t bcf_g2i(std::string genotype)
{
uint32_t index = 0;
for (uint32_t i=0; i<genotype.size(); ++i)
{
uint32_t allele = genotype.at(i)-65;
index += bcf_ap2g(allele, i+1);
}
return index;
}

void print_genotypes(uint32_t A, uint32_t P, std::string genotype)
{
if (genotype.size()==P)
{
std::cerr << no << ") " << genotype << " " << bcf_g2i(genotype) << "\n";
++no;
}
else
{
for (uint32_t a=0; a<A; ++a)
{
std::string s(1,(char)(a+65));
s.append(genotype);
print_genotypes(a+1, P, s);
}
}
}
// /**
// * n choose r.
// */
// uint32_t choose(uint32_t n, uint32_t r)
// {
// if (r>n)
// {
// return 0;
// }
// else if (r==n)
// {
// return 1;
// }
// else if (r==0)
// {
// return 1;
// }
// else
// {
// if (r>(n>>1))
// {
// r = n-r;
// }
//
// uint32_t num = n;
// uint32_t denum = 1;
//
// for (uint32_t i=1; i<r; ++i)
// {
// num *= n-i;
// denum *= i+1;
// }
//
// return num/denum;
// }
// }
//
//
// /**
// * Gets number of genotypes from number of alleles and ploidy.
// */
// uint32_t bcf_ap2g(uint32_t no_allele, uint32_t no_ploidy)
// {
// return choose(no_ploidy+no_allele-1, no_allele-1);
// }
//
// /**
// * Gets number of genotypes from number of alleles and ploidy.
// */
// uint32_t bcf_g2i(std::string genotype)
// {
// uint32_t index = 0;
// for (uint32_t i=0; i<genotype.size(); ++i)
// {
// uint32_t allele = genotype.at(i)-65;
// index += bcf_ap2g(allele, i+1);
// }
// return index;
// }
//
// void print_genotypes(uint32_t A, uint32_t P, std::string genotype)
// {
// if (genotype.size()==P)
// {
// std::cerr << no << ") " << genotype << " " << bcf_g2i(genotype) << "\n";
// ++no;
// }
// else
// {
// for (uint32_t a=0; a<A; ++a)
// {
// std::string s(1,(char)(a+65));
// s.append(genotype);
// print_genotypes(a+1, P, s);
// }
// }
// }

/**
* Computes composition and entropy ofrepeat tract.
Expand Down Expand Up @@ -274,9 +274,7 @@ class Igor : Program
}

}




void test(int argc, char ** argv)
{
version = "0.5";
Expand Down Expand Up @@ -601,6 +599,39 @@ class Igor : Program
};


void test_ip2g(int argc, char ** argv)
{
printf("allele\tploidy\tindex\tverified\n");

for (int32_t no_allele=2; no_allele<=6; ++no_allele)
{
for (int32_t no_ploidy=2; no_ploidy<=6; ++no_ploidy)
{
int32_t max_index = bcf_ap2g(no_allele, no_ploidy);

for (int32_t index=0; index<=max_index; ++index)
{
fprintf(stderr, "%d\t%d\t%d\t", no_allele, no_ploidy, index);


std::vector<int32_t> genotypes = bcf_ip2g(index, no_ploidy);

for (int32_t j=0; j<no_ploidy; ++j)
{
if (j) printf("/");
printf("%d", genotypes[j]);
}

uint32_t i = bcf_g2i(genotypes);

std::string v = (i==index) ? "correct" : "wrong";

printf("\t%s\n", v.c_str());

}
}
}
};
~Igor() {};

private:
Expand All @@ -613,7 +644,7 @@ void test(int argc, char ** argv)
Igor igor(argc, argv);


igor.test(argc, argv);
igor.test_ip2g(argc, argv);
// printf ("std::isnan(0.0) : %d\n",std::isnan(0.0));
// printf ("std::isnan(1.0/0.0) : %d\n",std::isnan(1.0/0.0));
// printf ("std::isnan(-1.0/0.0) : %d\n",std::isnan(-1.0/0.0));
Expand Down

0 comments on commit e3678c2

Please sign in to comment.