Skip to content

Commit

Permalink
better error reporting, fix segfault when reporting SNP outside of ch…
Browse files Browse the repository at this point in the history
…romosome range
  • Loading branch information
gmcvicker committed Sep 24, 2016
1 parent b0c5c99 commit 422ef94
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 22 deletions.
6 changes: 3 additions & 3 deletions snp2h5/snp2h5.c
Original file line number Diff line number Diff line change
Expand Up @@ -1095,9 +1095,9 @@ void parse_vcf(Arguments *args, Chromosome *all_chroms, int n_chrom,
* to point to row in matrices / SNP table
*/
if(snp.pos > chrom->len || snp.pos < 1) {
my_err("%s:%d: SNP position (%ld) is outside of "
"chromomosome %s range:1-%ld", __FILE__, __LINE__,
snp.pos, chrom->len);
my_err("%s:%d: SNP %s position (%ld) is outside of "
"chromomosome %s range: 1-%ld", __FILE__, __LINE__,
snp.name, snp.pos, chrom->name, chrom->len);
}

if(snp_index) {
Expand Down
56 changes: 37 additions & 19 deletions snp2h5/vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ VCFInfo *vcf_info_new() {
vcf_info->buf = my_malloc(vcf_info->buf_size);

vcf_info->n_sample = 0;
vcf_info->cur_line = 0;
vcf_info->sample_names = NULL;

return vcf_info;
Expand Down Expand Up @@ -71,6 +72,7 @@ void vcf_read_header(gzFile vcf_fh, VCFInfo *vcf_info) {

while(util_gzgetline(vcf_fh, &vcf_info->buf, &vcf_info->buf_size) != -1) {
line = vcf_info->buf;
vcf_info->cur_line += 1;

if(util_str_starts_with(line, "##")) {
/* header line */
Expand Down Expand Up @@ -226,8 +228,8 @@ void vcf_parse_haplotypes(VCFInfo *vcf_info, char *haplotypes,
}

if((n_haps + 2) > expect_haps) {
my_err("%s:%d: more genotypes per line than expected",
__FILE__, __LINE__);
my_err("%s:%d: more genotypes per line than expected (line: %ld)",
__FILE__, __LINE__, vcf_info->cur_line);
}
haplotypes[n_haps] = hap1;
haplotypes[n_haps+1] = hap2;
Expand All @@ -241,7 +243,8 @@ void vcf_parse_haplotypes(VCFInfo *vcf_info, char *haplotypes,

if(n_haps != expect_haps) {
my_err("%s:%d: expected %ld genotype values per line, but got "
"%ld", __FILE__, __LINE__, expect_haps, n_haps);
"%ld (line: %ld)", __FILE__, __LINE__,
expect_haps, n_haps, vcf_info->cur_line);
}
}

Expand Down Expand Up @@ -285,7 +288,8 @@ void vcf_parse_gl(VCFInfo *vcf_info, float *geno_probs, char *cur, long gl_idx)
like_homo_ref = like_het = like_homo_alt = -0.477;
} else {
my_err("%s:%d: failed to parse genotype likelihoods from "
"string '%s'", __FILE__, __LINE__, inner_tok);
"string '%s' (line: %ld)", __FILE__, __LINE__,
inner_tok, vcf_info->cur_line);
}
}

Expand All @@ -295,8 +299,9 @@ void vcf_parse_gl(VCFInfo *vcf_info, float *geno_probs, char *cur, long gl_idx)
prob_homo_alt = pow(10.0, like_homo_alt);

if((n_geno_probs + 3) > expect_geno_probs) {
my_err("%s:%d: more genotype likelihoods per line than expected",
__FILE__, __LINE__);
my_err("%s:%d: more genotype likelihoods per line "
"than expected (line: %ld)",
__FILE__, __LINE__, vcf_info->cur_line);
}

/* most of time probs sum to 1.0, but sometimes they do not
Expand All @@ -322,7 +327,8 @@ void vcf_parse_gl(VCFInfo *vcf_info, float *geno_probs, char *cur, long gl_idx)

if(n_geno_probs != expect_geno_probs) {
my_err("%s:%d: expected %ld genotype likelihoods per line, but got "
"%ld", __FILE__, __LINE__, expect_geno_probs, n_geno_probs);
"%ld (line: %ld)", __FILE__, __LINE__, expect_geno_probs,
n_geno_probs, vcf_info->cur_line);
}
}

Expand Down Expand Up @@ -363,7 +369,8 @@ void vcf_parse_gp(VCFInfo *vcf_info, float *geno_probs, char *cur, long gp_idx)
prob_homo_ref = prob_het = prob_homo_alt = 0.333;
} else {
my_err("%s:%d: failed to parse genotype probabilities from "
"string '%s'", __FILE__, __LINE__, inner_tok);
"string '%s' (line: %ld)",
__FILE__, __LINE__, inner_tok, vcf_info->cur_line);
}
}

Expand All @@ -387,7 +394,8 @@ void vcf_parse_gp(VCFInfo *vcf_info, float *geno_probs, char *cur, long gp_idx)

if(n_geno_probs != expect_geno_probs) {
my_err("%s:%d: expected %ld genotype probabilities per line, but got "
"%ld", __FILE__, __LINE__, expect_geno_probs, n_geno_probs);
"%ld (line: %ld)", __FILE__, __LINE__, expect_geno_probs,
n_geno_probs, vcf_info->cur_line);
}
}

Expand Down Expand Up @@ -450,14 +458,16 @@ int vcf_read_line(gzFile vcf_fh, VCFInfo *vcf_info, SNP *snp,
if(util_gzgetline(vcf_fh, &vcf_info->buf, &vcf_info->buf_size) == -1) {
return -1;
}

vcf_info->cur_line += 1;

cur = vcf_info->buf;
tok_num = 0;

/* chrom */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n",
n_fix_header, vcf_info->cur_line);
}

/* we don't bother to store chromosome since we store
Expand All @@ -469,21 +479,24 @@ int vcf_read_line(gzFile vcf_fh, VCFInfo *vcf_info, SNP *snp,
/* pos */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n",
n_fix_header, vcf_info->cur_line);
}
snp->pos = util_parse_long(token);

/* ID */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n",
n_fix_header, vcf_info->cur_line);
}
util_strncpy(snp->name, token, sizeof(snp->name));

/* ref */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n", n_fix_header,
vcf_info->cur_line);
}
ref_len = util_strncpy(snp->allele1, token, sizeof(snp->allele1));

Expand All @@ -500,7 +513,8 @@ int vcf_read_line(gzFile vcf_fh, VCFInfo *vcf_info, SNP *snp,
/* alt */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n",
n_fix_header, vcf_info->cur_line);
}
alt_len = util_strncpy(snp->allele2, token, sizeof(snp->allele2));

Expand All @@ -514,30 +528,34 @@ int vcf_read_line(gzFile vcf_fh, VCFInfo *vcf_info, SNP *snp,
/* qual */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n",
n_fix_header, vcf_info->cur_line);
}
util_strncpy(vcf_info->qual, token, sizeof(vcf_info->qual));

/* filter */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n", n_fix_header,
vcf_info->cur_line);
}
util_strncpy(vcf_info->filter, token, sizeof(vcf_info->filter));


/* info */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n",
n_fix_header, vcf_info->cur_line);
}
util_strncpy(vcf_info->info, token, sizeof(vcf_info->info));


/* format */
token = strsep(&cur, delim);
if(token == NULL) {
my_err("expected at least %d tokens per line\n", n_fix_header);
my_err("expected at least %d tokens per line (line: %ld)\n",
n_fix_header, vcf_info->cur_line);
}
util_strncpy(vcf_info->format, token, sizeof(vcf_info->format));

Expand Down
3 changes: 3 additions & 0 deletions snp2h5/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ typedef struct {

char **sample_names;

/* keeps track of line number for error reporting */
size_t cur_line;

/* used for reading lines */
size_t buf_size;
char *buf;
Expand Down

0 comments on commit 422ef94

Please sign in to comment.