Skip to content
Browse files

make vcf format valid for pyvcf

  • Loading branch information...
1 parent 3994df9 commit 98b05565cc410253237717a4039e78b377018f97 @andrewjpage committed Jan 22, 2013
Showing with 22 additions and 17 deletions.
  1. +16 −12 run_gubbins.py
  2. +6 −5 src/vcf.c
View
28 run_gubbins.py
@@ -290,18 +290,18 @@ def reinsert_gaps_into_fasta_file(input_fasta_filename, input_vcf_file, output_f
# interleave gap only and snp bases
input_handle = open(input_fasta_filename, "rU")
alignments = AlignIO.parse(input_handle, "fasta")
- for record in alignments:
- if record.id in sample_names:
- continue
- gap_index = 0
- for input_base in record.seq:
- while gap_position[gap_index] == 1 and gap_index < length(gap_position):
- gap_position[gap_index] = '-'
- gap_index+=1
- gap_position[gap_index] = input_base
- record.seq(''.join(gap_position))
- gapped_alignments.append(record)
-
+ for alignment in alignments:
+ for record in alignment:
+ if record.id in sample_names:
+ continue
+ gap_index = 0
+ for input_base in record.seq:
+ while gap_position[gap_index] == 1 and gap_index < length(gap_position):
+ gap_position[gap_index] = '-'
+ gap_index+=1
+ gap_position[gap_index] = input_base
+ record.seq(''.join(gap_position))
+ gapped_alignments.append(record)
output_handle = open(output_fasta_filename, "a")
AlignIO.write(MultipleSeqAlignment(gapped_alignments), output_handle, "fasta")
@@ -383,6 +383,10 @@ def is_exe(fpath):
if (args.tree_builder == "fasttree" or args.tree_builder == "hybrid") and which(FASTTREE_EXEC) is None:
print "FastTree is not in your path"
sys.exit()
+
+if(not os.path.exists(args.alignment_filename)):
+ print "Cannot access the input alignment file. Check its been entered correctly"
+ sys.exit()
current_time = ''
if args.use_time_stamp > 0:
View
11 src/vcf.c
@@ -54,7 +54,8 @@ void output_vcf_header( FILE * vcf_file_pointer, char ** sequence_names, int num
{
int i;
fprintf( vcf_file_pointer, "##fileformat=VCFv4.1\n" );
- fprintf( vcf_file_pointer, "##INFO=<ID=AB,Number=1,Type=String,Description=\"Alt Base\">\n" );
+ fprintf( vcf_file_pointer, "##FORMAT=<ID=AB,Number=1,Type=String,Description=\"Alt Base\">\n" );
+
fprintf( vcf_file_pointer, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" );
for(i=0; i<number_of_samples; i++)
@@ -101,12 +102,12 @@ void output_vcf_row(FILE * vcf_file_pointer, char * bases_for_snp, int snp_locat
// FILTER
fprintf( vcf_file_pointer, ".\t");
- // FORMAT
- fprintf( vcf_file_pointer, "AB\t");
-
// INFO
fprintf( vcf_file_pointer, ".\t");
+ // FORMAT
+ fprintf( vcf_file_pointer, "AB\t");
+
// Bases for each sample
output_vcf_row_samples_bases(vcf_file_pointer, reference_base, bases_for_snp, number_of_samples,internal_nodes );
@@ -166,7 +167,7 @@ void output_vcf_row_samples_bases(FILE * vcf_file_pointer, char reference_base,
}
if((bases_for_snp[i] == reference_base))
{
- fprintf( vcf_file_pointer, "." );
+ fprintf( vcf_file_pointer, "%c", (char) reference_base );
}
else
{

0 comments on commit 98b0556

Please sign in to comment.
Something went wrong with that request. Please try again.