Skip to content

Commit

Permalink
PacBio Assembly Bax2Bam Generate Commands
Browse files Browse the repository at this point in the history
allow using fof of bax files
  • Loading branch information
ebelter committed Aug 16, 2018
1 parent 3cb78dd commit dc40e51
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 33 deletions.
76 changes: 52 additions & 24 deletions lib/Pacbio/Command/Assembly/BaxToBam/GenerateCommands.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use strict;
use warnings 'FATAL';

use IO::File;
use List::Util;
use Pacbio::Run;
use Pacbio::Run::AnalysisFactoryForRsii;
use Path::Class;

class Pacbio::Command::Assembly::BaxToBam::GenerateCommands {
Expand All @@ -14,10 +16,10 @@ class Pacbio::Command::Assembly::BaxToBam::GenerateCommands {
is => 'Text',
doc => 'Command to fill in with BAX files and logging files. BAX files will be appended to the command. Use %LOG for the log file base name for each cell. Example: bsub -o /my-logging-dir/%LOG bam2bax',
},
run_directories => {
bax_sources => {
is => 'Text',
is_many => 1,
doc => 'Pacbio run directories',
doc => 'Pacbio run directories OR FOF of bax files.',
},
},
has_optional_input => {
Expand All @@ -39,24 +41,27 @@ class Pacbio::Command::Assembly::BaxToBam::GenerateCommands {
},
},
has_optional_transient => {
_bam_output_directory => { is => 'Path::Class::Dir', },
_runs => { is => 'ARRAY', },
_analyses => { is => 'ARRAY', },
_commands_fh => { is => 'IO::Handle', },
_bam_output_directory => { is => 'Text', },
},
doc => 'insert missing primary contigs from haplotigs',
};

sub __init__ {
my ($self) = @_;

my @runs;
for my $directory ( $self->run_directories ) {
push @runs, Pacbio::Run->new(
directory => Path::Class::dir($directory),
machine_type => 'rsii',
);
my @bax_sources = $self->bax_sources;
if ( @bax_sources == 1 and -f $bax_sources[0] ) {
$self->_resolve_analyses_from_bax_fof($bax_sources[0]);
}
$self->_runs(\@runs);
elsif ( List::Util::all { -d $_ } @bax_sources ) {
$self->_resolve_analyses_from_runs(@bax_sources);
}
else {
$self->fatal_message('Can not handle mix of run directories and bax FOFs! %s', join("\n", @bax_sources));
}
$self->fatal_message("No analyses found in bax sources!\n%s", join("\n", @bax_sources)) if not @{$self->_analyses};

my $commands_file = $self->commands_file;
if ( $commands_file and $commands_file ne '-' and -s $commands_file ) {
Expand All @@ -77,26 +82,49 @@ sub __init__ {

}

sub execute {
my ($self) = @_;
sub _resolve_analyses_from_bax_fof {
my ($self, $bax_fof) = @_;

$self->__init__;
my $fh = IO::File->new($bax_fof, 'r');
$self->fatal_message('Failed to open bax FOF!') if not $fh;
my @analysis_directories = List::MoreUtils::uniq( map { chomp; file($_)->parent->parent } $fh->getlines );
$fh->close;

my @analyses = map { Pacbio::Run::AnalysisFactoryForRsii->build_from_analysis_directory($_) } @analysis_directories;
$self->_analyses(\@analyses);
}

sub _resolve_analyses_from_runs {
my ($self, @runs) = @_;

my $library_name = $self->library_name;
my $regex = qr/$library_name/;

for my $run ( @{$self->_runs} ) {
my $analyses = $run->analyses_for_sample($regex);
if ( not $analyses ) {
my @analyses;
for my $directory ( $self->bax_sources ) {
my $run = Pacbio::Run->new(
directory => Path::Class::dir($directory),
machine_type => 'rsii',
);
my $run_analyses = $run->analyses_for_sample($regex);
if ( not $run_analyses ) {
$self->warning_message("No analyses found for library name %s on run %", $library_name, $run->__name__);
next;
}
for my $analysis ( @$analyses ) {
my $bam = $self->_bam_output_for_analysis($analysis);
next if $bam and -s $bam;
my $cmd = $self->_bax_to_bam_command_for_analysis($analysis);
$self->_commands_fh->print("$cmd\n");
}
push @analyses, @$run_analyses;
}
$self->_analyses(\@analyses);
}

sub execute {
my ($self) = @_;

$self->__init__;

for my $analysis ( @{$self->_analyses} ) {
my $bam = $self->_bam_output_for_analysis($analysis);
next if $bam and -s $bam;
my $cmd = $self->_bax_to_bam_command_for_analysis($analysis);
$self->_commands_fh->print("$cmd\n");
}

1;
Expand Down
6 changes: 3 additions & 3 deletions lib/Pacbio/Run/AnalysisFactoryForRsii.pm
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ sub build {
my ($class, $directory) = @_;

die "No run directory given." if not $directory;
die "Run directory given does not exist!" if not -d "$directory";
die "Run directory given does not exist! $directory" if not -d "$directory";

my (@analyses);
find(
Expand Down Expand Up @@ -45,7 +45,7 @@ sub build_from_analysis_directory {
my ($class, $directory) = @_;

die "No analysis directory given." if not $directory;
die "Analysis directory given does not exist!" if not -d "$directory";
die "Analysis directory given does not exist! $directory" if not -d "$directory";

my ($metadata_xml_file, @analysis_files);
find(
Expand All @@ -71,7 +71,7 @@ sub build_from_analysis_directory {
metadata_xml_file => file($metadata_xml_file),
%$xml_info,
);
$analysis->add_analysis_files(@analysis_files);
$analysis->add_analysis_files( map { file($_) } @analysis_files );
$analysis;
}

Expand Down
42 changes: 36 additions & 6 deletions t/Pacbio-Command-Assembly-BaxToBam-GenerateCommands.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ use warnings 'FATAL';

use TenxTestEnv;

use File::Temp;
use File::Slurp;
use Test::Exception;
use Test::More tests => 4;
use Test::More tests => 5;

my %test = ( class => 'Pacbio::Command::Assembly::BaxToBam::GenerateCommands', );
subtest 'setup' => sub{
Expand All @@ -24,11 +25,11 @@ subtest 'setup' => sub{

};

subtest 'execute' => sub{
subtest 'execute with runs' => sub{
plan tests => 4;

my $cmd = $test{class}->create(
run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ],
bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ],
bam_to_bax_command => 'bsub -q long -o %LOG bam2bax',
);
ok($cmd, 'create command');
Expand All @@ -45,12 +46,12 @@ subtest 'execute' => sub{

};

subtest 'execute with library name' => sub{
subtest 'execute with runs and library name' => sub{
plan tests => 4;

my $library_name = 'EEAI';
my $cmd = $test{class}->create(
run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ],
bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ],
bam_to_bax_command => 'bsub -q long -o %LOG bam2bax',
library_name => $library_name,
);
Expand All @@ -68,11 +69,40 @@ subtest 'execute with library name' => sub{

};

subtest 'execute with bax fof' => sub{
plan tests => 4;

my $bax_fof_contents = File::Slurp::slurp($test{data_dir}->file('bax.fof')->stringify);
my $base_test_data_dir = TenxTestEnv::test_data_directory();
$bax_fof_contents =~ s/\%TDD/$base_test_data_dir/g;

my ($fh, $bax_fof) = File::Temp::tempfile();
$fh->print($bax_fof_contents);
$fh->close;

my $cmd = $test{class}->create(
bax_sources => [ $bax_fof ],
bam_to_bax_command => 'bsub -q long -o %LOG bam2bax',
);
ok($cmd, 'create command');

my $output;
open local(*STDOUT), '>', \$output or die $!;
lives_ok(sub{ $cmd->execute }, 'execute');
ok($cmd->result, 'command result');

my $expected_output = File::Slurp::slurp( $test{data_dir}->file('expected.bax-fof.out')->stringify );
$base_test_data_dir = TenxTestEnv::test_data_directory();
$expected_output =~ s/\%TDD/$base_test_data_dir/g;
is($output, $expected_output, 'output commands matches');

};

subtest 'execute with some bams completed' => sub{
plan tests => 4;

my $cmd = $test{class}->create(
run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ],
bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ],
bam_to_bax_command => 'bsub -q long -o %LOG bam2bax',
bam_output_directory => $test{data_dir}->stringify,
);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.1.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.2.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.3.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.1.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.2.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.3.bax.h5
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bsub -q long -o 6U00E3_B02.out bam2bax %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.1.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.2.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.3.bax.h5
bsub -q long -o 6U00E3_E01.out bam2bax %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.1.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.2.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.3.bax.h5

0 comments on commit dc40e51

Please sign in to comment.