diff --git a/lib/Pacbio/Command/Assembly/BaxToBam/GenerateCommands.pm b/lib/Pacbio/Command/Assembly/BaxToBam/GenerateCommands.pm index e634ffb..a090084 100644 --- a/lib/Pacbio/Command/Assembly/BaxToBam/GenerateCommands.pm +++ b/lib/Pacbio/Command/Assembly/BaxToBam/GenerateCommands.pm @@ -4,7 +4,9 @@ use strict; use warnings 'FATAL'; use IO::File; +use List::Util; use Pacbio::Run; +use Pacbio::Run::AnalysisFactoryForRsii; use Path::Class; class Pacbio::Command::Assembly::BaxToBam::GenerateCommands { @@ -14,10 +16,10 @@ class Pacbio::Command::Assembly::BaxToBam::GenerateCommands { is => 'Text', doc => 'Command to fill in with BAX files and logging files. BAX files will be appended to the command. Use %LOG for the log file base name for each cell. Example: bsub -o /my-logging-dir/%LOG bam2bax', }, - run_directories => { + bax_sources => { is => 'Text', is_many => 1, - doc => 'Pacbio run directories', + doc => 'Pacbio run directories OR FOF of bax files.', }, }, has_optional_input => { @@ -39,9 +41,9 @@ class Pacbio::Command::Assembly::BaxToBam::GenerateCommands { }, }, has_optional_transient => { - _bam_output_directory => { is => 'Path::Class::Dir', }, - _runs => { is => 'ARRAY', }, + _analyses => { is => 'ARRAY', }, _commands_fh => { is => 'IO::Handle', }, + _bam_output_directory => { is => 'Text', }, }, doc => 'insert missing primary contigs from haplotigs', }; @@ -49,14 +51,17 @@ class Pacbio::Command::Assembly::BaxToBam::GenerateCommands { sub __init__ { my ($self) = @_; - my @runs; - for my $directory ( $self->run_directories ) { - push @runs, Pacbio::Run->new( - directory => Path::Class::dir($directory), - machine_type => 'rsii', - ); + my @bax_sources = $self->bax_sources; + if ( @bax_sources == 1 and -f $bax_sources[0] ) { + $self->_resolve_analyses_from_bax_fof($bax_sources[0]); } - $self->_runs(\@runs); + elsif ( List::Util::all { -d $_ } @bax_sources ) { + $self->_resolve_analyses_from_runs(@bax_sources); + } + else { + $self->fatal_message('Can not handle mix of run directories and bax FOFs! %s', join("\n", @bax_sources)); + } + $self->fatal_message("No analyses found in bax sources!\n%s", join("\n", @bax_sources)) if not @{$self->_analyses}; my $commands_file = $self->commands_file; if ( $commands_file and $commands_file ne '-' and -s $commands_file ) { @@ -77,26 +82,49 @@ sub __init__ { } -sub execute { - my ($self) = @_; +sub _resolve_analyses_from_bax_fof { + my ($self, $bax_fof) = @_; - $self->__init__; + my $fh = IO::File->new($bax_fof, 'r'); + $self->fatal_message('Failed to open bax FOF!') if not $fh; + my @analysis_directories = List::MoreUtils::uniq( map { chomp; file($_)->parent->parent } $fh->getlines ); + $fh->close; + + my @analyses = map { Pacbio::Run::AnalysisFactoryForRsii->build_from_analysis_directory($_) } @analysis_directories; + $self->_analyses(\@analyses); +} + +sub _resolve_analyses_from_runs { + my ($self, @runs) = @_; my $library_name = $self->library_name; my $regex = qr/$library_name/; - - for my $run ( @{$self->_runs} ) { - my $analyses = $run->analyses_for_sample($regex); - if ( not $analyses ) { + my @analyses; + for my $directory ( $self->bax_sources ) { + my $run = Pacbio::Run->new( + directory => Path::Class::dir($directory), + machine_type => 'rsii', + ); + my $run_analyses = $run->analyses_for_sample($regex); + if ( not $run_analyses ) { $self->warning_message("No analyses found for library name %s on run %", $library_name, $run->__name__); next; } - for my $analysis ( @$analyses ) { - my $bam = $self->_bam_output_for_analysis($analysis); - next if $bam and -s $bam; - my $cmd = $self->_bax_to_bam_command_for_analysis($analysis); - $self->_commands_fh->print("$cmd\n"); - } + push @analyses, @$run_analyses; + } + $self->_analyses(\@analyses); +} + +sub execute { + my ($self) = @_; + + $self->__init__; + + for my $analysis ( @{$self->_analyses} ) { + my $bam = $self->_bam_output_for_analysis($analysis); + next if $bam and -s $bam; + my $cmd = $self->_bax_to_bam_command_for_analysis($analysis); + $self->_commands_fh->print("$cmd\n"); } 1; diff --git a/lib/Pacbio/Run/AnalysisFactoryForRsii.pm b/lib/Pacbio/Run/AnalysisFactoryForRsii.pm index 5c255fe..87f6134 100644 --- a/lib/Pacbio/Run/AnalysisFactoryForRsii.pm +++ b/lib/Pacbio/Run/AnalysisFactoryForRsii.pm @@ -14,7 +14,7 @@ sub build { my ($class, $directory) = @_; die "No run directory given." if not $directory; - die "Run directory given does not exist!" if not -d "$directory"; + die "Run directory given does not exist! $directory" if not -d "$directory"; my (@analyses); find( @@ -45,7 +45,7 @@ sub build_from_analysis_directory { my ($class, $directory) = @_; die "No analysis directory given." if not $directory; - die "Analysis directory given does not exist!" if not -d "$directory"; + die "Analysis directory given does not exist! $directory" if not -d "$directory"; my ($metadata_xml_file, @analysis_files); find( @@ -71,7 +71,7 @@ sub build_from_analysis_directory { metadata_xml_file => file($metadata_xml_file), %$xml_info, ); - $analysis->add_analysis_files(@analysis_files); + $analysis->add_analysis_files( map { file($_) } @analysis_files ); $analysis; } diff --git a/t/Pacbio-Command-Assembly-BaxToBam-GenerateCommands.t b/t/Pacbio-Command-Assembly-BaxToBam-GenerateCommands.t index e159408..17a73a3 100644 --- a/t/Pacbio-Command-Assembly-BaxToBam-GenerateCommands.t +++ b/t/Pacbio-Command-Assembly-BaxToBam-GenerateCommands.t @@ -5,9 +5,10 @@ use warnings 'FATAL'; use TenxTestEnv; +use File::Temp; use File::Slurp; use Test::Exception; -use Test::More tests => 4; +use Test::More tests => 5; my %test = ( class => 'Pacbio::Command::Assembly::BaxToBam::GenerateCommands', ); subtest 'setup' => sub{ @@ -24,11 +25,11 @@ subtest 'setup' => sub{ }; -subtest 'execute' => sub{ +subtest 'execute with runs' => sub{ plan tests => 4; my $cmd = $test{class}->create( - run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ], + bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ], bam_to_bax_command => 'bsub -q long -o %LOG bam2bax', ); ok($cmd, 'create command'); @@ -45,12 +46,12 @@ subtest 'execute' => sub{ }; -subtest 'execute with library name' => sub{ +subtest 'execute with runs and library name' => sub{ plan tests => 4; my $library_name = 'EEAI'; my $cmd = $test{class}->create( - run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ], + bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ], bam_to_bax_command => 'bsub -q long -o %LOG bam2bax', library_name => $library_name, ); @@ -68,11 +69,40 @@ subtest 'execute with library name' => sub{ }; +subtest 'execute with bax fof' => sub{ + plan tests => 4; + + my $bax_fof_contents = File::Slurp::slurp($test{data_dir}->file('bax.fof')->stringify); + my $base_test_data_dir = TenxTestEnv::test_data_directory(); + $bax_fof_contents =~ s/\%TDD/$base_test_data_dir/g; + + my ($fh, $bax_fof) = File::Temp::tempfile(); + $fh->print($bax_fof_contents); + $fh->close; + + my $cmd = $test{class}->create( + bax_sources => [ $bax_fof ], + bam_to_bax_command => 'bsub -q long -o %LOG bam2bax', + ); + ok($cmd, 'create command'); + + my $output; + open local(*STDOUT), '>', \$output or die $!; + lives_ok(sub{ $cmd->execute }, 'execute'); + ok($cmd->result, 'command result'); + + my $expected_output = File::Slurp::slurp( $test{data_dir}->file('expected.bax-fof.out')->stringify ); + $base_test_data_dir = TenxTestEnv::test_data_directory(); + $expected_output =~ s/\%TDD/$base_test_data_dir/g; + is($output, $expected_output, 'output commands matches'); + +}; + subtest 'execute with some bams completed' => sub{ plan tests => 4; my $cmd = $test{class}->create( - run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ], + bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ], bam_to_bax_command => 'bsub -q long -o %LOG bam2bax', bam_output_directory => $test{data_dir}->stringify, ); diff --git a/t/data/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/bax.fof b/t/data/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/bax.fof new file mode 100644 index 0000000..8e56fa3 --- /dev/null +++ b/t/data/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/bax.fof @@ -0,0 +1,6 @@ +%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.1.bax.h5 +%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.2.bax.h5 +%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.3.bax.h5 +%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.1.bax.h5 +%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.2.bax.h5 +%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.3.bax.h5 diff --git a/t/data/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/expected.bax-fof.out b/t/data/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/expected.bax-fof.out new file mode 100644 index 0000000..0f4c3af --- /dev/null +++ b/t/data/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/expected.bax-fof.out @@ -0,0 +1,2 @@ +bsub -q long -o 6U00E3_B02.out bam2bax %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.1.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.2.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.3.bax.h5 +bsub -q long -o 6U00E3_E01.out bam2bax %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.1.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.2.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.3.bax.h5