Skip to content

Commit

Permalink
Merge dc40e51 into 7175dbf
Browse files Browse the repository at this point in the history
  • Loading branch information
ebelter committed Aug 16, 2018
2 parents 7175dbf + dc40e51 commit c00e0dc
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 40 deletions.
76 changes: 52 additions & 24 deletions lib/Pacbio/Command/Assembly/BaxToBam/GenerateCommands.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use strict;
use warnings 'FATAL';

use IO::File;
use List::Util;
use Pacbio::Run;
use Pacbio::Run::AnalysisFactoryForRsii;
use Path::Class;

class Pacbio::Command::Assembly::BaxToBam::GenerateCommands {
Expand All @@ -14,10 +16,10 @@ class Pacbio::Command::Assembly::BaxToBam::GenerateCommands {
is => 'Text',
doc => 'Command to fill in with BAX files and logging files. BAX files will be appended to the command. Use %LOG for the log file base name for each cell. Example: bsub -o /my-logging-dir/%LOG bam2bax',
},
run_directories => {
bax_sources => {
is => 'Text',
is_many => 1,
doc => 'Pacbio run directories',
doc => 'Pacbio run directories OR FOF of bax files.',
},
},
has_optional_input => {
Expand All @@ -39,24 +41,27 @@ class Pacbio::Command::Assembly::BaxToBam::GenerateCommands {
},
},
has_optional_transient => {
_bam_output_directory => { is => 'Path::Class::Dir', },
_runs => { is => 'ARRAY', },
_analyses => { is => 'ARRAY', },
_commands_fh => { is => 'IO::Handle', },
_bam_output_directory => { is => 'Text', },
},
doc => 'insert missing primary contigs from haplotigs',
};

sub __init__ {
my ($self) = @_;

my @runs;
for my $directory ( $self->run_directories ) {
push @runs, Pacbio::Run->new(
directory => Path::Class::dir($directory),
machine_type => 'rsii',
);
my @bax_sources = $self->bax_sources;
if ( @bax_sources == 1 and -f $bax_sources[0] ) {
$self->_resolve_analyses_from_bax_fof($bax_sources[0]);
}
$self->_runs(\@runs);
elsif ( List::Util::all { -d $_ } @bax_sources ) {
$self->_resolve_analyses_from_runs(@bax_sources);
}
else {
$self->fatal_message('Can not handle mix of run directories and bax FOFs! %s', join("\n", @bax_sources));
}
$self->fatal_message("No analyses found in bax sources!\n%s", join("\n", @bax_sources)) if not @{$self->_analyses};

my $commands_file = $self->commands_file;
if ( $commands_file and $commands_file ne '-' and -s $commands_file ) {
Expand All @@ -77,26 +82,49 @@ sub __init__ {

}

sub execute {
my ($self) = @_;
sub _resolve_analyses_from_bax_fof {
my ($self, $bax_fof) = @_;

$self->__init__;
my $fh = IO::File->new($bax_fof, 'r');
$self->fatal_message('Failed to open bax FOF!') if not $fh;
my @analysis_directories = List::MoreUtils::uniq( map { chomp; file($_)->parent->parent } $fh->getlines );
$fh->close;

my @analyses = map { Pacbio::Run::AnalysisFactoryForRsii->build_from_analysis_directory($_) } @analysis_directories;
$self->_analyses(\@analyses);
}

sub _resolve_analyses_from_runs {
my ($self, @runs) = @_;

my $library_name = $self->library_name;
my $regex = qr/$library_name/;

for my $run ( @{$self->_runs} ) {
my $analyses = $run->analyses_for_sample($regex);
if ( not $analyses ) {
my @analyses;
for my $directory ( $self->bax_sources ) {
my $run = Pacbio::Run->new(
directory => Path::Class::dir($directory),
machine_type => 'rsii',
);
my $run_analyses = $run->analyses_for_sample($regex);
if ( not $run_analyses ) {
$self->warning_message("No analyses found for library name %s on run %", $library_name, $run->__name__);
next;
}
for my $analysis ( @$analyses ) {
my $bam = $self->_bam_output_for_analysis($analysis);
next if $bam and -s $bam;
my $cmd = $self->_bax_to_bam_command_for_analysis($analysis);
$self->_commands_fh->print("$cmd\n");
}
push @analyses, @$run_analyses;
}
$self->_analyses(\@analyses);
}

sub execute {
my ($self) = @_;

$self->__init__;

for my $analysis ( @{$self->_analyses} ) {
my $bam = $self->_bam_output_for_analysis($analysis);
next if $bam and -s $bam;
my $cmd = $self->_bax_to_bam_command_for_analysis($analysis);
$self->_commands_fh->print("$cmd\n");
}

1;
Expand Down
36 changes: 35 additions & 1 deletion lib/Pacbio/Run/AnalysisFactoryForRsii.pm
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ sub build {
my ($class, $directory) = @_;

die "No run directory given." if not $directory;
die "Run directory given does not exist!" if not -d "$directory";
die "Run directory given does not exist! $directory" if not -d "$directory";

my (@analyses);
find(
Expand All @@ -41,6 +41,40 @@ sub build {
\@analyses;
}

sub build_from_analysis_directory {
my ($class, $directory) = @_;

die "No analysis directory given." if not $directory;
die "Analysis directory given does not exist! $directory" if not -d "$directory";

my ($metadata_xml_file, @analysis_files);
find(
{
wanted => sub{
if ( /metadata\.xml$/) {
die "Found more than one metadata XML in $directory" if $metadata_xml_file;
$metadata_xml_file = $File::Find::name;
}
elsif ( $File::Find::dir =~ /Analysis_Results/ and /\.h5$/ ) {
push @analysis_files, $File::Find::name;
}
},
},
glob($directory->file('*')->stringify),
);

die "Failed to find analysis metadata xml in directory: $directory" if !$metadata_xml_file;
die "Failed to find analysis files in directory: $directory" if !@analysis_files;

my $xml_info = _load_xml($metadata_xml_file);
my $analysis = Pacbio::Run::Analysis->new(
metadata_xml_file => file($metadata_xml_file),
%$xml_info,
);
$analysis->add_analysis_files( map { file($_) } @analysis_files );
$analysis;
}

sub _load_xml {
my ($xml_file) = @_;

Expand Down
42 changes: 36 additions & 6 deletions t/Pacbio-Command-Assembly-BaxToBam-GenerateCommands.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ use warnings 'FATAL';

use TenxTestEnv;

use File::Temp;
use File::Slurp;
use Test::Exception;
use Test::More tests => 4;
use Test::More tests => 5;

my %test = ( class => 'Pacbio::Command::Assembly::BaxToBam::GenerateCommands', );
subtest 'setup' => sub{
Expand All @@ -24,11 +25,11 @@ subtest 'setup' => sub{

};

subtest 'execute' => sub{
subtest 'execute with runs' => sub{
plan tests => 4;

my $cmd = $test{class}->create(
run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ],
bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ],
bam_to_bax_command => 'bsub -q long -o %LOG bam2bax',
);
ok($cmd, 'create command');
Expand All @@ -45,12 +46,12 @@ subtest 'execute' => sub{

};

subtest 'execute with library name' => sub{
subtest 'execute with runs and library name' => sub{
plan tests => 4;

my $library_name = 'EEAI';
my $cmd = $test{class}->create(
run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ],
bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ],
bam_to_bax_command => 'bsub -q long -o %LOG bam2bax',
library_name => $library_name,
);
Expand All @@ -68,11 +69,40 @@ subtest 'execute with library name' => sub{

};

subtest 'execute with bax fof' => sub{
plan tests => 4;

my $bax_fof_contents = File::Slurp::slurp($test{data_dir}->file('bax.fof')->stringify);
my $base_test_data_dir = TenxTestEnv::test_data_directory();
$bax_fof_contents =~ s/\%TDD/$base_test_data_dir/g;

my ($fh, $bax_fof) = File::Temp::tempfile();
$fh->print($bax_fof_contents);
$fh->close;

my $cmd = $test{class}->create(
bax_sources => [ $bax_fof ],
bam_to_bax_command => 'bsub -q long -o %LOG bam2bax',
);
ok($cmd, 'create command');

my $output;
open local(*STDOUT), '>', \$output or die $!;
lives_ok(sub{ $cmd->execute }, 'execute');
ok($cmd->result, 'command result');

my $expected_output = File::Slurp::slurp( $test{data_dir}->file('expected.bax-fof.out')->stringify );
$base_test_data_dir = TenxTestEnv::test_data_directory();
$expected_output =~ s/\%TDD/$base_test_data_dir/g;
is($output, $expected_output, 'output commands matches');

};

subtest 'execute with some bams completed' => sub{
plan tests => 4;

my $cmd = $test{class}->create(
run_directories => [ $test{data_dir}->file('6U00E3')->stringify, ],
bax_sources => [ $test{data_dir}->file('6U00E3')->stringify, ],
bam_to_bax_command => 'bsub -q long -o %LOG bam2bax',
bam_output_directory => $test{data_dir}->stringify,
);
Expand Down
50 changes: 41 additions & 9 deletions t/Pacbio-Run-AnalysisFactoryForRsii.t
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,61 @@ use warnings 'FATAL';
use TenxTestEnv;

use Path::Class;
use Test::More tests => 1;
use Test::More tests => 3;
use Test::Exception;

my %setup = ( class => 'Pacbio::Run::AnalysisFactoryForRsii', );
subtest 'new' => sub{
plan tests => 11;
subtest 'setup' => sub{
plan tests => 3;

use_ok($setup{class}) or die;

throws_ok(sub{ $setup{class}->build; }, qr/No run directory given/, 'new fails w/o directory');
throws_ok(sub{ $setup{class}->build('blah'); }, qr/Run directory given does not exist/, 'new fails w/ non existing directory');
$setup{run_dir} = TenxTestEnv::test_data_directory_for_class('Pacbio::Run')->subdir('6U00FA');
ok(-d $setup{run_dir}->stringify, 'run data dir exists');

my $directory = dir( TenxTestEnv::test_data_directory_for_class('Pacbio::Run') )->subdir('6U00FA');
ok(-d "$directory", "example run directory exists");
my $subdir = $setup{run_dir}->subdir('A01_1')->subdir('Analysis_Results');
my @afiles = map {
$subdir->file( sprintf('m160819_231415_00116_c101036512550000001823251411171640_s1_p0.%d.bax.h5', $_) )
} (qw/ 1 2 3 /);
push @afiles, $subdir->file('m160819_231415_00116_c101036512550000001823251411171640_s1_p0.bas.h5');
$setup{A01_1_analysis_files} = \@afiles;
is(@{$setup{A01_1_analysis_files}}, 4, 'run analysis files');

my $analyses = $setup{class}->build($directory);
};

subtest 'build' => sub{
plan tests => 10;

throws_ok(sub{ $setup{class}->build; }, qr/No run directory given/, 'fails w/o directory');
throws_ok(sub{ $setup{class}->build('blah'); }, qr/Run directory given does not exist/, 'fails w/ non existing directory');

my $analyses = $setup{class}->build($setup{run_dir});
is(@$analyses, 10, 'built the correct number of analyses');
is($analyses->[0]->metadata_xml_file, $directory->subdir('A01_1')->file('m160819_231415_00116_c101036512550000001823251411171640_s1_p0.metadata.xml'), 'metadata_xml_file');
is($analyses->[0]->metadata_xml_file, $setup{run_dir}->subdir('A01_1')->file('m160819_231415_00116_c101036512550000001823251411171640_s1_p0.metadata.xml'), 'metadata_xml_file');
is($analyses->[0]->sample_name, 'NA19434_4808o3_lib1_50pM', 'sample_name');
is($analyses->[0]->library_name, 'NA19434_4808o3_lib1_50pM_A1', 'library_name');
is($analyses->[0]->plate_id, '6U00FA', 'plate_id');
is($analyses->[0]->version, '2.3.0.3.154799', 'version');
is($analyses->[0]->well, 'A01', 'well');
is_deeply($analyses->[0]->analysis_files, $setup{A01_1_analysis_files}, 'analysis files');

};

subtest 'build from analysis directory' => sub{
plan tests => 9;

throws_ok(sub{ $setup{class}->build_from_analysis_directory; }, qr/No analysis directory given/, 'fails w/o directory');
throws_ok(sub{ $setup{class}->build_from_analysis_directory('blah'); }, qr/Analysis directory given does not exist/, 'fails w/ non existing directory');
throws_ok(sub{ $setup{class}->build_from_analysis_directory($setup{run_dir}->subdir('A01_1')->subdir('Analysis_Results')); }, qr/Failed to find analysis metadata xml in/, 'fails w/ when no analysis found');

my $directory = $setup{run_dir}->subdir('A01_1');
my $analysis = $setup{class}->build_from_analysis_directory($directory);
is($analysis->metadata_xml_file, $directory->file('m160819_231415_00116_c101036512550000001823251411171640_s1_p0.metadata.xml'), 'metadata_xml_file');
is($analysis->sample_name, 'NA19434_4808o3_lib1_50pM', 'sample_name');
is($analysis->library_name, 'NA19434_4808o3_lib1_50pM_A1', 'library_name');
is($analysis->plate_id, '6U00FA', 'plate_id');
is($analysis->version, '2.3.0.3.154799', 'version');
is($analysis->well, 'A01', 'well');

};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.1.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.2.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.3.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.1.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.2.bax.h5
%TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.3.bax.h5
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bsub -q long -o 6U00E3_B02.out bam2bax %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.1.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.2.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/B02_1/Analysis_Results/m160613_070741_00116_c100976392550000001823226708101601_s1_p0.3.bax.h5
bsub -q long -o 6U00E3_E01.out bam2bax %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.1.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.2.bax.h5 %TDD/Pacbio-Command-Assembly-BaxToBam-GenerateCommands/6U00E3/E01_1/Analysis_Results/m160611_231419_00116_c100976122550000001823226708101634_s1_p0.3.bax.h5

0 comments on commit c00e0dc

Please sign in to comment.