Skip to content

Commit

Permalink
Merge 66ba8d8 into b06842b
Browse files Browse the repository at this point in the history
  • Loading branch information
ebelter committed Aug 13, 2018
2 parents b06842b + 66ba8d8 commit dbada3e
Show file tree
Hide file tree
Showing 944 changed files with 6,365 additions and 0 deletions.
1 change: 1 addition & 0 deletions cpanfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
requires 'Bio::Seq';
requires 'Bio::SeqIO';
requires 'Class::Accessor';
requires 'DBD::mysql';
requires 'Date::Format';
requires 'Devel::Cover';
Expand Down
11 changes: 11 additions & 0 deletions lib/Pacbio/Command/Assembly/BaxToBam.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package Pacbio::Command::Assembly::BaxToBam;

use strict;
use warnings 'FATAL';

class Pacbio::Command::Assembly::BaxToBam {
is => 'Command::Tree',
doc => 'helpers for bax2bam',
};

1;
123 changes: 123 additions & 0 deletions lib/Pacbio/Command/Assembly/BaxToBam/GenerateCommands.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package Pacbio::Command::Assembly::BaxToBam::GenerateCommands;

use strict;
use warnings 'FATAL';

use IO::File;
use Pacbio::Run;
use Path::Class;

class Pacbio::Command::Assembly::BaxToBam::GenerateCommands {
is => 'Command::V2',
has_input => {
bam_to_bax_command => {
is => 'Text',
doc => 'Command to fill in with BAX files and logging files. BAX files will be appended to the command. Use %LOG for the log file base name for each cell. Example: bsub -o /my-logging-dir/%LOG bam2bax',
},
run_directories => {
is => 'Text',
is_many => 1,
doc => 'Pacbio run directories',
},
},
has_optional_input => {
bam_output_directory => {
is => 'Text',
doc => 'Give the bam output directory to check if the bam already exists. If so, the bax2bam command for that cell will not be printed.',
},
},
has_optional_output => {
commands_file => {
is => 'Text',
default_value => '-',
doc => 'Output file to print commands. Defaults to STDOUT.',
},
},
has_optional_transient => {
_bam_output_directory => { is => 'Path::Class::Dir', },
_runs => { is => 'ARRAY', },
_commands_fh => { is => 'IO::Handle', },
},
doc => 'insert missing primary contigs from haplotigs',
};

sub __init__ {
my ($self) = @_;

my @runs;
for my $directory ( $self->run_directories ) {
push @runs, Pacbio::Run->new(
directory => Path::Class::dir($directory),
machine_type => 'rsii',
);
}
$self->_runs(\@runs);

my $commands_file = $self->commands_file;
if ( $commands_file and $commands_file ne '-' and -s $commands_file ) {
$self->fatal_message("Output commands file exists: $commands_file. Please change detination, or remove it.");
}

$self->_commands_fh(
( $commands_file eq '-' )
? 'STDOUT'
: IO::File->new($commands_file, 'r')
);

my $bam_output_directory = $self->bam_output_directory;
if ( $bam_output_directory ) {
$self->fatal_message('Given bam output directory, but it does not exist!') if not -d $bam_output_directory;
$self->_bam_output_directory( Path::Class::dir($bam_output_directory) );
}

}

sub execute {
my ($self) = @_;

$self->__init__;
for my $run ( @{$self->_runs} ) {
my $analyses = $run->analyses;
for my $analysis ( @$analyses ) {
my $bam = $self->_bam_output_for_analysis($analysis);
next if $bam and -s $bam;
my $cmd = $self->_bax_to_bam_command_for_analysis($analysis);
$self->_commands_fh->print("$cmd\n");
}
}

1;
}

sub _bam_output_for_analysis {
my ($self, $analysis) = @_;
return if not $self->bam_output_directory;
# m151026_060206_00116_c100928752550000001823208204291687_s1_p0.subreads.bam
# m151026_060206_00116_c100928752550000001823208204291687_s1_p0.bax.h5
my @bax_files = map { $_->stringify } grep { "$_" =~ /\.bax\.h5$/ } @{$analysis->analysis_files};
my @bax_basenames = List::Util::uniq(
map { my $bn = $_->basename; my ($t) = split(/\./, $bn, 2); $t; } grep { "$_" =~ /\.bax\.h5$/ } @{$analysis->analysis_files}
);
$self->fatal_message('Expected one BAX file basename: %s', join(' ', @bax_basenames)) if @bax_basenames != 1;
$self->_bam_output_directory->file( join('.', $bax_basenames[0], 'subreads', 'bam') );
}

sub _bax_to_bam_command_for_analysis {
my ($self, $analysis) = @_;

my @bax_files = map { $_->stringify } grep { "$_" =~ /\.bax\.h5$/ } @{$analysis->analysis_files};
$self->fatal_message('Incorrect number (%d) of BAX files for run analysis: %s', scalar(@bax_files), $analysis->alias) if not @bax_files or @bax_files != 3;

# bsub -o /dir/%LOG bax2bam -o %OUT_BAM BAX_FILES
my $cmd = $self->bam_to_bax_command;

my $log_rex = qr/%LOG/;
if ( $cmd =~ /$log_rex/ ) {
my $log = join('.', $analysis->alias, 'out');
$cmd =~ s/$log_rex/$log/;
}

join(' ', $cmd, join(' ', @bax_files));
}

1;
65 changes: 65 additions & 0 deletions lib/Pacbio/Run.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package Pacbio::Run;

use strict;
use warnings 'FATAL';

use base 'Class::Accessor';
__PACKAGE__->mk_accessors(qw/ _analyses directory machine_type /);

use List::MoreUtils;

use Pacbio::Run::AnalysisFactoryForRsii;
use Pacbio::Run::AnalysisFactoryForSequel;

sub valid_machine_types { (qw/ rsii sequel /) }

sub __name__ { join(' ', map { $_[0]->$_ } (qw/ directory machine_type /)) }

sub new {
my ($class, %params) = @_;

my $self = bless \%params, $class;

die "No directory given!" if not $self->directory;
die "Directory given does not exist: ".$self->directory if not -d $self->directory->stringify;
die "No machine_type given!" if not $self->machine_type;
die "Invalid machine_type given: ".$self->machine_type if not List::MoreUtils::any { $self->machine_type eq $_ } $self->valid_machine_types;

$self;
}

sub analyses_for_sample {
my ($self, $sample_name_regex) = @_;
die "No sample name regex given!" if not $sample_name_regex;

my $analyses = $self->analyses;
my @sample_analyses;
for my $analysis ( @$analyses ) {
push @sample_analyses, $analysis if $analysis->sample_name =~ $sample_name_regex;
}

return if not @sample_analyses;
\@sample_analyses;
}

sub analyses_count {
my ($self) = @_;
my $analyses = $self->analyses;
( $analyses ? scalar(@$analyses) : 0 );
}

sub analyses {
my ($self) = @_;
return $self->_analyses if $self->_analyses;
my $analyses;
if ( $self->machine_type eq 'rsii' ) {
$analyses = Pacbio::Run::AnalysisFactoryForRsii->build($self->directory)
}
else {
$analyses = Pacbio::Run::AnalysisFactoryForSequel->build($self->directory)
}
return if not $analyses;
$self->_analyses($analyses);
}

1;
32 changes: 32 additions & 0 deletions lib/Pacbio/Run/Analysis.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package Pacbio::Run::Analysis;

use strict;
use warnings 'FATAL';

use base 'Class::Accessor';
__PACKAGE__->mk_accessors(qw/ metadata_xml_file sample_name library_name plate_id version well analysis_files /);

sub __name__ { join(' ', map { $_[0]->$_ } (qw/ plate_id well library_name /)); }

sub new {
my ($class, %params) = @_;
$params{analysis_files} = [] if not $params{analysis_files};
bless \%params, $class;
}

sub add_analysis_files {
my ($self, @files) = @_;
die "No analysis file given!" if not @files;
my $analysis_files = $self->analysis_files;
push @$analysis_files, @files;
$self->analysis_files;
}

sub alias { # PLATE_WELL was: 6U00E3_1020_A01_1
my $self = shift;
my $alias = join('_', $self->plate_id, $self->well);
$alias =~ s/ /_/g;
$alias;
}

1;
92 changes: 92 additions & 0 deletions lib/Pacbio/Run/AnalysisFactoryForRsii.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package Pacbio::Run::AnalysisFactoryForRsii;

use strict;
use warnings 'FATAL';

use Data::Dumper 'Dumper';
use File::Find 'find';
use List::Util;
use Path::Class;
use Pacbio::Run::Analysis;
use XML::LibXML;

sub build {
my ($class, $directory) = @_;

die "No run directory given." if not $directory;
die "Run directory given does not exist!" if not -d "$directory";

my (@analyses);
find(
{
wanted => sub{
if ( /metadata\.xml$/) {
my $xml_info = _load_xml( $File::Find::name );
my $analysis = Pacbio::Run::Analysis->new(
metadata_xml_file => file( $File::Find::name ),
%$xml_info,
);
push @analyses, $analysis;
}
elsif ( $File::Find::dir =~ /Analysis_Results/ and /\.h5$/ ) {
die "No analyses created to add analysis files!" if not @analyses;
$analyses[$#analyses]->add_analysis_files( file($File::Find::name) );
}
},
},
glob($directory->file('*')->stringify),
);

return if not @analyses;
\@analyses;
}

sub _load_xml {
my ($xml_file) = @_;

my $dom = XML::LibXML->load_xml(location => "$xml_file");
my $metadata_node = $dom->firstChild;
if ( not $metadata_node ) {
die "No metadata node found in $xml_file";
}

my $sample_node = List::Util::first { $_->nodeName eq 'Sample' } $metadata_node->childNodes;
if ( not $sample_node ) {
die "No sample node found!";
}

my $library_name = _load_from_parent_node($sample_node, 'Name');
my $well = _load_from_parent_node($sample_node, 'WellName');

my $sample_name = $library_name;
my $well_without_zeros = $well;
$well_without_zeros =~ s/0//g;
$sample_name =~ s/_$well_without_zeros$//;

{
sample_name => $sample_name,
library_name => $library_name,
plate_id => _load_from_parent_node($sample_node, 'PlateId'),
version => _load_from_parent_node($metadata_node, 'InstCtrlVer'),
well => $well,
};
}

sub _load_from_parent_node {
my ($parent_node, $node_name) = @_;
die "No parent node given!" if not $parent_node;
die "No node name node given!" if not $node_name;

my $node = List::Util::first { $_->nodeName eq $node_name } $parent_node->childNodes;
if ( not $node ) {
die "No $node_name node found!";
}

my $version = $node->to_literal;
if ( not $version ) {
die "No info found in $node_name node!";
}
$version;
}

1;

0 comments on commit dbada3e

Please sign in to comment.