-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
944 changed files
with
6,365 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package Pacbio::Command::Assembly::BaxToBam; | ||
|
||
use strict; | ||
use warnings 'FATAL'; | ||
|
||
class Pacbio::Command::Assembly::BaxToBam { | ||
is => 'Command::Tree', | ||
doc => 'helpers for bax2bam', | ||
}; | ||
|
||
1; |
123 changes: 123 additions & 0 deletions
123
lib/Pacbio/Command/Assembly/BaxToBam/GenerateCommands.pm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package Pacbio::Command::Assembly::BaxToBam::GenerateCommands; | ||
|
||
use strict; | ||
use warnings 'FATAL'; | ||
|
||
use IO::File; | ||
use Pacbio::Run; | ||
use Path::Class; | ||
|
||
class Pacbio::Command::Assembly::BaxToBam::GenerateCommands { | ||
is => 'Command::V2', | ||
has_input => { | ||
bam_to_bax_command => { | ||
is => 'Text', | ||
doc => 'Command to fill in with BAX files and logging files. BAX files will be appended to the command. Use %LOG for the log file base name for each cell. Example: bsub -o /my-logging-dir/%LOG bam2bax', | ||
}, | ||
run_directories => { | ||
is => 'Text', | ||
is_many => 1, | ||
doc => 'Pacbio run directories', | ||
}, | ||
}, | ||
has_optional_input => { | ||
bam_output_directory => { | ||
is => 'Text', | ||
doc => 'Give the bam output directory to check if the bam already exists. If so, the bax2bam command for that cell will not be printed.', | ||
}, | ||
}, | ||
has_optional_output => { | ||
commands_file => { | ||
is => 'Text', | ||
default_value => '-', | ||
doc => 'Output file to print commands. Defaults to STDOUT.', | ||
}, | ||
}, | ||
has_optional_transient => { | ||
_bam_output_directory => { is => 'Path::Class::Dir', }, | ||
_runs => { is => 'ARRAY', }, | ||
_commands_fh => { is => 'IO::Handle', }, | ||
}, | ||
doc => 'insert missing primary contigs from haplotigs', | ||
}; | ||
|
||
sub __init__ { | ||
my ($self) = @_; | ||
|
||
my @runs; | ||
for my $directory ( $self->run_directories ) { | ||
push @runs, Pacbio::Run->new( | ||
directory => Path::Class::dir($directory), | ||
machine_type => 'rsii', | ||
); | ||
} | ||
$self->_runs(\@runs); | ||
|
||
my $commands_file = $self->commands_file; | ||
if ( $commands_file and $commands_file ne '-' and -s $commands_file ) { | ||
$self->fatal_message("Output commands file exists: $commands_file. Please change detination, or remove it."); | ||
} | ||
|
||
$self->_commands_fh( | ||
( $commands_file eq '-' ) | ||
? 'STDOUT' | ||
: IO::File->new($commands_file, 'r') | ||
); | ||
|
||
my $bam_output_directory = $self->bam_output_directory; | ||
if ( $bam_output_directory ) { | ||
$self->fatal_message('Given bam output directory, but it does not exist!') if not -d $bam_output_directory; | ||
$self->_bam_output_directory( Path::Class::dir($bam_output_directory) ); | ||
} | ||
|
||
} | ||
|
||
sub execute { | ||
my ($self) = @_; | ||
|
||
$self->__init__; | ||
for my $run ( @{$self->_runs} ) { | ||
my $analyses = $run->analyses; | ||
for my $analysis ( @$analyses ) { | ||
my $bam = $self->_bam_output_for_analysis($analysis); | ||
next if $bam and -s $bam; | ||
my $cmd = $self->_bax_to_bam_command_for_analysis($analysis); | ||
$self->_commands_fh->print("$cmd\n"); | ||
} | ||
} | ||
|
||
1; | ||
} | ||
|
||
sub _bam_output_for_analysis { | ||
my ($self, $analysis) = @_; | ||
return if not $self->bam_output_directory; | ||
# m151026_060206_00116_c100928752550000001823208204291687_s1_p0.subreads.bam | ||
# m151026_060206_00116_c100928752550000001823208204291687_s1_p0.bax.h5 | ||
my @bax_files = map { $_->stringify } grep { "$_" =~ /\.bax\.h5$/ } @{$analysis->analysis_files}; | ||
my @bax_basenames = List::Util::uniq( | ||
map { my $bn = $_->basename; my ($t) = split(/\./, $bn, 2); $t; } grep { "$_" =~ /\.bax\.h5$/ } @{$analysis->analysis_files} | ||
); | ||
$self->fatal_message('Expected one BAX file basename: %s', join(' ', @bax_basenames)) if @bax_basenames != 1; | ||
$self->_bam_output_directory->file( join('.', $bax_basenames[0], 'subreads', 'bam') ); | ||
} | ||
|
||
sub _bax_to_bam_command_for_analysis { | ||
my ($self, $analysis) = @_; | ||
|
||
my @bax_files = map { $_->stringify } grep { "$_" =~ /\.bax\.h5$/ } @{$analysis->analysis_files}; | ||
$self->fatal_message('Incorrect number (%d) of BAX files for run analysis: %s', scalar(@bax_files), $analysis->alias) if not @bax_files or @bax_files != 3; | ||
|
||
# bsub -o /dir/%LOG bax2bam -o %OUT_BAM BAX_FILES | ||
my $cmd = $self->bam_to_bax_command; | ||
|
||
my $log_rex = qr/%LOG/; | ||
if ( $cmd =~ /$log_rex/ ) { | ||
my $log = join('.', $analysis->alias, 'out'); | ||
$cmd =~ s/$log_rex/$log/; | ||
} | ||
|
||
join(' ', $cmd, join(' ', @bax_files)); | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package Pacbio::Run; | ||
|
||
use strict; | ||
use warnings 'FATAL'; | ||
|
||
use base 'Class::Accessor'; | ||
__PACKAGE__->mk_accessors(qw/ _analyses directory machine_type /); | ||
|
||
use List::MoreUtils; | ||
|
||
use Pacbio::Run::AnalysisFactoryForRsii; | ||
use Pacbio::Run::AnalysisFactoryForSequel; | ||
|
||
sub valid_machine_types { (qw/ rsii sequel /) } | ||
|
||
sub __name__ { join(' ', map { $_[0]->$_ } (qw/ directory machine_type /)) } | ||
|
||
sub new { | ||
my ($class, %params) = @_; | ||
|
||
my $self = bless \%params, $class; | ||
|
||
die "No directory given!" if not $self->directory; | ||
die "Directory given does not exist: ".$self->directory if not -d $self->directory->stringify; | ||
die "No machine_type given!" if not $self->machine_type; | ||
die "Invalid machine_type given: ".$self->machine_type if not List::MoreUtils::any { $self->machine_type eq $_ } $self->valid_machine_types; | ||
|
||
$self; | ||
} | ||
|
||
sub analyses_for_sample { | ||
my ($self, $sample_name_regex) = @_; | ||
die "No sample name regex given!" if not $sample_name_regex; | ||
|
||
my $analyses = $self->analyses; | ||
my @sample_analyses; | ||
for my $analysis ( @$analyses ) { | ||
push @sample_analyses, $analysis if $analysis->sample_name =~ $sample_name_regex; | ||
} | ||
|
||
return if not @sample_analyses; | ||
\@sample_analyses; | ||
} | ||
|
||
sub analyses_count { | ||
my ($self) = @_; | ||
my $analyses = $self->analyses; | ||
( $analyses ? scalar(@$analyses) : 0 ); | ||
} | ||
|
||
sub analyses { | ||
my ($self) = @_; | ||
return $self->_analyses if $self->_analyses; | ||
my $analyses; | ||
if ( $self->machine_type eq 'rsii' ) { | ||
$analyses = Pacbio::Run::AnalysisFactoryForRsii->build($self->directory) | ||
} | ||
else { | ||
$analyses = Pacbio::Run::AnalysisFactoryForSequel->build($self->directory) | ||
} | ||
return if not $analyses; | ||
$self->_analyses($analyses); | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package Pacbio::Run::Analysis; | ||
|
||
use strict; | ||
use warnings 'FATAL'; | ||
|
||
use base 'Class::Accessor'; | ||
__PACKAGE__->mk_accessors(qw/ metadata_xml_file sample_name library_name plate_id version well analysis_files /); | ||
|
||
sub __name__ { join(' ', map { $_[0]->$_ } (qw/ plate_id well library_name /)); } | ||
|
||
sub new { | ||
my ($class, %params) = @_; | ||
$params{analysis_files} = [] if not $params{analysis_files}; | ||
bless \%params, $class; | ||
} | ||
|
||
sub add_analysis_files { | ||
my ($self, @files) = @_; | ||
die "No analysis file given!" if not @files; | ||
my $analysis_files = $self->analysis_files; | ||
push @$analysis_files, @files; | ||
$self->analysis_files; | ||
} | ||
|
||
sub alias { # PLATE_WELL was: 6U00E3_1020_A01_1 | ||
my $self = shift; | ||
my $alias = join('_', $self->plate_id, $self->well); | ||
$alias =~ s/ /_/g; | ||
$alias; | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
package Pacbio::Run::AnalysisFactoryForRsii; | ||
|
||
use strict; | ||
use warnings 'FATAL'; | ||
|
||
use Data::Dumper 'Dumper'; | ||
use File::Find 'find'; | ||
use List::Util; | ||
use Path::Class; | ||
use Pacbio::Run::Analysis; | ||
use XML::LibXML; | ||
|
||
sub build { | ||
my ($class, $directory) = @_; | ||
|
||
die "No run directory given." if not $directory; | ||
die "Run directory given does not exist!" if not -d "$directory"; | ||
|
||
my (@analyses); | ||
find( | ||
{ | ||
wanted => sub{ | ||
if ( /metadata\.xml$/) { | ||
my $xml_info = _load_xml( $File::Find::name ); | ||
my $analysis = Pacbio::Run::Analysis->new( | ||
metadata_xml_file => file( $File::Find::name ), | ||
%$xml_info, | ||
); | ||
push @analyses, $analysis; | ||
} | ||
elsif ( $File::Find::dir =~ /Analysis_Results/ and /\.h5$/ ) { | ||
die "No analyses created to add analysis files!" if not @analyses; | ||
$analyses[$#analyses]->add_analysis_files( file($File::Find::name) ); | ||
} | ||
}, | ||
}, | ||
glob($directory->file('*')->stringify), | ||
); | ||
|
||
return if not @analyses; | ||
\@analyses; | ||
} | ||
|
||
sub _load_xml { | ||
my ($xml_file) = @_; | ||
|
||
my $dom = XML::LibXML->load_xml(location => "$xml_file"); | ||
my $metadata_node = $dom->firstChild; | ||
if ( not $metadata_node ) { | ||
die "No metadata node found in $xml_file"; | ||
} | ||
|
||
my $sample_node = List::Util::first { $_->nodeName eq 'Sample' } $metadata_node->childNodes; | ||
if ( not $sample_node ) { | ||
die "No sample node found!"; | ||
} | ||
|
||
my $library_name = _load_from_parent_node($sample_node, 'Name'); | ||
my $well = _load_from_parent_node($sample_node, 'WellName'); | ||
|
||
my $sample_name = $library_name; | ||
my $well_without_zeros = $well; | ||
$well_without_zeros =~ s/0//g; | ||
$sample_name =~ s/_$well_without_zeros$//; | ||
|
||
{ | ||
sample_name => $sample_name, | ||
library_name => $library_name, | ||
plate_id => _load_from_parent_node($sample_node, 'PlateId'), | ||
version => _load_from_parent_node($metadata_node, 'InstCtrlVer'), | ||
well => $well, | ||
}; | ||
} | ||
|
||
sub _load_from_parent_node { | ||
my ($parent_node, $node_name) = @_; | ||
die "No parent node given!" if not $parent_node; | ||
die "No node name node given!" if not $node_name; | ||
|
||
my $node = List::Util::first { $_->nodeName eq $node_name } $parent_node->childNodes; | ||
if ( not $node ) { | ||
die "No $node_name node found!"; | ||
} | ||
|
||
my $version = $node->to_literal; | ||
if ( not $version ) { | ||
die "No info found in $node_name node!"; | ||
} | ||
$version; | ||
} | ||
|
||
1; |
Oops, something went wrong.