From 12d27f8108733678b06370a590b60abd627f58f1 Mon Sep 17 00:00:00 2001 From: Eddie Belter Date: Thu, 19 Jul 2018 21:40:11 +0000 Subject: [PATCH] Tenx Reads Upload To Cloud commands to upload fastqs to the cloud --- lib/Tenx/Reads/Command/UploadToCloud.pm | 13 ++++ lib/Tenx/Reads/Command/UploadToCloud/Base.pm | 65 +++++++++++++++++++ .../Reads/Command/UploadToCloud/Mkfastq.pm | 41 ++++++++++++ .../Reads/Command/UploadToCloud/Sample.pm | 43 ++++++++++++ t/Tenx-Reads-Command-UploadToCloud-Base.t | 12 ++++ t/Tenx-Reads-Command-UploadToCloud-Mkfastq.t | 52 +++++++++++++++ t/Tenx-Reads-Command-UploadToCloud-Sample.t | 49 ++++++++++++++ t/Tenx-Reads-Command-UploadToCloud.t | 11 ++++ 8 files changed, 286 insertions(+) create mode 100644 lib/Tenx/Reads/Command/UploadToCloud.pm create mode 100644 lib/Tenx/Reads/Command/UploadToCloud/Base.pm create mode 100644 lib/Tenx/Reads/Command/UploadToCloud/Mkfastq.pm create mode 100644 lib/Tenx/Reads/Command/UploadToCloud/Sample.pm create mode 100644 t/Tenx-Reads-Command-UploadToCloud-Base.t create mode 100644 t/Tenx-Reads-Command-UploadToCloud-Mkfastq.t create mode 100644 t/Tenx-Reads-Command-UploadToCloud-Sample.t create mode 100644 t/Tenx-Reads-Command-UploadToCloud.t diff --git a/lib/Tenx/Reads/Command/UploadToCloud.pm b/lib/Tenx/Reads/Command/UploadToCloud.pm new file mode 100644 index 0000000..2fa15f6 --- /dev/null +++ b/lib/Tenx/Reads/Command/UploadToCloud.pm @@ -0,0 +1,13 @@ +package Tenx::Reads::Command::UploadToCloud; + +use strict; +use warnings 'FATAL'; + +class Tenx::Reads::Command::UploadToCloud { + is => 'Command::Tree', + doc => 'upload fastqs to the cloud', +}; + +sub help_detail { $_[0]->__meta__->doc } + +1; diff --git a/lib/Tenx/Reads/Command/UploadToCloud/Base.pm b/lib/Tenx/Reads/Command/UploadToCloud/Base.pm new file mode 100644 index 0000000..2b9c290 --- /dev/null +++ b/lib/Tenx/Reads/Command/UploadToCloud/Base.pm @@ -0,0 +1,65 @@ +package Tenx::Reads::Command::UploadToCloud::Base; + +use strict; +use warnings 'FATAL'; + +use IPC::Open3; +use Symbol 'gensym'; + +class Tenx::Reads::Command::UploadToCloud::Base { + is => 'Command::V2', + is_abstract => 1, + has_input => { + directory => { + is => 'Text', + shell_args_position => 1, + }, + cloud_url => { + is => 'Text', + shell_args_position => 2, + doc => 'Cloud URL to put reads.', + }, + }, + has_optional_input => { + dry_run => { + is => 'Boolean', + doc => 'Do not run commands, just print to STDOUT.', + }, + }, + doc => 'upload fastqs to the cloud', +}; + +sub help_detail { $_[0]->__meta__->doc } + +sub get_upload_command { + my ($self, $ldir) = @_; + [ 'gsutil', '-m', 'rsync', '-R', $ldir, $self->cloud_url ]; +} + +sub run_command { + my ($self, $ldir) = @_; + + my $cmd = $self->get_upload_command($ldir); + if ( $self->dry_run ) { + $self->status_message( join(' ', @$cmd) ); + return; + } + $self->status_message( join(' ', 'RUNNING', @$cmd) ); + my $rv = system(@$cmd); + if ( $rv != 0 ) { + #$self->fatal_message("Failed to run command!"); + } + return 1; + + my ($wtr, $rdr); + my $err = gensym; + my $pid = open3($wtr, $rdr, $err, @$cmd); + waitpid( $pid, 0 ); + my $child_exit_status = $? >> 8; + if ( $child_exit_status ) { + $self->status_message($err); + $self->fatal_message("Failed to run command!"); + } +} + +1; diff --git a/lib/Tenx/Reads/Command/UploadToCloud/Mkfastq.pm b/lib/Tenx/Reads/Command/UploadToCloud/Mkfastq.pm new file mode 100644 index 0000000..2c2c82b --- /dev/null +++ b/lib/Tenx/Reads/Command/UploadToCloud/Mkfastq.pm @@ -0,0 +1,41 @@ +package Tenx::Reads::Command::UploadToCloud::Mkfastq; + +use strict; +use warnings 'FATAL'; + +use Path::Class; +use List::MoreUtils; + +class Tenx::Reads::Command::UploadToCloud::Mkfastq { + is => 'Tenx::Reads::Command::UploadToCloud::Base', + has_optional_input => { + white_list => { + is => 'Text', + is_many => 1, + doc => 'Only upload the samples in this list.', + }, + }, + doc => 'upload fastqs from mkfastq directory to GCP object store', +}; +__PACKAGE__->__meta__->property_meta_for_name('directory')->doc('Mkfastq output directory. Must include outs subdir with input samplesheet.'); + +sub execute { + my $self = shift; + $self->status_message('Upload mkfastq directory to GCP object store...'); + + my $samplesheet = Tenx::Reads::MkfastqRun->create( $self->directory ); + my @sample_names = $samplesheet->sample_names; + $self->fatal_message("Failed to find samples in %s", $self->directory) if not @sample_names; + + my @white_list = $self->white_list; + for my $sample_name ( $samplesheet->sample_names ) { + next if @white_list and List::MoreUtils::any { $sample_name eq $_ } @white_list; + my $sample_directory = $samplesheet->fastq_directory_for_sample_name($sample_name); + $self->run_command($sample_directory); + } + + $self->status_message('Done.'); + 1; +} + +1; diff --git a/lib/Tenx/Reads/Command/UploadToCloud/Sample.pm b/lib/Tenx/Reads/Command/UploadToCloud/Sample.pm new file mode 100644 index 0000000..e3185c3 --- /dev/null +++ b/lib/Tenx/Reads/Command/UploadToCloud/Sample.pm @@ -0,0 +1,43 @@ +package Tenx::Reads::Command::UploadToCloud::Sample; + +use strict; +use warnings 'FATAL'; + +use Path::Class; + +class Tenx::Reads::Command::UploadToCloud::Sample { + is => 'Tenx::Reads::Command::UploadToCloud::Base', + has_optional_input => { + sample_name => { + is => 'Text', + doc => 'Sample name. If not given, the base name of the directory will be used.', + }, + }, + doc => 'upload sample fastqs to GCP object store', +}; +__PACKAGE__->__meta__->property_meta_for_name('directory')->doc('Mkfastq output directory. Must include outs subdir with input samplesheet.'); + +sub execute { + my ($self) = @_; + $self->status_message('Upload fastqs to GCP object store...'); + + $self->fatal_message('Directory does not exist: %s', $self->directory) if not -d $self->directory; + $self->status_message('Directory: %s', $self->directory); + + $self->_resolve_sample_name; + $self->status_message('Sample name: %s', $self->sample_name); + + $self->run_command($self->directory); + + $self->status_message('Done.'); + 1; +} + +sub _resolve_sample_name { + my ($self) = @_; + return if $self->sample_name; + $self->sample_name( dir($self->directory)->basename ); + $self->fatal_message('Could not get sample name from directory: %s', $self->directory) if not $self->sample_name; +} + +1; diff --git a/t/Tenx-Reads-Command-UploadToCloud-Base.t b/t/Tenx-Reads-Command-UploadToCloud-Base.t new file mode 100644 index 0000000..d2cd759 --- /dev/null +++ b/t/Tenx-Reads-Command-UploadToCloud-Base.t @@ -0,0 +1,12 @@ +#!/usr/bin/env perl + +use strict; +use warnings 'FATAL'; + +use TenxTestEnv; + +use Test::More tests => 1; + +use_ok('Tenx::Reads::Command::UploadToCloud::Base') or die; + +done_testing(); diff --git a/t/Tenx-Reads-Command-UploadToCloud-Mkfastq.t b/t/Tenx-Reads-Command-UploadToCloud-Mkfastq.t new file mode 100644 index 0000000..8b24a71 --- /dev/null +++ b/t/Tenx-Reads-Command-UploadToCloud-Mkfastq.t @@ -0,0 +1,52 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use TenxTestEnv; + +use Path::Class; +use Test::Exception; +use Test::More tests => 3; + +my %test; +subtest 'setup' => sub{ + plan tests => 1; + + %test = ( + pkg => 'Tenx::Reads::Command::UploadToCloud::Mkfastq', + ); + use_ok($test{pkg}) or die; + + $test{data_dir} = TenxTestEnv::test_data_directory_for_class('Tenx::Reads'); + $test{expected_sample_names} = [qw/ M_FA-1CNTRL-Control_10x M_FA-2PD1-aPD1_10x M_FA-3CTLA4-aCTLA4_10x M_FA-4PDCTLA-aPD1-aCTLA4_10x /]; + +}; + +subtest 'create' => sub{ + plan tests => 2; + + my $cmd; + lives_ok( + sub{ $cmd = $test{pkg}->execute( + directory => $test{data_dir}->subdir('sample-sheet')->stringify, + cloud_url => 'gs://bucket', + ); }, + 'execute', + ); + ok($cmd->result, 'execute successful'); + +}; + +subtest 'fails' => sub{ + plan tests => 1; + + throws_ok( + sub{ $test{pkg}->execute(directory => $test{data_dir}->subdir('no-sample')->stringify, cloud_url => 'gs://bucket'); }, + qr/No sample name found/, + 'fails ', + ); + +}; + +done_testing(); diff --git a/t/Tenx-Reads-Command-UploadToCloud-Sample.t b/t/Tenx-Reads-Command-UploadToCloud-Sample.t new file mode 100644 index 0000000..27c36cb --- /dev/null +++ b/t/Tenx-Reads-Command-UploadToCloud-Sample.t @@ -0,0 +1,49 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use TenxTestEnv; + +use Test::Exception; +use Test::More tests => 3; + +my %test; +subtest 'setup' => sub{ + plan tests => 1; + + %test = ( + class => 'Tenx::Reads::Command::UploadToCloud::Sample', + sample_name => 'TESTSAMPLE', + ); + use_ok($test{class}) or die; + + $test{data_dir} = TenxTestEnv::test_data_directory_for_class('Tenx::Reads'); + +}; + +subtest 'execute' => sub{ + plan tests => 3; + + my $cmd = $test{class}->create( + directory => $test{data_dir}->subdir('sample-sheet')->subdir('M_FA-3CTLA4-aCTLA4_10x')->stringify, + cloud_url => 'gs://bucket', + ); + lives_ok( sub{ $cmd->execute; }, 'execute'); + ok($cmd->result, 'execute successful'); + is($cmd->sample_name, 'M_FA-3CTLA4-aCTLA4_10x', 'resolved sample name'); + +}; + +subtest 'fails' => sub{ + plan tests => 1; + + throws_ok( + sub{ $test{class}->execute(directory => '/blah', cloud_url => 'gs://bucket'); }, + qr/Directory does not exist/, + 'fails w/ non existing directory', + ); + +}; + +done_testing(); diff --git a/t/Tenx-Reads-Command-UploadToCloud.t b/t/Tenx-Reads-Command-UploadToCloud.t new file mode 100644 index 0000000..8b33e13 --- /dev/null +++ b/t/Tenx-Reads-Command-UploadToCloud.t @@ -0,0 +1,11 @@ +#!/usr/bin/env perl + +use strict; +use warnings 'FATAL'; + +use TenxTestEnv; +use Test::More tests => 1; + +use_ok('Tenx::Reads::Command::UploadToCloud') or die; + +done_testing();