Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,14 @@ jobs:
steps:
- uses: actions/checkout@v4

# runner runs as userid 1001 but userid 1000 is baked into docker image.
# we could adjust this if needed via env var but this should work
- name: Adjust permissions for versitygw directories
run: chmod 777 var/vgw var/metadata_vgw

- name: Build docker image
run: docker compose build

- name: Run tests
run: docker compose run test-and-cover
env:
Expand Down
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ ENV PERL5LIB="/extlib/lib/perl5:$FEED_HOME/lib"
COPY ./src/validateCache.cpp /usr/src/validateCache.cpp
RUN /usr/bin/g++ -o /usr/local/bin/validate-cache /usr/src/validateCache.cpp -lxerces-c

ENV GNUPGHOME=/tmp/gnupg
RUN mkdir $GNUPGHOME
RUN chown $UID:$GID $GNUPGHOME
RUN chmod 700 $GNUPGHOME

USER $UID:$GID

WORKDIR $FEED_HOME
Expand All @@ -87,8 +92,6 @@ RUN mkdir -p /tmp/stage/grin
RUN mkdir -p /tmp/prep/toingest /tmp/prep/failed /tmp/prep/ingested /tmp/prep/logs /tmp/prep/toingest/emma

RUN mkdir $FEED_HOME/bin $FEED_HOME/src $FEED_HOME/.gnupg
RUN chown $UID:$GID $FEED_HOME/.gnupg
RUN chmod 700 $FEED_HOME/.gnupg

COPY . $FEED_HOME

Expand Down
25 changes: 15 additions & 10 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ services:
command: prove
depends_on:
mariadb: *healthy
minio: *healthy
pushgateway: *healthy
rabbitmq: *healthy
versitygw: *healthy

# Note: for permissions purposes this does NOT bind in the local development
# environment, so local changes after running docker compose build will NOT
Expand All @@ -36,6 +36,7 @@ services:
build: .
volumes:
- ./clamav:/var/lib/clamav
- ./var/vgw:/usr/local/feed/var/vgw
environment:
- HTFEED_CONFIG=/usr/local/feed/etc/config_test.yml
- FEED_HOME=/usr/local/feed
Expand All @@ -53,9 +54,9 @@ services:
command: cover -test -report Coveralls -make 'prove; exit $?'
depends_on:
mariadb: *healthy
minio: *healthy
pushgateway: *healthy
rabbitmq: *healthy
versitygw: *healthy

ingest:
build: .
Expand Down Expand Up @@ -119,17 +120,21 @@ services:
<<: *healthcheck-defaults
test: ["CMD", "healthcheck.sh", "--su-mysql", "--connect", "--innodb_initialized"]

# S3 compatible object storage
minio:
image: minio/minio
# S3 -> filesystem gateway
versitygw:
user: "1000:1000"
image: versity/versitygw
restart: always
environment:
MINIO_ACCESS_KEY: TESTINGACCESSKEY
MINIO_SECRET_KEY: testingsecretkey
command: server /data
ROOT_ACCESS_KEY: TESTINGACCESSKEY
ROOT_SECRET_KEY: testingsecretkey
volumes:
- ./var/vgw:/usr/local/feed/var/vgw
- ./var/metadata_vgw:/usr/local/feed/var/metadata_vgw
command: --health /health posix --sidecar /usr/local/feed/var/metadata_vgw /usr/local/feed/var/vgw
healthcheck:
<<: *healthcheck-defaults
test: timeout 5s mc ready local
test: [ "CMD", "wget", "--quiet", "--tries=1", "-O", "/dev/null", "http://127.0.0.1:7070/health" ]

pushgateway:
image: prom/pushgateway
Expand All @@ -152,4 +157,4 @@ volumes:
repository_link:
repository_obj:
backups:
rclone:
vgw_sidecar:
5 changes: 2 additions & 3 deletions etc/config_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ emma:
namespace: test
packagetype: emma
bucket: emma-test-bucket
awscli: ['aws', '--endpoint-url', 'http://minio:9000']
awscli: ['aws', '--endpoint-url', 'http://versitygw:7070']

rabbitmq:
host: rabbitmq
Expand All @@ -34,11 +34,10 @@ rabbitmq:
queue: testqueue
priority_levels: 3

test_awscli: ['aws', '--endpoint-url', 'http://minio:9000']
awscli: ['aws', '--endpoint-url', 'http://versitygw:7070']

pushgateway: http://pushgateway:9091


# To configure in production

handle:
Expand Down
14 changes: 14 additions & 0 deletions etc/ingest.sql
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,20 @@ CREATE TABLE IF NOT EXISTS `feed_backups` (
KEY `feed_backups_version` (`version`)
);

CREATE TABLE IF NOT EXISTS `feed_storage` (
`namespace` varchar(10) NOT NULL,
`id` varchar(32) NOT NULL,
`storage_name` varchar(32) NOT NULL,
`zip_size` bigint(20) DEFAULT NULL,
`mets_size` bigint(20) DEFAULT NULL,
`saved_md5sum` char(32) DEFAULT NULL,
`deposit_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`lastchecked` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`lastmd5check` timestamp NULL DEFAULT NULL,
`md5check_ok` tinyint(1) DEFAULT NULL,
PRIMARY KEY (`namespace`, `id`, `storage_name`)
);

CREATE TABLE IF NOT EXISTS `feed_audit_detail` (
`namespace` varchar(10) NOT NULL,
`id` varchar(30) NOT NULL,
Expand Down
1 change: 1 addition & 0 deletions lib/HTFeed/Stage/Collate.pm
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use Carp qw(croak);
use HTFeed::Config qw(get_config);
use HTFeed::Storage::LinkedPairtree;
use HTFeed::Storage::LocalPairtree;
use HTFeed::Storage::PairtreeObjectStore;
use HTFeed::Storage::ObjectStore;
use HTFeed::Storage::PrefixedVersions;
use Log::Log4perl qw(get_logger);
Expand Down
26 changes: 20 additions & 6 deletions lib/HTFeed/Storage/ObjectStore.pm
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ sub mets_key {
return $self->object_path . ".mets.xml";
}

sub zip_size {
my $self = shift;
return $self->{filesize}{$self->zip_key},
}

sub mets_size {
my $self = shift;
return $self->{filesize}{$self->mets_key},
}

sub zip_filename {
my $self = shift;

Expand Down Expand Up @@ -208,15 +218,19 @@ sub record_audit {
$self->record_backup;
}

sub saved_md5sum {
my $self = shift;

my $b64_checksum = $self->{checksums}{$self->zip_key};
my $hex_checksum = unpack("H*", decode_base64($b64_checksum));
}

sub record_backup {
my $self = shift;

get_logger->trace(" starting record_backup");
my $dbh = HTFeed::DBTools::get_dbh();

my $b64_checksum = $self->{checksums}{$self->zip_key};
my $hex_checksum = unpack("H*", decode_base64($b64_checksum));

my $stmt = join(
" ",
"INSERT INTO feed_backups",
Expand All @@ -232,9 +246,9 @@ sub record_backup {
$self->audit_path,
$self->{timestamp},
$self->{name},
$self->{filesize}{$self->zip_key},
$self->{filesize}{$self->object_path . '.mets.xml'},
$hex_checksum
$self->zip_size,
$self->mets_size,
$self->saved_md5sum
);

get_logger->trace(" finished record_backup");
Expand Down
60 changes: 60 additions & 0 deletions lib/HTFeed/Storage/PairtreeObjectStore.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package HTFeed::Storage::PairtreeObjectStore;

# Stores using the S3 protocol but with pairtree paths

use HTFeed::Storage::ObjectStore;
use base qw(HTFeed::Storage::ObjectStore);

use HTFeed::DBTools qw(get_dbh);
use File::Pairtree qw(id2ppath s2ppchars);

sub object_path {
my $self = shift;

return sprintf(
'%s/%s%s/',
$self->{namespace},
id2ppath($self->{objid}),
s2ppchars($self->{objid})
);
}

sub zip_key {
my $self = shift;

return $self->object_path . $self->{volume}->get_pt_objid() . $self->zip_suffix;

}

sub mets_key {
my $self = shift;

return $self->object_path . $self->{volume}->get_mets_filename;
}

sub record_audit {
my $self = shift;

my $stmt =
"insert into feed_storage (namespace, id, storage_name, zip_size, mets_size, saved_md5sum, deposit_time, lastchecked, lastmd5check, md5check_ok) \
values(?,?,?,?,?,?,CURRENT_TIMESTAMP,CURRENT_TIMESTAMP,CURRENT_TIMESTAMP,1) \
ON DUPLICATE KEY UPDATE zip_size=?, mets_size=?, saved_md5sum=?, deposit_time=CURRENT_TIMESTAMP, lastchecked = CURRENT_TIMESTAMP,lastmd5check = CURRENT_TIMESTAMP, md5check_ok = 1";

my $storage_name = $self->{name};
my $saved_md5sum = $self->saved_md5sum;

my $zip_size = $self->zip_size;
my $mets_size = $self->mets_size;

my $sth = get_dbh()->prepare($stmt);
my $res = $sth->execute(
$self->{namespace}, $self->{objid}, $storage_name,
$zipsize, $metssize, $saved_md5sum,
# duplicate parameters for duplicate key update
$zipsize, $metssize, $saved_md5sum
);

return $res;
}

1;
51 changes: 45 additions & 6 deletions t/collate.t
Original file line number Diff line number Diff line change
Expand Up @@ -177,27 +177,60 @@ describe "HTFeed::Collate" => sub {

local our ($bucket, $s3);
my $old_storage_classes;
my %s3s;

before all => sub {
foreach my $suffix (qw(ptobj1 ptobj2 backup)) {
$s3s{$suffix} = HTFeed::Storage::S3->new(
bucket => "$bucket-$suffix",
awscli => get_config('awscli')
);
$s3s{$suffix}->mb;
}
};

after all => sub {
foreach my $s3 (values(%s3s)) {
$s3->rm('/',"--recursive");
$s3->rb;
}
};

before each => sub {
$old_storage_classes = get_config('storage_classes');
my $new_storage_classes = {
# simulating isilon
'linkedpairtree-test' =>
{
class => 'HTFeed::Storage::LinkedPairtree',
obj_dir => $tmpdirs->{obj_dir},
link_dir => $tmpdirs->{link_dir}
},
# simulating truenas (site 1)
'pairtreeobjectstore-ptobj1' => {
class => 'HTFeed::Storage::PairtreeObjectStore',
bucket => $s3s{ptobj1}->{bucket},
awscli => $s3s{ptobj1}->{awscli},
},
# simulating truenas (site 2)
'pairtreeobjectstore-ptobj2' => {
class => 'HTFeed::Storage::PairtreeObjectStore',
bucket => $s3s{ptobj2}->{bucket},
awscli => $s3s{ptobj2}->{awscli},
},
# simulating data den
'prefixedversions-test' =>
{
class => 'HTFeed::Storage::PrefixedVersions',
obj_dir => $tmpdirs->{backup_obj_dir},
encryption_key => $tmpdirs->test_home . "/fixtures/encryption_key"
},
# simulating glacier deep archive
'objectstore-test' =>
{
class => 'HTFeed::Storage::ObjectStore',
bucket => $s3->{bucket},
awscli => $s3->{awscli},
bucket => $s3s{backup}->{bucket},
awscli => $s3s{backup}->{awscli},
encryption_key => $tmpdirs->test_home . "/fixtures/encryption_key"
}
};
Expand All @@ -223,16 +256,22 @@ describe "HTFeed::Collate" => sub {
is(scalar(@{$s3_backup}),1,'records a backup for object store');

my $timestamp = $versioned_backup->[0][0];
ok(-e "$tmpdirs->{obj_dir}/test/pairtree_root/te/st/test/test.mets.xml",'copies mets to local storage');
ok(-e "$tmpdirs->{obj_dir}/test/pairtree_root/te/st/test/test.zip",'copies zip to local storage');

my $pt_path = "test/pairtree_root/te/st/test";
ok(-e "$tmpdirs->{obj_dir}/$pt_path/test.mets.xml",'copies mets to local storage');
ok(-e "$tmpdirs->{obj_dir}/$pt_path/test.zip",'copies zip to local storage');

ok(-e "$tmpdirs->{backup_obj_dir}/test/tes/test.$timestamp.zip.gpg","copies the encrypted zip to backup storage");
ok(-e "$tmpdirs->{backup_obj_dir}/test/tes/test.$timestamp.mets.xml","copies the mets backup storage");

my $s3_timestamp = $s3_backup->[0][0];

ok($s3->s3_has("test.test.$s3_timestamp.zip.gpg"));
ok($s3->s3_has("test.test.$s3_timestamp.mets.xml"));
ok($s3s{ptobj1}->s3_has("$pt_path/test.mets.xml"));
ok($s3s{ptobj1}->s3_has("$pt_path/test.zip"));
ok($s3s{ptobj2}->s3_has("$pt_path/test.mets.xml"));
ok($s3s{ptobj2}->s3_has("$pt_path/test.zip"));
ok($s3s{backup}->s3_has("test.test.$s3_timestamp.zip.gpg"));
ok($s3s{backup}->s3_has("test.test.$s3_timestamp.mets.xml"));

ok(! -e "$tmpdirs->{zip}/test/00000001.jp2","cleans up the extracted zip files");
ok(! -e "$tmpdirs->{zip}/test","cleans up the zip file tmpdir");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package HTFeed::Namespace::Test;
package HTFeed::Namespace::ClassTest;

use warnings;
use strict;
Expand Down
4 changes: 3 additions & 1 deletion t/lib/HTFeed/Test/Class.pm
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ use HTFeed::Config qw(get_config);
use File::Path qw(remove_tree);

# return testing class, with assumption that $class eq "$testing_class::Test"
# or for example "$testing_class::SomethingTest"

sub testing_class{
my $self = shift;
my $class = ref $self;
$class =~ s/::Test$//;
$class =~ s/::\w*Test$//;
return $class;
}

Expand Down
3 changes: 2 additions & 1 deletion t/lib/HTFeed/Test/Support.pm
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ my @test_classes;
my $libDir = "$FindBin::Bin/lib/";
# get the path to each test classes
find(sub{
if (-f and $_ =~ /^Test\.pm$/ ){
if (-f and $_ =~ /Test\.pm$/ ){
my $name = $File::Find::name;
$name =~ s/$libDir//;
return if $name =~ /AbstractTest\.pm$/;
push @test_classes, $name;
}
}, $libDir
Expand Down
2 changes: 1 addition & 1 deletion t/s3_helper.pl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
$bucket = "bucket" . sprintf("%08d",rand(1000000));
$s3 = HTFeed::Storage::S3->new(
bucket => $bucket,
awscli => get_config('test_awscli')
awscli => get_config('awscli')
);
$ENV{AWS_MAX_ATTEMPTS} = 1;

Expand Down
Loading