Skip to content

Commit

Permalink
Merge 2cc0595 into cc1a544
Browse files Browse the repository at this point in the history
  • Loading branch information
aelkiss committed Nov 11, 2022
2 parents cc1a544 + 2cc0595 commit f4a4a24
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 11 deletions.
13 changes: 7 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
FROM perl:5.34

RUN apt-get update && apt-get install -y \
bsd-mailx \
msmtp \
netcat \
pigz

RUN cpanm -n \
Data::Dumper \
DBD::mysql \
Expand All @@ -9,6 +15,7 @@ RUN cpanm -n \
Devel::Cover::Report::Coveralls \
Exporter \
File::Slurp \
https://github.com/hathitrust/progress_tracker.git@v0.9.0 \
JSON::XS \
LWP::Simple \
MARC \
Expand All @@ -24,12 +31,6 @@ RUN cpanm -n \
XML::LibXSLT \
YAML

RUN apt-get update && apt-get install -y \
bsd-mailx \
msmtp \
netcat \
pigz

ENV ROOTDIR /usr/src/app

COPY . $ROOTDIR
Expand Down
3 changes: 1 addition & 2 deletions config/defaults
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ export REPORTS=${REPORTS:-$DATA_ROOT/reports}

# formerly /aleph-prep/zephir-data
export ZEPHIR_DATA=${ZEPHIR_DATA:-$DATA_ROOT/zephir}
export RIGHTS_DATA=${RIGHTS_DATA:-$DATA_ROOT/zephir}
export FEDDOCS_HOME=${FEDDOCS_HOME:-$DATA_ROOT/govdocs}

# Location of oai output
Expand All @@ -26,6 +25,6 @@ export RIGHTS_DIR=${RIGHTS_DIR:-$DATA_ROOT/rights}
export CATALOG_PREP=${CATALOG_PREP:-$DATA_ROOT/catalog_prep}
export CATALOG_ARCHIVE=${CATALOG_ARCHIVE:-$DATAROOT/catalog_archive}

for dir in $TMPDIR $DATA_ROOT $ZEPHIR_DATA $RIGHTS_DATA $DATADIR_OAI $INGEST_BIBRECORDS $RIGHTS_DIR $CATALOG_PREP $CATALOG_ARCHIVE;
for dir in $TMPDIR $DATA_ROOT $ZEPHIR_DATA $DATADIR_OAI $INGEST_BIBRECORDS $RIGHTS_DIR $CATALOG_PREP $CATALOG_ARCHIVE;
do mkdir -pv $dir;
done
9 changes: 8 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ services:
- .:/usr/src/app
depends_on:
- mariadb
- pushgateway
environment:
DB_CONNECTION_STRING: "mysql2://ht_rights:ht_rights@mariadb/ht"
PUSHGATEWAY: "http://pushgateway:9091"
command:
- /bin/bash

Expand All @@ -19,8 +21,10 @@ services:
- .:/usr/src/app
depends_on:
- mariadb
- pushgateway
environment:
- DB_CONNECTION_STRING="mysql2://ht_rights:ht_rights@mariadb/ht"
- PUSHGATEWAY="http://pushgateway:9091"
# pass through info needed by coveralls uploader
- GITHUB_TOKEN
- GITHUB_RUN_ID
Expand All @@ -29,7 +33,7 @@ services:
- GITHUB_SHA
- GITHUB_REF
- GITHUB_ACTIONS
command: bin/wait-for mariadb:3306 -- prove
command: bin/wait-for mariadb:3306 pushgateway:9091 -- prove

mariadb:
image: ghcr.io/hathitrust/db-image:latest
Expand All @@ -39,3 +43,6 @@ services:
MYSQL_DATABASE: ht
MYSQL_USER: ht_rights
MYSQL_PASSWORD: ht_rights

pushgateway:
image: prom/pushgateway
6 changes: 6 additions & 0 deletions postZephir.pm
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ use MARC::File::XML(BinaryEncoding => 'utf8');
use bib_rights;
use rightsDB;

use ProgressTracker;

my $tracker = ProgressTracker->new(report_interval => 10000);

# build mapping of collection to sysnum prefixes (coll => sdr_prefix)
my $sdrnum_prefix_map = load_prefix_map("$ENV{ROOTDIR}/data/sdr_num_prefix_map.tsv");
Expand Down Expand Up @@ -235,6 +238,7 @@ sub main {
my $bib_line;

RECORD:while($bib_line = <IN> ) {
$tracker->inc();
$exit and do {
print OUT_RPT "exitting due to signal\n";
last RECORD;
Expand Down Expand Up @@ -488,6 +492,7 @@ sub main {
$outcnt_json++;
}

# TODO push all these metrics in prometheus
print OUT_RPT "-----------------------------------------------\n";
print OUT_RPT "$bibcnt bib records read\n";
#print OUT_RPT "$dup_cid duplicate bib records for cid skipped\n";
Expand Down Expand Up @@ -532,6 +537,7 @@ sub main {

print OUT_RPT "DONE\n";
print STDERR "DONE\n";
$tracker->finalize;
}

# args: bib record, bib key (001)
Expand Down
2 changes: 1 addition & 1 deletion run_process_zephir_incremental.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ echo "`date`: dump the rights db to a dbm file"
$ROOTDIR/bld_rights_db.pl -x $RIGHTS_DBM

echo "`date`: processing file $ZEPHIR_VUFIND_EXPORT"
$ROOTDIR/postZephir.pm -i $ZEPHIR_VUFIND_EXPORT -o ${BASENAME} -r ${BASENAME}.rights -d -f $RIGHTS_DBM > ${BASENAME}_stderr
JOB_NAME="run_process_zephir_incremental.sh" $ROOTDIR/postZephir.pm -i $ZEPHIR_VUFIND_EXPORT -o ${BASENAME} -r ${BASENAME}.rights -d -f $RIGHTS_DBM > ${BASENAME}_stderr
tail -50 ${BASENAME}_rpt.txt

zcat $ZEPHIR_VUFIND_DELETE > ${BASENAME}_zephir_delete.txt
Expand Down
3 changes: 2 additions & 1 deletion run_zephir_full_daily.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ file_list=`ls zephir_full_daily_??`

for file in $file_list; do
echo "`date`: processing file $file"
`$ROOTDIR/postZephir.pm -z 1 -i $file -o ${file}_out -r ${file}.rights -d -f $RIGHTS_DBM &> ${file}_stderr &`
# TODO: wait to finalize until all of these have run?
JOB_APP="run_zephir_full_daily" JOB_NAME="$file" `$ROOTDIR/postZephir.pm -z 1 -i $file -o ${file}_out -r ${file}.rights -d -f $RIGHTS_DBM &> ${file}_stderr &`
done

# wait loop: check last line of each rpt file
Expand Down

0 comments on commit f4a4a24

Please sign in to comment.