Skip to content

Commit

Permalink
Merge pull request #1495
Browse files Browse the repository at this point in the history
stored: fix incoherent meta data when concurrently writing to the same volume
  • Loading branch information
BareosBot committed Aug 1, 2023
2 parents 35c99ec + a9a0351 commit 136248f
Show file tree
Hide file tree
Showing 16 changed files with 172 additions and 69 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -107,6 +107,7 @@ and since Bareos version 20 this project adheres to [Semantic Versioning](https:
- cats: fix creates, grants and drops postgresql [PR #1502]
- stored: fix blocksize warning [PR #1503]
- status storage: fix wrong data displayed about waiting jobs [PR #1476]
- stored: fix incoherent meta data when concurrently writing to the same volume [PR #1495]

### Documentation
- add explanation about binary version numbers [PR #1354]
Expand Down Expand Up @@ -204,6 +205,7 @@ and since Bareos version 20 this project adheres to [Semantic Versioning](https:
[PR #1488]: https://github.com/bareos/bareos/pull/1488
[PR #1490]: https://github.com/bareos/bareos/pull/1490
[PR #1493]: https://github.com/bareos/bareos/pull/1493
[PR #1495]: https://github.com/bareos/bareos/pull/1495
[PR #1502]: https://github.com/bareos/bareos/pull/1502
[PR #1503]: https://github.com/bareos/bareos/pull/1503
[PR #1507]: https://github.com/bareos/bareos/pull/1507
Expand Down
1 change: 1 addition & 0 deletions core/src/cats/cats.h
Expand Up @@ -614,6 +614,7 @@ class BareosDb : public BareosDbQueryEnum {
bool CreateClientRecord(JobControlRecord* jcr, ClientDbRecord* cr);
bool CreateFilesetRecord(JobControlRecord* jcr, FileSetDbRecord* fsr);
bool CreatePoolRecord(JobControlRecord* jcr, PoolDbRecord* pool_dbr);
int DeleteNullJobmediaRecords(JobControlRecord* jcr, std::uint32_t jobid);
bool CreateJobmediaRecord(JobControlRecord* jcr, JobMediaDbRecord* jr);
bool CreateCounterRecord(JobControlRecord* jcr, CounterDbRecord* cr);
bool CreateDeviceRecord(JobControlRecord* jcr, DeviceDbRecord* dr);
Expand Down
24 changes: 21 additions & 3 deletions core/src/cats/sql_delete.cc
Expand Up @@ -125,6 +125,26 @@ static int DeleteHandler(void* ctx, int, char** row)
return 0;
}

/**
* Delete all NullJobMedia records for this job
* Returns: -1 on failure
* n>=0 on success,
* where n is the number of rows affected
*/
int BareosDb::DeleteNullJobmediaRecords(JobControlRecord* jcr,
std::uint32_t jobid)
{
DbLocker _{this};

Mmsg(cmd,
"DELETE FROM jobmedia WHERE jobid=%u AND firstindex=0 AND lastindex=0",
jobid);
Dmsg1(200, "DeleteNullJobmediaRecords: %s\n", cmd);

int numrows = DELETE_DB(jcr, cmd);

return numrows;
}

/**
* This routine will purge (delete) all records
Expand Down Expand Up @@ -195,9 +215,7 @@ bool BareosDb::DeleteMediaRecord(JobControlRecord* jcr, MediaDbRecord* mr)
}

Mmsg(cmd, "DELETE FROM Media WHERE MediaId=%d", mr->MediaId);
SqlQuery(cmd);

return true;
return DELETE_DB(jcr, cmd) != -1;
}

void BareosDb::PurgeFiles(const char* jobids)
Expand Down
12 changes: 12 additions & 0 deletions core/src/dird/catreq.cc
Expand Up @@ -74,6 +74,8 @@ static char Update_filelist[] = "Catreq Job=%127s UpdateFileList\n";

static char Update_jobrecord[]
= "Catreq Job=%127s UpdateJobRecord JobFiles=%lu JobBytes=%llu\n";
static char Delete_nulljobmediarecord[]
= "CatReq Job=%127s DeleteNullJobmediaRecords jobid=%u\n";

// Responses sent to Storage daemon
static char OK_media[]
Expand All @@ -84,6 +86,7 @@ static char OK_media[]
" VolWriteTime=%s EndFile=%u EndBlock=%u LabelType=%d"
" MediaId=%s EncryptionKey=%s MinBlocksize=%d MaxBlocksize=%d\n";
static char OK_create[] = "1000 OK CreateJobMedia\n";
static char OK_delete[] = "1000 OK DeleteNullJobmediaRecords\n";

static int SendVolumeInfoToStorageDaemon(JobControlRecord* jcr,
BareosSocket* sd,
Expand Down Expand Up @@ -122,6 +125,7 @@ void CatalogRequest(JobControlRecord* jcr, BareosSocket* bs)
uint64_t MediaId;
utime_t VolFirstWritten;
utime_t VolLastWritten;
std::uint32_t jobid;

// Request to find next appendable Volume for this Job
Dmsg1(100, "catreq %s", bs->msg);
Expand Down Expand Up @@ -380,6 +384,14 @@ void CatalogRequest(JobControlRecord* jcr, BareosSocket* bs)
jcr->db->strerror());
bs->fsend(_("1992 Update job record error\n"));
}
} else if (sscanf(bs->msg, Delete_nulljobmediarecord, &Job, &jobid) == 2) {
int numrows = jcr->db->DeleteNullJobmediaRecords(jcr, jobid);
Dmsg1(400, "Deleted %d rows.\n", numrows);
if (numrows == -1) {
bs->fsend(_("1992 DeleteNullJobMediaRecords error\n"));
} else {
bs->fsend(OK_delete);
}
} else {
omsg = GetMemory(bs->message_length + 1);
PmStrcpy(omsg, bs->msg);
Expand Down
6 changes: 6 additions & 0 deletions core/src/stored/acquire.cc
Expand Up @@ -559,6 +559,12 @@ bool ReleaseDevice(DeviceControlRecord* dcr)
* already been done, which means we skip them here. */
dev->num_writers--;
Dmsg1(100, "There are %d writers in ReleaseDevice\n", dev->num_writers);
if (dcr->VolFirstIndex) {
// Send a new jobmedia record if we have written to a volume
// take note that the volume we wrote to is not necessarily the volume
// that is currently inside the device.
dcr->DirCreateJobmediaRecord(false);
}
if (dev->IsLabeled()) {
Dmsg2(200, "dir_create_jobmedia. Release vol=%s dev=%s\n",
dev->getVolCatName(), dev->print_name());
Expand Down
5 changes: 5 additions & 0 deletions core/src/stored/append.cc
Expand Up @@ -26,6 +26,7 @@
*/

#include "stored/append.h"
#include "stored/askdir.h"
#include "stored/stored.h"
#include "stored/acquire.h"
#include "stored/checkpoint_handler.h"
Expand Down Expand Up @@ -412,6 +413,10 @@ bool DoAppendData(JobControlRecord* jcr, BareosSocket* bs, const char* what)
// Release the device -- and send final Vol info to DIR and unlock it.
ReleaseDevice(jcr->sd_impl->dcr);

if (!DeleteNullJobmediaRecords(jcr)) {
Jmsg(jcr, M_WARNING, 0, _("Could not delete placeholder media records.\n"));
}

/* Don't use time_t for job_elapsed as time_t can be 32 or 64 bits,
* and the subsequent Jmsg() editing will break */
job_elapsed = time(NULL) - jcr->run_time;
Expand Down
21 changes: 20 additions & 1 deletion core/src/stored/askdir.cc
Expand Up @@ -28,6 +28,8 @@
*/

#include "include/bareos.h"
#include "stored/askdir.h"

#include "stored/stored.h"
#include "stored/stored_globals.h"

Expand Down Expand Up @@ -641,6 +643,24 @@ bool DeviceControlRecord::DirAskSysopToMountVolume(int /*mode*/)
return true;
}

bool DeleteNullJobmediaRecords(JobControlRecord* jcr)
{
Dmsg0(100, "Deleting null jobmedia records\n");
BareosSocket* dir = jcr->dir_bsock;
const char* delete_null_records
= "CatReq Job=%s DeleteNullJobmediaRecords jobid=%u";
dir->fsend(delete_null_records, jcr->Job, jcr->JobId);
if (dir->recv() <= 0) {
Dmsg0(100, "DeleteNullJobmediaRecords error BnetRecv\n");
Mmsg(jcr->errmsg,
_("Network error on BnetRecv in DeleteNullJobmediaRecords.\n"));
return false;
}
Dmsg1(100, ">dird %s", dir->msg);
if (strncmp(dir->msg, "1000", 4) == 0) { return true; }
return false;
}

bool DeviceControlRecord::DirGetVolumeInfo(enum get_vol_info_rw)
{
Dmsg0(100, "Fake DirGetVolumeInfo\n");
Expand All @@ -653,5 +673,4 @@ DeviceControlRecord* DeviceControlRecord::get_new_spooling_dcr()
{
return new StorageDaemonDeviceControlRecord;
}

} /* namespace storagedaemon */
32 changes: 32 additions & 0 deletions core/src/stored/askdir.h
@@ -0,0 +1,32 @@
/*
BAREOS® - Backup Archiving REcovery Open Sourced
Copyright (C) 2023-2023 Bareos GmbH & Co. KG
This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
License as published by the Free Software Foundation and included
in the file LICENSE.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
*/
#ifndef BAREOS_STORED_ASKDIR_H_
#define BAREOS_STORED_ASKDIR_H_

#include "lib/jcr.h"

namespace storagedaemon {
// deletes all null jobmedia records from the current job (jcr->job)
// a null jobmedia record is a record with firstindex = 0 and lastindex = 0
bool DeleteNullJobmediaRecords(JobControlRecord* jcr);
} // namespace storagedaemon

#endif // BAREOS_STORED_ASKDIR_H_
28 changes: 22 additions & 6 deletions core/src/stored/block.cc
Expand Up @@ -790,6 +790,27 @@ bool DeviceControlRecord::WriteBlockToDev()
return false;
}

bool block_seek = dev->GetSeekMode() == SeekMode::FILE_BLOCK;

// if this is the first write to this volume (from this job) create a null
// jobmedia entry to prevent the volume from getting recycled.
dcr->VolMediaId = dev->VolCatInfo.VolMediaId;
if (dcr->VolFirstIndex == 0 && block->FirstIndex > 0) {
dcr->WroteVol = true;
ASSERT(dcr->DirCreateJobmediaRecord(true));
dcr->VolFirstIndex = block->FirstIndex;
uint64_t addr = dev->file_addr;

if (block_seek) {
dcr->StartBlock = dev->block_num;
dcr->StartFile = dev->file;
} else {
dcr->StartBlock = (uint32_t)addr;
dcr->StartFile = (uint32_t)(addr >> 32);
}
}
if (block->LastIndex > 0) { dcr->VolLastIndex = block->LastIndex; }

// We successfully wrote the block, now do housekeeping
Dmsg2(1300, "VolCatBytes=%d newVolCatBytes=%d\n",
(int)dev->VolCatInfo.VolCatBytes,
Expand All @@ -802,7 +823,7 @@ bool DeviceControlRecord::WriteBlockToDev()
block->BlockNumber++;

// Update dcr values
if (dev->IsTape()) {
if (block_seek) {
dcr->EndBlock = dev->EndBlock;
dcr->EndFile = dev->EndFile;
dev->block_num++;
Expand All @@ -814,11 +835,6 @@ bool DeviceControlRecord::WriteBlockToDev()
dev->block_num = dcr->EndBlock;
dev->file = dcr->EndFile;
}
dcr->VolMediaId = dev->VolCatInfo.VolMediaId;
if (dcr->VolFirstIndex == 0 && block->FirstIndex > 0) {
dcr->VolFirstIndex = block->FirstIndex;
}
if (block->LastIndex > 0) { dcr->VolLastIndex = block->LastIndex; }
dcr->WroteVol = true;
dev->file_addr += wlen; /* update file address */
dev->file_size += wlen;
Expand Down
18 changes: 2 additions & 16 deletions core/src/stored/label.cc
Expand Up @@ -616,22 +616,8 @@ bool WriteSessionLabel(DeviceControlRecord* dcr, int label)

rec = new_record();
Dmsg1(130, "session_label record=%x\n", rec);
switch (label) {
case SOS_LABEL:
SetStartVolPosition(dcr);
break;
case EOS_LABEL:
if (dev->IsTape()) {
dcr->EndBlock = dev->EndBlock;
dcr->EndFile = dev->EndFile;
} else {
dcr->EndBlock = (uint32_t)dev->file_addr;
dcr->EndFile = (uint32_t)(dev->file_addr >> 32);
}
break;
default:
Jmsg1(jcr, M_ABORT, 0, _("Bad Volume session label = %d\n"), label);
break;
if (label != SOS_LABEL && label != EOS_LABEL) {
Jmsg1(jcr, M_ABORT, 0, _("Bad Volume session label = %d\n"), label);
}
CreateSessionLabel(dcr, rec, label);
rec->FileIndex = label;
Expand Down
4 changes: 2 additions & 2 deletions systemtests/scripts/diff.pl.in
Expand Up @@ -85,7 +85,7 @@ foreach my $f (keys %src_attr)
{
if (!defined $dst_attr{$f}) {
$ret++;
print "diff.pl ERROR: Can't find $f in dst\n";
print "diff.pl ERROR: Can't find $f in (dst) $dst\n";

} else {
compare($src_attr{$f}, $dst_attr{$f});
Expand All @@ -97,7 +97,7 @@ foreach my $f (keys %src_attr)
foreach my $f (keys %dst_attr)
{
$ret++;
print "diff.pl ERROR: Can't find $f in src\n";
print "diff.pl ERROR: Can't find $f in (src) $src\n";
}

if ($ret) {
Expand Down
55 changes: 24 additions & 31 deletions systemtests/tests/block-size/testrunner
Expand Up @@ -45,8 +45,6 @@ setup_data
VolumeName=TestVolume001
# VolumePath=storage/${VolumeName}
DEFAULT_BLOCK_SIZE=64512
LABEL_BLOCK_SIZE=${DEFAULT_BLOCK_SIZE}


bls_blocks()
{
Expand All @@ -55,37 +53,24 @@ bls_blocks()
${BAREOS_BLS_BINARY} -c ${configs} ${DEVICE} -V"${VOLUME}" -k -d 250
}

get_block_and_data_size()
{
# block 1 and 2 are label blocks.
# blocks >= 3 are data blocks.
# Parameter 3 is the block of interest. Default is block 1.
DEVICE="$1"
VOLUME="${2:-*}"
LINE=${3:-1}
bls_blocks "$DEVICE" "$VOLUME" | sed -r -n -e "s/^bls .* Exit read_block (read_len=[0-9]+) (block_len=[0-9]*)/\1 \2/p" | sed "${LINE}q;d"
}

get_block_size()
{
# block 1 and 2 are label blocks.
# blocks >= 3 are data blocks.
# Parameter 3 is the block of interest. Default is block 1.
DEVICE="$1"
VOLUME="${2:-*}"
LINE=${3:-1}
bls_blocks "$DEVICE" "$VOLUME" | sed -r -n -e "s/^bls .* Exit read_block read_len=([0-9]+) block_len=[0-9]*/\1/p" | sed "${LINE}q;d"
# Parameter 2 is the block of interest. Default is block 1.
DATA="$1"
LINE=${2:-1}
echo "$DATA" | sed -r -n -e "s/^bls .* Exit read_block read_len=([0-9]+) block_len=[0-9]*/\1/p" | sed "${LINE}q;d"
}

get_data_size()
{
# block 1 and 2 are label blocks.
# blocks >= 3 are data blocks.
# Parameter 3 is the block of interest. Default is block 1.
DEVICE="$1"
VOLUME="${2:-*}"
LINE=${3:-1}
bls_blocks "$DEVICE" "$VOLUME" | sed -r -n -e "s/^bls .* Exit read_block read_len=[0-9]+ block_len=([0-9]*)/\1/p" | sed "${LINE}q;d"
# Parameter 2 is the block of interest. Default is block 1.
DATA="$1"
LINE=${2:-1}
echo "$DATA" | sed -r -n -e "s/^bls .* Exit read_block read_len=[0-9]+ block_len=([0-9]*)/\1/p" | sed "${LINE}q;d"
}


Expand Down Expand Up @@ -148,27 +133,35 @@ if [ ! -d ${BackupDirectory} ]; then
set_error "Directory ${BackupDirectory} does not exists any more."
fi

blocksize_label=$(get_block_size File1 ${VolumeName} 1)
file_out=$(bls_blocks File1 ${VolumeName})
blocksize_label=$(get_block_size "${file_out}" 1)
print_debug "File Label block size: $blocksize_label"
if [ $blocksize_label != $DEFAULT_BLOCK_SIZE ]; then
if [ "$blocksize_label" != $DEFAULT_BLOCK_SIZE ]; then
set_error "Wrong label block size: expected: $DEFAULT_BLOCK_SIZE, got: $blocksize_label"
fi

blocksize_data=$(get_block_size File1 ${VolumeName} 3)
blocksize_data=$(get_block_size "${file_out}" 3)
print_debug "File Data block size (block 3): $blocksize_data"
if [ $blocksize_data != $DEFAULT_BLOCK_SIZE ]; then
if [ "$blocksize_data" != $DEFAULT_BLOCK_SIZE ]; then
set_error "Wrong data block size (block 3): expected: $DEFAULT_BLOCK_SIZE, got: $blocksize_data"
fi

blocksize_label=$(get_block_size tapedrive0-0 "*" 1)
tape_out=$(bls_blocks tapedrive0-0 "*")
blocksize_label=$(get_block_size "${tape_out}" 1)
if [[ -z "$blocksize_data" ]]; then
set_error "Could not read the label blocksize"
fi
print_debug "Tape Label block size: $blocksize_label"
if [ $blocksize_label != $DEFAULT_BLOCK_SIZE ]; then
if [ "$blocksize_label" != $DEFAULT_BLOCK_SIZE ]; then
set_error "Wrong label block size: expected: $DEFAULT_BLOCK_SIZE, got: $blocksize_label"
fi

blocksize_data=$(get_block_size tapedrive0-0 "*" 3)
blocksize_data=$(get_block_size "${tape_out}" 3)
if [[ -z "$blocksize_data" ]]; then
set_error "Could not read the data blocksize"
fi
print_debug "Tape Data block size (block 3): $blocksize_data"
if [ $blocksize_data -le $DEFAULT_BLOCK_SIZE ]; then
if [ "$blocksize_data" -lt $DEFAULT_BLOCK_SIZE ]; then
set_error "Wrong data block size (block 3): expected: $DEFAULT_BLOCK_SIZE, got: $blocksize_data"
fi

Expand Down

0 comments on commit 136248f

Please sign in to comment.