Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stored: fix authentication race condition / deadlock #1732

Merged
merged 13 commits into from Mar 18, 2024
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -66,6 +66,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- plugins: fix cancel handling crash [PR #1595]
- Fix bareos_tasks plugin for pgsql [PR #1659]
- core: Fix compile errors on GCC 14 [PR #1687]
- stored: fix authentication race condition / deadlock [PR #1732]

[PR #1538]: https://github.com/bareos/bareos/pull/1538
[PR #1581]: https://github.com/bareos/bareos/pull/1581
Expand Down Expand Up @@ -119,4 +120,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
[PR #1718]: https://github.com/bareos/bareos/pull/1718
[PR #1719]: https://github.com/bareos/bareos/pull/1719
[PR #1728]: https://github.com/bareos/bareos/pull/1728
[PR #1732]: https://github.com/bareos/bareos/pull/1732
[unreleased]: https://github.com/bareos/bareos/tree/master
3 changes: 2 additions & 1 deletion core/scripts/bareos-ctl-dir.in
Expand Up @@ -55,7 +55,8 @@ case "$1" in
stop)
if [ -x "${BAREOS_DIRECTOR_BINARY}" ]; then
printf "Stopping the Bareos Director daemon: "
killproc "${BAREOS_DIRECTOR_BINARY}" "${BAREOS_DIRECTOR_PORT}"
shift # shift away the stop
killproc "${BAREOS_DIRECTOR_BINARY}" "${BAREOS_DIRECTOR_PORT}" "${@}"
fi
;;

Expand Down
3 changes: 2 additions & 1 deletion core/scripts/bareos-ctl-fd.in
Expand Up @@ -54,7 +54,8 @@ case "$1" in
stop)
if [ -x ${BAREOS_FILEDAEMON_BINARY} ]; then
printf "Stopping the Bareos File daemon: "
killproc ${BAREOS_FILEDAEMON_BINARY} ${BAREOS_FD_PORT}
shift # shift away the stop
killproc ${BAREOS_FILEDAEMON_BINARY} ${BAREOS_FD_PORT} "${@}"
fi
;;

Expand Down
3 changes: 2 additions & 1 deletion core/scripts/bareos-ctl-sd.in
Expand Up @@ -53,7 +53,8 @@ case "$1" in
stop)
if [ -x ${BAREOS_STORAGEDAEMON_BINARY} ]; then
printf "Stopping the Bareos Storage daemon: "
killproc ${BAREOS_STORAGEDAEMON_BINARY} ${BAREOS_SD_PORT}
shift # shift away the stop
killproc ${BAREOS_STORAGEDAEMON_BINARY} ${BAREOS_SD_PORT} "${@}"
fi
;;

Expand Down
7 changes: 4 additions & 3 deletions core/src/dird/director_jcr_impl.h
Expand Up @@ -3,7 +3,7 @@

Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
Copyright (C) 2011-2012 Planets Communications B.V.
Copyright (C) 2013-2023 Bareos GmbH & Co. KG
Copyright (C) 2013-2024 Bareos GmbH & Co. KG

This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand All @@ -28,6 +28,8 @@
#include "dird/client_connection_handshake_mode.h"
#include "dird/job_trigger.h"

#include <condition_variable>

typedef struct s_tree_root TREE_ROOT;

class ConfigResourcesContainer;
Expand Down Expand Up @@ -105,7 +107,7 @@ struct DirectorJcrImpl {
std::shared_ptr<ConfigResourcesContainer> job_config_resources_container_;
pthread_t SD_msg_chan{}; /**< Message channel thread id */
bool SD_msg_chan_started{}; /**< Message channel thread started */
pthread_cond_t term_wait = PTHREAD_COND_INITIALIZER; /**< Wait for job termination */
std::condition_variable term_wait{}; /**< Wait for job termination */
pthread_cond_t nextrun_ready = PTHREAD_COND_INITIALIZER; /**< Wait for job next run to become ready */
Resources res; /**< Resources assigned */
TREE_ROOT* restore_tree_root{}; /**< Selected files to restore (some protocols need this info) */
Expand Down Expand Up @@ -151,7 +153,6 @@ struct DirectorJcrImpl {
int32_t max_concurrent_jobs{}; /**< Maximum concurrent jobs */
bool spool_data{}; /**< Spool data in SD */
bool acquired_resource_locks{}; /**< Set if resource locks acquired */
bool term_wait_inited{}; /**< Set when cond var inited */
bool nextrun_ready_inited{}; /**< Set when cond var inited */
bool fn_printed{}; /**< Printed filename */
bool needs_sd{}; /**< Set if SD needed by Job */
Expand Down
57 changes: 21 additions & 36 deletions core/src/dird/job.cc
Expand Up @@ -140,39 +140,30 @@ bool SetupJob(JobControlRecord* jcr, bool suppress_output)
{
int errstat;

jcr->lock();
{
std::unique_lock l(jcr->mutex_guard());

// See if we should suppress all output.
if (!suppress_output) {
InitMsg(jcr, jcr->dir_impl->res.messages, job_code_callback_director);
} else {
jcr->suppress_output = true;
}
// See if we should suppress all output.
if (!suppress_output) {
InitMsg(jcr, jcr->dir_impl->res.messages, job_code_callback_director);
} else {
jcr->suppress_output = true;
}

// Initialize termination condition variable
if ((errstat = pthread_cond_init(&jcr->dir_impl->term_wait, NULL)) != 0) {
BErrNo be;
Jmsg1(jcr, M_FATAL, 0, T_("Unable to init job cond variable: ERR=%s\n"),
be.bstrerror(errstat));
jcr->unlock();
goto bail_out;
}
jcr->dir_impl->term_wait_inited = true;
// Initialize nextrun ready condition variable
if ((errstat = pthread_cond_init(&jcr->dir_impl->nextrun_ready, NULL))
!= 0) {
BErrNo be;
Jmsg1(jcr, M_FATAL, 0,
T_("Unable to init job nextrun cond variable: ERR=%s\n"),
be.bstrerror(errstat));
goto bail_out;
}
jcr->dir_impl->nextrun_ready_inited = true;

// Initialize nextrun ready condition variable
if ((errstat = pthread_cond_init(&jcr->dir_impl->nextrun_ready, NULL)) != 0) {
BErrNo be;
Jmsg1(jcr, M_FATAL, 0,
T_("Unable to init job nextrun cond variable: ERR=%s\n"),
be.bstrerror(errstat));
jcr->unlock();
goto bail_out;
CreateUniqueJobName(jcr, jcr->dir_impl->res.job->resource_name_);
jcr->setJobStatusWithPriorityCheck(JS_Created);
}
jcr->dir_impl->nextrun_ready_inited = true;

CreateUniqueJobName(jcr, jcr->dir_impl->res.job->resource_name_);
jcr->setJobStatusWithPriorityCheck(JS_Created);
jcr->unlock();

// Open database
Dmsg0(100, "Open database\n");
Expand Down Expand Up @@ -654,13 +645,12 @@ static void* job_thread(void* arg)

void SdMsgThreadSendSignal(JobControlRecord* jcr, int sig)
{
jcr->lock();
std::unique_lock l(jcr->mutex_guard());
if (!jcr->dir_impl->sd_msg_thread_done && jcr->dir_impl->SD_msg_chan_started
&& !pthread_equal(jcr->dir_impl->SD_msg_chan, pthread_self())) {
Dmsg1(800, "Send kill to SD msg chan jid=%d\n", jcr->JobId);
pthread_kill(jcr->dir_impl->SD_msg_chan, sig);
}
jcr->unlock();
}

/**
Expand Down Expand Up @@ -1528,11 +1518,6 @@ void DirdFreeJcr(JobControlRecord* jcr)

DirdFreeJcrPointers(jcr);

if (jcr->dir_impl->term_wait_inited) {
pthread_cond_destroy(&jcr->dir_impl->term_wait);
jcr->dir_impl->term_wait_inited = false;
}

if (jcr->dir_impl->nextrun_ready_inited) {
pthread_cond_destroy(&jcr->dir_impl->nextrun_ready);
jcr->dir_impl->nextrun_ready_inited = false;
Expand Down
13 changes: 1 addition & 12 deletions core/src/dird/jobq.cc
Expand Up @@ -3,7 +3,7 @@

Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
Copyright (C) 2011-2012 Planets Communications B.V.
Copyright (C) 2013-2023 Bareos GmbH & Co. KG
Copyright (C) 2013-2024 Bareos GmbH & Co. KG

This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -206,17 +206,6 @@ int JobqAdd(jobq_t* jq, JobControlRecord* jcr)
pthread_t id;
wait_pkt* sched_pkt;

if (!jcr->dir_impl->term_wait_inited) {
// Initialize termination condition variable
if ((status = pthread_cond_init(&jcr->dir_impl->term_wait, NULL)) != 0) {
BErrNo be;
Jmsg1(jcr, M_FATAL, 0, T_("Unable to init job cond variable: ERR=%s\n"),
be.bstrerror(status));
return status;
}
jcr->dir_impl->term_wait_inited = true;
}

Dmsg3(2300, "JobqAdd jobid=%d jcr=0x%x UseCount=%d\n", jcr->JobId, jcr,
jcr->UseCount());
if (jq->valid != JOBQ_VALID) {
Expand Down
43 changes: 20 additions & 23 deletions core/src/dird/msgchan.cc
Expand Up @@ -3,7 +3,7 @@

Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
Copyright (C) 2011-2016 Planets Communications B.V.
Copyright (C) 2013-2023 Bareos GmbH & Co. KG
Copyright (C) 2013-2024 Bareos GmbH & Co. KG

This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -50,8 +50,6 @@

namespace directordaemon {

static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

/* Commands sent to Storage daemon */
static char jobcmd[]
= "JobId=%s job=%s job_name=%s client_name=%s "
Expand Down Expand Up @@ -436,14 +434,17 @@ extern "C" void MsgThreadCleanup(void* arg)
JobControlRecord* jcr = (JobControlRecord*)arg;

jcr->db->EndTransaction(jcr); /* Terminate any open transaction */
jcr->lock();
jcr->dir_impl->sd_msg_thread_done = true;
jcr->dir_impl->SD_msg_chan_started = false;
jcr->unlock();
pthread_cond_broadcast(
&jcr->dir_impl->nextrun_ready); /* wakeup any waiting threads */

{
std::unique_lock l(jcr->mutex_guard());

jcr->dir_impl->sd_msg_thread_done = true;
jcr->dir_impl->SD_msg_chan_started = false;
}

pthread_cond_broadcast(
&jcr->dir_impl->term_wait); /* wakeup any waiting threads */
&jcr->dir_impl->nextrun_ready); /* wakeup any waiting threads */
jcr->dir_impl->term_wait.notify_all(); /* wakeup any waiting threads */
Dmsg2(100, "=== End msg_thread. JobId=%d usecnt=%d\n", jcr->JobId,
jcr->UseCount());
jcr->db->ThreadCleanup(); /* remove thread specific data */
Expand Down Expand Up @@ -519,29 +520,25 @@ void WaitForStorageDaemonTermination(JobControlRecord* jcr)
{
int cancel_count = 0;
/* Now wait for Storage daemon to Terminate our message thread */
while (!jcr->dir_impl->sd_msg_thread_done) {
struct timeval tv;
struct timezone tz;
struct timespec timeout;
std::unique_lock l(jcr->mutex_guard());

gettimeofday(&tv, &tz);
timeout.tv_nsec = 0;
timeout.tv_sec = tv.tv_sec + 5; /* wait 5 seconds */
/* Give SD 30 seconds to clean up after cancel */
auto timeout = std::chrono::system_clock::now() + std::chrono::seconds(30);
while (!jcr->dir_impl->sd_msg_thread_done) {
Dmsg0(400, "I'm waiting for message thread termination.\n");
lock_mutex(mutex);
pthread_cond_timedwait(&jcr->dir_impl->term_wait, &mutex, &timeout);
unlock_mutex(mutex);
if (jcr->IsJobCanceled()) {
if (jcr->dir_impl->term_wait.wait_until(l, timeout)
== std::cv_status::timeout) {
break;
} else if (jcr->IsJobCanceled()) {
if (jcr->dir_impl->SD_msg_chan_started) {
jcr->store_bsock->SetTimedOut();
jcr->store_bsock->SetTerminated();
SdMsgThreadSendSignal(jcr, TIMEOUT_SIGNAL);
}
cancel_count++;
}
/* Give SD 30 seconds to clean up after cancel */
if (cancel_count == 6) { break; }
}

jcr->setJobStatusWithPriorityCheck(JS_Terminated);
}

Expand Down
9 changes: 5 additions & 4 deletions core/src/dird/ndmp_fhdb_common.cc
@@ -1,7 +1,7 @@
/*
BAREOS® - Backup Archiving REcovery Open Sourced

Copyright (C) 2015-2023 Bareos GmbH & Co. KG
Copyright (C) 2015-2024 Bareos GmbH & Co. KG

This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -41,9 +41,10 @@ extern "C" int BndmpFhdbAddFile(struct ndmlog* ixlog,
{
NIS* nis = (NIS*)ixlog->ctx;

nis->jcr->lock();
nis->jcr->JobFiles++;
nis->jcr->unlock();
{
std::unique_lock l(nis->jcr->mutex_guard());
nis->jcr->JobFiles++;
}

if (nis->save_filehist) {
int8_t FileType = 0;
Expand Down
7 changes: 4 additions & 3 deletions core/src/dird/ndmp_fhdb_lmdb.cc
Expand Up @@ -159,9 +159,10 @@ extern "C" int bndmp_fhdb_lmdb_add_node(struct ndmlog* ixlog,
NIS* nis;
nis = (NIS*)ixlog->ctx;

nis->jcr->lock();
nis->jcr->JobFiles++;
nis->jcr->unlock();
{
std::unique_lock l(nis->jcr->mutex_guard());
nis->jcr->JobFiles++;
}

if (nis->save_filehist) {
int result;
Expand Down
9 changes: 5 additions & 4 deletions core/src/dird/ndmp_fhdb_mem.cc
Expand Up @@ -2,7 +2,7 @@
BAREOS® - Backup Archiving REcovery Open Sourced

Copyright (C) 2015-2015 Planets Communications B.V.
Copyright (C) 2015-2023 Bareos GmbH & Co. KG
Copyright (C) 2015-2024 Bareos GmbH & Co. KG

This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -576,9 +576,10 @@ extern "C" int bndmp_fhdb_mem_add_node(struct ndmlog* ixlog,
{
NIS* nis = (NIS*)ixlog->ctx;

nis->jcr->lock();
nis->jcr->JobFiles++;
nis->jcr->unlock();
{
std::unique_lock l(nis->jcr->mutex_guard());
nis->jcr->JobFiles++;
}

if (nis->save_filehist) {
int attr_size;
Expand Down
9 changes: 5 additions & 4 deletions core/src/dird/next_vol.cc
Expand Up @@ -3,7 +3,7 @@

Copyright (C) 2001-2012 Free Software Foundation Europe e.V.
Copyright (C) 2011-2016 Planets Communications B.V.
Copyright (C) 2013-2023 Bareos GmbH & Co. KG
Copyright (C) 2013-2024 Bareos GmbH & Co. KG

This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -357,7 +357,8 @@ void CheckIfVolumeValidOrRecyclable(JobControlRecord* jcr,
}
}

static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
// Only one thread at a time can pull from the scratch pool
static pthread_mutex_t scratch_volume_mutex = PTHREAD_MUTEX_INITIALIZER;

bool GetScratchVolume(JobControlRecord* jcr,
bool InChanger,
Expand All @@ -370,7 +371,7 @@ bool GetScratchVolume(JobControlRecord* jcr,
bool found = false;

// Only one thread at a time can pull from the scratch pool
lock_mutex(mutex);
lock_mutex(scratch_volume_mutex);

/* Get Pool record for Scratch Pool
* choose between ScratchPoolId and Scratch
Expand Down Expand Up @@ -447,7 +448,7 @@ bool GetScratchVolume(JobControlRecord* jcr,
}

bail_out:
unlock_mutex(mutex);
unlock_mutex(scratch_volume_mutex);
return ok;
}
} /* namespace directordaemon */
11 changes: 6 additions & 5 deletions core/src/filed/backup.cc
Expand Up @@ -1597,11 +1597,12 @@ bool EncodeAndSendAttributes(JobControlRecord* jcr,
Dmsg3(300, "File %s\nattribs=%s\nattribsEx=%s\n", ff_pkt->fname,
attribs.c_str(), attribsEx);

jcr->lock();
jcr->JobFiles++; /* increment number of files sent */
ff_pkt->FileIndex = jcr->JobFiles; /* return FileIndex */
PmStrcpy(jcr->fd_impl->last_fname, ff_pkt->fname);
jcr->unlock();
{
std::unique_lock l(jcr->mutex_guard());
jcr->JobFiles++; /* increment number of files sent */
ff_pkt->FileIndex = jcr->JobFiles; /* return FileIndex */
PmStrcpy(jcr->fd_impl->last_fname, ff_pkt->fname);
}

// Debug code: check if we must hangup
if (hangup && (jcr->JobFiles > (uint32_t)hangup)) {
Expand Down