Skip to content

Commit

Permalink
Merge pull request #1595
Browse files Browse the repository at this point in the history
plugins: fix cancel handling crash
  • Loading branch information
BareosBot committed Dec 22, 2023
2 parents c3eb763 + aa43c0b commit 57b632d
Show file tree
Hide file tree
Showing 16 changed files with 761 additions and 346 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- dird: fix `purge oldest volume` [PR #1628]
- Fix continuation on colons in plugin baseclass [PR #1637]
- plugins: fix cancel handling crash [PR #1595]

[PR #1581]: https://github.com/bareos/bareos/pull/1581
[PR #1587]: https://github.com/bareos/bareos/pull/1587
[PR #1589]: https://github.com/bareos/bareos/pull/1589
[PR #1592]: https://github.com/bareos/bareos/pull/1592
[PR #1593]: https://github.com/bareos/bareos/pull/1593
[PR #1595]: https://github.com/bareos/bareos/pull/1595
[PR #1598]: https://github.com/bareos/bareos/pull/1598
[PR #1605]: https://github.com/bareos/bareos/pull/1605
[PR #1606]: https://github.com/bareos/bareos/pull/1606
Expand Down
28 changes: 15 additions & 13 deletions core/scripts/btraceback.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,21 @@ MDB="@MDB@"
# Try to generate a core dump for postmortem analyzing.
#
POSTMORTEM="NONE"
if [ ! -z "${GCORE}" -a -x "${GCORE}" ]; then
POSTMORTEM="${WD}/${PNAME}.core"
${GCORE} -o ${POSTMORTEM} $2 > /dev/null 2>&1
POSTMORTEM="${POSTMORTEM}.$2"
echo "Created ${POSTMORTEM} for doing postmortem debugging" > ${WD}/bareos.$2.traceback
else
which gcore > /dev/null 2>&1 && GCORE="`which gcore`" || GCORE=''
if [ ! -z "${GCORE}" -a -x "${GCORE}" ]; then
POSTMORTEM="${WD}/${PNAME}.core"
${GCORE} -o ${POSTMORTEM} $2 > /dev/null 2>&1
POSTMORTEM="${POSTMORTEM}.$2"
echo "Created ${POSTMORTEM} for doing postmortem debugging" > ${WD}/bareos.$2.traceback
fi
if [ -n "${DUMPCORE+x}" ]; then
if [ ! -z "${GCORE}" -a -x "${GCORE}" ]; then
POSTMORTEM="${WD}/${PNAME}.core"
${GCORE} -o ${POSTMORTEM} $2 > /dev/null 2>&1
POSTMORTEM="${POSTMORTEM}.$2"
echo "Created ${POSTMORTEM} for doing postmortem debugging" > ${WD}/bareos.$2.traceback
else
which gcore > /dev/null 2>&1 && GCORE="`which gcore`" || GCORE=''
if [ ! -z "${GCORE}" -a -x "${GCORE}" ]; then
POSTMORTEM="${WD}/${PNAME}.core"
${GCORE} -o ${POSTMORTEM} $2 > /dev/null 2>&1
POSTMORTEM="${POSTMORTEM}.$2"
echo "Created ${POSTMORTEM} for doing postmortem debugging" > ${WD}/bareos.$2.traceback
fi
fi
fi

#
Expand Down
16 changes: 10 additions & 6 deletions core/src/filed/dir_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ void* process_director_commands(JobControlRecord* jcr, BareosSocket* dir)

// Inform Director that we are done
dir->signal(BNET_TERMINATE);
jcr->EnterFinish();

FreePlugins(jcr); /* release instantiated plugins */
FreeAndNullPoolMemory(jcr->fd_impl->job_metadata);
Expand Down Expand Up @@ -732,13 +733,16 @@ static bool CancelCmd(JobControlRecord* jcr)
if (!(cjcr = get_jcr_by_full_name(Job))) {
dir->fsend(T_("2901 Job %s not found.\n"), Job);
} else {
GeneratePluginEvent(cjcr, bEventCancelCommand, nullptr);
cjcr->setJobStatusWithPriorityCheck(JS_Canceled);
if (cjcr->store_bsock) {
cjcr->store_bsock->SetTimedOut();
cjcr->store_bsock->SetTerminated();
if (cjcr->PrepareCancel()) {
GeneratePluginEvent(cjcr, bEventCancelCommand, nullptr);
cjcr->setJobStatusWithPriorityCheck(JS_Canceled);
if (cjcr->store_bsock) {
cjcr->store_bsock->SetTimedOut();
cjcr->store_bsock->SetTerminated();
}
cjcr->MyThreadSendSignal(TIMEOUT_SIGNAL);
cjcr->CancelFinished();
}
cjcr->MyThreadSendSignal(TIMEOUT_SIGNAL);
FreeJcr(cjcr);
dir->fsend(T_("2001 Job %s marked to be canceled.\n"), Job);
}
Expand Down
18 changes: 18 additions & 0 deletions core/src/include/jcr.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,25 @@ class JobControlRecord {
int32_t JobLevel_{}; /**< Job level */
int32_t Protocol_{}; /**< Backup Protocol */
bool my_thread_killable_{};
enum class cancel_status : int8_t {
None,
InProcess,
Finished,
};
std::atomic<cancel_status> canceled_status{cancel_status::None};
public:
/* tells the jcr that it will get canceled soon. This can happen only once.
* Returns true if you may cancel it (in that case you _must_ call
* CancelFinished() when done); otherwise false is returned (in which case
* you _may not_ call CancelFinished()) */
bool PrepareCancel();
void CancelFinished();

/* Once called, this function stops other threads from canceling this job;
* If this job is currently getting canceled, then it wait until
* that is done, i.e. until CancelFinished() is called. */
void EnterFinish();

JobControlRecord();
~JobControlRecord();
JobControlRecord(const JobControlRecord &other) = delete;
Expand Down
7 changes: 6 additions & 1 deletion core/src/lib/address_conf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,12 @@ bool CheckIfFamilyEnabled(IpFamily family)
FamilyName(family), be.bstrerror());
return false;
}
close(fd);

#ifdef HAVE_WIN32
::closesocket(fd);
#else
::close(fd);
#endif
return true;
}

Expand Down
43 changes: 43 additions & 0 deletions core/src/lib/jcr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@
#include "lib/watchdog.h"

#include <algorithm>
#include <thread>
#include <atomic>
#include <chrono>

const int debuglevel = 3400;

Expand Down Expand Up @@ -1071,3 +1074,43 @@ void DbgPrintJcr(FILE* fp)

fprintf(fp, "dumping of jcrs finished. number of dumped = %zu\n", num_dumped);
}

bool JobControlRecord::PrepareCancel()
{
auto expected = cancel_status::None;
return canceled_status.compare_exchange_strong(expected,
cancel_status::InProcess);
}

void JobControlRecord::CancelFinished()
{
auto expected = cancel_status::InProcess;
ASSERT(canceled_status.compare_exchange_strong(expected,
cancel_status::Finished));
}

void JobControlRecord::EnterFinish()
{
// We want to wait until cancelled_status is set to Finished.
// We are only allowed to change this ourselves if its currently
// set to None, otherwise we have to wait since another thread
// is currently canceling this job.
for (;;) {
auto current_status = cancel_status::None;
if (
// first check if we can set it from None to Finished ourselves
!canceled_status.compare_exchange_weak(current_status,
cancel_status::Finished)
// if we could not change the status then current_status
// is now the actual current canceled status, so check that it was not
// already set to Finished
&& current_status != cancel_status::Finished) {
// neither we nor the cancelling thread set cancelled_status to
// Finished, so lets wait
std::this_thread::sleep_for(std::chrono::milliseconds(30));
} else {
// Somebody changed it to Finished, so it is safe to return now.
break;
}
}
}
9 changes: 9 additions & 0 deletions core/src/plugins/filed/python/module/bareosfd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,15 @@ static inline bool PyIoPacketToNative(PyIoPacket* pIoPkt, io_pkt* io)

if (!(buf = PyByteArray_AsString(pIoPkt->buf))) { return false; }
memcpy(io->buf, buf, io->status);
} else if (PyBytes_Check(pIoPkt->buf)) {
char* buf;

if (PyBytes_Size(pIoPkt->buf) > io->count || io->status > io->count) {
return false;
}

if (!(buf = PyBytes_AsString(pIoPkt->buf))) { return false; }
memcpy(io->buf, buf, io->status);
}
}

Expand Down
6 changes: 3 additions & 3 deletions core/src/plugins/filed/python/plugin_private_context.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
BAREOS® - Backup Archiving REcovery Open Sourced
Copyright (C) 2020-2022 Bareos GmbH & Co. KG
Copyright (C) 2020-2023 Bareos GmbH & Co. KG
This program is Free Software; you can redistribute it and/or
modify it under the terms of version three of the GNU Affero General Public
Expand Down Expand Up @@ -35,8 +35,8 @@ struct plugin_private_context {
char* link; // Target symlink points to
char* object_name; // Restore Object Name
char* object; // Restore Object Content
PyThreadState*
interpreter; // Python interpreter for this instance of the plugin
PyInterpreterState*
interp; // Python interpreter for this instance of the plugin
PyObject* pModule; // Python Module entry point
PyObject* pyModuleFunctionsDict; // Python Dictionary
};
Expand Down

0 comments on commit 57b632d

Please sign in to comment.