Skip to content

Commit

Permalink
Merge pull request #2474 from htcondor/V23_8-HTCONDOR-2445-cgroup-v2-…
Browse files Browse the repository at this point in the history
…under-starter-cgroup

HTCONDOR-2445 cgroup v2 under starter cgroup
  • Loading branch information
GregThain committed May 15, 2024
2 parents 4ac47f1 + 96c8b60 commit c458df5
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 14 deletions.
7 changes: 6 additions & 1 deletion docs/version-history/feature-versions-23-x.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ Release Notes:

New Features:


- Added an ``-edit`` option to the *condor_qusers* tool. This option allows
and administrator to add custom attributes to a User classad in the *condor_schedd*.
:jira:`2381`
Expand All @@ -34,6 +33,12 @@ New Features:
cool-down expression.
:jira:`2134`

- V2 cgroups created for jobs will now be in the cgroup tree the daemons
are born in. This tree is marked as Delegated in the systemd unit file,
so that HTCondor is the sole manipulator of these trees, following the
systemd "one writer" cgroup rule.
:jira:`2445`

- New config parameter :macro:`CGROUP_LOW_MEMORY_LIMIT` allows an administrator
of a Linux cgroup v2 system to set the "memory.low" setting in a job's cgroup
to encourage cacheable memory pages to be reclaimed faster.
Expand Down
83 changes: 73 additions & 10 deletions src/condor_starter.V6.1/vanilla_proc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,15 @@
#include "condor_debug.h"
#include "condor_daemon_core.h"
#include "condor_attributes.h"
#include "exit.h"
#include "vanilla_proc.h"
#include "condor_uid.h"
#include "starter.h"
#include "condor_config.h"
#include "domain_tools.h"
#include "classad_helpers.h"
#include "filesystem_remap.h"
#include "directory.h"
#include "env.h"
#include "subsystem_info.h"
#include "selector.h"
#include "singularity.h"
#include "has_sysadmin_cap.h"
#include "starter_util.h"
Expand Down Expand Up @@ -217,19 +215,65 @@ static bool cgroup_v2_is_writeable(const std::string &relative_cgroup) {
return can_switch_ids() && cgroup_controller_is_writeable("", relative_cgroup);
}

static bool cgroup_is_writeable(const std::string &relative_cgroup) {
dprintf(D_ALWAYS, "Checking to see if %s is a writeable cgroup\n", relative_cgroup.c_str());
static std::string current_parent_cgroup() {
TemporaryPrivSentry sentry(PRIV_ROOT);
std::string cgroup;

int fd = open("/proc/self/cgroup", 0666);
if (fd < 0) {
dprintf(D_ALWAYS, "Cannot open /proc/self/cgroup: %s\n", strerror(errno));
return cgroup; // empty cgroup is invalid
}

struct stat statbuf;
char buf[1024];
int r = read(fd, buf, sizeof(buf)-1);
if (r < 0) {
dprintf(D_ALWAYS, "Cannot read /proc/self/cgroup: %s\n", strerror(errno));
close(fd);
return cgroup;
}
buf[r] = '\0';
cgroup = buf;
close(fd);

if (cgroup.starts_with("0::")) {
cgroup = cgroup.substr(3,cgroup.size() - 4); // 4 because of newline at end
} else {
dprintf(D_ALWAYS, "Unknown prefix for /proc/self/cgroup: %s\n", cgroup.c_str());
cgroup = "";
}

size_t lastSlash = cgroup.rfind('/');
if (lastSlash == std::string::npos) {
// This can happen if you are at the root of a cgroup namespace. Not sure how to handle
dprintf(D_ALWAYS, "Cgroup %s has no internal directory to chdir .. to...\n", cgroup.c_str());
cgroup = "";
} else {
cgroup.erase(lastSlash); // Remove trailing slash
}
return cgroup;
}

static bool hasCgroupV2() {
struct stat statbuf{};
// Should be readable by everyone
if (stat("/sys/fs/cgroup/cgroup.procs", &statbuf) == 0) {
// This means we're on cgroups v2
return true;
}
// V1.
return false;
}

static bool cgroup_is_writeable(const std::string &relative_cgroup) {
dprintf(D_ALWAYS, "Checking to see if %s is a writeable cgroup\n", relative_cgroup.c_str());
// Should be readable by everyone
if (hasCgroupV2()) {
// This means we're on cgroups v2
return cgroup_v2_is_writeable(relative_cgroup);
} else {
// V1.
return cgroup_v1_is_writeable(relative_cgroup);
}
// V1.
return cgroup_v1_is_writeable(relative_cgroup);
}
#endif

Expand Down Expand Up @@ -303,9 +347,28 @@ VanillaProc::StartJob()
create_cgroup = false;
}

if (cgroup_base.empty()) {
create_cgroup = false;
}

// For v2, let's put the job into the current cgroup hierarchy
// Because of the "no process in interior cgroups" rule, this means
// we create a new child of our parent. (a sibling, if you will).
if (hasCgroupV2()) {
std::string current = current_parent_cgroup();
cgroup_base = current + '/' + cgroup_base;

// remove leading / from cgroup_name. cgroupv2 code hates that
if (cgroup_base.starts_with('/')) {
cgroup_base = cgroup_base.substr(1, cgroup_base.size() - 1);
}
replace_str(cgroup_base, "//", "/");
}

if (create_cgroup && cgroup_is_writeable(cgroup_base)) {
std::string cgroup_uniq;
std::string starter_name, execute_str;
std::string starter_name;
std::string execute_str;
param(execute_str, "EXECUTE", "EXECUTE_UNKNOWN");
// Note: Starter is a global variable from os_proc.cpp
Starter->jic->machClassAd()->LookupString(ATTR_NAME, starter_name);
Expand Down
8 changes: 5 additions & 3 deletions src/condor_utils/proc_family_direct_cgroup_v2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ static bool killCgroupTree(const std::string &cgroup_name) {
FILE *f = fopen(kill_path.c_str(), "r");
if (!f) {
// Could be it just doesn't exist
dprintf(D_FULLDEBUG, "trimCgroupTree: cannot open %s: %d %s\n", kill_path.c_str(), errno, strerror(errno));
if (errno != ENOENT) {
dprintf(D_ALWAYS, "trimCgroupTree: cannot open %s: %d %s\n", kill_path.c_str(), errno, strerror(errno));
}
} else {
fprintf(f, "%c", '1');
fclose(f);
Expand Down Expand Up @@ -353,7 +355,7 @@ ProcFamilyDirectCgroupV2::cgroupify_process(const std::string &cgroup_name, pid_

void
ProcFamilyDirectCgroupV2::assign_cgroup_for_pid(pid_t pid, const std::string &cgroup_name) {
auto [it, success] = cgroup_map.insert(std::make_pair(pid, cgroup_name));
auto [it, success] = cgroup_map.emplace(pid, cgroup_name);
if (!success) {
EXCEPT("Couldn't insert into cgroup map, duplicate?");
}
Expand Down Expand Up @@ -388,7 +390,7 @@ ProcFamilyDirectCgroupV2::get_usage(pid_t pid, ProcFamilyUsage& usage, bool /*fu
return true;
}

std::string cgroup_name = cgroup_map[pid];
const std::string cgroup_name = cgroup_map[pid];

// Initialize the ones we don't set to -1 to mean "don't know".
usage.block_reads = usage.block_writes = usage.block_read_bytes = usage.block_write_bytes = usage.m_instructions = -1;
Expand Down

0 comments on commit c458df5

Please sign in to comment.