Skip to content

Commit b32f4cd

Browse files
sjp38gregkh
authored andcommitted
mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values
commit 64a140a upstream. Patch series "mm/damon/modules: detect and use fresh status", v3. DAMON modules including DAMON_RECLAIM, DAMON_LRU_SORT and DAMON_STAT commonly expose the kdamond running status via their parameters. Under certain scenarios including wrong user inputs and memory allocation failures, those parameter values can be stale. It can confuse users. For DAMON_RECLAIM and DAMON_LRU_SORT, it even makes the kdamond unable to be restarted before the system reboot. The problem comes from the fact that there are multiple events for the status changes and it is difficult to follow up all the scenarios. Fix the issue by detecting and using the status on demand, instead of using a cached status that is difficult to be updated. Patches 1-3 fix the bugs in DAMON_RECLAIM, DAMON_LRU_SORT and DAMON_STAT in the order. This patch (of 3): DAMON_RECLAIM updates 'enabled' and 'kdamond_pid' parameter values, which represents the running status of its kdamond, when the user explicitly requests start/stop of the kdamond. The kdamond can, however, be stopped in events other than the explicit user request in the following three events. 1. ctx->regions_score_histogram allocation failure at beginning of the execution, 2. damon_commit_ctx() failure due to invalid user input, and 3. damon_commit_ctx() failure due to its internal allocation failures. Hence, if the kdamond is stopped by the above three events, the values of the status parameters can be stale. Users could show the stale values and be confused. This is already bad, but the real consequence is worse. DAMON_RECLAIM avoids unnecessary damon_start() and damon_stop() calls based on the 'enabled' parameter value. And the update of 'enabled' parameter value depends on the damon_start() and damon_stop() call results. Hence, once the kdamond has stopped by the unintentional events, the user cannot restart the kdamond before the system reboot. For example, the issue can be reproduced via below steps. # cd /sys/module/damon_reclaim/parameters # # # start DAMON_RECLAIM # echo Y > enabled # ps -ef | grep kdamond root 806 2 0 17:53 ? 00:00:00 [kdamond.0] root 808 803 0 17:53 pts/4 00:00:00 grep kdamond # # # commit wrong input to stop kdamond withou explicit stop request # echo 3 > addr_unit # echo Y > commit_inputs bash: echo: write error: Invalid argument # # # confirm kdamond is stopped # ps -ef | grep kdamond root 811 803 0 17:53 pts/4 00:00:00 grep kdamond # # # users casn now show stable status # cat enabled Y # cat kdamond_pid 806 # # # even after fixing the wrong parameter, # # kdamond cannot be restarted. # echo 1 > addr_unit # echo Y > enabled # ps -ef | grep kdamond root 815 803 0 17:54 pts/4 00:00:00 grep kdamond The problem will only rarely happen in real and common setups for the following reasons. The allocation failures are unlikely in such setups since those allocations are arguably too small to fail. Also sane users on real production environments may not commit wrong input parameters. But once it happens, the consequence is quite bad. And the bug is a bug. The issue stems from the fact that there are multiple events that can change the status, and following all the events is challenging. Dynamically detect and use the fresh status for the parameters when those are requested. Link: https://lore.kernel.org/20260419161003.79176-1-sj@kernel.org Link: https://lore.kernel.org/20260419161003.79176-2-sj@kernel.org Fixes: e035c28 ("mm/damon/reclaim: support online inputs update") Co-developed-by: Liew Rui Yan <aethernet65535@gmail.com> Signed-off-by: Liew Rui Yan <aethernet65535@gmail.com> Signed-off-by: SeongJae Park <sj@kernel.org> Cc: <stable@vger.kernel.org> # 5.19.x Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: SeongJae Park <sj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 8e73175 commit b32f4cd

1 file changed

Lines changed: 56 additions & 32 deletions

File tree

mm/damon/reclaim.c

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,6 @@ module_param(monitor_region_end, ulong, 0600);
107107
static bool skip_anon __read_mostly;
108108
module_param(skip_anon, bool, 0600);
109109

110-
/*
111-
* PID of the DAMON thread
112-
*
113-
* If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
114-
* Else, -1.
115-
*/
116-
static int kdamond_pid __read_mostly = -1;
117-
module_param(kdamond_pid, int, 0400);
118-
119110
static struct damos_stat damon_reclaim_stat;
120111
DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_reclaim_stat,
121112
reclaim_tried_regions, reclaimed_regions, quota_exceeds);
@@ -203,60 +194,93 @@ static int damon_reclaim_turn(bool on)
203194
{
204195
int err;
205196

206-
if (!on) {
207-
err = damon_stop(&ctx, 1);
208-
if (!err)
209-
kdamond_pid = -1;
210-
return err;
211-
}
197+
if (!on)
198+
return damon_stop(&ctx, 1);
212199

213200
err = damon_reclaim_apply_parameters();
214201
if (err)
215202
return err;
216203

217-
err = damon_start(&ctx, 1, true);
218-
if (err)
219-
return err;
220-
kdamond_pid = ctx->kdamond->pid;
221-
return 0;
204+
return damon_start(&ctx, 1, true);
205+
}
206+
207+
static bool damon_reclaim_enabled(void)
208+
{
209+
if (!ctx)
210+
return false;
211+
return damon_is_running(ctx);
222212
}
223213

224214
static int damon_reclaim_enabled_store(const char *val,
225215
const struct kernel_param *kp)
226216
{
227-
bool is_enabled = enabled;
228-
bool enable;
229217
int err;
230218

231-
err = kstrtobool(val, &enable);
219+
err = kstrtobool(val, &enabled);
232220
if (err)
233221
return err;
234222

235-
if (is_enabled == enable)
223+
if (damon_reclaim_enabled() == enabled)
236224
return 0;
237225

238226
/* Called before init function. The function will handle this. */
239227
if (!ctx)
240-
goto set_param_out;
228+
return 0;
241229

242-
err = damon_reclaim_turn(enable);
243-
if (err)
244-
return err;
230+
return damon_reclaim_turn(enabled);
231+
}
245232

246-
set_param_out:
247-
enabled = enable;
248-
return err;
233+
static int damon_reclaim_enabled_load(char *buffer,
234+
const struct kernel_param *kp)
235+
{
236+
return sprintf(buffer, "%c\n", damon_reclaim_enabled() ? 'Y' : 'N');
249237
}
250238

251239
static const struct kernel_param_ops enabled_param_ops = {
252240
.set = damon_reclaim_enabled_store,
253-
.get = param_get_bool,
241+
.get = damon_reclaim_enabled_load,
254242
};
255243

256244
module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
257245
MODULE_PARM_DESC(enabled,
258246
"Enable or disable DAMON_RECLAIM (default: disabled)");
259247

248+
static int damon_reclaim_kdamond_pid_store(const char *val,
249+
const struct kernel_param *kp)
250+
{
251+
/*
252+
* kdamond_pid is read-only, but kernel command line could write it.
253+
* Do nothing here.
254+
*/
255+
return 0;
256+
}
257+
258+
static int damon_reclaim_kdamond_pid_load(char *buffer,
259+
const struct kernel_param *kp)
260+
{
261+
int kdamond_pid = -1;
262+
263+
if (ctx) {
264+
kdamond_pid = damon_kdamond_pid(ctx);
265+
if (kdamond_pid < 0)
266+
kdamond_pid = -1;
267+
}
268+
return sprintf(buffer, "%d\n", kdamond_pid);
269+
}
270+
271+
static const struct kernel_param_ops kdamond_pid_param_ops = {
272+
.set = damon_reclaim_kdamond_pid_store,
273+
.get = damon_reclaim_kdamond_pid_load,
274+
};
275+
276+
/*
277+
* PID of the DAMON thread
278+
*
279+
* If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
280+
* Else, -1.
281+
*/
282+
module_param_cb(kdamond_pid, &kdamond_pid_param_ops, NULL, 0400);
283+
260284
static int damon_reclaim_handle_commit_inputs(void)
261285
{
262286
int err;

0 commit comments

Comments
 (0)