Skip to content

Commit 783822e

Browse files
committed
mnt_idmapping: decouple from namespaces
There's no reason we need to couple mnt idmapping to namespaces in the way we currently do. Copy the idmapping when an idmapped mount is created and don't take any reference on the namespace at all. We also can't easily refcount struct uid_gid_map because it needs to stay the size of a cacheline otherwise we risk performance regressions (Ignoring for a second that right now struct uid_gid_map isn't actually 64 byte but 72 but that's a fix for another patch series.). Link: https://lore.kernel.org/r/20231122-vfs-mnt_idmap-v1-3-dae4abdde5bd@kernel.org Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 90fbd8b commit 783822e

File tree

3 files changed

+106
-17
lines changed

3 files changed

+106
-17
lines changed

fs/mnt_idmapping.c

Lines changed: 91 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,16 @@
99

1010
#include "internal.h"
1111

12+
/*
13+
* Outside of this file vfs{g,u}id_t are always created from k{g,u}id_t,
14+
* never from raw values. These are just internal helpers.
15+
*/
16+
#define VFSUIDT_INIT_RAW(val) (vfsuid_t){ val }
17+
#define VFSGIDT_INIT_RAW(val) (vfsgid_t){ val }
18+
1219
struct mnt_idmap {
13-
struct user_namespace *owner;
20+
struct uid_gid_map uid_map;
21+
struct uid_gid_map gid_map;
1422
refcount_t count;
1523
};
1624

@@ -20,7 +28,6 @@ struct mnt_idmap {
2028
* mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
2129
*/
2230
struct mnt_idmap nop_mnt_idmap = {
23-
.owner = &init_user_ns,
2431
.count = REFCOUNT_INIT(1),
2532
};
2633
EXPORT_SYMBOL_GPL(nop_mnt_idmap);
@@ -65,7 +72,6 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
6572
kuid_t kuid)
6673
{
6774
uid_t uid;
68-
struct user_namespace *mnt_userns = idmap->owner;
6975

7076
if (idmap == &nop_mnt_idmap)
7177
return VFSUIDT_INIT(kuid);
@@ -75,7 +81,7 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
7581
uid = from_kuid(fs_userns, kuid);
7682
if (uid == (uid_t)-1)
7783
return INVALID_VFSUID;
78-
return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
84+
return VFSUIDT_INIT_RAW(map_id_down(&idmap->uid_map, uid));
7985
}
8086
EXPORT_SYMBOL_GPL(make_vfsuid);
8187

@@ -103,7 +109,6 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
103109
struct user_namespace *fs_userns, kgid_t kgid)
104110
{
105111
gid_t gid;
106-
struct user_namespace *mnt_userns = idmap->owner;
107112

108113
if (idmap == &nop_mnt_idmap)
109114
return VFSGIDT_INIT(kgid);
@@ -113,7 +118,7 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
113118
gid = from_kgid(fs_userns, kgid);
114119
if (gid == (gid_t)-1)
115120
return INVALID_VFSGID;
116-
return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
121+
return VFSGIDT_INIT_RAW(map_id_down(&idmap->gid_map, gid));
117122
}
118123
EXPORT_SYMBOL_GPL(make_vfsgid);
119124

@@ -132,11 +137,10 @@ kuid_t from_vfsuid(struct mnt_idmap *idmap,
132137
struct user_namespace *fs_userns, vfsuid_t vfsuid)
133138
{
134139
uid_t uid;
135-
struct user_namespace *mnt_userns = idmap->owner;
136140

137141
if (idmap == &nop_mnt_idmap)
138142
return AS_KUIDT(vfsuid);
139-
uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
143+
uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
140144
if (uid == (uid_t)-1)
141145
return INVALID_UID;
142146
if (initial_idmapping(fs_userns))
@@ -160,11 +164,10 @@ kgid_t from_vfsgid(struct mnt_idmap *idmap,
160164
struct user_namespace *fs_userns, vfsgid_t vfsgid)
161165
{
162166
gid_t gid;
163-
struct user_namespace *mnt_userns = idmap->owner;
164167

165168
if (idmap == &nop_mnt_idmap)
166169
return AS_KGIDT(vfsgid);
167-
gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
170+
gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
168171
if (gid == (gid_t)-1)
169172
return INVALID_GID;
170173
if (initial_idmapping(fs_userns))
@@ -195,16 +198,91 @@ int vfsgid_in_group_p(vfsgid_t vfsgid)
195198
#endif
196199
EXPORT_SYMBOL_GPL(vfsgid_in_group_p);
197200

201+
static int copy_mnt_idmap(struct uid_gid_map *map_from,
202+
struct uid_gid_map *map_to)
203+
{
204+
struct uid_gid_extent *forward, *reverse;
205+
u32 nr_extents = READ_ONCE(map_from->nr_extents);
206+
/* Pairs with smp_wmb() when writing the idmapping. */
207+
smp_rmb();
208+
209+
/*
210+
* Don't blindly copy @map_to into @map_from if nr_extents is
211+
* smaller or equal to UID_GID_MAP_MAX_BASE_EXTENTS. Since we
212+
* read @nr_extents someone could have written an idmapping and
213+
* then we might end up with inconsistent data. So just don't do
214+
* anything at all.
215+
*/
216+
if (nr_extents == 0)
217+
return 0;
218+
219+
/*
220+
* Here we know that nr_extents is greater than zero which means
221+
* a map has been written. Since idmappings can't be changed
222+
* once they have been written we know that we can safely copy
223+
* from @map_to into @map_from.
224+
*/
225+
226+
if (nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
227+
*map_to = *map_from;
228+
return 0;
229+
}
230+
231+
forward = kmemdup(map_from->forward,
232+
nr_extents * sizeof(struct uid_gid_extent),
233+
GFP_KERNEL_ACCOUNT);
234+
if (!forward)
235+
return -ENOMEM;
236+
237+
reverse = kmemdup(map_from->reverse,
238+
nr_extents * sizeof(struct uid_gid_extent),
239+
GFP_KERNEL_ACCOUNT);
240+
if (!reverse) {
241+
kfree(forward);
242+
return -ENOMEM;
243+
}
244+
245+
/*
246+
* The idmapping isn't exposed anywhere so we don't need to care
247+
* about ordering between extent pointers and @nr_extents
248+
* initialization.
249+
*/
250+
map_to->forward = forward;
251+
map_to->reverse = reverse;
252+
map_to->nr_extents = nr_extents;
253+
return 0;
254+
}
255+
256+
static void free_mnt_idmap(struct mnt_idmap *idmap)
257+
{
258+
if (idmap->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
259+
kfree(idmap->uid_map.forward);
260+
kfree(idmap->uid_map.reverse);
261+
}
262+
if (idmap->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
263+
kfree(idmap->gid_map.forward);
264+
kfree(idmap->gid_map.reverse);
265+
}
266+
kfree(idmap);
267+
}
268+
198269
struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
199270
{
200271
struct mnt_idmap *idmap;
272+
int ret;
201273

202274
idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
203275
if (!idmap)
204276
return ERR_PTR(-ENOMEM);
205277

206-
idmap->owner = get_user_ns(mnt_userns);
207278
refcount_set(&idmap->count, 1);
279+
ret = copy_mnt_idmap(&mnt_userns->uid_map, &idmap->uid_map);
280+
if (!ret)
281+
ret = copy_mnt_idmap(&mnt_userns->gid_map, &idmap->gid_map);
282+
if (ret) {
283+
free_mnt_idmap(idmap);
284+
idmap = ERR_PTR(ret);
285+
}
208286
return idmap;
209287
}
210288

@@ -234,9 +312,7 @@ EXPORT_SYMBOL_GPL(mnt_idmap_get);
234312
*/
235313
void mnt_idmap_put(struct mnt_idmap *idmap)
236314
{
237-
if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
238-
put_user_ns(idmap->owner);
239-
kfree(idmap);
240-
}
315+
if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count))
316+
free_mnt_idmap(idmap);
241317
}
242318
EXPORT_SYMBOL_GPL(mnt_idmap_put);

include/linux/uidgid.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
struct user_namespace;
1919
extern struct user_namespace init_user_ns;
20+
struct uid_gid_map;
2021

2122
typedef struct {
2223
uid_t val;
@@ -138,6 +139,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
138139
return from_kgid(ns, gid) != (gid_t) -1;
139140
}
140141

142+
u32 map_id_down(struct uid_gid_map *map, u32 id);
143+
u32 map_id_up(struct uid_gid_map *map, u32 id);
144+
141145
#else
142146

143147
static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid)
@@ -186,6 +190,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
186190
return gid_valid(gid);
187191
}
188192

193+
static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
194+
{
195+
return id;
196+
}
197+
198+
static inline u32 map_id_up(struct uid_gid_map *map, u32 id)
199+
{
200+
return id;
201+
}
189202
#endif /* CONFIG_USER_NS */
190203

191204
#endif /* _LINUX_UIDGID_H */

kernel/user_namespace.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
332332
return id;
333333
}
334334

335-
static u32 map_id_down(struct uid_gid_map *map, u32 id)
335+
u32 map_id_down(struct uid_gid_map *map, u32 id)
336336
{
337337
return map_id_range_down(map, id, 1);
338338
}
@@ -375,7 +375,7 @@ map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
375375
sizeof(struct uid_gid_extent), cmp_map_id);
376376
}
377377

378-
static u32 map_id_up(struct uid_gid_map *map, u32 id)
378+
u32 map_id_up(struct uid_gid_map *map, u32 id)
379379
{
380380
struct uid_gid_extent *extent;
381381
unsigned extents = map->nr_extents;

0 commit comments

Comments
 (0)