Skip to content

Commit 1303afd

Browse files
Gnurougregkh
authored andcommitted
gpu: nova-core: create falcon firmware DMA objects lazily
[ Upstream commit bc9de9e ] When DMA was the only loading option for falcon firmwares, we decided to store them in DMA objects as soon as they were loaded from disk and patch them in-place to avoid having to do an extra copy. This decision complicates the PIO loading patch considerably, and actually does not even stand on its own when put into perspective with the fact that it requires 8 unsafe statements in the code that wouldn't exist if we stored the firmware into a `KVVec` and copied it into a DMA object at the last minute. The cost of the copy is, as can be expected, imperceptible at runtime. Thus, switch to a lazy DMA object creation model and simplify our code a bit. This will also have the nice side-effect of being more fit for PIO loading. Reviewed-by: Eliot Courtney <ecourtney@nvidia.com> Acked-by: Danilo Krummrich <dakr@kernel.org> Link: https://patch.msgid.link/20260306-turing_prep-v11-1-8f0042c5d026@nvidia.com [acourbot@nvidia.com: add TODO item to switch back to a coherent allocation when it becomes convenient to do so.] Signed-off-by: Alexandre Courbot <acourbot@nvidia.com> Stable-dep-of: 17d7c97 ("gpu: nova-core: firmware: fix and explain v2 header offsets computations") Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 4b7204b commit 1303afd

5 files changed

Lines changed: 108 additions & 127 deletions

File tree

drivers/gpu/nova-core/falcon.rs

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,21 @@
22

33
//! Falcon microprocessor base support
44
5-
use core::ops::Deref;
6-
75
use hal::FalconHal;
86

97
use kernel::{
10-
device,
8+
device::{
9+
self,
10+
Device, //
11+
},
1112
dma::{
1213
DmaAddress,
1314
DmaMask, //
1415
},
1516
io::poll::read_poll_timeout,
1617
prelude::*,
1718
sync::aref::ARef,
18-
time::{
19-
Delta, //
20-
},
19+
time::Delta,
2120
};
2221

2322
use crate::{
@@ -351,6 +350,9 @@ pub(crate) struct FalconBromParams {
351350

352351
/// Trait for providing load parameters of falcon firmwares.
353352
pub(crate) trait FalconLoadParams {
353+
/// Returns the firmware data as a slice of bytes.
354+
fn as_slice(&self) -> &[u8];
355+
354356
/// Returns the load parameters for Secure `IMEM`.
355357
fn imem_sec_load_params(&self) -> FalconLoadTarget;
356358

@@ -370,9 +372,8 @@ pub(crate) trait FalconLoadParams {
370372

371373
/// Trait for a falcon firmware.
372374
///
373-
/// A falcon firmware can be loaded on a given engine, and is presented in the form of a DMA
374-
/// object.
375-
pub(crate) trait FalconFirmware: FalconLoadParams + Deref<Target = DmaObject> {
375+
/// A falcon firmware can be loaded on a given engine.
376+
pub(crate) trait FalconFirmware: FalconLoadParams {
376377
/// Engine on which this firmware is to be loaded.
377378
type Target: FalconEngine;
378379
}
@@ -415,10 +416,10 @@ impl<E: FalconEngine + 'static> Falcon<E> {
415416
/// `target_mem`.
416417
///
417418
/// `sec` is set if the loaded firmware is expected to run in secure mode.
418-
fn dma_wr<F: FalconFirmware<Target = E>>(
419+
fn dma_wr(
419420
&self,
420421
bar: &Bar0,
421-
fw: &F,
422+
dma_obj: &DmaObject,
422423
target_mem: FalconMem,
423424
load_offsets: FalconLoadTarget,
424425
) -> Result {
@@ -430,11 +431,11 @@ impl<E: FalconEngine + 'static> Falcon<E> {
430431
// For DMEM we can fold the start offset into the DMA handle.
431432
let (src_start, dma_start) = match target_mem {
432433
FalconMem::ImemSecure | FalconMem::ImemNonSecure => {
433-
(load_offsets.src_start, fw.dma_handle())
434+
(load_offsets.src_start, dma_obj.dma_handle())
434435
}
435436
FalconMem::Dmem => (
436437
0,
437-
fw.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?,
438+
dma_obj.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?,
438439
),
439440
};
440441
if dma_start % DmaAddress::from(DMA_LEN) > 0 {
@@ -466,7 +467,7 @@ impl<E: FalconEngine + 'static> Falcon<E> {
466467
dev_err!(self.dev, "DMA transfer length overflow\n");
467468
return Err(EOVERFLOW);
468469
}
469-
Some(upper_bound) if usize::from_safe_cast(upper_bound) > fw.size() => {
470+
Some(upper_bound) if usize::from_safe_cast(upper_bound) > dma_obj.size() => {
470471
dev_err!(self.dev, "DMA transfer goes beyond range of DMA object\n");
471472
return Err(EINVAL);
472473
}
@@ -515,22 +516,35 @@ impl<E: FalconEngine + 'static> Falcon<E> {
515516
}
516517

517518
/// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
518-
fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result {
519+
fn dma_load<F: FalconFirmware<Target = E>>(
520+
&self,
521+
dev: &Device<device::Bound>,
522+
bar: &Bar0,
523+
fw: &F,
524+
) -> Result {
519525
// The Non-Secure section only exists on firmware used by Turing and GA100, and
520526
// those platforms do not use DMA.
521527
if fw.imem_ns_load_params().is_some() {
522528
debug_assert!(false);
523529
return Err(EINVAL);
524530
}
525531

532+
// Create DMA object with firmware content as the source of the DMA engine.
533+
let dma_obj = DmaObject::from_data(dev, fw.as_slice())?;
534+
526535
self.dma_reset(bar);
527536
regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 0, |v| {
528537
v.set_target(FalconFbifTarget::CoherentSysmem)
529538
.set_mem_type(FalconFbifMemType::Physical)
530539
});
531540

532-
self.dma_wr(bar, fw, FalconMem::ImemSecure, fw.imem_sec_load_params())?;
533-
self.dma_wr(bar, fw, FalconMem::Dmem, fw.dmem_load_params())?;
541+
self.dma_wr(
542+
bar,
543+
&dma_obj,
544+
FalconMem::ImemSecure,
545+
fw.imem_sec_load_params(),
546+
)?;
547+
self.dma_wr(bar, &dma_obj, FalconMem::Dmem, fw.dmem_load_params())?;
534548

535549
self.hal.program_brom(self, bar, &fw.brom_params())?;
536550

@@ -641,9 +655,14 @@ impl<E: FalconEngine + 'static> Falcon<E> {
641655
}
642656

643657
// Load a firmware image into Falcon memory
644-
pub(crate) fn load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result {
658+
pub(crate) fn load<F: FalconFirmware<Target = E>>(
659+
&self,
660+
dev: &Device<device::Bound>,
661+
bar: &Bar0,
662+
fw: &F,
663+
) -> Result {
645664
match self.hal.load_method() {
646-
LoadMethod::Dma => self.dma_load(bar, fw),
665+
LoadMethod::Dma => self.dma_load(dev, bar, fw),
647666
LoadMethod::Pio => Err(ENOTSUPP),
648667
}
649668
}

drivers/gpu/nova-core/firmware.rs

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ use kernel::{
1515
};
1616

1717
use crate::{
18-
dma::DmaObject,
1918
falcon::{
2019
FalconFirmware,
2120
FalconLoadTarget, //
@@ -292,51 +291,52 @@ impl SignedState for Unsigned {}
292291
struct Signed;
293292
impl SignedState for Signed {}
294293

295-
/// A [`DmaObject`] containing a specific microcode ready to be loaded into a falcon.
294+
/// Microcode to be loaded into a specific falcon.
296295
///
297296
/// This is module-local and meant for sub-modules to use internally.
298297
///
299298
/// After construction, a firmware is [`Unsigned`], and must generally be patched with a signature
300299
/// before it can be loaded (with an exception for development hardware). The
301300
/// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the
302301
/// firmware to its [`Signed`] state.
303-
struct FirmwareDmaObject<F: FalconFirmware, S: SignedState>(DmaObject, PhantomData<(F, S)>);
302+
// TODO: Consider replacing this with a coherent memory object once `CoherentAllocation` supports
303+
// temporary CPU-exclusive access to the object without unsafe methods.
304+
struct FirmwareObject<F: FalconFirmware, S: SignedState>(KVVec<u8>, PhantomData<(F, S)>);
304305

305306
/// Trait for signatures to be patched directly into a given firmware.
306307
///
307308
/// This is module-local and meant for sub-modules to use internally.
308309
trait FirmwareSignature<F: FalconFirmware>: AsRef<[u8]> {}
309310

310-
impl<F: FalconFirmware> FirmwareDmaObject<F, Unsigned> {
311-
/// Patches the firmware at offset `sig_base_img` with `signature`.
311+
impl<F: FalconFirmware> FirmwareObject<F, Unsigned> {
312+
/// Patches the firmware at offset `signature_start` with `signature`.
312313
fn patch_signature<S: FirmwareSignature<F>>(
313314
mut self,
314315
signature: &S,
315-
sig_base_img: usize,
316-
) -> Result<FirmwareDmaObject<F, Signed>> {
316+
signature_start: usize,
317+
) -> Result<FirmwareObject<F, Signed>> {
317318
let signature_bytes = signature.as_ref();
318-
if sig_base_img + signature_bytes.len() > self.0.size() {
319-
return Err(EINVAL);
320-
}
321-
322-
// SAFETY: We are the only user of this object, so there cannot be any race.
323-
let dst = unsafe { self.0.start_ptr_mut().add(sig_base_img) };
319+
let signature_end = signature_start
320+
.checked_add(signature_bytes.len())
321+
.ok_or(EOVERFLOW)?;
322+
let dst = self
323+
.0
324+
.get_mut(signature_start..signature_end)
325+
.ok_or(EINVAL)?;
324326

325-
// SAFETY: `signature` and `dst` are valid, properly aligned, and do not overlap.
326-
unsafe {
327-
core::ptr::copy_nonoverlapping(signature_bytes.as_ptr(), dst, signature_bytes.len())
328-
};
327+
// PANIC: `dst` and `signature_bytes` have the same length.
328+
dst.copy_from_slice(signature_bytes);
329329

330-
Ok(FirmwareDmaObject(self.0, PhantomData))
330+
Ok(FirmwareObject(self.0, PhantomData))
331331
}
332332

333333
/// Mark the firmware as signed without patching it.
334334
///
335335
/// This method is used to explicitly confirm that we do not need to sign the firmware, while
336336
/// allowing us to continue as if it was. This is typically only needed for development
337337
/// hardware.
338-
fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> {
339-
FirmwareDmaObject(self.0, PhantomData)
338+
fn no_patch_signature(self) -> FirmwareObject<F, Signed> {
339+
FirmwareObject(self.0, PhantomData)
340340
}
341341
}
342342

drivers/gpu/nova-core/firmware/booter.rs

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@
44
//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon
55
//! (and optionally unload it through a separate firmware image).
66
7-
use core::{
8-
marker::PhantomData,
9-
ops::Deref, //
10-
};
7+
use core::marker::PhantomData;
118

129
use kernel::{
1310
device,
@@ -16,7 +13,6 @@ use kernel::{
1613
};
1714

1815
use crate::{
19-
dma::DmaObject,
2016
driver::Bar0,
2117
falcon::{
2218
sec2::Sec2,
@@ -28,7 +24,7 @@ use crate::{
2824
},
2925
firmware::{
3026
BinFirmware,
31-
FirmwareDmaObject,
27+
FirmwareObject,
3228
FirmwareSignature,
3329
Signed,
3430
Unsigned, //
@@ -261,12 +257,15 @@ pub(crate) struct BooterFirmware {
261257
// BROM falcon parameters.
262258
brom_params: FalconBromParams,
263259
// Device-mapped firmware image.
264-
ucode: FirmwareDmaObject<Self, Signed>,
260+
ucode: FirmwareObject<Self, Signed>,
265261
}
266262

267-
impl FirmwareDmaObject<BooterFirmware, Unsigned> {
268-
fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> {
269-
DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData))
263+
impl FirmwareObject<BooterFirmware, Unsigned> {
264+
fn new_booter(data: &[u8]) -> Result<Self> {
265+
let mut ucode = KVVec::new();
266+
ucode.extend_from_slice(data, GFP_KERNEL)?;
267+
268+
Ok(Self(ucode, PhantomData))
270269
}
271270
}
272271

@@ -320,7 +319,7 @@ impl BooterFirmware {
320319
let ucode = bin_fw
321320
.data()
322321
.ok_or(EINVAL)
323-
.and_then(|data| FirmwareDmaObject::<Self, _>::new_booter(dev, data))?;
322+
.and_then(FirmwareObject::<Self, _>::new_booter)?;
324323

325324
let ucode_signed = {
326325
let mut signatures = hs_fw.signatures_iter()?.peekable();
@@ -392,6 +391,10 @@ impl BooterFirmware {
392391
}
393392

394393
impl FalconLoadParams for BooterFirmware {
394+
fn as_slice(&self) -> &[u8] {
395+
self.ucode.0.as_slice()
396+
}
397+
395398
fn imem_sec_load_params(&self) -> FalconLoadTarget {
396399
self.imem_sec_load_target.clone()
397400
}
@@ -417,14 +420,6 @@ impl FalconLoadParams for BooterFirmware {
417420
}
418421
}
419422

420-
impl Deref for BooterFirmware {
421-
type Target = DmaObject;
422-
423-
fn deref(&self) -> &Self::Target {
424-
&self.ucode.0
425-
}
426-
}
427-
428423
impl FalconFirmware for BooterFirmware {
429424
type Target = Sec2;
430425
}

0 commit comments

Comments
 (0)