Skip to content

Commit 8544374

Browse files
xiaogang-chen-amdalexdeucher
authored andcommitted
drm/amdkfd: Have kfd driver use same PASID values from graphic driver
Current kfd driver has its own PASID value for a kfd process and uses it to locate vm at interrupt handler or mapping between kfd process and vm. That design is not working when a physical gpu device has multiple spatial partitions, ex: adev in CPX mode. This patch has kfd driver use same pasid values that graphic driver generated which is per vm per pasid. These pasid values are passed to fw/hardware. We do not need change interrupt handler though more pasid values are used. Also, pasid values at log are replaced by user process pid; pasid values are not exposed to user. Users see their process pids that have meaning in user space. Signed-off-by: Xiaogang Chen <xiaogang.chen@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent ca44922 commit 8544374

16 files changed

+196
-181
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ enum TLB_FLUSH_TYPE {
4747
};
4848

4949
struct amdgpu_device;
50+
struct kfd_process_device;
5051
struct amdgpu_reset_context;
5152

5253
enum kfd_mem_attachment_type {
@@ -299,8 +300,6 @@ bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id);
299300
(&((struct amdgpu_fpriv *) \
300301
((struct drm_file *)(drm_priv))->driver_priv)->vm)
301302

302-
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
303-
struct amdgpu_vm *avm, u32 pasid);
304303
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
305304
struct amdgpu_vm *avm,
306305
void **process_info,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,27 +1529,6 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
15291529
amdgpu_bo_unreserve(bo);
15301530
}
15311531

1532-
int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
1533-
struct amdgpu_vm *avm, u32 pasid)
1534-
1535-
{
1536-
int ret;
1537-
1538-
/* Free the original amdgpu allocated pasid,
1539-
* will be replaced with kfd allocated pasid.
1540-
*/
1541-
if (avm->pasid) {
1542-
amdgpu_pasid_free(avm->pasid);
1543-
amdgpu_vm_set_pasid(adev, avm, 0);
1544-
}
1545-
1546-
ret = amdgpu_vm_set_pasid(adev, avm, pasid);
1547-
if (ret)
1548-
return ret;
1549-
1550-
return 0;
1551-
}
1552-
15531532
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
15541533
struct amdgpu_vm *avm,
15551534
void **process_info,

drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,20 +107,30 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
107107
kfd_signal_hw_exception_event(pasid);
108108
else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
109109
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
110+
struct kfd_process_device *pdd = NULL;
110111
struct kfd_vm_fault_info info;
112+
struct kfd_process *p;
111113

112114
kfd_smi_event_update_vmfault(dev, pasid);
113-
kfd_dqm_evict_pasid(dev->dqm, pasid);
115+
p = kfd_lookup_process_by_pasid(pasid, &pdd);
116+
if (!pdd)
117+
return;
118+
119+
kfd_evict_process_device(pdd);
114120

115121
memset(&info, 0, sizeof(info));
116122
amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
117-
if (!info.page_addr && !info.status)
123+
if (!info.page_addr && !info.status) {
124+
kfd_unref_process(p);
118125
return;
126+
}
119127

120128
if (info.vmid == vmid)
121-
kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
129+
kfd_signal_vm_fault_event(pdd, &info, NULL);
122130
else
123-
kfd_signal_vm_fault_event(dev, pasid, NULL, NULL);
131+
kfd_signal_vm_fault_event(pdd, &info, NULL);
132+
133+
kfd_unref_process(p);
124134
}
125135
}
126136

drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ static int kfd_open(struct inode *inode, struct file *filep)
155155
/* filep now owns the reference returned by kfd_create_process */
156156
filep->private_data = process;
157157

158-
dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
159-
process->pasid, process->is_32bit_user_mode);
158+
dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",
159+
process->lead_thread->pid, process->is_32bit_user_mode);
160160

161161
return 0;
162162
}
@@ -361,8 +361,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
361361
goto err_acquire_queue_buf;
362362
}
363363

364-
pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
365-
p->pasid,
364+
pr_debug("Creating queue for process pid %d on gpu 0x%x\n",
365+
p->lead_thread->pid,
366366
dev->id);
367367

368368
err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,
@@ -415,9 +415,9 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
415415
int retval;
416416
struct kfd_ioctl_destroy_queue_args *args = data;
417417

418-
pr_debug("Destroying queue id %d for pasid 0x%x\n",
418+
pr_debug("Destroying queue id %d for process pid %d\n",
419419
args->queue_id,
420-
p->pasid);
420+
p->lead_thread->pid);
421421

422422
mutex_lock(&p->mutex);
423423

@@ -468,8 +468,8 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
468468
properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
469469
properties.priority = args->queue_priority;
470470

471-
pr_debug("Updating queue id %d for pasid 0x%x\n",
472-
args->queue_id, p->pasid);
471+
pr_debug("Updating queue id %d for process pid %d\n",
472+
args->queue_id, p->lead_thread->pid);
473473

474474
mutex_lock(&p->mutex);
475475

@@ -695,7 +695,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
695695
struct kfd_process_device_apertures *pAperture;
696696
int i;
697697

698-
dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
698+
dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);
699699

700700
args->num_of_nodes = 0;
701701

@@ -747,7 +747,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
747747
int ret;
748748
int i;
749749

750-
dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
750+
dev_dbg(kfd_device, "get apertures for process pid %d",
751+
p->lead_thread->pid);
751752

752753
if (args->num_of_nodes == 0) {
753754
/* Return number of nodes, so that user space can alloacate
@@ -3365,12 +3366,12 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
33653366

33663367
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
33673368

3368-
pr_debug("pasid 0x%x mapping mmio page\n"
3369+
pr_debug("process pid %d mapping mmio page\n"
33693370
" target user address == 0x%08llX\n"
33703371
" physical address == 0x%08llX\n"
33713372
" vm_flags == 0x%04lX\n"
33723373
" size == 0x%04lX\n",
3373-
process->pasid, (unsigned long long) vma->vm_start,
3374+
process->lead_thread->pid, (unsigned long long) vma->vm_start,
33743375
address, vma->vm_flags, PAGE_SIZE);
33753376

33763377
return io_remap_pfn_range(vma,

drivers/gpu/drm/amd/amdkfd/kfd_debug.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,12 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
204204
size_t exception_data_size)
205205
{
206206
struct kfd_process *p;
207+
struct kfd_process_device *pdd = NULL;
207208
bool signaled_to_debugger_or_runtime = false;
208209

209-
p = kfd_lookup_process_by_pasid(pasid);
210+
p = kfd_lookup_process_by_pasid(pasid, &pdd);
210211

211-
if (!p)
212+
if (!pdd)
212213
return false;
213214

214215
if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
@@ -238,9 +239,8 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
238239

239240
mutex_unlock(&p->mutex);
240241
} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
241-
kfd_dqm_evict_pasid(dev->dqm, p->pasid);
242-
kfd_signal_vm_fault_event(dev, p->pasid, NULL,
243-
exception_data);
242+
kfd_evict_process_device(pdd);
243+
kfd_signal_vm_fault_event(pdd, NULL, exception_data);
244244

245245
signaled_to_debugger_or_runtime = true;
246246
}
@@ -276,8 +276,8 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
276276
data = (struct kfd_hsa_memory_exception_data *)
277277
pdd->vm_fault_exc_data;
278278

279-
kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
280-
kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
279+
kfd_evict_process_device(pdd);
280+
kfd_signal_vm_fault_event(pdd, NULL, data);
281281
error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
282282
}
283283

drivers/gpu/drm/amd/amdkfd/kfd_device.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1565,7 +1565,7 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr
15651565
u32 cam_index;
15661566

15671567
if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) {
1568-
p = kfd_lookup_process_by_pasid(entry->pasid);
1568+
p = kfd_lookup_process_by_pasid(entry->pasid, NULL);
15691569
if (!p)
15701570
return true;
15711571

0 commit comments

Comments
 (0)