|
| 1 | +// SPDX-License-Identifier: MIT |
| 2 | +/* |
| 3 | + * Copyright 2023 Advanced Micro Devices, Inc. |
| 4 | + * |
| 5 | + * Permission is hereby granted, free of charge, to any person obtaining a |
| 6 | + * copy of this software and associated documentation files (the "Software"), |
| 7 | + * to deal in the Software without restriction, including without limitation |
| 8 | + * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 9 | + * and/or sell copies of the Software, and to permit persons to whom the |
| 10 | + * Software is furnished to do so, subject to the following conditions: |
| 11 | + * |
| 12 | + * The above copyright notice and this permission notice shall be included in |
| 13 | + * all copies or substantial portions of the Software. |
| 14 | + * |
| 15 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 19 | + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| 20 | + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 21 | + * OTHER DEALINGS IN THE SOFTWARE. |
| 22 | + * |
| 23 | + */ |
| 24 | + |
| 25 | +#include <linux/firmware.h> |
| 26 | + |
| 27 | +#include "amdgpu.h" |
| 28 | +#include "amdgpu_umsch_mm.h" |
| 29 | +#include "umsch_mm_v4_0.h" |
| 30 | + |
| 31 | +int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws) |
| 32 | +{ |
| 33 | + struct amdgpu_ring *ring = &umsch->ring; |
| 34 | + |
| 35 | + if (amdgpu_ring_alloc(ring, ndws)) |
| 36 | + return -ENOMEM; |
| 37 | + |
| 38 | + amdgpu_ring_write_multiple(ring, pkt, ndws); |
| 39 | + amdgpu_ring_commit(ring); |
| 40 | + |
| 41 | + return 0; |
| 42 | +} |
| 43 | + |
| 44 | +int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch) |
| 45 | +{ |
| 46 | + struct amdgpu_ring *ring = &umsch->ring; |
| 47 | + struct amdgpu_device *adev = ring->adev; |
| 48 | + int r; |
| 49 | + |
| 50 | + r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, adev->usec_timeout); |
| 51 | + if (r < 1) { |
| 52 | + dev_err(adev->dev, "ring umsch timeout, emitted fence %u\n", |
| 53 | + ring->fence_drv.sync_seq); |
| 54 | + return -ETIMEDOUT; |
| 55 | + } |
| 56 | + |
| 57 | + return 0; |
| 58 | +} |
| 59 | + |
| 60 | +static void umsch_mm_ring_set_wptr(struct amdgpu_ring *ring) |
| 61 | +{ |
| 62 | + struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; |
| 63 | + struct amdgpu_device *adev = ring->adev; |
| 64 | + |
| 65 | + if (ring->use_doorbell) |
| 66 | + WDOORBELL32(ring->doorbell_index, ring->wptr << 2); |
| 67 | + else |
| 68 | + WREG32(umsch->rb_wptr, ring->wptr << 2); |
| 69 | +} |
| 70 | + |
| 71 | +static u64 umsch_mm_ring_get_rptr(struct amdgpu_ring *ring) |
| 72 | +{ |
| 73 | + struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; |
| 74 | + struct amdgpu_device *adev = ring->adev; |
| 75 | + |
| 76 | + return RREG32(umsch->rb_rptr); |
| 77 | +} |
| 78 | + |
| 79 | +static u64 umsch_mm_ring_get_wptr(struct amdgpu_ring *ring) |
| 80 | +{ |
| 81 | + struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; |
| 82 | + struct amdgpu_device *adev = ring->adev; |
| 83 | + |
| 84 | + return RREG32(umsch->rb_wptr); |
| 85 | +} |
| 86 | + |
| 87 | +static const struct amdgpu_ring_funcs umsch_v4_0_ring_funcs = { |
| 88 | + .type = AMDGPU_RING_TYPE_UMSCH_MM, |
| 89 | + .align_mask = 0, |
| 90 | + .nop = 0, |
| 91 | + .support_64bit_ptrs = false, |
| 92 | + .get_rptr = umsch_mm_ring_get_rptr, |
| 93 | + .get_wptr = umsch_mm_ring_get_wptr, |
| 94 | + .set_wptr = umsch_mm_ring_set_wptr, |
| 95 | + .insert_nop = amdgpu_ring_insert_nop, |
| 96 | +}; |
| 97 | + |
| 98 | +int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch) |
| 99 | +{ |
| 100 | + struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm); |
| 101 | + struct amdgpu_ring *ring = &umsch->ring; |
| 102 | + |
| 103 | + ring->vm_hub = AMDGPU_MMHUB0(0); |
| 104 | + ring->use_doorbell = 0; |
| 105 | + ring->no_scheduler = true; |
| 106 | + ring->doorbell_index = (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1) + 6; |
| 107 | + |
| 108 | + snprintf(ring->name, sizeof(ring->name), "umsch"); |
| 109 | + |
| 110 | + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); |
| 111 | +} |
| 112 | + |
| 113 | +int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) |
| 114 | +{ |
| 115 | + const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; |
| 116 | + struct amdgpu_device *adev = umsch->ring.adev; |
| 117 | + const char *fw_name = NULL; |
| 118 | + int r; |
| 119 | + |
| 120 | + switch (adev->ip_versions[VCN_HWIP][0]) { |
| 121 | + case IP_VERSION(4, 0, 5): |
| 122 | + fw_name = "amdgpu/umsch_mm_4_0_0.bin"; |
| 123 | + break; |
| 124 | + default: |
| 125 | + break; |
| 126 | + } |
| 127 | + |
| 128 | + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, fw_name); |
| 129 | + if (r) { |
| 130 | + release_firmware(adev->umsch_mm.fw); |
| 131 | + adev->umsch_mm.fw = NULL; |
| 132 | + return r; |
| 133 | + } |
| 134 | + |
| 135 | + umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)adev->umsch_mm.fw->data; |
| 136 | + |
| 137 | + adev->umsch_mm.ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes); |
| 138 | + adev->umsch_mm.data_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes); |
| 139 | + |
| 140 | + adev->umsch_mm.irq_start_addr = |
| 141 | + le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_lo) | |
| 142 | + ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_hi)) << 32); |
| 143 | + adev->umsch_mm.uc_start_addr = |
| 144 | + le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_lo) | |
| 145 | + ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_hi)) << 32); |
| 146 | + adev->umsch_mm.data_start_addr = |
| 147 | + le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) | |
| 148 | + ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32); |
| 149 | + |
| 150 | + return 0; |
| 151 | +} |
| 152 | + |
| 153 | +int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch) |
| 154 | +{ |
| 155 | + const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; |
| 156 | + struct amdgpu_device *adev = umsch->ring.adev; |
| 157 | + const __le32 *fw_data; |
| 158 | + uint32_t fw_size; |
| 159 | + int r; |
| 160 | + |
| 161 | + umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *) |
| 162 | + adev->umsch_mm.fw->data; |
| 163 | + |
| 164 | + fw_data = (const __le32 *)(adev->umsch_mm.fw->data + |
| 165 | + le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_offset_bytes)); |
| 166 | + fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes); |
| 167 | + |
| 168 | + r = amdgpu_bo_create_reserved(adev, fw_size, |
| 169 | + 4 * 1024, AMDGPU_GEM_DOMAIN_VRAM, |
| 170 | + &adev->umsch_mm.ucode_fw_obj, |
| 171 | + &adev->umsch_mm.ucode_fw_gpu_addr, |
| 172 | + (void **)&adev->umsch_mm.ucode_fw_ptr); |
| 173 | + if (r) { |
| 174 | + dev_err(adev->dev, "(%d) failed to create umsch_mm fw ucode bo\n", r); |
| 175 | + return r; |
| 176 | + } |
| 177 | + |
| 178 | + memcpy(adev->umsch_mm.ucode_fw_ptr, fw_data, fw_size); |
| 179 | + |
| 180 | + amdgpu_bo_kunmap(adev->umsch_mm.ucode_fw_obj); |
| 181 | + amdgpu_bo_unreserve(adev->umsch_mm.ucode_fw_obj); |
| 182 | + return 0; |
| 183 | +} |
| 184 | + |
| 185 | +int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch) |
| 186 | +{ |
| 187 | + const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; |
| 188 | + struct amdgpu_device *adev = umsch->ring.adev; |
| 189 | + const __le32 *fw_data; |
| 190 | + uint32_t fw_size; |
| 191 | + int r; |
| 192 | + |
| 193 | + umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *) |
| 194 | + adev->umsch_mm.fw->data; |
| 195 | + |
| 196 | + fw_data = (const __le32 *)(adev->umsch_mm.fw->data + |
| 197 | + le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes)); |
| 198 | + fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes); |
| 199 | + |
| 200 | + r = amdgpu_bo_create_reserved(adev, fw_size, |
| 201 | + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, |
| 202 | + &adev->umsch_mm.data_fw_obj, |
| 203 | + &adev->umsch_mm.data_fw_gpu_addr, |
| 204 | + (void **)&adev->umsch_mm.data_fw_ptr); |
| 205 | + if (r) { |
| 206 | + dev_err(adev->dev, "(%d) failed to create umsch_mm fw data bo\n", r); |
| 207 | + return r; |
| 208 | + } |
| 209 | + |
| 210 | + memcpy(adev->umsch_mm.data_fw_ptr, fw_data, fw_size); |
| 211 | + |
| 212 | + amdgpu_bo_kunmap(adev->umsch_mm.data_fw_obj); |
| 213 | + amdgpu_bo_unreserve(adev->umsch_mm.data_fw_obj); |
| 214 | + return 0; |
| 215 | +} |
| 216 | + |
| 217 | +static void umsch_mm_agdb_index_init(struct amdgpu_device *adev) |
| 218 | +{ |
| 219 | + uint32_t umsch_mm_agdb_start; |
| 220 | + int i; |
| 221 | + |
| 222 | + umsch_mm_agdb_start = adev->doorbell_index.max_assignment + 1; |
| 223 | + umsch_mm_agdb_start = roundup(umsch_mm_agdb_start, 1024); |
| 224 | + umsch_mm_agdb_start += (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1); |
| 225 | + |
| 226 | + for (i = 0; i < CONTEXT_PRIORITY_NUM_LEVELS; i++) |
| 227 | + adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start + i; |
| 228 | +} |
| 229 | + |
| 230 | +static int umsch_mm_init(struct amdgpu_device *adev) |
| 231 | +{ |
| 232 | + int r; |
| 233 | + |
| 234 | + r = amdgpu_device_wb_get(adev, &adev->umsch_mm.wb_index); |
| 235 | + if (r) { |
| 236 | + dev_err(adev->dev, "failed to alloc wb for umsch: %d\n", r); |
| 237 | + return r; |
| 238 | + } |
| 239 | + |
| 240 | + adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr + |
| 241 | + (adev->umsch_mm.wb_index * 4); |
| 242 | + |
| 243 | + mutex_init(&adev->umsch_mm.mutex_hidden); |
| 244 | + |
| 245 | + umsch_mm_agdb_index_init(adev); |
| 246 | + |
| 247 | + return 0; |
| 248 | +} |
| 249 | + |
| 250 | + |
| 251 | +static int umsch_mm_early_init(void *handle) |
| 252 | +{ |
| 253 | + struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 254 | + |
| 255 | + switch (adev->ip_versions[VCN_HWIP][0]) { |
| 256 | + case IP_VERSION(4, 0, 5): |
| 257 | + umsch_mm_v4_0_set_funcs(&adev->umsch_mm); |
| 258 | + break; |
| 259 | + default: |
| 260 | + return -EINVAL; |
| 261 | + } |
| 262 | + |
| 263 | + adev->umsch_mm.ring.funcs = &umsch_v4_0_ring_funcs; |
| 264 | + umsch_mm_set_regs(&adev->umsch_mm); |
| 265 | + |
| 266 | + return 0; |
| 267 | +} |
| 268 | + |
| 269 | +static int umsch_mm_late_init(void *handle) |
| 270 | +{ |
| 271 | + return 0; |
| 272 | +} |
| 273 | + |
| 274 | +static int umsch_mm_sw_init(void *handle) |
| 275 | +{ |
| 276 | + struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 277 | + int r; |
| 278 | + |
| 279 | + r = umsch_mm_init(adev); |
| 280 | + if (r) |
| 281 | + return r; |
| 282 | + |
| 283 | + r = umsch_mm_ring_init(&adev->umsch_mm); |
| 284 | + if (r) |
| 285 | + return r; |
| 286 | + |
| 287 | + r = umsch_mm_init_microcode(&adev->umsch_mm); |
| 288 | + if (r) |
| 289 | + return r; |
| 290 | + |
| 291 | + return 0; |
| 292 | +} |
| 293 | + |
| 294 | +static int umsch_mm_sw_fini(void *handle) |
| 295 | +{ |
| 296 | + struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 297 | + |
| 298 | + release_firmware(adev->umsch_mm.fw); |
| 299 | + adev->umsch_mm.fw = NULL; |
| 300 | + |
| 301 | + amdgpu_ring_fini(&adev->umsch_mm.ring); |
| 302 | + |
| 303 | + mutex_destroy(&adev->umsch_mm.mutex_hidden); |
| 304 | + amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index); |
| 305 | + |
| 306 | + return 0; |
| 307 | +} |
| 308 | + |
| 309 | +static int umsch_mm_hw_init(void *handle) |
| 310 | +{ |
| 311 | + struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 312 | + int r; |
| 313 | + |
| 314 | + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { |
| 315 | + r = umsch_mm_load_microcode(&adev->umsch_mm); |
| 316 | + if (r) |
| 317 | + return r; |
| 318 | + } |
| 319 | + |
| 320 | + umsch_mm_ring_start(&adev->umsch_mm); |
| 321 | + |
| 322 | + r = umsch_mm_set_hw_resources(&adev->umsch_mm); |
| 323 | + if (r) |
| 324 | + return r; |
| 325 | + |
| 326 | + return 0; |
| 327 | +} |
| 328 | + |
| 329 | +static int umsch_mm_hw_fini(void *handle) |
| 330 | +{ |
| 331 | + struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
| 332 | + |
| 333 | + umsch_mm_ring_stop(&adev->umsch_mm); |
| 334 | + |
| 335 | + amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj, |
| 336 | + &adev->umsch_mm.data_fw_gpu_addr, |
| 337 | + (void **)&adev->umsch_mm.data_fw_ptr); |
| 338 | + |
| 339 | + amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj, |
| 340 | + &adev->umsch_mm.ucode_fw_gpu_addr, |
| 341 | + (void **)&adev->umsch_mm.ucode_fw_ptr); |
| 342 | + return 0; |
| 343 | +} |
| 344 | + |
| 345 | +static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { |
| 346 | + .name = "umsch_mm_v4_0", |
| 347 | + .early_init = umsch_mm_early_init, |
| 348 | + .late_init = umsch_mm_late_init, |
| 349 | + .sw_init = umsch_mm_sw_init, |
| 350 | + .sw_fini = umsch_mm_sw_fini, |
| 351 | + .hw_init = umsch_mm_hw_init, |
| 352 | + .hw_fini = umsch_mm_hw_fini, |
| 353 | +}; |
| 354 | + |
| 355 | +const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { |
| 356 | + .type = AMD_IP_BLOCK_TYPE_UMSCH_MM, |
| 357 | + .major = 4, |
| 358 | + .minor = 0, |
| 359 | + .rev = 0, |
| 360 | + .funcs = &umsch_mm_v4_0_ip_funcs, |
| 361 | +}; |
0 commit comments