@@ -2034,6 +2034,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
20342034 WREG32 (GB_ADDR_CONFIG , gb_addr_config );
20352035 WREG32 (DMIF_ADDR_CONFIG , gb_addr_config );
20362036 WREG32 (HDP_ADDR_CONFIG , gb_addr_config );
2037+ WREG32 (DMA_TILING_CONFIG , gb_addr_config );
20372038
20382039 tmp = gb_addr_config & NUM_PIPES_MASK ;
20392040 tmp = r6xx_remap_render_backend (rdev , tmp , rdev -> config .evergreen .max_backends ,
@@ -2403,8 +2404,12 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
24032404 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE );
24042405 cayman_cp_int_cntl_setup (rdev , 1 , 0 );
24052406 cayman_cp_int_cntl_setup (rdev , 2 , 0 );
2407+ tmp = RREG32 (CAYMAN_DMA1_CNTL ) & ~TRAP_ENABLE ;
2408+ WREG32 (CAYMAN_DMA1_CNTL , tmp );
24062409 } else
24072410 WREG32 (CP_INT_CNTL , CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE );
2411+ tmp = RREG32 (DMA_CNTL ) & ~TRAP_ENABLE ;
2412+ WREG32 (DMA_CNTL , tmp );
24082413 WREG32 (GRBM_INT_CNTL , 0 );
24092414 WREG32 (INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET , 0 );
24102415 WREG32 (INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET , 0 );
@@ -2457,6 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
24572462 u32 grbm_int_cntl = 0 ;
24582463 u32 grph1 = 0 , grph2 = 0 , grph3 = 0 , grph4 = 0 , grph5 = 0 , grph6 = 0 ;
24592464 u32 afmt1 = 0 , afmt2 = 0 , afmt3 = 0 , afmt4 = 0 , afmt5 = 0 , afmt6 = 0 ;
2465+ u32 dma_cntl , dma_cntl1 = 0 ;
24602466
24612467 if (!rdev -> irq .installed ) {
24622468 WARN (1 , "Can't enable IRQ/MSI because no handler is installed\n" );
@@ -2484,6 +2490,8 @@ int evergreen_irq_set(struct radeon_device *rdev)
24842490 afmt5 = RREG32 (AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET ) & ~AFMT_AZ_FORMAT_WTRIG_MASK ;
24852491 afmt6 = RREG32 (AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET ) & ~AFMT_AZ_FORMAT_WTRIG_MASK ;
24862492
2493+ dma_cntl = RREG32 (DMA_CNTL ) & ~TRAP_ENABLE ;
2494+
24872495 if (rdev -> family >= CHIP_CAYMAN ) {
24882496 /* enable CP interrupts on all rings */
24892497 if (atomic_read (& rdev -> irq .ring_int [RADEON_RING_TYPE_GFX_INDEX ])) {
@@ -2506,6 +2514,19 @@ int evergreen_irq_set(struct radeon_device *rdev)
25062514 }
25072515 }
25082516
2517+ if (atomic_read (& rdev -> irq .ring_int [R600_RING_TYPE_DMA_INDEX ])) {
2518+ DRM_DEBUG ("r600_irq_set: sw int dma\n" );
2519+ dma_cntl |= TRAP_ENABLE ;
2520+ }
2521+
2522+ if (rdev -> family >= CHIP_CAYMAN ) {
2523+ dma_cntl1 = RREG32 (CAYMAN_DMA1_CNTL ) & ~TRAP_ENABLE ;
2524+ if (atomic_read (& rdev -> irq .ring_int [CAYMAN_RING_TYPE_DMA1_INDEX ])) {
2525+ DRM_DEBUG ("r600_irq_set: sw int dma1\n" );
2526+ dma_cntl1 |= TRAP_ENABLE ;
2527+ }
2528+ }
2529+
25092530 if (rdev -> irq .crtc_vblank_int [0 ] ||
25102531 atomic_read (& rdev -> irq .pflip [0 ])) {
25112532 DRM_DEBUG ("evergreen_irq_set: vblank 0\n" );
@@ -2591,6 +2612,12 @@ int evergreen_irq_set(struct radeon_device *rdev)
25912612 cayman_cp_int_cntl_setup (rdev , 2 , cp_int_cntl2 );
25922613 } else
25932614 WREG32 (CP_INT_CNTL , cp_int_cntl );
2615+
2616+ WREG32 (DMA_CNTL , dma_cntl );
2617+
2618+ if (rdev -> family >= CHIP_CAYMAN )
2619+ WREG32 (CAYMAN_DMA1_CNTL , dma_cntl1 );
2620+
25942621 WREG32 (GRBM_INT_CNTL , grbm_int_cntl );
25952622
25962623 WREG32 (INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET , crtc1 );
@@ -3126,9 +3153,19 @@ int evergreen_irq_process(struct radeon_device *rdev)
31263153 } else
31273154 radeon_fence_process (rdev , RADEON_RING_TYPE_GFX_INDEX );
31283155 break ;
3156+ case 224 : /* DMA trap event */
3157+ DRM_DEBUG ("IH: DMA trap\n" );
3158+ radeon_fence_process (rdev , R600_RING_TYPE_DMA_INDEX );
3159+ break ;
31293160 case 233 : /* GUI IDLE */
31303161 DRM_DEBUG ("IH: GUI idle\n" );
31313162 break ;
3163+ case 244 : /* DMA trap event */
3164+ if (rdev -> family >= CHIP_CAYMAN ) {
3165+ DRM_DEBUG ("IH: DMA1 trap\n" );
3166+ radeon_fence_process (rdev , CAYMAN_RING_TYPE_DMA1_INDEX );
3167+ }
3168+ break ;
31323169 default :
31333170 DRM_DEBUG ("Unhandled interrupt: %d %d\n" , src_id , src_data );
31343171 break ;
@@ -3154,6 +3191,143 @@ int evergreen_irq_process(struct radeon_device *rdev)
31543191 return IRQ_HANDLED ;
31553192}
31563193
3194+ /**
3195+ * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
3196+ *
3197+ * @rdev: radeon_device pointer
3198+ * @fence: radeon fence object
3199+ *
3200+ * Add a DMA fence packet to the ring to write
3201+ * the fence seq number and DMA trap packet to generate
3202+ * an interrupt if needed (evergreen-SI).
3203+ */
3204+ void evergreen_dma_fence_ring_emit (struct radeon_device * rdev ,
3205+ struct radeon_fence * fence )
3206+ {
3207+ struct radeon_ring * ring = & rdev -> ring [fence -> ring ];
3208+ u64 addr = rdev -> fence_drv [fence -> ring ].gpu_addr ;
3209+ /* write the fence */
3210+ radeon_ring_write (ring , DMA_PACKET (DMA_PACKET_FENCE , 0 , 0 , 0 ));
3211+ radeon_ring_write (ring , addr & 0xfffffffc );
3212+ radeon_ring_write (ring , (upper_32_bits (addr ) & 0xff ));
3213+ radeon_ring_write (ring , fence -> seq );
3214+ /* generate an interrupt */
3215+ radeon_ring_write (ring , DMA_PACKET (DMA_PACKET_TRAP , 0 , 0 , 0 ));
3216+ /* flush HDP */
3217+ radeon_ring_write (ring , DMA_PACKET (DMA_PACKET_SRBM_WRITE , 0 , 0 , 0 ));
3218+ radeon_ring_write (ring , (0xf << 16 ) | HDP_MEM_COHERENCY_FLUSH_CNTL );
3219+ radeon_ring_write (ring , 1 );
3220+ }
3221+
3222+ /**
3223+ * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
3224+ *
3225+ * @rdev: radeon_device pointer
3226+ * @ib: IB object to schedule
3227+ *
3228+ * Schedule an IB in the DMA ring (evergreen).
3229+ */
3230+ void evergreen_dma_ring_ib_execute (struct radeon_device * rdev ,
3231+ struct radeon_ib * ib )
3232+ {
3233+ struct radeon_ring * ring = & rdev -> ring [ib -> ring ];
3234+
3235+ if (rdev -> wb .enabled ) {
3236+ u32 next_rptr = ring -> wptr + 4 ;
3237+ while ((next_rptr & 7 ) != 5 )
3238+ next_rptr ++ ;
3239+ next_rptr += 3 ;
3240+ radeon_ring_write (ring , DMA_PACKET (DMA_PACKET_WRITE , 0 , 0 , 1 ));
3241+ radeon_ring_write (ring , ring -> next_rptr_gpu_addr & 0xfffffffc );
3242+ radeon_ring_write (ring , upper_32_bits (ring -> next_rptr_gpu_addr ) & 0xff );
3243+ radeon_ring_write (ring , next_rptr );
3244+ }
3245+
3246+ /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
3247+ * Pad as necessary with NOPs.
3248+ */
3249+ while ((ring -> wptr & 7 ) != 5 )
3250+ radeon_ring_write (ring , DMA_PACKET (DMA_PACKET_NOP , 0 , 0 , 0 ));
3251+ radeon_ring_write (ring , DMA_PACKET (DMA_PACKET_INDIRECT_BUFFER , 0 , 0 , 0 ));
3252+ radeon_ring_write (ring , (ib -> gpu_addr & 0xFFFFFFE0 ));
3253+ radeon_ring_write (ring , (ib -> length_dw << 12 ) | (upper_32_bits (ib -> gpu_addr ) & 0xFF ));
3254+
3255+ }
3256+
3257+ /**
3258+ * evergreen_copy_dma - copy pages using the DMA engine
3259+ *
3260+ * @rdev: radeon_device pointer
3261+ * @src_offset: src GPU address
3262+ * @dst_offset: dst GPU address
3263+ * @num_gpu_pages: number of GPU pages to xfer
3264+ * @fence: radeon fence object
3265+ *
3266+ * Copy GPU paging using the DMA engine (evergreen-cayman).
3267+ * Used by the radeon ttm implementation to move pages if
3268+ * registered as the asic copy callback.
3269+ */
3270+ int evergreen_copy_dma (struct radeon_device * rdev ,
3271+ uint64_t src_offset , uint64_t dst_offset ,
3272+ unsigned num_gpu_pages ,
3273+ struct radeon_fence * * fence )
3274+ {
3275+ struct radeon_semaphore * sem = NULL ;
3276+ int ring_index = rdev -> asic -> copy .dma_ring_index ;
3277+ struct radeon_ring * ring = & rdev -> ring [ring_index ];
3278+ u32 size_in_dw , cur_size_in_dw ;
3279+ int i , num_loops ;
3280+ int r = 0 ;
3281+
3282+ r = radeon_semaphore_create (rdev , & sem );
3283+ if (r ) {
3284+ DRM_ERROR ("radeon: moving bo (%d).\n" , r );
3285+ return r ;
3286+ }
3287+
3288+ size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT ) / 4 ;
3289+ num_loops = DIV_ROUND_UP (size_in_dw , 0xfffff );
3290+ r = radeon_ring_lock (rdev , ring , num_loops * 5 + 11 );
3291+ if (r ) {
3292+ DRM_ERROR ("radeon: moving bo (%d).\n" , r );
3293+ radeon_semaphore_free (rdev , & sem , NULL );
3294+ return r ;
3295+ }
3296+
3297+ if (radeon_fence_need_sync (* fence , ring -> idx )) {
3298+ radeon_semaphore_sync_rings (rdev , sem , (* fence )-> ring ,
3299+ ring -> idx );
3300+ radeon_fence_note_sync (* fence , ring -> idx );
3301+ } else {
3302+ radeon_semaphore_free (rdev , & sem , NULL );
3303+ }
3304+
3305+ for (i = 0 ; i < num_loops ; i ++ ) {
3306+ cur_size_in_dw = size_in_dw ;
3307+ if (cur_size_in_dw > 0xFFFFF )
3308+ cur_size_in_dw = 0xFFFFF ;
3309+ size_in_dw -= cur_size_in_dw ;
3310+ radeon_ring_write (ring , DMA_PACKET (DMA_PACKET_COPY , 0 , 0 , cur_size_in_dw ));
3311+ radeon_ring_write (ring , dst_offset & 0xfffffffc );
3312+ radeon_ring_write (ring , src_offset & 0xfffffffc );
3313+ radeon_ring_write (ring , upper_32_bits (dst_offset ) & 0xff );
3314+ radeon_ring_write (ring , upper_32_bits (src_offset ) & 0xff );
3315+ src_offset += cur_size_in_dw * 4 ;
3316+ dst_offset += cur_size_in_dw * 4 ;
3317+ }
3318+
3319+ r = radeon_fence_emit (rdev , fence , ring -> idx );
3320+ if (r ) {
3321+ radeon_ring_unlock_undo (rdev , ring );
3322+ return r ;
3323+ }
3324+
3325+ radeon_ring_unlock_commit (rdev , ring );
3326+ radeon_semaphore_free (rdev , & sem , * fence );
3327+
3328+ return r ;
3329+ }
3330+
31573331static int evergreen_startup (struct radeon_device * rdev )
31583332{
31593333 struct radeon_ring * ring = & rdev -> ring [RADEON_RING_TYPE_GFX_INDEX ];
@@ -3217,6 +3391,12 @@ static int evergreen_startup(struct radeon_device *rdev)
32173391 return r ;
32183392 }
32193393
3394+ r = radeon_fence_driver_start_ring (rdev , R600_RING_TYPE_DMA_INDEX );
3395+ if (r ) {
3396+ dev_err (rdev -> dev , "failed initializing DMA fences (%d).\n" , r );
3397+ return r ;
3398+ }
3399+
32203400 /* Enable IRQ */
32213401 r = r600_irq_init (rdev );
32223402 if (r ) {
@@ -3231,10 +3411,21 @@ static int evergreen_startup(struct radeon_device *rdev)
32313411 0 , 0xfffff , RADEON_CP_PACKET2 );
32323412 if (r )
32333413 return r ;
3414+
3415+ ring = & rdev -> ring [R600_RING_TYPE_DMA_INDEX ];
3416+ r = radeon_ring_init (rdev , ring , ring -> ring_size , R600_WB_DMA_RPTR_OFFSET ,
3417+ DMA_RB_RPTR , DMA_RB_WPTR ,
3418+ 2 , 0x3fffc , DMA_PACKET (DMA_PACKET_NOP , 0 , 0 , 0 ));
3419+ if (r )
3420+ return r ;
3421+
32343422 r = evergreen_cp_load_microcode (rdev );
32353423 if (r )
32363424 return r ;
32373425 r = evergreen_cp_resume (rdev );
3426+ if (r )
3427+ return r ;
3428+ r = r600_dma_resume (rdev );
32383429 if (r )
32393430 return r ;
32403431
@@ -3283,11 +3474,9 @@ int evergreen_resume(struct radeon_device *rdev)
32833474
32843475int evergreen_suspend (struct radeon_device * rdev )
32853476{
3286- struct radeon_ring * ring = & rdev -> ring [RADEON_RING_TYPE_GFX_INDEX ];
3287-
32883477 r600_audio_fini (rdev );
32893478 r700_cp_stop (rdev );
3290- ring -> ready = false ;
3479+ r600_dma_stop ( rdev ) ;
32913480 evergreen_irq_suspend (rdev );
32923481 radeon_wb_disable (rdev );
32933482 evergreen_pcie_gart_disable (rdev );
@@ -3364,6 +3553,9 @@ int evergreen_init(struct radeon_device *rdev)
33643553 rdev -> ring [RADEON_RING_TYPE_GFX_INDEX ].ring_obj = NULL ;
33653554 r600_ring_init (rdev , & rdev -> ring [RADEON_RING_TYPE_GFX_INDEX ], 1024 * 1024 );
33663555
3556+ rdev -> ring [R600_RING_TYPE_DMA_INDEX ].ring_obj = NULL ;
3557+ r600_ring_init (rdev , & rdev -> ring [R600_RING_TYPE_DMA_INDEX ], 64 * 1024 );
3558+
33673559 rdev -> ih .ring_obj = NULL ;
33683560 r600_ih_ring_init (rdev , 64 * 1024 );
33693561
@@ -3376,6 +3568,7 @@ int evergreen_init(struct radeon_device *rdev)
33763568 if (r ) {
33773569 dev_err (rdev -> dev , "disabling GPU acceleration\n" );
33783570 r700_cp_fini (rdev );
3571+ r600_dma_fini (rdev );
33793572 r600_irq_fini (rdev );
33803573 radeon_wb_fini (rdev );
33813574 radeon_ib_pool_fini (rdev );
@@ -3403,6 +3596,7 @@ void evergreen_fini(struct radeon_device *rdev)
34033596 r600_audio_fini (rdev );
34043597 r600_blit_fini (rdev );
34053598 r700_cp_fini (rdev );
3599+ r600_dma_fini (rdev );
34063600 r600_irq_fini (rdev );
34073601 radeon_wb_fini (rdev );
34083602 radeon_ib_pool_fini (rdev );
0 commit comments