@@ -1544,6 +1544,181 @@ void xe_migrate_wait(struct xe_migrate *m)
15441544 dma_fence_wait (m -> fence , false);
15451545}
15461546
1547+ static u32 pte_update_cmd_size (u64 size )
1548+ {
1549+ u32 num_dword ;
1550+ u64 entries = DIV_ROUND_UP (size , XE_PAGE_SIZE );
1551+
1552+ XE_WARN_ON (size > MAX_PREEMPTDISABLE_TRANSFER );
1553+ /*
1554+ * MI_STORE_DATA_IMM command is used to update page table. Each
1555+ * instruction can update maximumly 0x1ff pte entries. To update
1556+ * n (n <= 0x1ff) pte entries, we need:
1557+ * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
1558+ * 2 dword for the page table's physical location
1559+ * 2*n dword for value of pte to fill (each pte entry is 2 dwords)
1560+ */
1561+ num_dword = (1 + 2 ) * DIV_ROUND_UP (entries , 0x1ff );
1562+ num_dword += entries * 2 ;
1563+
1564+ return num_dword ;
1565+ }
1566+
1567+ static void build_pt_update_batch_sram (struct xe_migrate * m ,
1568+ struct xe_bb * bb , u32 pt_offset ,
1569+ dma_addr_t * sram_addr , u32 size )
1570+ {
1571+ u16 pat_index = tile_to_xe (m -> tile )-> pat .idx [XE_CACHE_WB ];
1572+ u32 ptes ;
1573+ int i = 0 ;
1574+
1575+ ptes = DIV_ROUND_UP (size , XE_PAGE_SIZE );
1576+ while (ptes ) {
1577+ u32 chunk = min (0x1ffU , ptes );
1578+
1579+ bb -> cs [bb -> len ++ ] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW (chunk );
1580+ bb -> cs [bb -> len ++ ] = pt_offset ;
1581+ bb -> cs [bb -> len ++ ] = 0 ;
1582+
1583+ pt_offset += chunk * 8 ;
1584+ ptes -= chunk ;
1585+
1586+ while (chunk -- ) {
1587+ u64 addr = sram_addr [i ++ ] & PAGE_MASK ;
1588+
1589+ xe_tile_assert (m -> tile , addr );
1590+ addr = m -> q -> vm -> pt_ops -> pte_encode_addr (m -> tile -> xe ,
1591+ addr , pat_index ,
1592+ 0 , false, 0 );
1593+ bb -> cs [bb -> len ++ ] = lower_32_bits (addr );
1594+ bb -> cs [bb -> len ++ ] = upper_32_bits (addr );
1595+ }
1596+ }
1597+ }
1598+
1599+ enum xe_migrate_copy_dir {
1600+ XE_MIGRATE_COPY_TO_VRAM ,
1601+ XE_MIGRATE_COPY_TO_SRAM ,
1602+ };
1603+
1604+ static struct dma_fence * xe_migrate_vram (struct xe_migrate * m ,
1605+ unsigned long npages ,
1606+ dma_addr_t * sram_addr , u64 vram_addr ,
1607+ const enum xe_migrate_copy_dir dir )
1608+ {
1609+ struct xe_gt * gt = m -> tile -> primary_gt ;
1610+ struct xe_device * xe = gt_to_xe (gt );
1611+ struct dma_fence * fence = NULL ;
1612+ u32 batch_size = 2 ;
1613+ u64 src_L0_ofs , dst_L0_ofs ;
1614+ u64 round_update_size ;
1615+ struct xe_sched_job * job ;
1616+ struct xe_bb * bb ;
1617+ u32 update_idx , pt_slot = 0 ;
1618+ int err ;
1619+
1620+ if (npages * PAGE_SIZE > MAX_PREEMPTDISABLE_TRANSFER )
1621+ return ERR_PTR (- EINVAL );
1622+
1623+ round_update_size = npages * PAGE_SIZE ;
1624+ batch_size += pte_update_cmd_size (round_update_size );
1625+ batch_size += EMIT_COPY_DW ;
1626+
1627+ bb = xe_bb_new (gt , batch_size , true);
1628+ if (IS_ERR (bb )) {
1629+ err = PTR_ERR (bb );
1630+ return ERR_PTR (err );
1631+ }
1632+
1633+ build_pt_update_batch_sram (m , bb , pt_slot * XE_PAGE_SIZE ,
1634+ sram_addr , round_update_size );
1635+
1636+ if (dir == XE_MIGRATE_COPY_TO_VRAM ) {
1637+ src_L0_ofs = xe_migrate_vm_addr (pt_slot , 0 );
1638+ dst_L0_ofs = xe_migrate_vram_ofs (xe , vram_addr , false);
1639+
1640+ } else {
1641+ src_L0_ofs = xe_migrate_vram_ofs (xe , vram_addr , false);
1642+ dst_L0_ofs = xe_migrate_vm_addr (pt_slot , 0 );
1643+ }
1644+
1645+ bb -> cs [bb -> len ++ ] = MI_BATCH_BUFFER_END ;
1646+ update_idx = bb -> len ;
1647+
1648+ emit_copy (gt , bb , src_L0_ofs , dst_L0_ofs , round_update_size ,
1649+ XE_PAGE_SIZE );
1650+
1651+ job = xe_bb_create_migration_job (m -> q , bb ,
1652+ xe_migrate_batch_base (m , true),
1653+ update_idx );
1654+ if (IS_ERR (job )) {
1655+ err = PTR_ERR (job );
1656+ goto err ;
1657+ }
1658+
1659+ xe_sched_job_add_migrate_flush (job , 0 );
1660+
1661+ mutex_lock (& m -> job_mutex );
1662+ xe_sched_job_arm (job );
1663+ fence = dma_fence_get (& job -> drm .s_fence -> finished );
1664+ xe_sched_job_push (job );
1665+
1666+ dma_fence_put (m -> fence );
1667+ m -> fence = dma_fence_get (fence );
1668+ mutex_unlock (& m -> job_mutex );
1669+
1670+ xe_bb_free (bb , fence );
1671+
1672+ return fence ;
1673+
1674+ err :
1675+ xe_bb_free (bb , NULL );
1676+
1677+ return ERR_PTR (err );
1678+ }
1679+
1680+ /**
1681+ * xe_migrate_to_vram() - Migrate to VRAM
1682+ * @m: The migration context.
1683+ * @npages: Number of pages to migrate.
1684+ * @src_addr: Array of dma addresses (source of migrate)
1685+ * @dst_addr: Device physical address of VRAM (destination of migrate)
1686+ *
1687+ * Copy from an array dma addresses to a VRAM device physical address
1688+ *
1689+ * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
1690+ * failure
1691+ */
1692+ struct dma_fence * xe_migrate_to_vram (struct xe_migrate * m ,
1693+ unsigned long npages ,
1694+ dma_addr_t * src_addr ,
1695+ u64 dst_addr )
1696+ {
1697+ return xe_migrate_vram (m , npages , src_addr , dst_addr ,
1698+ XE_MIGRATE_COPY_TO_VRAM );
1699+ }
1700+
1701+ /**
1702+ * xe_migrate_from_vram() - Migrate from VRAM
1703+ * @m: The migration context.
1704+ * @npages: Number of pages to migrate.
1705+ * @src_addr: Device physical address of VRAM (source of migrate)
1706+ * @dst_addr: Array of dma addresses (destination of migrate)
1707+ *
1708+ * Copy from a VRAM device physical address to an array dma addresses
1709+ *
1710+ * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
1711+ * failure
1712+ */
1713+ struct dma_fence * xe_migrate_from_vram (struct xe_migrate * m ,
1714+ unsigned long npages ,
1715+ u64 src_addr ,
1716+ dma_addr_t * dst_addr )
1717+ {
1718+ return xe_migrate_vram (m , npages , dst_addr , src_addr ,
1719+ XE_MIGRATE_COPY_TO_SRAM );
1720+ }
1721+
15471722#if IS_ENABLED (CONFIG_DRM_XE_KUNIT_TEST )
15481723#include "tests/xe_migrate.c"
15491724#endif
0 commit comments