Skip to content

Commit

Permalink
swiotlb: use bitmap to track free slots
Browse files Browse the repository at this point in the history
Currently, each slot tracks the number of contiguous free slots starting
from itself. It helps to quickly check if there are enough contiguous
entries when dealing with an allocation request. But maintaining this
information can leads to some overhead. Specifically, if a slot is
allocated/freed, preceding slots may need to be updated as the number
of contiguous free slots can change. This process may access memory
scattering over multiple cachelines.

To reduce the overhead of maintaining the number of contiguous free
entries, use a global bitmap to track free slots; each bit represents
if a slot is available. The number of contiguous free slots can be
calculated by counting the number of consecutive 1s in the bitmap.

Tests show that the average cost of freeing slots drops by 120 cycles
while the average cost of allocation increases by 20 cycles. Overall,
100 cycles are saved from a pair of allocation and freeing.

Signed-off-by: Chao Gao <chao.gao@intel.com>
  • Loading branch information
gaochaointel authored and kiryl committed Jan 10, 2022
1 parent 95bf288 commit 64f4667
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 36 deletions.
6 changes: 3 additions & 3 deletions include/linux/swiotlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ extern enum swiotlb_force swiotlb_force;
* @end. For default swiotlb, this is command line adjustable via
* setup_io_tlb_npages.
* @used: The number of used IO TLB block.
* @list: The free list describing the number of free entries available
* from each index.
* @index: The index to start searching in the next round.
* @orig_addr: The original address corresponding to a mapped entry.
* @alloc_size: Size of the allocated buffer.
Expand All @@ -88,6 +86,8 @@ extern enum swiotlb_force swiotlb_force;
* @late_alloc: %true if allocated using the page allocator
* @force_bounce: %true if swiotlb bouncing is forced
* @for_alloc: %true if the pool is used for memory allocation
* @bitmap: The bitmap used to track free entries. 1 in bit X means the slot
* indexed by X is free.
*/
struct io_tlb_mem {
phys_addr_t start;
Expand All @@ -103,8 +103,8 @@ struct io_tlb_mem {
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
unsigned int list;
} *slots;
unsigned long *bitmap;
};
extern struct io_tlb_mem io_tlb_default_mem;

Expand Down
62 changes: 29 additions & 33 deletions kernel/dma/swiotlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,

spin_lock_init(&mem->lock);
for (i = 0; i < mem->nslabs; i++) {
mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
__set_bit(i, mem->bitmap);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
}
Expand All @@ -216,6 +216,11 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);

mem->bitmap = memblock_alloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), SMP_CACHE_BYTES);
if (!mem->bitmap)
panic("%s: Failed to allocate %lu bytes align=0x%x\n",
__func__, DIV_ROUND_UP(nslabs, BITS_PER_BYTE), SMP_CACHE_BYTES);

swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);

if (verbose)
Expand Down Expand Up @@ -312,10 +317,14 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (WARN_ON_ONCE(mem->nslabs))
return -ENOMEM;

mem->bitmap = kzalloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), GFP_KERNEL);
mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(array_size(sizeof(*mem->slots), nslabs)));
if (!mem->slots)
if (!mem->slots || !mem->bitmap) {
kfree(mem->bitmap);
kfree(mem->slots);
return -ENOMEM;
}

set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
Expand Down Expand Up @@ -468,7 +477,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
unsigned int iotlb_align_mask =
dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
unsigned int nslots = nr_slots(alloc_size), stride;
unsigned int index, wrap, count = 0, i;
unsigned int index, wrap, i;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
unsigned long flags;

Expand Down Expand Up @@ -497,6 +506,12 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
continue;
}

/* Start from the next segment if no enough free entries */
if (io_tlb_offset(index) + nslots > IO_TLB_SEGSIZE) {
index = wrap_index(mem, round_up(index, IO_TLB_SEGSIZE));
continue;
}

/*
* If we find a slot that indicates we have 'nslots' number of
* contiguous buffers, we allocate the buffers from that slot
Expand All @@ -505,7 +520,8 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
if (!iommu_is_span_boundary(index, nslots,
nr_slots(tbl_dma_addr),
max_slots)) {
if (mem->slots[index].list >= nslots)
if (find_next_zero_bit(mem->bitmap, index + nslots, index) ==
index + nslots)
goto found;
}
index = wrap_index(mem, index + stride);
Expand All @@ -517,14 +533,10 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,

found:
for (i = index; i < index + nslots; i++) {
mem->slots[i].list = 0;
__clear_bit(i, mem->bitmap);
mem->slots[i].alloc_size =
alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
}
for (i = index - 1;
io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
mem->slots[i].list; i--)
mem->slots[i].list = ++count;

/*
* Update the indices to avoid searching in the next round.
Expand Down Expand Up @@ -593,38 +605,19 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
int nslots = nr_slots(mem->slots[index].alloc_size + offset);
int count, i;
int i;

/*
* Return the buffer to the free list by setting the corresponding
* entries to indicate the number of contiguous entries available.
* While returning the entries to the free list, we merge the entries
* with slots below and above the pool being returned.
*/
spin_lock_irqsave(&mem->lock, flags);
if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
count = mem->slots[index + nslots].list;
else
count = 0;

/*
* Step 1: return the slots to the free list, merging the slots with
* superceeding slots
* Return the slots to swiotlb, updating bitmap to indicate
* corresponding entries are free.
*/
for (i = index + nslots - 1; i >= index; i--) {
mem->slots[i].list = ++count;
__set_bit(i, mem->bitmap);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
}

/*
* Step 2: merge the returned slots with the preceding slots, if
* available (non zero)
*/
for (i = index - 1;
io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
i--)
mem->slots[i].list = ++count;
mem->used -= nslots;
spin_unlock_irqrestore(&mem->lock, flags);
}
Expand Down Expand Up @@ -798,9 +791,12 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
if (!mem)
return -ENOMEM;

mem->bitmap = kzalloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), GFP_KERNEL);
mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs),
GFP_KERNEL);
if (!mem->slots) {
if (!mem->slots || !mem->bitmap) {
kfree(mem->slots);
kfree(mem->bitmap);
kfree(mem);
return -ENOMEM;
}
Expand Down

0 comments on commit 64f4667

Please sign in to comment.