Skip to content

Commit

Permalink
drm/i915: Move GEM activity tracking into a common struct reservation…
Browse files Browse the repository at this point in the history
…_object

In preparation to support many distinct timelines, we need to expand the
activity tracking on the GEM object to handle more than just a request
per engine. We already use the struct reservation_object on the dma-buf
to handle many fence contexts, so integrating that into the GEM object
itself is the preferred solution. (For example, we can now share the same
reservation_object between every consumer/producer using this buffer and
skip the manual import/export via dma-buf.)

v2: Reimplement busy-ioctl (by walking the reservation object), postpone
the ABI change for another day. Similarly use the reservation object to
find the last_write request (if active and from i915) for choosing
display CS flips.

Caveats:

 * busy-ioctl: busy-ioctl only reports on the native fences, it will not
warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is
being rendered to by external fences. It also will not report the same
busy state as wait-ioctl (or polling on the dma-buf) in the same
circumstances. On the plus side, it does retain reporting of which
*i915* engines are engaged with this object.

 * non-blocking atomic modesets take a step backwards as the wait for
render completion blocks the ioctl. This is fixed in a subsequent
patch to use a fence instead for awaiting on the rendering, see
"drm/i915: Restore nonblocking awaits for modesetting"

 * dynamic array manipulation for shared-fences in reservation is slower
than the previous lockless static assignment (e.g. gem_exec_lut_handle
runtime on ivb goes from 42s to 66s), mainly due to atomic operations
(maintaining the fence refcounts).

 * loss of object-level retirement callbacks, emulated by VMA retirement
tracking.

 * minor loss of object-level last activity information from debugfs,
could be replaced with per-vma information if desired

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk
  • Loading branch information
ickle committed Oct 28, 2016
1 parent f0cd518 commit d07f0e5
Show file tree
Hide file tree
Showing 15 changed files with 212 additions and 533 deletions.
15 changes: 3 additions & 12 deletions drivers/gpu/drm/i915/i915_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
struct i915_vma *vma;
unsigned int frontbuffer_bits;
int pin_count = 0;
enum intel_engine_id id;

lockdep_assert_held(&obj->base.dev->struct_mutex);

seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ",
seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
&obj->base,
get_active_flag(obj),
get_pin_flag(obj),
Expand All @@ -149,14 +148,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
get_pin_mapped_flag(obj),
obj->base.size / 1024,
obj->base.read_domains,
obj->base.write_domain);
for_each_engine(engine, dev_priv, id)
seq_printf(m, "%x ",
i915_gem_active_get_seqno(&obj->last_read[id],
&obj->base.dev->struct_mutex));
seq_printf(m, "] %x %s%s%s",
i915_gem_active_get_seqno(&obj->last_write,
&obj->base.dev->struct_mutex),
obj->base.write_domain,
i915_cache_level_str(dev_priv, obj->cache_level),
obj->mm.dirty ? " dirty" : "",
obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
Expand Down Expand Up @@ -187,8 +179,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
if (obj->stolen)
seq_printf(m, " (stolen: %08llx)", obj->stolen->start);

engine = i915_gem_active_get_engine(&obj->last_write,
&dev_priv->drm.struct_mutex);
engine = i915_gem_object_last_write_engine(obj);
if (engine)
seq_printf(m, " (%s)", engine->name);

Expand Down
62 changes: 24 additions & 38 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include <linux/intel-iommu.h>
#include <linux/kref.h>
#include <linux/pm_qos.h>
#include <linux/reservation.h>
#include <linux/shmem_fs.h>

#include <drm/drmP.h>
Expand Down Expand Up @@ -2246,21 +2247,12 @@ struct drm_i915_gem_object {
struct list_head batch_pool_link;

unsigned long flags;
/**
* This is set if the object is on the active lists (has pending
* rendering and so a non-zero seqno), and is not set if it i s on
* inactive (ready to be unbound) list.
*/
#define I915_BO_ACTIVE_SHIFT 0
#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
#define __I915_BO_ACTIVE(bo) \
((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)

/**
* Have we taken a reference for the object for incomplete GPU
* activity?
*/
#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
#define I915_BO_ACTIVE_REF 0

/*
* Is the object to be mapped as read-only to the GPU
Expand All @@ -2281,6 +2273,7 @@ struct drm_i915_gem_object {

/** Count of VMA actually bound by this object */
unsigned int bind_count;
unsigned int active_count;
unsigned int pin_display;

struct {
Expand Down Expand Up @@ -2320,8 +2313,7 @@ struct drm_i915_gem_object {
* read request. This allows for the CPU to read from an active
* buffer by only waiting for the write to complete.
*/
struct i915_gem_active last_read[I915_NUM_ENGINES];
struct i915_gem_active last_write;
struct reservation_object *resv;

/** References from framebuffers, locks out tiling changes. */
unsigned long framebuffer_references;
Expand All @@ -2340,6 +2332,8 @@ struct drm_i915_gem_object {

/** for phys allocated objects */
struct drm_dma_handle *phys_handle;

struct reservation_object __builtin_resv;
};

static inline struct drm_i915_gem_object *
Expand Down Expand Up @@ -2425,35 +2419,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
}

static inline unsigned long
i915_gem_object_get_active(const struct drm_i915_gem_object *obj)
{
return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
}

static inline bool
i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
{
return i915_gem_object_get_active(obj);
}

static inline void
i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
{
obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT);
}

static inline void
i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine)
{
obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT);
}

static inline bool
i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
int engine)
{
return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
return obj->active_count;
}

static inline bool
Expand Down Expand Up @@ -2496,6 +2465,23 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
return obj->tiling_and_stride & STRIDE_MASK;
}

static inline struct intel_engine_cs *
i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
{
struct intel_engine_cs *engine = NULL;
struct dma_fence *fence;

rcu_read_lock();
fence = reservation_object_get_excl_rcu(obj->resv);
rcu_read_unlock();

if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
engine = to_request(fence)->engine;
dma_fence_put(fence);

return engine;
}

static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
{
i915_gem_object_get(vma->obj);
Expand Down
Loading

0 comments on commit d07f0e5

Please sign in to comment.