Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a giant UBO to optimize performance in 2D [OpenGL3] #66861

Merged
merged 1 commit into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1925,6 +1925,9 @@
<member name="rendering/gl_compatibility/driver.windows" type="String" setter="" getter="" default="&quot;opengl3&quot;">
Windows override for [member rendering/gl_compatibility/driver].
</member>
<member name="rendering/gl_compatibility/item_buffer_size" type="int" setter="" getter="" default="16384">
Maximum number of canvas items commands that can be drawn in a single viewport update. If more render commands are issued they will be ignored. Decreasing this limit may improve performance on bandwidth limited devices. Increase this limit if you find that not all objects are being drawn in a frame.
</member>
<member name="rendering/global_illumination/gi/use_half_resolution" type="bool" setter="" getter="" default="false">
If [code]true[/code], renders [VoxelGI] and SDFGI ([member Environment.sdfgi_enabled]) buffers at halved resolution (e.g. 960×540 when the viewport size is 1920×1080). This improves performance significantly when VoxelGI or SDFGI is enabled, at the cost of artifacts that may be visible on polygon edges. The loss in quality becomes less noticeable as the viewport resolution increases. [LightmapGI] rendering is not affected by this setting.
[b]Note:[/b] This property is only read when the project starts. To set half-resolution GI at run-time, call [method RenderingServer.gi_set_use_half_resolution] instead.
Expand Down
1,042 changes: 617 additions & 425 deletions drivers/gles3/rasterizer_canvas_gles3.cpp

Large diffs are not rendered by default.

111 changes: 72 additions & 39 deletions drivers/gles3/rasterizer_canvas_gles3.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,23 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender {
uint32_t pad2;
};

struct PolygonBuffers {
GLuint vertex_buffer;
GLuint vertex_array;
GLuint index_buffer;
int count = 0;
bool color_disabled = false;
Color color;
};

struct {
HashMap<PolygonID, PolygonBuffers> polygons;
PolygonID last_id = 0;
} polygon_buffers;

RendererCanvasRender::PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()) override;
void free_polygon(PolygonID p_polygon) override;

struct InstanceData {
float world[6];
float color_texture_pixel_size[2];
Expand Down Expand Up @@ -156,42 +173,71 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender {
GLuint canvas_quad_vertices;
GLuint canvas_quad_array;

GLuint indexed_quad_buffer;
GLuint indexed_quad_array;

GLuint particle_quad_vertices;
GLuint particle_quad_array;

GLuint ninepatch_vertices;
GLuint ninepatch_elements;

RID canvas_shader_default_version;

uint32_t max_lights_per_render;
uint32_t max_lights_per_item;
uint32_t max_instances_per_batch = 512;
uint32_t max_instances_per_ubo = 16384;
uint32_t max_instance_buffer_size = 16384 * 128;
} data;

struct Batch {
// Position in the UBO measured in bytes
uint32_t start = 0;
uint32_t instance_count = 0;

RID tex = RID();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does RID() self initialize in 4.x?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe RID tex = RID(); is the same as RID tex; if that is what you are asking

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah so the point is that usually we wouldn't explicitly initialize it in the header, like Vector3 or String. But it's not a big deal :)

RS::CanvasItemTextureFilter filter = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX;
RS::CanvasItemTextureRepeat repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX;

GLES3::CanvasShaderData::BlendMode blend_mode = GLES3::CanvasShaderData::BLEND_MODE_MIX;
Color blend_color = Color(1.0, 1.0, 1.0, 1.0);

Item *clip = nullptr;

RID material = RID();
GLES3::CanvasMaterialData *material_data = nullptr;
CanvasShaderGLES3::ShaderVariant shader_variant = CanvasShaderGLES3::MODE_QUAD;

const Item::Command *command = nullptr;
Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch.
uint32_t primitive_points = 0;
};

struct DataBuffer {
GLuint ubo = 0;
uint64_t last_frame_used = -3;
GLsync fence = GLsync();
};

struct State {
GLuint canvas_state_buffer;
LocalVector<GLuint> canvas_instance_data_buffers;
LocalVector<GLsync> fences;
LocalVector<DataBuffer> canvas_instance_data_buffers;
LocalVector<Batch> canvas_instance_batches;
uint32_t current_buffer = 0;
uint32_t current_buffer_index = 0;
uint32_t current_batch_index = 0;

InstanceData *instance_data_array = nullptr;
bool canvas_texscreen_used;
RID canvas_shader_current_version;
RID canvas_shader_default_version;

RID current_tex = RID();
Size2 current_pixel_size = Size2();
RID current_normal = RID();
RID current_specular = RID();
GLES3::Texture *current_tex_ptr;
RID current_shader_version = RID();
RS::PrimitiveType current_primitive = RS::PRIMITIVE_MAX;
uint32_t current_primitive_points = 0;
Item::Command::Type current_command = Item::Command::TYPE_RECT;
RS::CanvasItemTextureFilter current_filter_mode = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX;
RS::CanvasItemTextureRepeat current_repeat_mode = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX;

bool transparent_render_target = false;

double time = 0.0;

uint32_t max_lights_per_render;
uint32_t max_lights_per_item;
uint32_t max_instances_per_batch;

RS::CanvasItemTextureFilter default_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT;
RS::CanvasItemTextureRepeat default_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT;
} state;
Expand Down Expand Up @@ -229,31 +275,18 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender {
bool free(RID p_rid) override;
void update() override;

void _bind_canvas_texture(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, uint32_t &r_index);

struct PolygonBuffers {
GLuint vertex_buffer;
GLuint vertex_array;
GLuint index_buffer;
int count = 0;
bool color_disabled = false;
Color color;
};

struct {
HashMap<PolygonID, PolygonBuffers> polygons;
PolygonID last_id = 0;
} polygon_buffers;

RendererCanvasRender::PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()) override;
void free_polygon(PolygonID p_polygon) override;
void _bind_canvas_texture(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat);
void _prepare_canvas_texture(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, uint32_t &r_index, Size2 &r_texpixel_size);

void canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) override;
void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool p_to_backbuffer = false);
void _render_item(RID p_render_target, const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, Light *p_lights, uint32_t &r_index, GLES3::CanvasShaderData::BlendMode p_blend_mode, GLES3::CanvasShaderData::BlendMode &r_last_blend_mode, Color &r_last_blend_color);
void _render_batch(uint32_t &p_max_index);
void _bind_instance_data_buffer(uint32_t p_max_index);
void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, uint32_t &r_last_index, bool p_to_backbuffer = false);
void _record_item_commands(const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch);
void _render_batch(Light *p_lights, uint32_t p_index);
void _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant);
void _new_batch(bool &r_batch_broken, uint32_t &r_index);
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken);
void _allocate_instance_data_buffer();
void _align_instance_data_buffer(uint32_t &r_index);

void set_time(double p_time);

Expand Down
5 changes: 3 additions & 2 deletions drivers/gles3/rasterizer_gles3.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,9 @@ class RasterizerGLES3 : public RendererCompositor {
low_end = true;
}

uint64_t get_frame_number() const { return frame; }
double get_frame_delta_time() const { return delta; }
_ALWAYS_INLINE_ uint64_t get_frame_number() const { return frame; }
_ALWAYS_INLINE_ double get_frame_delta_time() const { return delta; }
_ALWAYS_INLINE_ double get_total_time() const { return time_total; }

RasterizerGLES3();
~RasterizerGLES3();
Expand Down
17 changes: 8 additions & 9 deletions drivers/gles3/shaders/canvas.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,18 @@ out vec2 pixel_size_interp;

void main() {
vec4 instance_custom = vec4(0.0);
draw_data_instance = gl_InstanceID;
#ifdef USE_PRIMITIVE

//weird bug,
//this works
#ifdef USE_PRIMITIVE
draw_data_instance = gl_InstanceID;
vec2 vertex;
vec2 uv;
vec4 color;

if (gl_VertexID == 0) {
if (gl_VertexID % 3 == 0) {
vertex = draw_data[draw_data_instance].point_a;
uv = draw_data[draw_data_instance].uv_a;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_a_rg), unpackHalf2x16(draw_data[draw_data_instance].color_a_ba));
} else if (gl_VertexID == 1) {
} else if (gl_VertexID % 3 == 1) {
vertex = draw_data[draw_data_instance].point_b;
uv = draw_data[draw_data_instance].uv_b;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_b_rg), unpackHalf2x16(draw_data[draw_data_instance].color_b_ba));
Expand All @@ -86,6 +84,7 @@ void main() {
vec4 bone_weights = vec4(0.0);

#elif defined(USE_ATTRIBUTES)
draw_data_instance = gl_InstanceID;
#ifdef USE_INSTANCING
draw_data_instance = 0;
#endif
Expand All @@ -103,9 +102,9 @@ void main() {
#endif

#else

vec2 vertex_base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0));
vec2 vertex_base = vertex_base_arr[gl_VertexID];
draw_data_instance = gl_VertexID / 6;
vec2 vertex_base_arr[6] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0));
vec2 vertex_base = vertex_base_arr[gl_VertexID % 6];

vec2 uv = draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw) * ((draw_data[draw_data_instance].flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy);
vec4 color = draw_data[draw_data_instance].modulation;
Expand Down
2 changes: 2 additions & 0 deletions drivers/gles3/storage/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ Config::Config() {
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size);
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_uniform_buffer_size);

glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_offset_alignment);

// the use skeleton software path should be used if either float texture is not supported,
// OR max_vertex_texture_image_units is zero
use_skeleton_software = (float_texture_supported == false) || (max_vertex_texture_image_units == 0);
Expand Down
2 changes: 2 additions & 0 deletions drivers/gles3/storage/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class Config {
int max_renderable_lights = 0;
int max_lights_per_object = 0;

int uniform_buffer_offset_alignment = 0;

// TODO implement wireframe in OpenGL
// bool generate_wireframes;

Expand Down
2 changes: 2 additions & 0 deletions drivers/gles3/storage/texture_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1533,9 +1533,11 @@ void TextureStorage::render_target_do_clear_request(RID p_render_target) {
if (!rt->clear_requested) {
return;
}
glBindFramebuffer(GL_FRAMEBUFFER, rt->fbo);

glClearBufferfv(GL_COLOR, 0, rt->clear_color.components);
rt->clear_requested = false;
glBindFramebuffer(GL_FRAMEBUFFER, system_fbo);
}

void TextureStorage::render_target_set_sdf_size_and_scale(RID p_render_target, RS::ViewportSDFOversize p_size, RS::ViewportSDFScale p_scale) {
Expand Down
5 changes: 0 additions & 5 deletions drivers/gles3/storage/texture_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,6 @@ struct CanvasTexture {

RS::CanvasItemTextureFilter texture_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT;
RS::CanvasItemTextureRepeat texture_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT;

Size2i size_cache = Size2i(1, 1);
bool use_normal_cache = false;
bool use_specular_cache = false;
bool cleared_cache = true;
};

/* CANVAS SHADOW */
Expand Down
3 changes: 3 additions & 0 deletions servers/rendering/dummy/rasterizer_dummy.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class RasterizerDummy : public RendererCompositor {
private:
uint64_t frame = 1;
double delta = 0;
double time = 0.0;

protected:
RasterizerCanvasDummy canvas;
Expand Down Expand Up @@ -82,6 +83,7 @@ class RasterizerDummy : public RendererCompositor {
void begin_frame(double frame_step) override {
frame++;
delta = frame_step;
time += frame_step;
}

void prepare_for_blitting_render_targets() override {}
Expand All @@ -106,6 +108,7 @@ class RasterizerDummy : public RendererCompositor {

uint64_t get_frame_number() const override { return frame; }
double get_frame_delta_time() const override { return delta; }
double get_total_time() const override { return time; }

RasterizerDummy() {}
~RasterizerDummy() {}
Expand Down
1 change: 1 addition & 0 deletions servers/rendering/renderer_compositor.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class RendererCompositor {
virtual void finalize() = 0;
virtual uint64_t get_frame_number() const = 0;
virtual double get_frame_delta_time() const = 0;
virtual double get_total_time() const = 0;

static bool is_low_end() { return low_end; };
virtual bool is_xr_enabled() const;
Expand Down
4 changes: 4 additions & 0 deletions servers/rendering_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2875,6 +2875,10 @@ void RenderingServer::init() {
GLOBAL_DEF("rendering/rendering_device/staging_buffer/texture_upload_region_size_px", 64);
GLOBAL_DEF("rendering/rendering_device/descriptor_pools/max_descriptors_per_pool", 64);

// Number of commands that can be drawn per frame.
GLOBAL_DEF_RST("rendering/gl_compatibility/item_buffer_size", 16384);
ProjectSettings::get_singleton()->set_custom_property_info("rendering/gl_compatibility/item_buffer_size", PropertyInfo(Variant::INT, "rendering/gl_compatibility/item_buffer_size", PROPERTY_HINT_RANGE, "1024,1048576,1"));
akien-mga marked this conversation as resolved.
Show resolved Hide resolved

GLOBAL_DEF("rendering/shader_compiler/shader_cache/enabled", true);
GLOBAL_DEF("rendering/shader_compiler/shader_cache/compress", true);
GLOBAL_DEF("rendering/shader_compiler/shader_cache/use_zstd_compression", true);
Expand Down