Skip to content

Commit

Permalink
- decided to restrict the 2.0 beta to OpenGL 4.x with GL_ARB_buffer_s…
Browse files Browse the repository at this point in the history
…torage extension and removed all code for supporting older versions.

Sadly, anything else makes no sense.
All the recently made changes live or die, depending on this extension's presence.
Without it, there are major performance issues with the buffer uploads. All of the traditional buffer upload methods are without exception horrendously slow, especially in the context of a Doom engine where frequent small updates are required.
It could be solved with a complete restructuring of the engine, of course, but that's hardly worth the effort, considering it's only for legacy hardware whose market share will inevitably shrink considerably over the next years.
And even then, under the best circumstances I'd still get the same performance as the old immediate mode renderer in GZDoom 1.x and still couldn't implement the additions I'd like to make.

So, since I need to keep GZDoom 1.x around anyway for older GL 2.x hardware, it may as well serve for 3.x hardware, too. It's certainly less work than constantly trying to find workarounds for the older hardware's limitations that cost more time than working on future-proofing the engine.

This new, trimmed down 4.x renderer runs on a core profile configuration and uses persistently mapped buffers for nearly everything that is getting transferred to the GPU. (The global uniforms are still being used as such but they'll be phased out after the first beta release.
  • Loading branch information
Christoph Oelckers committed Aug 1, 2014
1 parent 7967082 commit a8e9c18
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 324 deletions.
169 changes: 14 additions & 155 deletions src/gl/data/gl_vertexbuffer.cpp
Expand Up @@ -90,27 +90,11 @@ void FVertexBuffer::BindVBO()
FFlatVertexBuffer::FFlatVertexBuffer()
: FVertexBuffer()
{
if (gl.flags & RFL_BUFFER_STORAGE)
{
unsigned int bytesize = BUFFER_SIZE * sizeof(FFlatVertex);
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
glBufferStorage(GL_ARRAY_BUFFER, bytesize, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
map = (FFlatVertex*)glMapBufferRange(GL_ARRAY_BUFFER, 0, bytesize, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
mNumReserved = mIndex = mCurIndex = 0;
}
else
{
vbo_shadowdata.Reserve(BUFFER_SIZE);
map = &vbo_shadowdata[0];

for (int i = 0; i < 20; i++)
{
map[i].Set(0, 0, 0, 100001.f, i);
}
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
glBufferData(GL_ARRAY_BUFFER, BUFFER_SIZE * sizeof(FFlatVertex), map, GL_STREAM_DRAW);
mNumReserved = mIndex = mCurIndex = 20;
}
unsigned int bytesize = BUFFER_SIZE * sizeof(FFlatVertex);
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
glBufferStorage(GL_ARRAY_BUFFER, bytesize, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
map = (FFlatVertex*)glMapBufferRange(GL_ARRAY_BUFFER, 0, bytesize, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
mNumReserved = mIndex = mCurIndex = 0;

glBindVertexArray(vao_id);
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
Expand All @@ -128,101 +112,6 @@ FFlatVertexBuffer::~FFlatVertexBuffer()
glBindBuffer(GL_ARRAY_BUFFER, 0);
}

//==========================================================================
//
// Renders the buffer's contents with immediate mode functions
// This is here so that the immediate mode fallback does not need
// to double all rendering code and can instead reuse the buffer-based version
//
//==========================================================================

CUSTOM_CVAR(Int, gl_rendermethod, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
{
int newself = self;
if (newself < 0) newself = 0;
if (newself == 0 && (gl.flags & RFL_COREPROFILE)) newself = 1;
if (newself > 3) newself = 3;
}

void FFlatVertexBuffer::ImmRenderBuffer(unsigned int primtype, unsigned int offset, unsigned int count)
{
// this will only get called if we can't acquire a persistently mapped buffer.
// Any of the provided methods are rather shitty, with immediate mode being the most reliable across different hardware.
// Still, allow this to be set per CVAR, just in case. Fortunately for newer hardware all this nonsense is not needed anymore.
switch (gl_rendermethod)
{
case 0:
// trusty old immediate mode
#ifndef CORE_PROFILE
if (!(gl.flags & RFL_COREPROFILE))
{
glBegin(primtype);
for (unsigned int i = 0; i < count; i++)
{
glVertexAttrib2fv(VATTR_TEXCOORD, &map[offset + i].u);
glVertexAttrib3fv(VATTR_VERTEX, &map[offset + i].x);
}
glEnd();
break;
}
#endif
case 1:
// uniform array
if (count > 20)
{
int start = offset;
FFlatVertex ff = map[offset];
while (count > 20)
{

if (primtype == GL_TRIANGLE_FAN)
{
// split up the fan into multiple sub-fans
map[offset] = map[start];
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, 20 * 5, &map[offset].x);
glDrawArrays(primtype, 0, 20);
offset += 18;
count -= 18;
}
else
{
// we only have triangle fans of this size so don't bother with strips and triangles here.
break;
}
}
map[offset] = map[start];
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, count * 5, &map[offset].x);
glDrawArrays(primtype, 0, count);
map[offset] = ff;
}
else
{
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, count * 5, &map[offset].x);
glDrawArrays(primtype, 0, count);
}
break;

case 2:
// glBufferSubData
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
glBufferSubData(GL_ARRAY_BUFFER, offset * sizeof(FFlatVertex), count * sizeof(FFlatVertex), &vbo_shadowdata[offset]);
glDrawArrays(primtype, offset, count);
break;

case 3:
// glMapBufferRange
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
void *p = glMapBufferRange(GL_ARRAY_BUFFER, offset * sizeof(FFlatVertex), count * sizeof(FFlatVertex), GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT);
if (p != NULL)
{
memcpy(p, &vbo_shadowdata[offset], count * sizeof(FFlatVertex));
glUnmapBuffer(GL_ARRAY_BUFFER);
glDrawArrays(primtype, offset, count);
}
break;
}
}

//==========================================================================
//
// Initialize a single vertex
Expand Down Expand Up @@ -390,11 +279,6 @@ void FFlatVertexBuffer::UpdatePlaneVertices(sector_t *sec, int plane)
if (plane == sector_t::floor && sec->transdoor) vt->z -= 1;
mapvt->z = vt->z;
}
if (!(gl.flags & RFL_BUFFER_STORAGE))
{
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
glBufferSubData(GL_ARRAY_BUFFER, startvt * sizeof(FFlatVertex), countvt * sizeof(FFlatVertex), &vbo_shadowdata[startvt]);
}
}

//==========================================================================
Expand All @@ -405,32 +289,10 @@ void FFlatVertexBuffer::UpdatePlaneVertices(sector_t *sec, int plane)

void FFlatVertexBuffer::CreateVBO()
{
if (!(gl.flags & RFL_NOBUFFER))
{
vbo_shadowdata.Resize(mNumReserved);
CreateFlatVBO();
mCurIndex = mIndex = vbo_shadowdata.Size();
if (gl.flags & RFL_BUFFER_STORAGE)
{
memcpy(map, &vbo_shadowdata[0], vbo_shadowdata.Size() * sizeof(FFlatVertex));
}
else
{
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
glBufferSubData(GL_ARRAY_BUFFER, mNumReserved * sizeof(FFlatVertex), (mIndex - mNumReserved) * sizeof(FFlatVertex), &vbo_shadowdata[mNumReserved]);
}
}
else if (sectors)
{
// set all VBO info to invalid values so that we can save some checks in the rendering code
for(int i=0;i<numsectors;i++)
{
sectors[i].vboindex[3] = sectors[i].vboindex[2] =
sectors[i].vboindex[1] = sectors[i].vboindex[0] = -1;
sectors[i].vboheight[1] = sectors[i].vboheight[0] = FIXED_MIN;
}
}

vbo_shadowdata.Resize(mNumReserved);
CreateFlatVBO();
mCurIndex = mIndex = vbo_shadowdata.Size();
memcpy(map, &vbo_shadowdata[0], vbo_shadowdata.Size() * sizeof(FFlatVertex));
}

//==========================================================================
Expand Down Expand Up @@ -462,12 +324,9 @@ void FFlatVertexBuffer::CheckPlanes(sector_t *sector)

void FFlatVertexBuffer::CheckUpdate(sector_t *sector)
{
if (!(gl.flags & RFL_NOBUFFER))
{
CheckPlanes(sector);
sector_t *hs = sector->GetHeightSec();
if (hs != NULL) CheckPlanes(hs);
for(unsigned i = 0; i < sector->e->XFloor.ffloors.Size(); i++)
CheckPlanes(sector->e->XFloor.ffloors[i]->model);
}
CheckPlanes(sector);
sector_t *hs = sector->GetHeightSec();
if (hs != NULL) CheckPlanes(hs);
for(unsigned i = 0; i < sector->e->XFloor.ffloors.Size(); i++)
CheckPlanes(sector->e->XFloor.ffloors[i]->model);
}
9 changes: 1 addition & 8 deletions src/gl/data/gl_vertexbuffer.h
Expand Up @@ -83,14 +83,7 @@ class FFlatVertexBuffer : public FVertexBuffer
void RenderArray(unsigned int primtype, unsigned int offset, unsigned int count)
{
drawcalls.Clock();
if (gl.flags & RFL_BUFFER_STORAGE)
{
glDrawArrays(primtype, offset, count);
}
else
{
ImmRenderBuffer(primtype, offset, count);
}
glDrawArrays(primtype, offset, count);
drawcalls.Unclock();
}

Expand Down
39 changes: 5 additions & 34 deletions src/gl/dynlights/gl_lightbuffer.cpp
Expand Up @@ -64,18 +64,10 @@ FLightBuffer::FLightBuffer()
glGenBuffers(1, &mBufferId);
glBindBuffer(mBufferType, mBufferId);
unsigned int bytesize = BUFFER_SIZE * 4 * sizeof(float);
if (gl.flags & RFL_BUFFER_STORAGE)
{
glBufferStorage(mBufferType, bytesize, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
void *map = glMapBufferRange(mBufferType, 0, bytesize, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
mBufferPointer = (float*)map;
glBindBufferBase(mBufferType, LIGHTBUF_BINDINGPOINT, mBufferId);
}
else
{
glBufferData(mBufferType, bytesize, NULL, GL_STREAM_DRAW);
mBufferPointer = NULL;
}
glBufferStorage(mBufferType, bytesize, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
void *map = glMapBufferRange(mBufferType, 0, bytesize, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
mBufferPointer = (float*)map;
glBindBufferBase(mBufferType, LIGHTBUF_BINDINGPOINT, mBufferId);

Clear();
mLastMappedIndex = UINT_MAX;
Expand Down Expand Up @@ -109,15 +101,7 @@ int FLightBuffer::UploadLights(FDynLightData &data)

float *copyptr;

if (mBufferPointer != NULL)
{
copyptr = mBufferPointer + mIndex * 4;
}
else
{
unsigned int pos = mBufferArray.Reserve(totalsize * 4);
copyptr = &mBufferArray[pos];
}
copyptr = mBufferPointer + mIndex * 4;

float parmcnt[] = { 0, size0, size0 + size1, size0 + size1 + size2 };

Expand All @@ -126,12 +110,6 @@ int FLightBuffer::UploadLights(FDynLightData &data)
memcpy(&copyptr[4 + 4*size0], &data.arrays[1][0], 4 * size1*sizeof(float));
memcpy(&copyptr[4 + 4*(size0 + size1)], &data.arrays[2][0], 4 * size2*sizeof(float));

if (mBufferPointer == NULL) // if we can't persistently map the buffer we need to upload it after all lights have been added.
{
glBindBuffer(mBufferType, mBufferId);
glBufferSubData(mBufferType, mIndex, totalsize * 4 * sizeof(float), copyptr);
}

unsigned int bufferindex = mIndex;
mIndex += totalsize;
draw_dlight += (totalsize-1) / 2;
Expand All @@ -140,13 +118,6 @@ int FLightBuffer::UploadLights(FDynLightData &data)

void FLightBuffer::Finish()
{
/*
if (!(gl.flags & RFL_BUFFER_STORAGE)) // if we can't persistently map the buffer we need to upload it after all lights have been added.
{
glBindBuffer(mBufferType, mBufferId);
glBufferSubData(mBufferType, 0, mBufferArray.Size() * sizeof(float), &mBufferArray[0]);
}
*/
Clear();
}

Expand Down
12 changes: 3 additions & 9 deletions src/gl/shaders/gl_shader.cpp
Expand Up @@ -94,17 +94,11 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *

if (lightbuffertype == GL_UNIFORM_BUFFER)
{
vp_comb.Format("#version 130\n#extension GL_ARB_uniform_buffer_object : require\n#define NUM_UBO_LIGHTS %d\n", lightbuffersize);
vp_comb.Format("#version 330 core\n#extension GL_ARB_uniform_buffer_object : require\n#define NUM_UBO_LIGHTS %d\n", lightbuffersize);
}
else
{
vp_comb = "#version 400 compatibility\n#extension GL_ARB_shader_storage_buffer_object : require\n#define SHADER_STORAGE_LIGHTS\n";
}

if (!(gl.flags & RFL_BUFFER_STORAGE))
{
// we only want the uniform array hack in the shader if we actually need it.
vp_comb << "#define UNIFORM_VB\n";
vp_comb = "#version 400 core\n#extension GL_ARB_shader_storage_buffer_object : require\n#define SHADER_STORAGE_LIGHTS\n";
}

vp_comb << defines << i_data.GetString().GetChars();
Expand Down Expand Up @@ -306,7 +300,7 @@ FShader *FShaderManager::Compile (const char *ShaderName, const char *ShaderPath
void FShader::ApplyMatrices(VSMatrix *proj, VSMatrix *view)
{

if (gl.flags & RFL_SEPARATE_SHADER_OBJECTS)
if (gl.flags & RFL_SEPARATE_SHADER_OBJECTS) // this check is just for safety. All supported hardware reports this extension as being present.
{
glProgramUniformMatrix4fv(hShader, projectionmatrix_index, 1, false, proj->get());
glProgramUniformMatrix4fv(hShader, viewmatrix_index, 1, false, view->get());
Expand Down
15 changes: 2 additions & 13 deletions src/gl/system/gl_interface.cpp
Expand Up @@ -52,8 +52,6 @@ RenderContext gl;

int occlusion_type=0;

CVAR(Bool, gl_persistent_avail, false, CVAR_NOSET);

//==========================================================================
//
//
Expand Down Expand Up @@ -118,11 +116,10 @@ void gl_LoadExtensions()
if (version == NULL) version = (const char*)glGetString(GL_VERSION);
else Printf("Emulating OpenGL v %s\n", version);


// Don't even start if it's lower than 3.0
if (strcmp(version, "3.0") < 0)
if (strcmp(version, "3.3") < 0 || !CheckExtension("GL_ARB_buffer_storage"))
{
I_FatalError("Unsupported OpenGL version.\nAt least GL 3.0 is required to run " GAMENAME ".\n");
I_FatalError("Unsupported OpenGL version.\nAt least OpenGL 3.3 and the »GL_ARB_buffer_storage« extension is required to run " GAMENAME ".\n");
}

// add 0.01 to account for roundoff errors making the number a tad smaller than the actual version
Expand All @@ -134,16 +131,8 @@ void gl_LoadExtensions()
if (CheckExtension("GL_ARB_texture_compression")) gl.flags|=RFL_TEXTURE_COMPRESSION;
if (CheckExtension("GL_EXT_texture_compression_s3tc")) gl.flags|=RFL_TEXTURE_COMPRESSION_S3TC;
if (CheckExtension("GL_ARB_shader_storage_buffer_object")) gl.flags |= RFL_SHADER_STORAGE_BUFFER;
if (CheckExtension("GL_ARB_buffer_storage") && !Args->CheckParm("-nopersistentbuffers"))
{
gl.flags |= RFL_BUFFER_STORAGE; // the cmdline option is for testing the fallback implementation on newer hardware.
gl_persistent_avail = true;
}
if (CheckExtension("GL_ARB_separate_shader_objects")) gl.flags |= RFL_SEPARATE_SHADER_OBJECTS;
if (!CheckExtension("GL_ARB_compatibility")) gl.flags |= RFL_COREPROFILE;

if (!(gl.flags & (RFL_COREPROFILE|RFL_BUFFER_STORAGE)) && !strstr(gl.vendorstring, "NVIDIA Corporation")) gl.flags |= RFL_NOBUFFER;

glGetIntegerv(GL_MAX_TEXTURE_SIZE,&gl.max_texturesize);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
}
Expand Down
3 changes: 0 additions & 3 deletions src/gl/system/gl_interface.h
Expand Up @@ -10,11 +10,8 @@ enum RenderFlags
RFL_TEXTURE_COMPRESSION_S3TC=2,

RFL_SEPARATE_SHADER_OBJECTS = 4, // we need this extension for glProgramUniform. On hardware not supporting it we need some rather clumsy workarounds
RFL_BUFFER_STORAGE = 8, // allows persistently mapped buffers, which are the only efficient way to actually use a dynamic vertex buffer. If this isn't present, a workaround with uniform arrays is used.
RFL_SHADER_STORAGE_BUFFER = 16, // to be used later for a parameter buffer
RFL_BASEINDEX = 32, // currently unused
RFL_COREPROFILE = 64,
RFL_NOBUFFER = 128, // the static buffer makes no sense on GL 3.x AMD and Intel hardware, as long as compatibility mode is on
};

enum TexMode
Expand Down

0 comments on commit a8e9c18

Please sign in to comment.