346 changes: 221 additions & 125 deletions Source/Core/VideoCommon/Src/PixelShaderGen.cpp

Large diffs are not rendered by default.

17 changes: 15 additions & 2 deletions Source/Core/VideoCommon/Src/PixelShaderGen.h
Expand Up @@ -28,8 +28,8 @@
#define I_INDTEXSCALE "cindscale"
#define I_INDTEXMTX "cindmtx"
#define I_FOG "cfog"
#define I_PLIGHTS "cLights"
#define I_PMATERIALS "cmtrl"
#define I_PLIGHTS "cPLights"
#define I_PMATERIALS "cPmtrl"

#define C_COLORMATRIX 0 // 0
#define C_COLORS 0 // 0
Expand All @@ -47,6 +47,19 @@
#define PIXELSHADERUID_MAX_VALUES 70
#define PIXELSHADERUID_MAX_VALUES_SAFE 115

// Annoying sure, can be removed once we get up to GLSL ~1.3
const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },
{I_KCOLORS, C_KCOLORS, 4 },
{I_ALPHA, C_ALPHA, 1 },
{I_TEXDIMS, C_TEXDIMS, 8 },
{I_ZBIAS , C_ZBIAS, 2 },
{I_INDTEXSCALE , C_INDTEXSCALE, 2 },
{I_INDTEXMTX, C_INDTEXMTX, 6 },
{I_FOG, C_FOG, 3 },
{I_PLIGHTS, C_PLIGHTS, 40 },
{I_PMATERIALS, C_PMATERIALS, 4 },
};

// DO NOT make anything in this class virtual.
template<bool safe>
class _PIXELSHADERUID
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoCommon/Src/PixelShaderManager.cpp
Expand Up @@ -85,6 +85,8 @@ void PixelShaderManager::Shutdown()

void PixelShaderManager::SetConstants()
{
if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO)
Dirty();
for (int i = 0; i < 2; ++i)
{
if (s_nColorsChanged[i])
Expand Down
4 changes: 0 additions & 4 deletions Source/Core/VideoCommon/Src/TextureCacheBase.cpp
Expand Up @@ -831,9 +831,5 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat

entry->frameCount = frameCount;

g_renderer->ResetAPIState(); // reset any game specific settings

entry->FromRenderTarget(dstAddr, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat);

g_renderer->RestoreAPIState();
}
440 changes: 236 additions & 204 deletions Source/Core/VideoCommon/Src/TextureConversionShader.cpp

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Source/Core/VideoCommon/Src/VertexLoader.cpp
Expand Up @@ -44,8 +44,10 @@
#include "XFMemory.h"
extern float GC_ALIGNED16(g_fProjectionMatrix[16]);
#ifndef _M_GENERIC
#ifndef __APPLE__
#define USE_JIT
#endif
#endif

#define COMPILED_CODE_SIZE 4096

Expand Down
270 changes: 177 additions & 93 deletions Source/Core/VideoCommon/Src/VertexShaderGen.cpp

Large diffs are not rendered by default.

34 changes: 29 additions & 5 deletions Source/Core/VideoCommon/Src/VertexShaderGen.h
Expand Up @@ -21,9 +21,24 @@
#include "XFMemory.h"
#include "VideoCommon.h"

#define SHADER_POSMTX_ATTRIB 1
#define SHADER_NORM1_ATTRIB 6
#define SHADER_NORM2_ATTRIB 7
// TODO should be reordered
#define SHADER_POSITION_ATTRIB 0
#define SHADER_POSMTX_ATTRIB 1
#define SHADER_NORM0_ATTRIB 2
#define SHADER_NORM1_ATTRIB 3
#define SHADER_NORM2_ATTRIB 4
#define SHADER_COLOR0_ATTRIB 5
#define SHADER_COLOR1_ATTRIB 6

#define SHADER_TEXTURE0_ATTRIB 8
#define SHADER_TEXTURE1_ATTRIB 9
#define SHADER_TEXTURE2_ATTRIB 10
#define SHADER_TEXTURE3_ATTRIB 11
#define SHADER_TEXTURE4_ATTRIB 12
#define SHADER_TEXTURE5_ATTRIB 13
#define SHADER_TEXTURE6_ATTRIB 14
#define SHADER_TEXTURE7_ATTRIB 15



// shader variables
Expand All @@ -46,8 +61,17 @@
#define C_NORMALMATRICES (C_TRANSFORMMATRICES + 64)
#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32)
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 4)

#define C_VENVCONST_END (C_DEPTHPARAMS + 1)
const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
{I_PROJECTION , C_PROJECTION, 4 },
{I_MATERIALS, C_MATERIALS, 4 },
{I_LIGHTS, C_LIGHTS, 40 },
{I_TEXMATRICES, C_TEXMATRICES, 24 },
{I_TRANSFORMMATRICES , C_TRANSFORMMATRICES, 64 },
{I_NORMALMATRICES , C_NORMALMATRICES, 32 },
{I_POSTTRANSFORMMATRICES, C_POSTTRANSFORMMATRICES, 64 },
{I_DEPTHPARAMS, C_DEPTHPARAMS, 1 },
};
template<bool safe>
class _VERTEXSHADERUID
{
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoCommon/Src/VertexShaderManager.cpp
Expand Up @@ -194,6 +194,8 @@ void VertexShaderManager::Dirty()
// TODO: A cleaner way to control the matricies without making a mess in the parameters field
void VertexShaderManager::SetConstants()
{
if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO)
Dirty();
if (nTransformMatricesChanged[0] >= 0)
{
int startn = nTransformMatricesChanged[0] / 4;
Expand Down
9 changes: 7 additions & 2 deletions Source/Core/VideoCommon/Src/VideoCommon.h
Expand Up @@ -102,8 +102,7 @@ typedef enum
API_D3D9_SM20 = 4,
API_D3D9 = 6,
API_D3D11 = 8,
API_GLSL = 16,
API_NONE = 32
API_NONE = 16
} API_TYPE;

inline u32 RGBA8ToRGBA6ToRGBA8(u32 src)
Expand Down Expand Up @@ -149,5 +148,11 @@ inline unsigned int GetPow2(unsigned int val)
++ret;
return ret;
}
struct s_svar
{
const char *name;
const unsigned int reg;
const unsigned int size;
};

#endif // _VIDEOCOMMON_H
5 changes: 4 additions & 1 deletion Source/Core/VideoCommon/Src/VideoConfig.cpp
Expand Up @@ -76,6 +76,7 @@ void VideoConfig::Load(const char *ini_file)
iniFile.Get("Settings", "AnaglyphStereoSeparation", &iAnaglyphStereoSeparation, 200);
iniFile.Get("Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle, 0);
iniFile.Get("Settings", "EnablePixelLighting", &bEnablePixelLighting, 0);
iniFile.Get("Settings", "HackedBufferUpload", &bHackedBufferUpload, 0);

iniFile.Get("Settings", "MSAA", &iMultisampleMode, 0);
iniFile.Get("Settings", "EFBScale", &iEFBScale, 2); // native
Expand Down Expand Up @@ -134,6 +135,7 @@ void VideoConfig::GameIniLoad(const char *ini_file)
iniFile.GetIfExists("Video_Settings", "AnaglyphStereoSeparation", &iAnaglyphStereoSeparation);
iniFile.GetIfExists("Video_Settings", "AnaglyphFocalAngle", &iAnaglyphFocalAngle);
iniFile.GetIfExists("Video_Settings", "EnablePixelLighting", &bEnablePixelLighting);
iniFile.GetIfExists("Video_Settings", "HackedBufferUpload", &bHackedBufferUpload);
iniFile.GetIfExists("Video_Settings", "MSAA", &iMultisampleMode);
iniFile.GetIfExists("Video_Settings", "EFBScale", &iEFBScale); // integral
iniFile.GetIfExists("Video_Settings", "DstAlphaPass", &bDstAlphaPass);
Expand Down Expand Up @@ -172,6 +174,7 @@ void VideoConfig::VerifyValidity()
if (!backend_info.bSupports3DVision) b3DVision = false;
if (!backend_info.bSupportsFormatReinterpretation) bEFBEmulateFormatChanges = false;
if (!backend_info.bSupportsPixelLighting) bEnablePixelLighting = false;
if (backend_info.APIType != API_OPENGL) backend_info.bSupportsGLSLUBO = false;
}

void VideoConfig::Save(const char *ini_file)
Expand Down Expand Up @@ -202,7 +205,7 @@ void VideoConfig::Save(const char *ini_file)
iniFile.Set("Settings", "AnaglyphStereoSeparation", iAnaglyphStereoSeparation);
iniFile.Set("Settings", "AnaglyphFocalAngle", iAnaglyphFocalAngle);
iniFile.Set("Settings", "EnablePixelLighting", bEnablePixelLighting);

iniFile.Set("Settings", "HackedBufferUpload", bHackedBufferUpload);

iniFile.Set("Settings", "ShowEFBCopyRegions", bShowEFBCopyRegions);
iniFile.Set("Settings", "MSAA", iMultisampleMode);
Expand Down
7 changes: 7 additions & 0 deletions Source/Core/VideoCommon/Src/VideoConfig.h
Expand Up @@ -133,6 +133,7 @@ struct VideoConfig
bool bZTPSpeedHack; // The Legend of Zelda: Twilight Princess
bool bUseBBox;
bool bEnablePixelLighting;
bool bHackedBufferUpload;

int iLog; // CONF_ bits
int iSaveTargetId; // TODO: Should be dropped
Expand Down Expand Up @@ -162,6 +163,12 @@ struct VideoConfig
bool bSupportsDualSourceBlend; // only supported by D3D11 and OpenGL
bool bSupportsFormatReinterpretation;
bool bSupportsPixelLighting;

bool bSupportsGLSLUBO;
bool bSupportsGLSLCache;
bool bSupportsGLPinnedMemory;
bool bSupportsGLSync;
bool bSupportsGLBaseVertex;
} backend_info;

// Utility
Expand Down
4 changes: 4 additions & 0 deletions Source/Plugins/Plugin_VideoDX11/Src/FramebufferManager.cpp
Expand Up @@ -208,6 +208,8 @@ void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight)

void XFBSource::CopyEFB(float Gamma)
{
g_renderer->ResetAPIState(); // reset any game specific settings

// Copy EFB data to XFB and restore render target again
const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)texWidth, (float)texHeight);

Expand All @@ -222,6 +224,8 @@ void XFBSource::CopyEFB(float Gamma)

D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(),
FramebufferManager::GetEFBDepthTexture()->GetDSV());

g_renderer->RestoreAPIState();
}

} // namespace DX11
100 changes: 50 additions & 50 deletions Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp
Expand Up @@ -41,39 +41,39 @@ namespace DX11
// TODO: Find sensible values for these two
const UINT IBUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 8;
const UINT VBUFFER_SIZE = VertexManager::MAXVBUFFERSIZE;
const UINT MAXVBUFFER_COUNT = 2;
const UINT MAX_VBUFFER_COUNT = 2;

void VertexManager::CreateDeviceObjects()
{
D3D11_BUFFER_DESC bufdesc = CD3D11_BUFFER_DESC(IBUFFER_SIZE,
D3D11_BIND_INDEX_BUFFER, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE);

m_vertexDrawOffset = 0;
m_triangleDrawIndex = 0;
m_lineDrawIndex = 0;
m_pointDrawIndex = 0;
m_indexBuffers = new PID3D11Buffer[MAXVBUFFER_COUNT];
m_vertexBuffers = new PID3D11Buffer[MAXVBUFFER_COUNT];
for (m_activeIndexBuffer = 0; m_activeIndexBuffer < MAXVBUFFER_COUNT; m_activeIndexBuffer++)
m_vertex_draw_offset = 0;
m_triangle_draw_index = 0;
m_line_draw_index = 0;
m_point_draw_index = 0;
m_index_buffers = new PID3D11Buffer[MAX_VBUFFER_COUNT];
m_vertex_buffers = new PID3D11Buffer[MAX_VBUFFER_COUNT];
for (m_current_index_buffer = 0; m_current_index_buffer < MAX_VBUFFER_COUNT; m_current_index_buffer++)
{
m_indexBuffers[m_activeIndexBuffer] = NULL;
CHECK(SUCCEEDED(D3D::device->CreateBuffer(&bufdesc, NULL, &m_indexBuffers[m_activeIndexBuffer])),
m_index_buffers[m_current_index_buffer] = NULL;
CHECK(SUCCEEDED(D3D::device->CreateBuffer(&bufdesc, NULL, &m_index_buffers[m_current_index_buffer])),
"Failed to create index buffer.");
D3D::SetDebugObjectName((ID3D11DeviceChild*)m_indexBuffers[m_activeIndexBuffer], "index buffer of VertexManager");
D3D::SetDebugObjectName((ID3D11DeviceChild*)m_index_buffers[m_current_index_buffer], "index buffer of VertexManager");
}
bufdesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
bufdesc.ByteWidth = VBUFFER_SIZE;
for (m_activeVertexBuffer = 0; m_activeVertexBuffer < MAXVBUFFER_COUNT; m_activeVertexBuffer++)
for (m_current_vertex_buffer = 0; m_current_vertex_buffer < MAX_VBUFFER_COUNT; m_current_vertex_buffer++)
{
m_vertexBuffers[m_activeVertexBuffer] = NULL;
CHECK(SUCCEEDED(D3D::device->CreateBuffer(&bufdesc, NULL, &m_vertexBuffers[m_activeVertexBuffer])),
m_vertex_buffers[m_current_vertex_buffer] = NULL;
CHECK(SUCCEEDED(D3D::device->CreateBuffer(&bufdesc, NULL, &m_vertex_buffers[m_current_vertex_buffer])),
"Failed to create vertex buffer.");
D3D::SetDebugObjectName((ID3D11DeviceChild*)m_vertexBuffers[m_activeVertexBuffer], "Vertex buffer of VertexManager");
D3D::SetDebugObjectName((ID3D11DeviceChild*)m_vertex_buffers[m_current_vertex_buffer], "Vertex buffer of VertexManager");
}
m_activeVertexBuffer = 0;
m_activeIndexBuffer = 0;
m_indexBufferCursor = IBUFFER_SIZE;
m_vertexBufferCursor = VBUFFER_SIZE;
m_current_vertex_buffer = 0;
m_current_index_buffer = 0;
m_index_buffer_cursor = IBUFFER_SIZE;
m_vertex_buffer_cursor = VBUFFER_SIZE;
m_lineShader.Init();
m_pointShader.Init();
}
Expand All @@ -82,10 +82,10 @@ void VertexManager::DestroyDeviceObjects()
{
m_pointShader.Shutdown();
m_lineShader.Shutdown();
for (m_activeVertexBuffer = 0; m_activeVertexBuffer < MAXVBUFFER_COUNT; m_activeVertexBuffer++)
for (m_current_vertex_buffer = 0; m_current_vertex_buffer < MAX_VBUFFER_COUNT; m_current_vertex_buffer++)
{
SAFE_RELEASE(m_vertexBuffers[m_activeVertexBuffer]);
SAFE_RELEASE(m_indexBuffers[m_activeVertexBuffer]);
SAFE_RELEASE(m_vertex_buffers[m_current_vertex_buffer]);
SAFE_RELEASE(m_index_buffers[m_current_vertex_buffer]);
}

}
Expand All @@ -100,47 +100,47 @@ VertexManager::~VertexManager()
DestroyDeviceObjects();
}

void VertexManager::LoadBuffers()
void VertexManager::PrepareDrawBuffers()
{
D3D11_MAPPED_SUBRESOURCE map;

UINT vSize = UINT(s_pCurBufferPointer - s_pBaseBufferPointer);
D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE;
if (m_vertexBufferCursor + vSize >= VBUFFER_SIZE)
if (m_vertex_buffer_cursor + vSize >= VBUFFER_SIZE)
{
// Wrap around
m_activeVertexBuffer = (m_activeVertexBuffer + 1) % MAXVBUFFER_COUNT;
m_vertexBufferCursor = 0;
m_current_vertex_buffer = (m_current_vertex_buffer + 1) % MAX_VBUFFER_COUNT;
m_vertex_buffer_cursor = 0;
MapType = D3D11_MAP_WRITE_DISCARD;
}

D3D::context->Map(m_vertexBuffers[m_activeVertexBuffer], 0, MapType, 0, &map);
D3D::context->Map(m_vertex_buffers[m_current_vertex_buffer], 0, MapType, 0, &map);

memcpy((u8*)map.pData + m_vertexBufferCursor, s_pBaseBufferPointer, vSize);
D3D::context->Unmap(m_vertexBuffers[m_activeVertexBuffer], 0);
m_vertexDrawOffset = m_vertexBufferCursor;
m_vertexBufferCursor += vSize;
memcpy((u8*)map.pData + m_vertex_buffer_cursor, s_pBaseBufferPointer, vSize);
D3D::context->Unmap(m_vertex_buffers[m_current_vertex_buffer], 0);
m_vertex_draw_offset = m_vertex_buffer_cursor;
m_vertex_buffer_cursor += vSize;

UINT iCount = IndexGenerator::GetTriangleindexLen() +
IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen();
MapType = D3D11_MAP_WRITE_NO_OVERWRITE;
if (m_indexBufferCursor + iCount >= (IBUFFER_SIZE / sizeof(u16)))
if (m_index_buffer_cursor + iCount >= (IBUFFER_SIZE / sizeof(u16)))
{
// Wrap around
m_activeIndexBuffer = (m_activeIndexBuffer + 1) % MAXVBUFFER_COUNT;
m_indexBufferCursor = 0;
m_current_index_buffer = (m_current_index_buffer + 1) % MAX_VBUFFER_COUNT;
m_index_buffer_cursor = 0;
MapType = D3D11_MAP_WRITE_DISCARD;
}
D3D::context->Map(m_indexBuffers[m_activeIndexBuffer], 0, MapType, 0, &map);

m_triangleDrawIndex = m_indexBufferCursor;
m_lineDrawIndex = m_triangleDrawIndex + IndexGenerator::GetTriangleindexLen();
m_pointDrawIndex = m_lineDrawIndex + IndexGenerator::GetLineindexLen();
memcpy((u16*)map.pData + m_triangleDrawIndex, GetTriangleIndexBuffer(), sizeof(u16) * IndexGenerator::GetTriangleindexLen());
memcpy((u16*)map.pData + m_lineDrawIndex, GetLineIndexBuffer(), sizeof(u16) * IndexGenerator::GetLineindexLen());
memcpy((u16*)map.pData + m_pointDrawIndex, GetPointIndexBuffer(), sizeof(u16) * IndexGenerator::GetPointindexLen());
D3D::context->Unmap(m_indexBuffers[m_activeIndexBuffer], 0);
m_indexBufferCursor += iCount;
D3D::context->Map(m_index_buffers[m_current_index_buffer], 0, MapType, 0, &map);

m_triangle_draw_index = m_index_buffer_cursor;
m_line_draw_index = m_triangle_draw_index + IndexGenerator::GetTriangleindexLen();
m_point_draw_index = m_line_draw_index + IndexGenerator::GetLineindexLen();
memcpy((u16*)map.pData + m_triangle_draw_index, GetTriangleIndexBuffer(), sizeof(u16) * IndexGenerator::GetTriangleindexLen());
memcpy((u16*)map.pData + m_line_draw_index, GetLineIndexBuffer(), sizeof(u16) * IndexGenerator::GetLineindexLen());
memcpy((u16*)map.pData + m_point_draw_index, GetPointIndexBuffer(), sizeof(u16) * IndexGenerator::GetPointindexLen());
D3D::context->Unmap(m_index_buffers[m_current_index_buffer], 0);
m_index_buffer_cursor += iCount;
}

static const float LINE_PT_TEX_OFFSETS[8] = {
Expand All @@ -149,13 +149,13 @@ static const float LINE_PT_TEX_OFFSETS[8] = {

void VertexManager::Draw(UINT stride)
{
D3D::context->IASetVertexBuffers(0, 1, &m_vertexBuffers[m_activeVertexBuffer], &stride, &m_vertexDrawOffset);
D3D::context->IASetIndexBuffer(m_indexBuffers[m_activeIndexBuffer], DXGI_FORMAT_R16_UINT, 0);
D3D::context->IASetVertexBuffers(0, 1, &m_vertex_buffers[m_current_vertex_buffer], &stride, &m_vertex_draw_offset);
D3D::context->IASetIndexBuffer(m_index_buffers[m_current_index_buffer], DXGI_FORMAT_R16_UINT, 0);

if (IndexGenerator::GetNumTriangles() > 0)
{
D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
D3D::context->DrawIndexed(IndexGenerator::GetTriangleindexLen(), m_triangleDrawIndex, 0);
D3D::context->DrawIndexed(IndexGenerator::GetTriangleindexLen(), m_triangle_draw_index, 0);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
// Disable culling for lines and points
Expand All @@ -177,7 +177,7 @@ void VertexManager::Draw(UINT stride)
texOffset, vpWidth, vpHeight, texOffsetEnable))
{
D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_LINELIST);
D3D::context->DrawIndexed(IndexGenerator::GetLineindexLen(), m_lineDrawIndex, 0);
D3D::context->DrawIndexed(IndexGenerator::GetLineindexLen(), m_line_draw_index, 0);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);

D3D::context->GSSetShader(NULL, NULL, 0);
Expand All @@ -199,7 +199,7 @@ void VertexManager::Draw(UINT stride)
texOffset, vpWidth, vpHeight, texOffsetEnable))
{
D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
D3D::context->DrawIndexed(IndexGenerator::GetPointindexLen(), m_pointDrawIndex, 0);
D3D::context->DrawIndexed(IndexGenerator::GetPointindexLen(), m_point_draw_index, 0);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);

D3D::context->GSSetShader(NULL, NULL, 0);
Expand Down Expand Up @@ -265,7 +265,7 @@ void VertexManager::vFlush()
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
return;
}
LoadBuffers();
PrepareDrawBuffers();
unsigned int stride = g_nativeVertexFmt->GetVertexStride();
g_nativeVertexFmt->SetupVertexPointers();
g_renderer->ApplyState(useDstAlpha);
Expand Down
24 changes: 12 additions & 12 deletions Source/Plugins/Plugin_VideoDX11/Src/VertexManager.h
Expand Up @@ -37,22 +37,22 @@ class VertexManager : public ::VertexManager

private:

void LoadBuffers();
void Draw(UINT stride);
void PrepareDrawBuffers();
void Draw(u32 stride);
// temp
void vFlush();

UINT m_indexBufferCursor;
UINT m_vertexBufferCursor;
UINT m_vertexDrawOffset;
UINT m_triangleDrawIndex;
UINT m_lineDrawIndex;
UINT m_pointDrawIndex;
UINT m_activeVertexBuffer;
UINT m_activeIndexBuffer;
u32 m_vertex_buffer_cursor;
u32 m_vertex_draw_offset;
u32 m_index_buffer_cursor;
u32 m_current_vertex_buffer;
u32 m_current_index_buffer;
u32 m_triangle_draw_index;
u32 m_line_draw_index;
u32 m_point_draw_index;
typedef ID3D11Buffer* PID3D11Buffer;
PID3D11Buffer* m_indexBuffers;
PID3D11Buffer* m_vertexBuffers;
PID3D11Buffer* m_index_buffers;
PID3D11Buffer* m_vertex_buffers;

LineGeometryShader m_lineShader;
PointGeometryShader m_pointShader;
Expand Down
1 change: 1 addition & 0 deletions Source/Plugins/Plugin_VideoDX11/Src/VideoBackend.h
Expand Up @@ -15,6 +15,7 @@ class VideoBackend : public VideoBackendHardware
std::string GetName();

void Video_Prepare();
void Video_Cleanup();

void ShowConfig(void* parent);

Expand Down
4 changes: 4 additions & 0 deletions Source/Plugins/Plugin_VideoDX11/Src/main.cpp
Expand Up @@ -208,6 +208,7 @@ void VideoBackend::Shutdown()
{
s_BackendInitialized = false;

// TODO: should be in Video_Cleanup
if (g_renderer)
{
s_efbAccessRequested = FALSE;
Expand Down Expand Up @@ -236,4 +237,7 @@ void VideoBackend::Shutdown()
}
}

void VideoBackend::Video_Cleanup() {
}

}
226 changes: 172 additions & 54 deletions Source/Plugins/Plugin_VideoDX9/Src/D3DBase.cpp

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions Source/Plugins/Plugin_VideoDX9/Src/D3DBase.h
Expand Up @@ -100,12 +100,23 @@ void ChangeSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value);

void RefreshVertexDeclaration();
void SetVertexDeclaration(LPDIRECT3DVERTEXDECLARATION9 decl);
void ChangeVertexDeclaration(LPDIRECT3DVERTEXDECLARATION9 decl);

void RefreshVertexShader();
void SetVertexShader(LPDIRECT3DVERTEXSHADER9 shader);
void ChangeVertexShader(LPDIRECT3DVERTEXSHADER9 shader);

void RefreshPixelShader();
void SetPixelShader(LPDIRECT3DPIXELSHADER9 shader);
void ChangePixelShader(LPDIRECT3DPIXELSHADER9 shader);

void SetStreamSource(UINT StreamNumber,IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride);
void ChangeStreamSource(UINT StreamNumber,IDirect3DVertexBuffer9* pStreamData,UINT OffsetInBytes,UINT Stride);
void RefreshStreamSource(UINT StreamNumber);

void SetIndices(LPDIRECT3DINDEXBUFFER9 pIndexData);
void ChangeIndices(LPDIRECT3DINDEXBUFFER9 pIndexData);
void RefreshIndices();

void ApplyCachedState();

Expand Down
30 changes: 18 additions & 12 deletions Source/Plugins/Plugin_VideoDX9/Src/D3DUtil.cpp
Expand Up @@ -198,6 +198,8 @@ const int TS[6][2] =
void RestoreShaders()
{
D3D::SetTexture(0, 0);
D3D::RefreshStreamSource(0);
D3D::RefreshIndices();
D3D::RefreshVertexDeclaration();
D3D::RefreshPixelShader();
D3D::RefreshVertexShader();
Expand All @@ -217,9 +219,9 @@ void CD3DFont::SetRenderStates()
{
D3D::SetTexture(0, m_pTexture);

dev->SetPixelShader(0);
dev->SetVertexShader(0);

D3D::ChangePixelShader(0);
D3D::ChangeVertexShader(0);
D3D::ChangeVertexDeclaration(0);
dev->SetFVF(D3DFVF_FONT2DVERTEX);

for (int i = 0; i < 6; i++)
Expand All @@ -236,7 +238,7 @@ int CD3DFont::DrawTextScaled(float x, float y, float fXScale, float fYScale, flo
return 0;

SetRenderStates();
dev->SetStreamSource(0, m_pVB, 0, sizeof(FONT2DVERTEX));
D3D::ChangeStreamSource(0, m_pVB, 0, sizeof(FONT2DVERTEX));

float vpWidth = 1;
float vpHeight = 1;
Expand Down Expand Up @@ -389,9 +391,10 @@ void drawShadedTexQuad(IDirect3DTexture9 *texture,
{ 1.0f - dw,-1.0f + dh, 0.0f,1.0f, u2, v2, sw, sh, g},
{ 1.0f - dw, 1.0f + dh, 0.0f,1.0f, u2, v1, sw, sh, g}
};
dev->SetVertexShader(Vshader);
dev->SetPixelShader(PShader);
D3D::ChangeVertexShader(Vshader);
D3D::ChangePixelShader(PShader);
D3D::SetTexture(0, texture);
D3D::ChangeVertexDeclaration(0);
dev->SetFVF(D3DFVF_XYZW | D3DFVF_TEX3 | D3DFVF_TEXCOORDSIZE1(2));
dev->DrawPrimitiveUP(D3DPT_TRIANGLESTRIP, 2, coords, sizeof(Q2DVertex));
RestoreShaders();
Expand Down Expand Up @@ -424,9 +427,10 @@ void drawShadedTexSubQuad(IDirect3DTexture9 *texture,
{ rDest->right - dw , rDest->top + dh, 1.0f,1.0f, u2, v2, sw, sh, g},
{ rDest->right - dw , rDest->bottom + dh, 1.0f,1.0f, u2, v1, sw, sh, g}
};
dev->SetVertexShader(Vshader);
dev->SetPixelShader(PShader);
D3D::ChangeVertexShader(Vshader);
D3D::ChangePixelShader(PShader);
D3D::SetTexture(0, texture);
D3D::ChangeVertexDeclaration(0);
dev->SetFVF(D3DFVF_XYZW | D3DFVF_TEX3 | D3DFVF_TEXCOORDSIZE1(2));
dev->DrawPrimitiveUP(D3DPT_TRIANGLESTRIP, 2, coords, sizeof(Q2DVertex));
RestoreShaders();
Expand All @@ -442,8 +446,9 @@ void drawColorQuad(u32 Color, float x1, float y1, float x2, float y2)
{ x1, y1, 0.f, 1.f, Color },
{ x2, y1, 0.f, 1.f, Color },
};
dev->SetVertexShader(VertexShaderCache::GetClearVertexShader());
dev->SetPixelShader(PixelShaderCache::GetClearProgram());
D3D::ChangeVertexShader(VertexShaderCache::GetClearVertexShader());
D3D::ChangePixelShader(PixelShaderCache::GetClearProgram());
D3D::ChangeVertexDeclaration(0);
dev->SetFVF(D3DFVF_XYZW | D3DFVF_DIFFUSE);
dev->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coords, sizeof(CQVertex));
RestoreShaders();
Expand All @@ -457,8 +462,9 @@ void drawClearQuad(u32 Color,float z,IDirect3DPixelShader9 *PShader,IDirect3DVer
{ 1.0f, -1.0f, z, 1.0f, Color},
{-1.0f, -1.0f, z, 1.0f, Color}
};
dev->SetVertexShader(Vshader);
dev->SetPixelShader(PShader);
D3D::ChangeVertexShader(Vshader);
D3D::ChangePixelShader(PShader);
D3D::ChangeVertexDeclaration(0);
dev->SetFVF(D3DFVF_XYZW | D3DFVF_DIFFUSE);
dev->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coords, sizeof(Q2DVertex));
RestoreShaders();
Expand Down
4 changes: 4 additions & 0 deletions Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.cpp
Expand Up @@ -187,6 +187,8 @@ void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, c

void XFBSource::CopyEFB(float Gamma)
{
g_renderer->ResetAPIState(); // reset any game specific settings

// Copy EFB data to XFB and restore render target again
LPDIRECT3DSURFACE9 Rendersurf = NULL;
texture->GetSurfaceLevel(0, &Rendersurf);
Expand Down Expand Up @@ -229,6 +231,8 @@ void XFBSource::CopyEFB(float Gamma)
D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface());

Rendersurf->Release();

g_renderer->RestoreAPIState();
}

} // namespace DX9
12 changes: 12 additions & 0 deletions Source/Plugins/Plugin_VideoDX9/Src/Render.cpp
Expand Up @@ -67,6 +67,7 @@ static int s_fps = 0;
static u32 s_blendMode;
static u32 s_LastAA;
static bool IS_AMD;
static float m_fMaxPointSize;

static char *st;

Expand Down Expand Up @@ -187,6 +188,9 @@ Renderer::Renderer()
D3D::BeginFrame();
D3D::SetRenderState(D3DRS_SCISSORTESTENABLE, true);
D3D::dev->CreateOffscreenPlainSurface(s_backbuffer_width,s_backbuffer_height, D3DFMT_X8R8G8B8, D3DPOOL_SYSTEMMEM, &ScreenShootMEMSurface, NULL );
D3D::SetRenderState(D3DRS_POINTSCALEENABLE,false);
m_fMaxPointSize = D3D::GetCaps().MaxPointSize;

}

Renderer::~Renderer()
Expand Down Expand Up @@ -1280,7 +1284,15 @@ void Renderer::SetLineWidth()
// We can't change line width in D3D unless we use ID3DXLine
float fratio = xfregs.viewport.wd != 0 ? Renderer::EFBToScaledXf(1.f) : 1.0f;
float psize = bpmem.lineptwidth.linesize * fratio / 6.0f;
//little hack to compensate scalling problems in dx9 must be taken out when scalling is fixed.
psize *= 2.0f;
if (psize > m_fMaxPointSize)
{
psize = m_fMaxPointSize;
}
D3D::SetRenderState(D3DRS_POINTSIZE, *((DWORD*)&psize));
D3D::SetRenderState(D3DRS_POINTSIZE_MIN, *((DWORD*)&psize));
D3D::SetRenderState(D3DRS_POINTSIZE_MAX, *((DWORD*)&psize));
}

void Renderer::SetSamplerState(int stage, int texindex)
Expand Down
4 changes: 4 additions & 0 deletions Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp
Expand Up @@ -82,6 +82,8 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat)
{
g_renderer->ResetAPIState(); // reset any game specific settings

const LPDIRECT3DTEXTURE9 read_texture = (srcFormat == PIXELFMT_Z24) ?
FramebufferManager::GetEFBDepthTexture() :
FramebufferManager::GetEFBColorTexture();
Expand Down Expand Up @@ -179,6 +181,8 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
D3D::SetTexture(0, NULL);
D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface());
D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface());

g_renderer->RestoreAPIState();
}

TextureCache::TCacheEntryBase* TextureCache::CreateTexture(unsigned int width, unsigned int height,
Expand Down
229 changes: 129 additions & 100 deletions Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
Expand Up @@ -44,7 +44,7 @@ namespace DX9
//This are the initially requeted size for the buffers expresed in elements
const u32 IBUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 8;
const u32 VBUFFER_SIZE = VertexManager::MAXVBUFFERSIZE;
const u32 MAXVBUFFER_COUNT = 2;
const u32 MAX_VBUFFER_COUNT = 2;

inline void DumpBadShaders()
{
Expand All @@ -67,88 +67,88 @@ inline void DumpBadShaders()

void VertexManager::CreateDeviceObjects()
{
NumVBuffers = 0;
VBuffers = NULL;
IBuffers = NULL;
m_buffers_count = 0;
m_vertex_buffers = NULL;
m_index_buffers = NULL;
D3DCAPS9 DeviceCaps = D3D::GetCaps();
u32 devicevMaxBufferSize = DeviceCaps.MaxPrimitiveCount * 3 * DeviceCaps.MaxStreamStride;
//Calculate Device Dependant size
CurrentVBufferSize = (VBUFFER_SIZE > devicevMaxBufferSize) ? devicevMaxBufferSize : VBUFFER_SIZE;
CurrentIBufferSize = (IBUFFER_SIZE > DeviceCaps.MaxVertexIndex) ? DeviceCaps.MaxVertexIndex : IBUFFER_SIZE;
m_vertex_buffer_size = (VBUFFER_SIZE > devicevMaxBufferSize) ? devicevMaxBufferSize : VBUFFER_SIZE;
m_index_buffer_size = (IBUFFER_SIZE > DeviceCaps.MaxVertexIndex) ? DeviceCaps.MaxVertexIndex : IBUFFER_SIZE;
//if device caps are not enough for Vbuffer fall back to vertex arrays
if (CurrentIBufferSize < MAXIBUFFERSIZE || CurrentVBufferSize < MAXVBUFFERSIZE) return;
if (m_index_buffer_size < MAXIBUFFERSIZE || m_vertex_buffer_size < MAXVBUFFERSIZE) return;

VBuffers = new LPDIRECT3DVERTEXBUFFER9[MAXVBUFFER_COUNT];
IBuffers = new LPDIRECT3DINDEXBUFFER9[MAXVBUFFER_COUNT];
m_vertex_buffers = new LPDIRECT3DVERTEXBUFFER9[MAX_VBUFFER_COUNT];
m_index_buffers = new LPDIRECT3DINDEXBUFFER9[MAX_VBUFFER_COUNT];

bool Fail = false;
for (CurrentVBuffer = 0; CurrentVBuffer < MAXVBUFFER_COUNT; CurrentVBuffer++)
for (m_current_vertex_buffer = 0; m_current_vertex_buffer < MAX_VBUFFER_COUNT; m_current_vertex_buffer++)
{
VBuffers[CurrentVBuffer] = NULL;
IBuffers[CurrentVBuffer] = NULL;
m_vertex_buffers[m_current_vertex_buffer] = NULL;
m_index_buffers[m_current_vertex_buffer] = NULL;
}
for (CurrentVBuffer = 0; CurrentVBuffer < MAXVBUFFER_COUNT; CurrentVBuffer++)
for (m_current_vertex_buffer = 0; m_current_vertex_buffer < MAX_VBUFFER_COUNT; m_current_vertex_buffer++)
{
if(FAILED( D3D::dev->CreateVertexBuffer( CurrentVBufferSize, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &VBuffers[CurrentVBuffer], NULL ) ) )
if(FAILED( D3D::dev->CreateVertexBuffer( m_vertex_buffer_size, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vertex_buffers[m_current_vertex_buffer], NULL ) ) )
{
Fail = true;
break;
}
if( FAILED( D3D::dev->CreateIndexBuffer( CurrentIBufferSize * sizeof(u16), D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &IBuffers[CurrentVBuffer], NULL ) ) )
if( FAILED( D3D::dev->CreateIndexBuffer( m_index_buffer_size * sizeof(u16), D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &m_index_buffers[m_current_vertex_buffer], NULL ) ) )
{
Fail = true;
return;
}
}
NumVBuffers = CurrentVBuffer;
CurrentVBuffer = 0;
CurrentIBuffer = 0;
CurrentIBufferIndex = CurrentIBufferSize;
CurrentVBufferIndex = CurrentVBufferSize;

m_buffers_count = m_current_vertex_buffer;
m_current_vertex_buffer = 0;
m_current_index_buffer = 0;
m_index_buffer_cursor = m_index_buffer_size;
m_vertex_buffer_cursor = m_vertex_buffer_size;
m_current_stride = 0;
if (Fail)
{
NumVBuffers--;
if (NumVBuffers < 2)
m_buffers_count--;
if (m_buffers_count < 2)
{
//Error creating Vertex buffers. clean and fall to Vertex arrays
NumVBuffers = MAXVBUFFER_COUNT;
m_buffers_count = MAX_VBUFFER_COUNT;
DestroyDeviceObjects();
}
}
}
void VertexManager::DestroyDeviceObjects()
{
D3D::dev->SetStreamSource( 0, NULL, 0, 0);
D3D::dev->SetIndices(NULL);
for (int i = 0; i < MAXVBUFFER_COUNT; i++)
D3D::SetStreamSource( 0, NULL, 0, 0);
D3D::SetIndices(NULL);
for (int i = 0; i < MAX_VBUFFER_COUNT; i++)
{
if(VBuffers)
if(m_vertex_buffers)
{
if (VBuffers[i])
if (m_vertex_buffers[i])
{
VBuffers[i]->Release();
VBuffers[i] = NULL;
m_vertex_buffers[i]->Release();
m_vertex_buffers[i] = NULL;
}
}

if (IBuffers[i])
if (m_index_buffers[i])
{
IBuffers[i]->Release();
IBuffers[i] = NULL;
m_index_buffers[i]->Release();
m_index_buffers[i] = NULL;
}
}
if(VBuffers)
delete [] VBuffers;
if(IBuffers)
delete [] IBuffers;
VBuffers = NULL;
IBuffers = NULL;
if(m_vertex_buffers)
delete [] m_vertex_buffers;
if(m_index_buffers)
delete [] m_index_buffers;
m_vertex_buffers = NULL;
m_index_buffers = NULL;
}

void VertexManager::PrepareVBuffers(int stride)
void VertexManager::PrepareDrawBuffers(u32 stride)
{
if (!NumVBuffers)
if (!m_buffers_count)
{
return;
}
Expand All @@ -158,34 +158,33 @@ void VertexManager::PrepareVBuffers(int stride)
int TdataSize = IndexGenerator::GetTriangleindexLen();
int LDataSize = IndexGenerator::GetLineindexLen();
int PDataSize = IndexGenerator::GetPointindexLen();
int IndexDataSize = TdataSize + LDataSize + PDataSize;
int IndexDataSize = TdataSize + LDataSize;
DWORD LockMode = D3DLOCK_NOOVERWRITE;

if (CurrentVBufferIndex > CurrentVBufferSize - datasize)
m_vertex_buffer_cursor--;
m_vertex_buffer_cursor = m_vertex_buffer_cursor - (m_vertex_buffer_cursor % stride) + stride;
if (m_vertex_buffer_cursor > m_vertex_buffer_size - datasize)
{
LockMode = D3DLOCK_DISCARD;
CurrentVBufferIndex = 0;
CurrentVBuffer = (CurrentVBuffer + 1) % NumVBuffers;
}

if(FAILED(VBuffers[CurrentVBuffer]->Lock(CurrentVBufferIndex, datasize,(VOID**)(&pVertices), LockMode)))
m_vertex_buffer_cursor = 0;
m_current_vertex_buffer = (m_current_vertex_buffer + 1) % m_buffers_count;
}
if(FAILED(m_vertex_buffers[m_current_vertex_buffer]->Lock(m_vertex_buffer_cursor, datasize,(VOID**)(&pVertices), LockMode)))
{
DestroyDeviceObjects();
return;
}
memcpy(pVertices, s_pBaseBufferPointer, datasize);
VBuffers[CurrentVBuffer]->Unlock();
m_vertex_buffers[m_current_vertex_buffer]->Unlock();

LockMode = D3DLOCK_NOOVERWRITE;

if (CurrentIBufferIndex > CurrentIBufferSize - IndexDataSize)
if (m_index_buffer_cursor > m_index_buffer_size - IndexDataSize)
{
LockMode = D3DLOCK_DISCARD;
CurrentIBufferIndex = 0;
CurrentIBuffer = (CurrentIBuffer + 1) % NumVBuffers;
}
m_index_buffer_cursor = 0;
m_current_index_buffer = (m_current_index_buffer + 1) % m_buffers_count;
}

if(FAILED(IBuffers[CurrentIBuffer]->Lock(CurrentIBufferIndex * sizeof(u16), IndexDataSize * sizeof(u16), (VOID**)(&pIndices), LockMode )))
if(FAILED(m_index_buffers[m_current_index_buffer]->Lock(m_index_buffer_cursor * sizeof(u16), IndexDataSize * sizeof(u16), (VOID**)(&pIndices), LockMode )))
{
DestroyDeviceObjects();
return;
Expand All @@ -200,72 +199,88 @@ void VertexManager::PrepareVBuffers(int stride)
memcpy(pIndices, GetLineIndexBuffer(), LDataSize * sizeof(u16));
pIndices += LDataSize;
}
if(PDataSize)
{
memcpy(pIndices, GetPointIndexBuffer(), PDataSize * sizeof(u16));
m_index_buffers[m_current_index_buffer]->Unlock();
if(m_current_stride != stride || m_vertex_buffer_cursor == 0)
{
m_current_stride = stride;
D3D::SetStreamSource( 0, m_vertex_buffers[m_current_vertex_buffer], 0, stride);
}
IBuffers[CurrentIBuffer]->Unlock();
D3D::dev->SetStreamSource( 0, VBuffers[CurrentVBuffer], CurrentVBufferIndex, stride);
if(CurrentIBufferIndex == 0)
if (m_index_buffer_cursor == 0)
{
D3D::dev->SetIndices(IBuffers[CurrentIBuffer]);
D3D::SetIndices(m_index_buffers[m_current_index_buffer]);
}

}

void VertexManager::DrawVB(int stride)
void VertexManager::DrawVertexBuffer(int stride)
{
if (IndexGenerator::GetNumTriangles() > 0)
int triangles = IndexGenerator::GetNumTriangles();
int lines = IndexGenerator::GetNumLines();
int points = IndexGenerator::GetNumPoints();
int numverts = IndexGenerator::GetNumVerts();
int StartIndex = m_index_buffer_cursor;
int basevertex = m_vertex_buffer_cursor / stride;
if (triangles > 0)
{
if (FAILED(D3D::dev->DrawIndexedPrimitive(
D3DPT_TRIANGLELIST,
0,
D3DPT_TRIANGLELIST,
basevertex,
0,
IndexGenerator::GetNumVerts(),
CurrentIBufferIndex,
IndexGenerator::GetNumTriangles())))
numverts,
StartIndex,
triangles)))
{
DumpBadShaders();
}
StartIndex += IndexGenerator::GetTriangleindexLen();
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (IndexGenerator::GetNumLines() > 0)
if (lines > 0)
{
if (FAILED(D3D::dev->DrawIndexedPrimitive(
D3DPT_LINELIST,
0,
basevertex,
0,
IndexGenerator::GetNumVerts(),
CurrentIBufferIndex + IndexGenerator::GetTriangleindexLen(),
numverts,
StartIndex,
IndexGenerator::GetNumLines())))
{
DumpBadShaders();
}
StartIndex += IndexGenerator::GetLineindexLen();
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (IndexGenerator::GetNumPoints() > 0)
if (points > 0)
{
if (FAILED(D3D::dev->DrawIndexedPrimitive(
D3DPT_POINTLIST,
0,
0,
IndexGenerator::GetNumVerts(),
CurrentIBufferIndex + IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen(),
IndexGenerator::GetNumPoints())))
//DrawIndexedPrimitive does not support point list so we have to draw the points one by one
for (int i = 0; i < points; i++)
{
DumpBadShaders();
if (FAILED(D3D::dev->DrawPrimitive(
D3DPT_POINTLIST,
basevertex + GetPointIndexBuffer()[i],
1)))
{
DumpBadShaders();
}
INCSTAT(stats.thisFrame.numDrawCalls);
}
INCSTAT(stats.thisFrame.numIndexedDrawCalls);


}

}

void VertexManager::DrawVA(int stride)
{
if (IndexGenerator::GetNumTriangles() > 0)
void VertexManager::DrawVertexArray(int stride)
{
int triangles = IndexGenerator::GetNumTriangles();
int lines = IndexGenerator::GetNumLines();
int points = IndexGenerator::GetNumPoints();
int numverts = IndexGenerator::GetNumVerts();
if (triangles > 0)
{
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_TRIANGLELIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumTriangles(),
0, numverts, triangles,
GetTriangleIndexBuffer(),
D3DFMT_INDEX16,
s_pBaseBufferPointer,
Expand All @@ -275,11 +290,11 @@ void VertexManager::DrawVA(int stride)
}
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (IndexGenerator::GetNumLines() > 0)
if (lines > 0)
{
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_LINELIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumLines(),
0, numverts, lines,
GetLineIndexBuffer(),
D3DFMT_INDEX16,
s_pBaseBufferPointer,
Expand All @@ -289,11 +304,11 @@ void VertexManager::DrawVA(int stride)
}
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (IndexGenerator::GetNumPoints() > 0)
if (points > 0)
{
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_POINTLIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumPoints(),
0, numverts, points,
GetPointIndexBuffer(),
D3DFMT_INDEX16,
s_pBaseBufferPointer,
Expand Down Expand Up @@ -345,7 +360,7 @@ void VertexManager::vFlush()
// set global constants
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
int stride = g_nativeVertexFmt->GetVertexStride();
u32 stride = g_nativeVertexFmt->GetVertexStride();
if (!PixelShaderCache::SetShader(DSTALPHA_NONE,g_nativeVertexFmt->m_components))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
Expand All @@ -357,9 +372,16 @@ void VertexManager::vFlush()
goto shader_fail;

}
PrepareVBuffers(stride);
PrepareDrawBuffers(stride);
g_nativeVertexFmt->SetupVertexPointers();
if(NumVBuffers){ DrawVB(stride);} else { DrawVA(stride);}
if(m_buffers_count)
{
DrawVertexBuffer(stride);
}
else
{
DrawVertexArray(stride);
}

bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;
Expand All @@ -372,16 +394,23 @@ void VertexManager::vFlush()
}
// update alpha only
g_renderer->ApplyState(true);
if(NumVBuffers){ DrawVB(stride);} else { DrawVA(stride);}
if(m_buffers_count)
{
DrawVertexBuffer(stride);
}
else
{
DrawVertexArray(stride);
}
g_renderer->RestoreState();
}
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);

shader_fail:
if(NumVBuffers)
if(m_buffers_count)
{
CurrentIBufferIndex += IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen();
CurrentVBufferIndex += IndexGenerator::GetNumVerts() * stride;
m_index_buffer_cursor += IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen();
m_vertex_buffer_cursor += IndexGenerator::GetNumVerts() * stride;
}
}

Expand Down
25 changes: 13 additions & 12 deletions Source/Plugins/Plugin_VideoDX9/Src/VertexManager.h
Expand Up @@ -34,18 +34,19 @@ class VertexManager : public ::VertexManager
void CreateDeviceObjects();
void DestroyDeviceObjects();
private:
u32 CurrentVBufferIndex;
u32 CurrentVBufferSize;
u32 CurrentIBufferIndex;
u32 CurrentIBufferSize;
u32 NumVBuffers;
u32 CurrentVBuffer;
u32 CurrentIBuffer;
LPDIRECT3DVERTEXBUFFER9 *VBuffers;
LPDIRECT3DINDEXBUFFER9 *IBuffers;
void PrepareVBuffers(int stride);
void DrawVB(int stride);
void DrawVA(int stride);
u32 m_vertex_buffer_cursor;
u32 m_vertex_buffer_size;
u32 m_index_buffer_cursor;
u32 m_index_buffer_size;
u32 m_buffers_count;
u32 m_current_vertex_buffer;
u32 m_current_stride;
u32 m_current_index_buffer;
LPDIRECT3DVERTEXBUFFER9 *m_vertex_buffers;
LPDIRECT3DINDEXBUFFER9 *m_index_buffers;
void PrepareDrawBuffers(u32 stride);
void DrawVertexBuffer(int stride);
void DrawVertexArray(int stride);
// temp
void vFlush();
};
Expand Down
1 change: 1 addition & 0 deletions Source/Plugins/Plugin_VideoDX9/Src/VideoBackend.h
Expand Up @@ -15,6 +15,7 @@ class VideoBackend : public VideoBackendHardware
std::string GetName();

void Video_Prepare();
void Video_Cleanup();

void ShowConfig(void* parent);

Expand Down
4 changes: 4 additions & 0 deletions Source/Plugins/Plugin_VideoDX9/Src/main.cpp
Expand Up @@ -191,6 +191,7 @@ void VideoBackend::Shutdown()
{
s_BackendInitialized = false;

// TODO: should be in Video_Cleanup
if (g_renderer)
{
s_efbAccessRequested = FALSE;
Expand Down Expand Up @@ -219,4 +220,7 @@ void VideoBackend::Shutdown()
D3D::Shutdown();
}

void VideoBackend::Video_Cleanup() {
}

}
9 changes: 3 additions & 6 deletions Source/Plugins/Plugin_VideoOGL/CMakeLists.txt
Expand Up @@ -5,8 +5,11 @@ set(SRCS Src/FramebufferManager.cpp
Src/PerfQuery.cpp
Src/PixelShaderCache.cpp
Src/PostProcessing.cpp
Src/ProgramShaderCache.cpp
Src/RasterFont.cpp
Src/Render.cpp
Src/SamplerCache.cpp
Src/StreamBuffer.cpp
Src/TextureCache.cpp
Src/TextureConverter.cpp
Src/VertexShaderCache.cpp
Expand Down Expand Up @@ -34,12 +37,6 @@ if(wxWidgets_FOUND)
set(LIBS ${LIBS} ${wxWidgets_LIBRARIES})
endif(wxWidgets_FOUND)

if(WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
set(LIBS ${LIBS} Cg CgGL)
elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
list(APPEND LIBS "${CMAKE_SOURCE_DIR}/Externals/Cg/Cg.framework")
endif()

if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(LIBS ${LIBS} clrun)
endif()
Expand Down
32 changes: 18 additions & 14 deletions Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj
Expand Up @@ -121,8 +121,8 @@
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>opengl32.lib;cg.lib;cgGL.lib;glu32.lib;glew32s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;..\..\..\Externals\Cg;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>opengl32.lib;glu32.lib;glew32s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<OutputFile>..\..\..\Binary\$(PlatformName)\Plugins\$(TargetName)$(TargetExt)</OutputFile>
</Link>
<Lib />
Expand All @@ -133,8 +133,8 @@
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>opengl32.lib;cg.lib;cgGL.lib;glu32.lib;glew64s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;..\..\..\Externals\Cg64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>opengl32.lib;glu32.lib;glew64s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<OutputFile>..\..\..\Binary\$(PlatformName)\Plugins\$(TargetName)$(TargetExt)</OutputFile>
</Link>
<Lib />
Expand All @@ -147,8 +147,8 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>opengl32.lib;cg.lib;cgGL.lib;glu32.lib;glew32s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;..\..\..\Externals\Cg;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>opengl32.lib;glu32.lib;glew32s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<OutputFile>..\..\..\Binary\$(PlatformName)\Plugins\$(TargetName)$(TargetExt)</OutputFile>
</Link>
<Lib />
Expand All @@ -161,8 +161,8 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>opengl32.lib;cg.lib;cgGL.lib;glu32.lib;glew32s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;..\..\..\Externals\Cg;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>opengl32.lib;glu32.lib;glew32s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<OutputFile>..\..\..\Binary\$(PlatformName)\Plugins\$(TargetName)$(TargetExt)</OutputFile>
</Link>
<Lib />
Expand All @@ -175,8 +175,8 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>opengl32.lib;cg.lib;cgGL.lib;glu32.lib;glew64s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;..\..\..\Externals\Cg64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>opengl32.lib;glu32.lib;glew64s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<OutputFile>..\..\..\Binary\$(PlatformName)\Plugins\$(TargetName)$(TargetExt)</OutputFile>
</Link>
<Lib />
Expand All @@ -189,8 +189,8 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>opengl32.lib;cg.lib;cgGL.lib;glu32.lib;glew64s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;..\..\..\Externals\Cg64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>opengl32.lib;glu32.lib;glew64s.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>..\..\..\Externals\GLew;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<OutputFile>..\..\..\Binary\$(PlatformName)\Plugins\$(TargetName)$(TargetExt)</OutputFile>
</Link>
<Lib />
Expand All @@ -203,8 +203,11 @@
<ClCompile Include="Src\PerfQuery.cpp" />
<ClCompile Include="Src\PixelShaderCache.cpp" />
<ClCompile Include="Src\PostProcessing.cpp" />
<ClCompile Include="Src\ProgramShaderCache.cpp" />
<ClCompile Include="Src\RasterFont.cpp" />
<ClCompile Include="Src\Render.cpp" />
<ClCompile Include="Src\SamplerCache.cpp" />
<ClCompile Include="Src\StreamBuffer.cpp" />
<ClCompile Include="Src\stdafx.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
Expand All @@ -224,15 +227,16 @@
<ClInclude Include="Src\GLUtil.h" />
<ClInclude Include="Src\main.h" />
<ClInclude Include="Src\PerfQuery.h" />
<ClInclude Include="Src\PixelShaderCache.h" />
<ClInclude Include="Src\PostProcessing.h" />
<ClInclude Include="Src\ProgramShaderCache.h" />
<ClInclude Include="Src\RasterFont.h" />
<ClInclude Include="Src\Render.h" />
<ClInclude Include="Src\SamplerCache.h" />
<ClInclude Include="Src\StreamBuffer.h" />
<ClInclude Include="Src\stdafx.h" />
<ClInclude Include="Src\TextureCache.h" />
<ClInclude Include="Src\TextureConverter.h" />
<ClInclude Include="Src\VertexManager.h" />
<ClInclude Include="Src\VertexShaderCache.h" />
<ClInclude Include="Src\VideoBackend.h" />
</ItemGroup>
<ItemGroup>
Expand Down
15 changes: 9 additions & 6 deletions Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj.filters
Expand Up @@ -27,9 +27,15 @@
<ClCompile Include="Src\PostProcessing.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="Src\ProgramShaderCache.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="Src\Render.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="Src\StreamBuffer.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="Src\TextureCache.cpp">
<Filter>Render</Filter>
</ClCompile>
Expand Down Expand Up @@ -60,10 +66,10 @@
<ClInclude Include="Src\FramebufferManager.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Src\PixelShaderCache.h">
<ClInclude Include="Src\PostProcessing.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Src\PostProcessing.h">
<ClInclude Include="Src\ProgramShaderCache.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Src\Render.h">
Expand All @@ -72,9 +78,6 @@
<ClInclude Include="Src\TextureCache.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Src\VertexShaderCache.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Src\PerfQuery.h">
<Filter>Render</Filter>
</ClInclude>
Expand All @@ -96,4 +99,4 @@
<UniqueIdentifier>{aaa16061-dca9-4155-be44-f77538e839fc}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>
</Project>
199 changes: 76 additions & 123 deletions Source/Plugins/Plugin_VideoOGL/Src/FramebufferManager.cpp
Expand Up @@ -17,6 +17,7 @@

#include "Globals.h"
#include "FramebufferManager.h"
#include "VertexShaderGen.h"

#include "TextureConverter.h"
#include "Render.h"
Expand All @@ -25,8 +26,6 @@
namespace OGL
{

extern bool s_bHaveFramebufferBlit; // comes from Render.cpp. ugly.

int FramebufferManager::m_targetWidth;
int FramebufferManager::m_targetHeight;
int FramebufferManager::m_msaaSamples;
Expand All @@ -41,7 +40,7 @@ GLuint FramebufferManager::m_resolvedFramebuffer;
GLuint FramebufferManager::m_resolvedColorTexture;
GLuint FramebufferManager::m_resolvedDepthTexture;

GLuint FramebufferManager::m_xfbFramebuffer; // Only used in MSAA mode
GLuint FramebufferManager::m_xfbFramebuffer;

FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, int msaaCoverageSamples)
{
Expand All @@ -52,7 +51,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
m_resolvedColorTexture = 0;
m_resolvedDepthTexture = 0;
m_xfbFramebuffer = 0;

m_targetWidth = targetWidth;
m_targetHeight = targetHeight;

Expand All @@ -72,7 +71,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms

// Create EFB target.

glGenFramebuffersEXT(1, &m_efbFramebuffer);
glGenFramebuffers(1, &m_efbFramebuffer);

if (m_msaaSamples <= 1)
{
Expand All @@ -83,20 +82,20 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
m_efbColor = glObj[0];
m_efbDepth = glObj[1];

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbColor);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE, m_efbColor);
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_efbDepth);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE, m_efbDepth);
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
glBindTexture(GL_TEXTURE_RECTANGLE, 0);

// Bind target textures to the EFB framebuffer.

glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_efbFramebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer);

glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, m_efbColor, 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_ARB, m_efbDepth, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, m_efbColor, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_RECTANGLE, m_efbDepth, 0);

GL_REPORT_FBO_ERROR();
}
Expand All @@ -109,67 +108,67 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
// Create EFB target renderbuffers.

GLuint glObj[2];
glGenRenderbuffersEXT(2, glObj);
glGenRenderbuffers(2, glObj);
m_efbColor = glObj[0];
m_efbDepth = glObj[1];

glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, m_efbColor);
glBindRenderbuffer(GL_RENDERBUFFER, m_efbColor);
if (m_msaaCoverageSamples)
glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER_EXT, m_msaaCoverageSamples, m_msaaSamples, GL_RGBA8, m_targetWidth, m_targetHeight);
glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER, m_msaaCoverageSamples, m_msaaSamples, GL_RGBA8, m_targetWidth, m_targetHeight);
else
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, m_msaaSamples, GL_RGBA8, m_targetWidth, m_targetHeight);
glRenderbufferStorageMultisample(GL_RENDERBUFFER, m_msaaSamples, GL_RGBA8, m_targetWidth, m_targetHeight);

glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, m_efbDepth);
glBindRenderbuffer(GL_RENDERBUFFER, m_efbDepth);
if (m_msaaCoverageSamples)
glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER_EXT, m_msaaCoverageSamples, m_msaaSamples, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight);
glRenderbufferStorageMultisampleCoverageNV(GL_RENDERBUFFER, m_msaaCoverageSamples, m_msaaSamples, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight);
else
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, m_msaaSamples, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight);
glRenderbufferStorageMultisample(GL_RENDERBUFFER, m_msaaSamples, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight);

glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0);
glBindRenderbuffer(GL_RENDERBUFFER, 0);

// Bind target renderbuffers to EFB framebuffer.

glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_efbFramebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer);

glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, m_efbColor);
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, m_efbDepth);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, m_efbColor);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, m_efbDepth);

GL_REPORT_FBO_ERROR();

// Create resolved targets for transferring multisampled EFB to texture.

glGenFramebuffersEXT(1, &m_resolvedFramebuffer);
glGenFramebuffers(1, &m_resolvedFramebuffer);

glGenTextures(2, glObj);
m_resolvedColorTexture = glObj[0];
m_resolvedDepthTexture = glObj[1];

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedColorTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE, m_resolvedColorTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA8, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, m_resolvedDepthTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_RECTANGLE, m_resolvedDepthTexture);
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_BYTE, NULL);

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
glBindTexture(GL_TEXTURE_RECTANGLE, 0);

// Bind resolved textures to resolved framebuffer.

glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_resolvedFramebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer);

glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, m_resolvedColorTexture, 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_ARB, m_resolvedDepthTexture, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, m_resolvedColorTexture, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_RECTANGLE, m_resolvedDepthTexture, 0);

GL_REPORT_FBO_ERROR();

// Return to EFB framebuffer.

glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_efbFramebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer);
}

// Create XFB framebuffer; targets will be created elsewhere.

glGenFramebuffersEXT(1, &m_xfbFramebuffer);

glGenFramebuffers(1, &m_xfbFramebuffer);
// EFB framebuffer is currently bound, make sure to clear its alpha value to 1.f
glViewport(0, 0, m_targetWidth, m_targetHeight);
glScissor(0, 0, m_targetWidth, m_targetHeight);
Expand All @@ -180,7 +179,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms

FramebufferManager::~FramebufferManager()
{
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
glBindFramebuffer(GL_FRAMEBUFFER, 0);

GLuint glObj[3];

Expand All @@ -189,7 +188,7 @@ FramebufferManager::~FramebufferManager()
glObj[0] = m_efbFramebuffer;
glObj[1] = m_resolvedFramebuffer;
glObj[2] = m_xfbFramebuffer;
glDeleteFramebuffersEXT(3, glObj);
glDeleteFramebuffers(3, glObj);
m_efbFramebuffer = 0;
m_xfbFramebuffer = 0;

Expand All @@ -204,7 +203,7 @@ FramebufferManager::~FramebufferManager()
if (m_msaaSamples <= 1)
glDeleteTextures(2, glObj);
else
glDeleteRenderbuffersEXT(2, glObj);
glDeleteRenderbuffers(2, glObj);
m_efbColor = 0;
m_efbDepth = 0;
}
Expand All @@ -224,16 +223,16 @@ GLuint FramebufferManager::GetEFBColorTexture(const EFBRectangle& sourceRc)
targetRc.ClampLL(0, 0, m_targetWidth, m_targetHeight);

// Resolve.
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, m_efbFramebuffer);
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, m_resolvedFramebuffer);
glBlitFramebufferEXT(
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer);
glBlitFramebuffer(
targetRc.left, targetRc.top, targetRc.right, targetRc.bottom,
targetRc.left, targetRc.top, targetRc.right, targetRc.bottom,
GL_COLOR_BUFFER_BIT, GL_NEAREST
);

// Return to EFB.
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_efbFramebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer);

return m_resolvedColorTexture;
}
Expand All @@ -254,16 +253,16 @@ GLuint FramebufferManager::GetEFBDepthTexture(const EFBRectangle& sourceRc)
targetRc.ClampLL(0, 0, m_targetWidth, m_targetHeight);

// Resolve.
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, m_efbFramebuffer);
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, m_resolvedFramebuffer);
glBlitFramebufferEXT(
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer);
glBlitFramebuffer(
targetRc.left, targetRc.top, targetRc.right, targetRc.bottom,
targetRc.left, targetRc.top, targetRc.right, targetRc.bottom,
GL_DEPTH_BUFFER_BIT, GL_NEAREST
);

// Return to EFB.
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_efbFramebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer);

return m_resolvedDepthTexture;
}
Expand All @@ -284,7 +283,7 @@ void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, c

void FramebufferManager::SetFramebuffer(GLuint fb)
{
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb != 0 ? fb : GetEFBFramebuffer());
glBindFramebuffer(GL_FRAMEBUFFER, fb != 0 ? fb : GetEFBFramebuffer());
}

// Apply AA if enabled
Expand All @@ -298,106 +297,60 @@ GLuint FramebufferManager::ResolveAndGetDepthTarget(const EFBRectangle &source_r
return GetEFBDepthTexture(source_rect);
}

XFBSource::~XFBSource()
{
glDeleteRenderbuffers(1, &renderbuf);
}


void XFBSource::Draw(const MathUtil::Rectangle<float> &sourcerc,
const MathUtil::Rectangle<float> &drawrc, int width, int height) const
{
// Texture map xfbSource->texture onto the main buffer

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture);

glBegin(GL_QUADS);
glTexCoord2f(sourcerc.left, sourcerc.bottom);
glMultiTexCoord2fARB(GL_TEXTURE1, 0, 0);
glVertex2f(drawrc.left, drawrc.bottom);

glTexCoord2f(sourcerc.left, sourcerc.top);
glMultiTexCoord2fARB(GL_TEXTURE1, 0, 1);
glVertex2f(drawrc.left, drawrc.top);

glTexCoord2f(sourcerc.right, sourcerc.top);
glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1);
glVertex2f(drawrc.right, drawrc.top);

glTexCoord2f(sourcerc.right, sourcerc.bottom);
glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0);
glVertex2f(drawrc.right, drawrc.bottom);
glEnd();
glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuf);
glBlitFramebuffer(sourcerc.left, sourcerc.bottom, sourcerc.right, sourcerc.top,
drawrc.left, drawrc.bottom, drawrc.right, drawrc.top,
GL_COLOR_BUFFER_BIT, GL_LINEAR);

GL_REPORT_ERRORD();
}

void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
{
TextureConverter::DecodeToTexture(xfbAddr, fbWidth, fbHeight, texture);
TextureConverter::DecodeToTexture(xfbAddr, fbWidth, fbHeight, renderbuf);
}

void XFBSource::CopyEFB(float Gamma)
{
// Copy EFB data to XFB and restore render target again
glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer());
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FramebufferManager::GetXFBFramebuffer());

#if 0
if (m_msaaSamples <= 1)
#else
if (!s_bHaveFramebufferBlit)
#endif
{
// Just copy the EFB directly.

glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, FramebufferManager::GetEFBFramebuffer());

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture);
glCopyTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, 0, 0, texWidth, texHeight, 0);
// Bind texture.
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuf);
GL_REPORT_FBO_ERROR();

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
}
else
{
// OpenGL cannot copy directly from a multisampled framebuffer, so use
// EXT_framebuffer_blit.

glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, FramebufferManager::GetEFBFramebuffer());
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, FramebufferManager::GetXFBFramebuffer());

// Bind texture.
glFramebufferTexture2DEXT(GL_DRAW_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, texture, 0);
GL_REPORT_FBO_ERROR();

glBlitFramebufferEXT(
0, 0, texWidth, texHeight,
0, 0, texWidth, texHeight,
GL_COLOR_BUFFER_BIT, GL_NEAREST
);
glBlitFramebuffer(
0, 0, texWidth, texHeight,
0, 0, texWidth, texHeight,
GL_COLOR_BUFFER_BIT, GL_NEAREST
);

// Unbind texture.
glFramebufferTexture2DEXT(GL_DRAW_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, 0, 0);
// Return to EFB.
FramebufferManager::SetFramebuffer(0);

// Return to EFB.
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, FramebufferManager::GetEFBFramebuffer());
}
}

XFBSourceBase* FramebufferManager::CreateXFBSource(unsigned int target_width, unsigned int target_height)
{
GLuint texture;

glGenTextures(1, &texture);
GLuint renderbuf;

#if 0// XXX: Some video drivers don't handle glCopyTexImage2D correctly, so use EXT_framebuffer_blit whenever possible.
if (m_msaaSamples > 1)
#else
if (s_bHaveFramebufferBlit)
#endif
{
// In MSAA mode, allocate the texture image here. In non-MSAA mode,
// the image will be allocated by glCopyTexImage2D (later).

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texture);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 4, target_width, target_height, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);

glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
}
glGenRenderbuffers(1, &renderbuf);

glBindRenderbuffer(GL_RENDERBUFFER, renderbuf);
glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, target_width, target_height);

return new XFBSource(texture);
return new XFBSource(renderbuf);
}

void FramebufferManager::GetTargetSize(unsigned int *width, unsigned int *height, const EFBRectangle& sourceRc)
Expand Down
6 changes: 3 additions & 3 deletions Source/Plugins/Plugin_VideoOGL/Src/FramebufferManager.h
Expand Up @@ -57,15 +57,15 @@ namespace OGL {

struct XFBSource : public XFBSourceBase
{
XFBSource(GLuint tex) : texture(tex) {}
~XFBSource() { glDeleteTextures(1, &texture); }
XFBSource(GLuint rbuf) : renderbuf(rbuf) {}
~XFBSource();

void CopyEFB(float Gamma);
void DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight);
void Draw(const MathUtil::Rectangle<float> &sourcerc,
const MathUtil::Rectangle<float> &drawrc, int width, int height) const;

const GLuint texture;
const GLuint renderbuf;
};

class FramebufferManager : public FramebufferManagerBase
Expand Down
40 changes: 16 additions & 24 deletions Source/Plugins/Plugin_VideoOGL/Src/GLUtil.cpp
Expand Up @@ -55,8 +55,6 @@ void InitInterface()
GLInterface = new cInterfaceBase;
#elif defined(USE_EGL) && USE_EGL
GLInterface = new cInterfaceEGL;
#elif defined(USE_WX) && USE_WX
GLInterface = new cInterfaceWX;
#elif defined(__APPLE__)
GLInterface = new cInterfaceAGL;
#elif defined(_WIN32)
Expand All @@ -72,14 +70,14 @@ GLuint OpenGL_CompileProgram ( const char* vertexShader, const char* fragmentSha
GLuint vertexShaderID = glCreateShader(GL_VERTEX_SHADER);
GLuint fragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
GLuint programID = glCreateProgram();
GLint Result = GL_FALSE;
char stringBuffer[1024];
GLsizei stringBufferUsage = 0;

// compile vertex shader
glShaderSource(vertexShaderID, 1, &vertexShader, NULL);
glCompileShader(vertexShaderID);
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL)
GLint Result = GL_FALSE;
char stringBuffer[1024];
GLsizei stringBufferUsage = 0;
glGetShaderiv(vertexShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderInfoLog(vertexShaderID, 1024, &stringBufferUsage, stringBuffer);
if(Result && stringBufferUsage) {
Expand Down Expand Up @@ -159,32 +157,26 @@ void OpenGL_ReportARBProgramError()
bool OpenGL_ReportFBOError(const char *function, const char *file, int line)
{
#ifndef USE_GLES
unsigned int fbo_status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
if (fbo_status != GL_FRAMEBUFFER_COMPLETE_EXT)
unsigned int fbo_status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
if (fbo_status != GL_FRAMEBUFFER_COMPLETE)
{
const char *error = "-";
const char *error = "unknown error";
switch (fbo_status)
{
case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT:
error = "INCOMPLETE_ATTACHMENT_EXT";
break;
case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT:
error = "INCOMPLETE_MISSING_ATTACHMENT_EXT";
break;
case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_EXT:
error = "INCOMPLETE_DIMENSIONS_EXT";
case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
error = "INCOMPLETE_ATTACHMENT";
break;
case GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT:
error = "INCOMPLETE_FORMATS_EXT";
case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:
error = "INCOMPLETE_MISSING_ATTACHMENT";
break;
case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT:
error = "INCOMPLETE_DRAW_BUFFER_EXT";
case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER:
error = "INCOMPLETE_DRAW_BUFFER";
break;
case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT:
error = "INCOMPLETE_READ_BUFFER_EXT";
case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER:
error = "INCOMPLETE_READ_BUFFER";
break;
case GL_FRAMEBUFFER_UNSUPPORTED_EXT:
error = "UNSUPPORTED_EXT";
case GL_FRAMEBUFFER_UNSUPPORTED:
error = "UNSUPPORTED";
break;
}
ERROR_LOG(VIDEO, "%s:%d: (%s) OpenGL FBO error - %s\n",
Expand Down
24 changes: 10 additions & 14 deletions Source/Plugins/Plugin_VideoOGL/Src/GLUtil.h
Expand Up @@ -33,7 +33,7 @@
#define TEX2D GL_TEXTURE_2D
#define PREC "highp"
#define TEXTYPE "sampler2D"
#define TEXFUNC "texture2D"
#define TEXFUNC "texture"
#else
#define TEX2D GL_TEXTURE_RECTANGLE_ARB
#define PREC
Expand Down Expand Up @@ -69,20 +69,16 @@ bool OpenGL_ReportFBOError(const char *function, const char *file, int line);
#define GL_REPORT_PROGRAM_ERROR() (void)0
#endif

#if (defined __APPLE__ || defined __linux__ || defined _WIN32) && !(defined _M_ARM)
#include <Cg/cg.h>
#include <Cg/cgGL.h>
#define HAVE_CG 1
extern CGcontext g_cgcontext;
extern CGprofile g_cgvProf, g_cgfProf;
// this should be removed in future, but as long as glsl is unstable, we should really read this messages
#if defined(_DEBUG) || defined(DEBUGFAST)
#define DEBUG_GLSL 1
#else
#define DEBUG_GLSL 0
#endif

// XXX: Dual-source blending in OpenGL does not work correctly yet. To make it
// work, we may need to use glBindFragDataLocation. To use that, we need to
// use GLSL shaders across the whole pipeline. Yikes!
//#define USE_DUAL_SOURCE_BLEND

// TODO: should be removed if we use glsl a lot
#define DEBUG_GLSL
// Isn't defined if we aren't using GLEW 1.6
#ifndef GL_ONE_MINUS_SRC1_ALPHA
#define GL_ONE_MINUS_SRC1_ALPHA 0x88FB
#endif

#endif // _GLINIT_H_
240 changes: 33 additions & 207 deletions Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp
Expand Up @@ -19,54 +19,16 @@
#include "x64Emitter.h"
#include "x64ABI.h"
#include "MemoryUtil.h"
#include "ProgramShaderCache.h"
#include "VertexShaderGen.h"

#include "CPMemory.h"
#include "NativeVertexFormat.h"
#include "VertexManager.h"

#define COMPILED_CODE_SIZE 4096

// TODO: this guy is never initialized
u32 s_prevcomponents; // previous state set
/*
#ifdef _WIN32
#ifdef _M_IX86
#define USE_JIT
#endif
#endif
*/
// Note the use of CallCdeclFunction3I etc.
// This is a horrible hack that is necessary because in 64-bit mode, Opengl32.dll is based way, way above the 32-bit
// address space that is within reach of a CALL, and just doing &fn gives us these high uncallable addresses. So we
// want to grab the function pointers from the import table instead.

// This problem does not apply to glew functions, only core opengl32 functions.

// Here's some global state. We only use this to keep track of what we've sent to the OpenGL state
// machine.

#ifdef USE_JIT
DECLARE_IMPORT(glNormalPointer);
DECLARE_IMPORT(glVertexPointer);
DECLARE_IMPORT(glColorPointer);
DECLARE_IMPORT(glTexCoordPointer);
#endif

class GLVertexFormat : public NativeVertexFormat
{
u8 *m_compiledCode;
PortableVertexDeclaration vtx_decl;

public:
GLVertexFormat();
~GLVertexFormat();

virtual void Initialize(const PortableVertexDeclaration &_vtx_decl);
virtual void SetupVertexPointers();
virtual void EnableComponents(u32 components);
};

namespace OGL
{

Expand All @@ -75,23 +37,14 @@ NativeVertexFormat* VertexManager::CreateNativeVertexFormat()
return new GLVertexFormat();
}

}

GLVertexFormat::GLVertexFormat()
{
#ifdef USE_JIT
m_compiledCode = (u8 *)AllocateExecutableMemory(COMPILED_CODE_SIZE, false);
if (m_compiledCode)
memset(m_compiledCode, 0, COMPILED_CODE_SIZE);
#endif

}

GLVertexFormat::~GLVertexFormat()
{
#ifdef USE_JIT
FreeMemoryPages(m_compiledCode, COMPILED_CODE_SIZE);
m_compiledCode = 0;
#endif
glDeleteVertexArrays(1, &VAO);
}

inline GLuint VarToGL(VarType t)
Expand All @@ -104,183 +57,56 @@ inline GLuint VarToGL(VarType t)

void GLVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl)
{
s_prevcomponents = 0;

vertex_stride = _vtx_decl.stride;
using namespace Gen;
this->vtx_decl = _vtx_decl;
vertex_stride = vtx_decl.stride;

// We will not allow vertex components causing uneven strides.
if (_vtx_decl.stride & 3)
PanicAlert("Uneven vertex stride: %i", _vtx_decl.stride);

#ifdef USE_JIT
Gen::XEmitter emit(m_compiledCode);
// Alright, we have our vertex declaration. Compile some crazy code to set it quickly using GL.
emit.ABI_EmitPrologue(6);
if (vertex_stride & 3)
PanicAlert("Uneven vertex stride: %i", vertex_stride);

emit.CallCdeclFunction4_I(glVertexPointer, 3, GL_FLOAT, _vtx_decl.stride, 0);

if (_vtx_decl.num_normals >= 1)
{
emit.CallCdeclFunction3_I(glNormalPointer, VarToGL(_vtx_decl.normal_gl_type), _vtx_decl.stride, _vtx_decl.normal_offset[0]);
if (_vtx_decl.num_normals == 3) {
emit.CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_NORM1_ATTRIB, _vtx_decl.normal_gl_size, VarToGL(_vtx_decl.normal_gl_type), GL_TRUE, _vtx_decl.stride, _vtx_decl.normal_offset[1]);
emit.CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_NORM2_ATTRIB, _vtx_decl.normal_gl_size, VarToGL(_vtx_decl.normal_gl_type), GL_TRUE, _vtx_decl.stride, _vtx_decl.normal_offset[2]);
}
}

for (int i = 0; i < 2; i++)
{
if (_vtx_decl.color_offset[i] != -1)
{
if (i == 0)
emit.CallCdeclFunction4_I(glColorPointer, 4, GL_UNSIGNED_BYTE, _vtx_decl.stride, _vtx_decl.color_offset[i]);
else
emit.CallCdeclFunction4((void *)glSecondaryColorPointer, 4, GL_UNSIGNED_BYTE, _vtx_decl.stride, _vtx_decl.color_offset[i]);
}
}

for (int i = 0; i < 8; i++)
{
if (_vtx_decl.texcoord_offset[i] != -1)
{
int id = GL_TEXTURE0 + i;
#ifdef _M_X64
#ifdef _MSC_VER
emit.MOV(32, R(RCX), Imm32(id));
#else
emit.MOV(32, R(RDI), Imm32(id));
#endif
#else
emit.ABI_AlignStack(1 * 4);
emit.PUSH(32, Imm32(id));
#endif
emit.CALL((void *)glClientActiveTexture);
#ifndef _M_X64
#ifdef _WIN32
// don't inc stack on windows, stdcall
#else
emit.ABI_RestoreStack(1 * 4);
#endif
#endif
emit.CallCdeclFunction4_I(
glTexCoordPointer, _vtx_decl.texcoord_size[i], VarToGL(_vtx_decl.texcoord_gl_type[i]),
_vtx_decl.stride, _vtx_decl.texcoord_offset[i]);
}
}

if (_vtx_decl.posmtx_offset != -1)
emit.CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, GL_FALSE, _vtx_decl.stride, _vtx_decl.posmtx_offset);

emit.ABI_EmitEpilogue(6);

if (emit.GetCodePtr() - (u8*)m_compiledCode > COMPILED_CODE_SIZE)
Crash();

#endif
this->vtx_decl = _vtx_decl;
}
VertexManager *vm = (OGL::VertexManager*)g_vertex_manager;

glGenVertexArrays(1, &VAO);
glBindVertexArray(VAO);

// the element buffer is bound directly to the vao, so we must it set for every vao
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vm->m_index_buffers);
glBindBuffer(GL_ARRAY_BUFFER, vm->m_vertex_buffers);

void GLVertexFormat::SetupVertexPointers() {
// Cast a pointer to compiled code to a pointer to a function taking no parameters, through a (void *) cast first to
// get around type checking errors, and call it.
#ifdef USE_JIT
((void (*)())(void*)m_compiledCode)();
#else
glVertexPointer(3, GL_FLOAT, vtx_decl.stride, VertexManager::s_pBaseBufferPointer);
if (vtx_decl.num_normals >= 1) {
glNormalPointer(VarToGL(vtx_decl.normal_gl_type), vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.normal_offset[0]));
if (vtx_decl.num_normals == 3) {
glVertexAttribPointer(SHADER_NORM1_ATTRIB, vtx_decl.normal_gl_size, VarToGL(vtx_decl.normal_gl_type), GL_TRUE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.normal_offset[1]));
glVertexAttribPointer(SHADER_NORM2_ATTRIB, vtx_decl.normal_gl_size, VarToGL(vtx_decl.normal_gl_type), GL_TRUE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.normal_offset[2]));
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 3, GL_FLOAT, GL_FALSE, vtx_decl.stride, (u8*)NULL);

for (int i = 0; i < 3; i++) {
if (vtx_decl.num_normals > i) {
glEnableVertexAttribArray(SHADER_NORM0_ATTRIB+i);
glVertexAttribPointer(SHADER_NORM0_ATTRIB+i, vtx_decl.normal_gl_size, VarToGL(vtx_decl.normal_gl_type), GL_TRUE, vtx_decl.stride, (u8*)NULL + vtx_decl.normal_offset[i]);
}
}

for (int i = 0; i < 2; i++) {
if (vtx_decl.color_offset[i] != -1) {
if (i == 0)
glColorPointer(4, GL_UNSIGNED_BYTE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.color_offset[i]));
else {
glSecondaryColorPointer(4, GL_UNSIGNED_BYTE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.color_offset[i]));
}
glEnableVertexAttribArray(SHADER_COLOR0_ATTRIB+i);
glVertexAttribPointer(SHADER_COLOR0_ATTRIB+i, 4, GL_UNSIGNED_BYTE, GL_TRUE, vtx_decl.stride, (u8*)NULL + vtx_decl.color_offset[i]);
}
}

for (int i = 0; i < 8; i++) {
if (vtx_decl.texcoord_offset[i] != -1) {
int id = GL_TEXTURE0 + i;
glClientActiveTexture(id);
glTexCoordPointer(vtx_decl.texcoord_size[i], VarToGL(vtx_decl.texcoord_gl_type[i]),
vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.texcoord_offset[i]));
glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB+i);
glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB+i, vtx_decl.texcoord_size[i], VarToGL(vtx_decl.texcoord_gl_type[i]),
GL_FALSE, vtx_decl.stride, (u8*)NULL + vtx_decl.texcoord_offset[i]);
}
}

if (vtx_decl.posmtx_offset != -1) {
glVertexAttribPointer(SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, GL_FALSE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.posmtx_offset));
glEnableVertexAttribArray(SHADER_POSMTX_ATTRIB);
glVertexAttribIPointer(SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, vtx_decl.stride, (u8*)NULL + vtx_decl.posmtx_offset);
}
#endif
}

void GLVertexFormat::EnableComponents(u32 components)
{
if (s_prevcomponents != components)
{
VertexManager::Flush();

// matrices
if ((components & VB_HAS_POSMTXIDX) != (s_prevcomponents & VB_HAS_POSMTXIDX))
{
if (components & VB_HAS_POSMTXIDX)
glEnableVertexAttribArray(SHADER_POSMTX_ATTRIB);
else
glDisableVertexAttribArray(SHADER_POSMTX_ATTRIB);
}

// normals
if ((components & VB_HAS_NRM0) != (s_prevcomponents & VB_HAS_NRM0))
{
if (components & VB_HAS_NRM0)
glEnableClientState(GL_NORMAL_ARRAY);
else
glDisableClientState(GL_NORMAL_ARRAY);
}
if ((components & VB_HAS_NRM1) != (s_prevcomponents & VB_HAS_NRM1))
{
if (components & VB_HAS_NRM1) {
glEnableVertexAttribArray(SHADER_NORM1_ATTRIB);
glEnableVertexAttribArray(SHADER_NORM2_ATTRIB);
}
else {
glDisableVertexAttribArray(SHADER_NORM1_ATTRIB);
glDisableVertexAttribArray(SHADER_NORM2_ATTRIB);
}
}

// color
for (int i = 0; i < 2; ++i)
{
if ((components & (VB_HAS_COL0 << i)) != (s_prevcomponents & (VB_HAS_COL0 << i)))
{
if (components & (VB_HAS_COL0 << i))
glEnableClientState(i ? GL_SECONDARY_COLOR_ARRAY : GL_COLOR_ARRAY);
else
glDisableClientState(i ? GL_SECONDARY_COLOR_ARRAY : GL_COLOR_ARRAY);
}
}
vm->m_last_vao = VAO;
}

// tex
for (int i = 0; i < 8; ++i)
{
if ((components & (VB_HAS_UV0 << i)) != (s_prevcomponents & (VB_HAS_UV0 << i)))
{
glClientActiveTexture(GL_TEXTURE0 + i);
if (components & (VB_HAS_UV0 << i))
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
else
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
}
}
void GLVertexFormat::SetupVertexPointers() {
}

s_prevcomponents = components;
}
}
359 changes: 50 additions & 309 deletions Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp
Expand Up @@ -27,347 +27,88 @@
#include "Common.h"
#include "Render.h"
#include "VertexShaderGen.h"
#include "PixelShaderCache.h"
#include "ProgramShaderCache.h"
#include "PixelShaderManager.h"
#include "OnScreenDisplay.h"
#include "StringUtil.h"
#include "FileUtil.h"
#include "Debugger.h"

namespace OGL
{

static int s_nMaxPixelInstructions;
static GLuint s_ColorMatrixProgram = 0;
static GLuint s_DepthMatrixProgram = 0;
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
PIXELSHADERUID PixelShaderCache::s_curuid;
bool PixelShaderCache::s_displayCompileAlert;
GLuint PixelShaderCache::CurrentShader;
bool PixelShaderCache::ShaderEnabled;

PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry = NULL;
PIXELSHADERUID PixelShaderCache::last_uid;

GLuint PixelShaderCache::GetDepthMatrixProgram()
{
return s_DepthMatrixProgram;
}

GLuint PixelShaderCache::GetColorMatrixProgram()
void SetPSConstant4fvByName(const char * name, unsigned int offset, const float *f, const unsigned int count = 1)
{
return s_ColorMatrixProgram;
}

void PixelShaderCache::Init()
{
glEnable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = true;
CurrentShader = 0;
last_entry = NULL;
GL_REPORT_ERRORD();

s_displayCompileAlert = true;

glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, (GLint *)&s_nMaxPixelInstructions);

if(s_nMaxPixelInstructions == 0) // Some combination of drivers and hardware returns zero for some reason.
s_nMaxPixelInstructions = 4096;
if (strstr((const char*)glGetString(GL_VENDOR), "Humper") != NULL) s_nMaxPixelInstructions = 4096;
#if CG_VERSION_NUM == 2100
if (strstr((const char*)glGetString(GL_VENDOR), "ATI") != NULL)
ProgramShaderCache::PCacheEntry tmp = ProgramShaderCache::GetShaderProgram();
for (int a = 0; a < NUM_UNIFORMS; ++a)
{
s_nMaxPixelInstructions = 4096;
}
#endif

int maxinst, maxattribs;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB, (GLint *)&maxinst);
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, (GLint *)&maxattribs);
INFO_LOG(VIDEO, "pixel max_alu=%d, max_inst=%d, max_attrib=%d", s_nMaxPixelInstructions, maxinst, maxattribs);

char pmatrixprog[2048];
sprintf(pmatrixprog, "!!ARBfp1.0"
"TEMP R0;\n"
"TEMP R1;\n"
"PARAM K0 = { 0.5, 0.5, 0.5, 0.5};\n"
"TEX R0, fragment.texcoord[0], texture[0], RECT;\n"
"MUL R0, R0, program.env[%d];\n"
"ADD R0, R0, K0;\n"
"FLR R0, R0;\n"
"MUL R0, R0, program.env[%d];\n"
"DP4 R1.x, R0, program.env[%d];\n"
"DP4 R1.y, R0, program.env[%d];\n"
"DP4 R1.z, R0, program.env[%d];\n"
"DP4 R1.w, R0, program.env[%d];\n"
"ADD result.color, R1, program.env[%d];\n"
"END\n",C_COLORMATRIX+5,C_COLORMATRIX+6, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4);
glGenProgramsARB(1, &s_ColorMatrixProgram);
SetCurrentShader(s_ColorMatrixProgram);
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog);

GLenum err = GL_REPORT_ERROR();
if (err != GL_NO_ERROR) {
ERROR_LOG(VIDEO, "Failed to create color matrix fragment program");
glDeleteProgramsARB(1, &s_ColorMatrixProgram);
s_ColorMatrixProgram = 0;
}

sprintf(pmatrixprog, "!!ARBfp1.0\n"
"TEMP R0;\n"
"TEMP R1;\n"
"TEMP R2;\n"
//16777215/16777216*256, 1/255, 256, 0
"PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n"
"PARAM K1 = { 15.0, 0.066666666666, 0.0, 0.0};\n"
//sample the depth value
"TEX R2, fragment.texcoord[0], texture[0], RECT;\n"

//scale from [0*16777216..1*16777216] to
//[0*16777215..1*16777215], multiply by 256
"MUL R0, R2.x, K0.x;\n" // *16777215/16777216*256

//It is easy to get bad results due to low precision
//here, for example converting like this:
//MUL R0,R0,{ 65536, 256, 1, 16777216 }
//FRC R0,R0
//gives {?, 128/255, 254/255, ?} for depth value 254/255
//on some gpus

"FLR R0.x,R0;\n" //bits 31..24

"SUB R0.yzw,R0,R0.x;\n" //subtract bits 31..24 from rest
"MUL R0.yzw,R0,K0.z;\n" // *256
"FLR R0.y,R0;\n" //bits 23..16

"SUB R0.zw,R0,R0.y;\n" //subtract bits 23..16 from rest
"MUL R0.zw,R0,K0.z;\n" // *256
"FLR R0.z,R0;\n" //bits 15..8

"MOV R0.w,R0.x;\n" //duplicate bit 31..24

"MUL R0,R0,K0.y;\n" // /255

"MUL R0.w,R0,K1.x;\n" // *15
"FLR R0.w,R0;\n" //bits 31..28
"MUL R0.w,R0,K1.y;\n" // /15

"DP4 R1.x, R0, program.env[%d];\n"
"DP4 R1.y, R0, program.env[%d];\n"
"DP4 R1.z, R0, program.env[%d];\n"
"DP4 R1.w, R0, program.env[%d];\n"
"ADD result.color, R1, program.env[%d];\n"
"END\n", C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4);
glGenProgramsARB(1, &s_DepthMatrixProgram);
SetCurrentShader(s_DepthMatrixProgram);
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog);

err = GL_REPORT_ERROR();
if (err != GL_NO_ERROR) {
ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program");
glDeleteProgramsARB(1, &s_DepthMatrixProgram);
s_DepthMatrixProgram = 0;
}

}

void PixelShaderCache::Shutdown()
{
glDeleteProgramsARB(1, &s_ColorMatrixProgram);
s_ColorMatrixProgram = 0;
glDeleteProgramsARB(1, &s_DepthMatrixProgram);
s_DepthMatrixProgram = 0;
PSCache::iterator iter = PixelShaders.begin();
for (; iter != PixelShaders.end(); iter++)
iter->second.Destroy();
PixelShaders.clear();
}

FRAGMENTSHADER* PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode, components);

// Check if the shader is already set
if (last_entry)
{
if (uid == last_uid)
if (!strcmp(name, UniformNames[a]))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(API_OPENGL, last_entry->safe_uid, last_entry->shader.strprog, dstAlphaMode, components);
return &last_entry->shader;
if (tmp.shader.UniformLocations[a] == -1)
return;
else
{
glUniform4fv(tmp.shader.UniformLocations[a] + offset, count, f);
return;
}
}
}

last_uid = uid;

PSCache::iterator iter = PixelShaders.find(uid);
if (iter != PixelShaders.end())
{
PSCacheEntry &entry = iter->second;
last_entry = &entry;

GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(API_OPENGL, entry.safe_uid, entry.shader.strprog, dstAlphaMode, components);
return &last_entry->shader;
}

// Make an entry in the table
PSCacheEntry& newentry = PixelShaders[uid];
last_entry = &newentry;
const char *code = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components);

if (g_ActiveConfig.bEnableShaderDebugging && code)
{
GetSafePixelShaderId(&newentry.safe_uid, dstAlphaMode, components);
newentry.shader.strprog = code;
}

#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
static int counter = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);

SaveData(szTemp, code);
}
#endif

if (!code || !CompilePixelShader(newentry.shader, code)) {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return NULL;
}

INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return &last_entry->shader;
}

bool PixelShaderCache::CompilePixelShader(FRAGMENTSHADER& ps, const char* pstrprogram)
// Renderer functions
void Renderer::SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{
GLenum err = GL_REPORT_ERROR();
if (err != GL_NO_ERROR)
{
ERROR_LOG(VIDEO, "glError %08x before PS!", err);
}

#if defined HAVE_CG && HAVE_CG
char stropt[128];
sprintf(stropt, "MaxLocalParams=224,NumInstructionSlots=%d", s_nMaxPixelInstructions);
const char *opts[] = {"-profileopts", stropt, "-O2", "-q", NULL};
CGprogram tempprog = cgCreateProgram(g_cgcontext, CG_SOURCE, pstrprogram, g_cgfProf, "main", opts);
float const f[4] = {f1, f2, f3, f4};

// handle errors
if (!cgIsProgram(tempprog))
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{
cgDestroyProgram(tempprog);

static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%sbad_ps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file;
OpenFStream(file, szTemp, std::ios_base::out);
file << pstrprogram;
file.close();

PanicAlert("Failed to compile pixel shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%d):\n%s",
szTemp,
g_cgfProf,
cgGetLastListing(g_cgcontext));

return false;
ProgramShaderCache::SetMultiPSConstant4fv(const_number, f, 1);
return;
}

// handle warnings
if (cgGetError() != CG_NO_ERROR)
for (unsigned int a = 0; a < 10; ++a)
{
WARN_LOG(VIDEO, "Warnings on compile ps %s:", cgGetLastListing(g_cgcontext));
WARN_LOG(VIDEO, "%s", pstrprogram);
if (const_number >= PSVar_Loc[a].reg && const_number < (PSVar_Loc[a].reg + PSVar_Loc[a].size))
{
unsigned int offset = const_number - PSVar_Loc[a].reg;
SetPSConstant4fvByName(PSVar_Loc[a].name, offset, f);
return;
}
}
}

// This looks evil - we modify the program through the const char * we got from cgGetProgramString!
// It SHOULD not have any nasty side effects though - but you never know...
char *pcompiledprog = (char*)cgGetProgramString(tempprog, CG_COMPILED_PROGRAM);
char *plocal = strstr(pcompiledprog, "program.local");
while (plocal != NULL)
void Renderer::SetPSConstant4fv(unsigned int const_number, const float *f)
{
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{
const char *penv = " program.env";
memcpy(plocal, penv, 13);
plocal = strstr(plocal+13, "program.local");
ProgramShaderCache::SetMultiPSConstant4fv(const_number, f, 1);
return;
}

glGenProgramsARB(1, &ps.glprogid);
SetCurrentShader(ps.glprogid);
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pcompiledprog), pcompiledprog);

err = GL_REPORT_ERROR();
if (err != GL_NO_ERROR)
for (unsigned int a = 0; a < 10; ++a)
{
GLint error_pos, native_limit;
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos);
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native_limit);
// Error occur
if (error_pos != -1) {
const char *program_error = (const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB);
char line[256];
strncpy(line, (const char *)pcompiledprog + error_pos, 255);
line[255] = 0;
ERROR_LOG(VIDEO, "Error at %i: %s", error_pos, program_error);
ERROR_LOG(VIDEO, "Line dump: \n%s", line);
} else if (native_limit != -1) {
ERROR_LOG(VIDEO, "Hit limit? %i", native_limit);
// TODO
if (const_number >= PSVar_Loc[a].reg && const_number < (PSVar_Loc[a].reg + PSVar_Loc[a].size))
{
unsigned int offset = const_number - PSVar_Loc[a].reg;
SetPSConstant4fvByName(PSVar_Loc[a].name, offset, f);
return;
}
ERROR_LOG(VIDEO, "%s", pstrprogram);
ERROR_LOG(VIDEO, "%s", pcompiledprog);
}

cgDestroyProgram(tempprog);
#endif

return true;
}

//Disable Fragment programs and reset the selected Program
void PixelShaderCache::DisableShader()
{
if(ShaderEnabled)
{
glDisable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = false;
}
}

//bind a program if is diferent from the binded oone
void PixelShaderCache::SetCurrentShader(GLuint Shader)
void Renderer::SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
{
if(!ShaderEnabled)
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{
glEnable(GL_FRAGMENT_PROGRAM_ARB);
ShaderEnabled = true;
ProgramShaderCache::SetMultiPSConstant4fv(const_number, f, count);
return;
}
if(CurrentShader != Shader)
for (unsigned int a = 0; a < 10; ++a)
{
if(Shader != 0)
CurrentShader = Shader;
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, CurrentShader);
if (const_number >= PSVar_Loc[a].reg && const_number < (PSVar_Loc[a].reg + PSVar_Loc[a].size))
{
unsigned int offset = const_number - PSVar_Loc[a].reg;
SetPSConstant4fvByName(PSVar_Loc[a].name, offset, f, count);
return;
}
}
}

void Renderer::SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{
float f[4] = { f1, f2, f3, f4 };
glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f);
}

void Renderer::SetPSConstant4fv(unsigned int const_number, const float *f)
{
glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f);
}

void Renderer::SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
{
for (unsigned int i = 0; i < count; i++,f+=4)
glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number + i, f);
}

} // namespace OGL
91 changes: 0 additions & 91 deletions Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.h

This file was deleted.

167 changes: 131 additions & 36 deletions Source/Plugins/Plugin_VideoOGL/Src/PostProcessing.cpp
Expand Up @@ -20,7 +20,8 @@
#include "VideoConfig.h"
#include "GLUtil.h"
#include "PostProcessing.h"
#include "PixelShaderCache.h"
#include "ProgramShaderCache.h"
#include "FramebufferManager.h"

namespace OGL
{
Expand All @@ -29,62 +30,156 @@ namespace PostProcessing
{

static std::string s_currentShader;
static FRAGMENTSHADER s_shader;
static SHADER s_shader;
static bool s_enable;

static u32 s_width;
static u32 s_height;
static GLuint s_fbo;
static GLuint s_texture;
static GLuint s_vao;
static GLuint s_vbo;

static GLuint s_uniform_resolution;

static char s_vertex_shader[] =
"in vec2 rawpos;\n"
"in vec2 tex0;\n"
"out vec2 uv0;\n"
"void main(void) {\n"
" gl_Position = vec4(rawpos,0,1);\n"
" uv0 = tex0;\n"
"}\n";

void Init()
{
s_currentShader = "";
s_enable = 0;
s_width = 0;
s_height = 0;

glGenFramebuffers(1, &s_fbo);
glGenTextures(1, &s_texture);
glBindTexture(GL_TEXTURE_2D, s_texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); // disable mipmaps
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindFramebuffer(GL_FRAMEBUFFER, s_fbo);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_texture, 0);
FramebufferManager::SetFramebuffer(0);

glGenBuffers(1, &s_vbo);
glBindBuffer(GL_ARRAY_BUFFER, s_vbo);
GLfloat vertices[] = {
-1.f, -1.f, 0.f, 0.f,
-1.f, 1.f, 0.f, 1.f,
1.f, -1.f, 1.f, 0.f,
1.f, 1.f, 1.f, 1.f
};
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);

glGenVertexArrays(1, &s_vao);
glBindVertexArray( s_vao );
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, NULL);
glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB);
glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2);
}

void Shutdown()
{
s_shader.Destroy();

glDeleteFramebuffers(1, &s_vbo);
glDeleteTextures(1, &s_texture);

glDeleteBuffers(1, &s_vbo);
glDeleteVertexArrays(1, &s_vao);
}

void ReloadShader()
{
s_currentShader = "";
}

bool ApplyShader()
void BindTargetFramebuffer ()
{
if (s_currentShader != File::GetUserPath(D_SHADERS_IDX) + g_ActiveConfig.sPostProcessingShader + ".txt")
{
// Set immediately to prevent endless recompiles on failure.
if (!g_ActiveConfig.sPostProcessingShader.empty())
s_currentShader = File::GetUserPath(D_SHADERS_IDX) + g_ActiveConfig.sPostProcessingShader + ".txt";
else
s_currentShader.clear();

s_shader.Destroy();

if (!s_currentShader.empty())
{
std::string code;
if (File::ReadFileToString(true, s_currentShader.c_str(), code))
{
if (!PixelShaderCache::CompilePixelShader(s_shader, code.c_str()))
{
ERROR_LOG(VIDEO, "Failed to compile post-processing shader %s", s_currentShader.c_str());
}
}
else
{
ERROR_LOG(VIDEO, "Failed to load post-processing shader %s - does not exist?", s_currentShader.c_str());
}
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, s_enable ? s_fbo : 0);
}

void BlitToScreen()
{
if(!s_enable) return;

glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
glViewport(0, 0, s_width, s_height);

glBindVertexArray(s_vao);
s_shader.Bind();

glUniform4f(s_uniform_resolution, (float)s_width, (float)s_height, 1.0f/(float)s_width, 1.0f/(float)s_height);

glActiveTexture(GL_TEXTURE0+9);
glBindTexture(GL_TEXTURE_2D, s_texture);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glBindTexture(GL_TEXTURE_2D, 0);

/* glBindFramebuffer(GL_READ_FRAMEBUFFER, s_fbo);
glBlitFramebuffer(rc.left, rc.bottom, rc.right, rc.top,
rc.left, rc.bottom, rc.right, rc.top,
GL_COLOR_BUFFER_BIT, GL_NEAREST);*/
}

void Update ( u32 width, u32 height )
{
ApplyShader();

if(s_enable && (width != s_width || height != s_height)) {
s_width = width;
s_height = height;

// alloc texture for framebuffer
glActiveTexture(GL_TEXTURE0+9);
glBindTexture(GL_TEXTURE_2D, s_texture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_2D, 0);
}
}

if (s_shader.glprogid != 0)
{
PixelShaderCache::SetCurrentShader(s_shader.glprogid);
return true;
void ApplyShader()
{
// shader didn't changed
if (s_currentShader == g_ActiveConfig.sPostProcessingShader) return;
s_currentShader = g_ActiveConfig.sPostProcessingShader;
s_enable = false;
s_shader.Destroy();

// shader disabled
if (g_ActiveConfig.sPostProcessingShader == "") return;

// so need to compile shader

// loading shader code
std::string code;
std::string path = File::GetUserPath(D_SHADERS_IDX) + g_ActiveConfig.sPostProcessingShader + ".txt";
if(!File::ReadFileToString(true, path.c_str(), code)) {
ERROR_LOG(VIDEO, "post-processing shader not found: %s", path.c_str());
return;
}
else
{
PixelShaderCache::DisableShader();
return false;

// and compile it
if (!ProgramShaderCache::CompileShader(s_shader, s_vertex_shader, code.c_str())) {
ERROR_LOG(VIDEO, "Failed to compile post-processing shader %s", s_currentShader.c_str());
return;
}

// read uniform locations
s_uniform_resolution = glGetUniformLocation(s_shader.glprogid, "resolution");

// successful
s_enable = true;
}

} // namespace
Expand Down
8 changes: 6 additions & 2 deletions Source/Plugins/Plugin_VideoOGL/Src/PostProcessing.h
Expand Up @@ -30,9 +30,13 @@ namespace PostProcessing
void Init();
void Shutdown();

void BindTargetFramebuffer();
void BlitToScreen();
void Update(u32 width, u32 height);

void ReloadShader();
// Returns false if no shader was applied.
bool ApplyShader();

void ApplyShader();

} // namespace

Expand Down