359 changes: 92 additions & 267 deletions Source/Core/VideoCommon/Src/IndexGenerator.cpp

Large diffs are not rendered by default.

71 changes: 38 additions & 33 deletions Source/Core/VideoCommon/Src/IndexGenerator.h
Expand Up @@ -25,53 +25,58 @@
class IndexGenerator
{
public:
//Init
// Init
static void Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr);
//Triangles
static void AddList(int numVerts);
static void AddStrip(int numVerts);
static void AddFan(int numVerts);
static void AddQuads(int numVerts);
//Lines
static void AddLineList(int numVerts);
static void AddLineStrip(int numVerts);
//Points
static void AddPoints(int numVerts);
//Interface
static int GetNumTriangles() {used = true; return numT;}
static int GetNumLines() {used = true;return numL;}
static int GetNumPoints() {used = true;return numP;}
static int GetNumVerts() {return index;} //returns numprimitives
static int GetNumAdds() {return Tadds + Ladds + Padds;}
static int GetTriangleindexLen() {return (int)(Tptr - BASETptr);}
static int GetLineindexLen() {return (int)(Lptr - BASELptr);}
static int GetPointindexLen() {return (int)(Pptr - BASEPptr);}


static void AddIndices(int primitive, u32 numVertices);

// Interface
static u32 GetNumTriangles() {return numT;}
static u32 GetNumLines() {return numL;}
static u32 GetNumPoints() {return numP;}

// returns numprimitives
static u32 GetNumVerts() {return index;}

static u32 GetTriangleindexLen() {return (u32)(Tptr - BASETptr);}
static u32 GetLineindexLen() {return (u32)(Lptr - BASELptr);}
static u32 GetPointindexLen() {return (u32)(Pptr - BASEPptr);}
/*
enum IndexPrimitiveType
{
Prim_None = 0,
Prim_List,
Prim_Strip,
Prim_Fan
} ;
};
*/
private:
// Triangles
static void AddList(u32 numVerts);
static void AddStrip(u32 numVerts);
static void AddFan(u32 numVerts);
static void AddQuads(u32 numVerts);

// Lines
static void AddLineList(u32 numVerts);
static void AddLineStrip(u32 numVerts);

// Points
static void AddPoints(u32 numVerts);

static void WriteTriangle(u32 index1, u32 index2, u32 index3);

static u16 *Tptr;
static u16 *BASETptr;
static u16 *Lptr;
static u16 *BASELptr;
static u16 *Pptr;
static u16 *BASEPptr;
static int numT;
static int numL;
static int numP;
static int index;
static int Tadds;
static int Ladds;
static int Padds;
static IndexPrimitiveType LastTPrimitive;
static IndexPrimitiveType LastLPrimitive;
static bool used;

// TODO: redundant variables
static u32 numT;
static u32 numL;
static u32 numP;
static u32 index;
};

#endif // _INDEXGENERATOR_H
246 changes: 78 additions & 168 deletions Source/Core/VideoCommon/Src/VertexLoader.cpp
Expand Up @@ -73,6 +73,10 @@ int colElements[2];
float posScale;
float tcScale[8];

// bbox must read vertex position, so convert it to this buffer
static float s_bbox_vertex_buffer[3];
static u8 *s_bbox_pCurBufferPointer_orig;

static const float fractionTable[32] = {
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
Expand All @@ -95,23 +99,38 @@ void LOADERDECL PosMtx_ReadDirect_UByte()

void LOADERDECL PosMtx_Write()
{
*VertexManager::s_pCurBufferPointer++ = s_curposmtx;
*VertexManager::s_pCurBufferPointer++ = 0;
*VertexManager::s_pCurBufferPointer++ = 0;
*VertexManager::s_pCurBufferPointer++ = 0;
DataWrite<u8>(s_curposmtx);
DataWrite<u8>(0);
DataWrite<u8>(0);
DataWrite<u8>(0);
}

void LOADERDECL UpdateBoundingBoxPrepare()
{
if (!PixelEngine::bbox_active)
return;

// set our buffer as videodata buffer, so we will get a copy of the vertex positions
// this is a big hack, but so we can use the same converting function then without bbox
s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer;
VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer;
}

void LOADERDECL UpdateBoundingBox()
{
if (!PixelEngine::bbox_active)
return;

// reset videodata pointer
VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig;

// copy vertex pointers
memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12);
VertexManager::s_pCurBufferPointer += 12;

// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
// memory like we might have been with a D3D vertex buffer, this would have been a bad idea.
float *data = (float *)(VertexManager::s_pCurBufferPointer - 12);
// We must transform the just loaded point by the current world and projection matrix - in software.
// Then convert to screen space and update the bounding box.
float p[3] = {data[0], data[1], data[2]};
float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]};

const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
const float *proj_matrix = &g_fProjectionMatrix[0];
Expand Down Expand Up @@ -149,24 +168,22 @@ void LOADERDECL TexMtx_ReadDirect_UByte()

void LOADERDECL TexMtx_Write_Float()
{
*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
VertexManager::s_pCurBufferPointer += 4;
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
}

void LOADERDECL TexMtx_Write_Float2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
VertexManager::s_pCurBufferPointer += 8;
DataWrite(0.f);
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
}

void LOADERDECL TexMtx_Write_Float4()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
((float*)VertexManager::s_pCurBufferPointer)[1] = 0;
((float*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++];
((float*)VertexManager::s_pCurBufferPointer)[3] = 0; // Just to fill out with 0.
VertexManager::s_pCurBufferPointer += 16;
DataWrite(0.f);
DataWrite(0.f);
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
// Just to fill out with 0.
DataWrite(0.f);
}

VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
Expand Down Expand Up @@ -274,15 +291,16 @@ void VertexLoader::CompileVertexTranslator()
if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }

// Write vertex position loader
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
if(g_ActiveConfig.bUseBBox) {
WriteCall(UpdateBoundingBoxPrepare);
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
WriteCall(UpdateBoundingBox);
} else {
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
}
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
nat_offset += 12;

// OK, so we just got a point. Let's go back and read it for the bounding box.

if(g_ActiveConfig.bUseBBox)
WriteCall(UpdateBoundingBox);

// Normals
vtx_decl.num_normals = 0;
if (m_VtxDesc.Normal != NOT_PRESENT)
Expand Down Expand Up @@ -507,7 +525,8 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
#endif
}
#endif
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)

int VertexLoader::SetupRunVertices(int vtx_attr_group, int primitive, int const count)
{
m_numLoadedVertices += count;

Expand All @@ -526,7 +545,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
{
// if cull mode is none, ignore triangles and quads
DataSkip(count * m_VertexSize);
return;
return 0;
}

m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
Expand All @@ -550,157 +569,48 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;

// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
int granularity = 1;
switch (primitive) {
case 3: // strip .. hm, weird
case 4: // fan
if (VertexManager::GetRemainingSize() < 3 * native_stride)
VertexManager::Flush();
break;
case 6: // line strip
if (VertexManager::GetRemainingSize() < 2 * native_stride)
VertexManager::Flush();
break;
case 0: granularity = 4; break; // quads
case 2: granularity = 3; break; // tris
case 5: granularity = 2; break; // lines
}

int startv = 0, extraverts = 0;
int v = 0;

//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
while (v < count)
{
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
//if (remainingVerts2 - v + startv < remainingVerts)
//remainingVerts = remainingVerts2 - v + startv;
if (remainingVerts < granularity) {
INCSTAT(stats.thisFrame.numBufferSplits);
// This buffer full - break current primitive and flush, to switch to the next buffer.
u8* plastptr = VertexManager::s_pCurBufferPointer;
if (v - startv > 0)
VertexManager::AddVertices(primitive, v - startv + extraverts);
VertexManager::Flush();
//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
// Why does this need to be so complicated?
switch (primitive) {
case 3: // triangle strip, copy last two vertices
// a little trick since we have to keep track of signs
if (v & 1) {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
VertexManager::s_pCurBufferPointer += native_stride*3;
extraverts = 3;
}
else {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
VertexManager::s_pCurBufferPointer += native_stride*2;
extraverts = 2;
}
break;
case 4: // tri fan, copy first and last vert
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 2;
break;
case 6: // line strip
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 1;
break;
default:
extraverts = 0;
break;
}
startv = v;
}
int remainingPrims = remainingVerts / granularity;
remainingVerts = remainingPrims * granularity;
if (count - v < remainingVerts)
remainingVerts = count - v;

#ifdef USE_JIT
if (remainingVerts > 0) {
loop_counter = remainingVerts;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < remainingVerts; s++)
{
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
v += remainingVerts;
}

if (startv < count)
VertexManager::AddVertices(primitive, count - startv + extraverts);
VertexManager::PrepareForAdditionalData(primitive, count, native_stride);

return count;
}




void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const count)
{
m_numLoadedVertices += count;
auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count);
ConvertVertices(new_count);
VertexManager::AddVertices(primitive, new_count);
}

// Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
{
// We really must flush here. It's possible that the native representations
// of the two vtx formats are the same, but we have no way to easily check that
// now.
VertexManager::Flush();
// Also move the Set() here?
void VertexLoader::ConvertVertices ( int count )
{
#ifdef USE_JIT
if (count > 0) {
loop_counter = count;
((void (*)())(void*)m_compiledCode)();
}
g_nativeVertexFmt = m_NativeFmt;

if (bpmem.genMode.cullmode == 3 && primitive < 5)
#else
for (int s = 0; s < count; s++)
{
// if cull mode is none, ignore triangles and quads
DataSkip(count * m_VertexSize);
return;
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}

m_NativeFmt->EnableComponents(m_NativeFmt->m_components);

// Load position and texcoord scale factors.
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;

pVtxAttr = &m_VtxAttr;
posScale = fractionTable[m_VtxAttr.PosFrac];
if (m_NativeFmt->m_components & VB_HAS_UVALL)
for (int i = 0; i < 8; i++)
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;

if(VertexManager::GetRemainingSize() < native_stride * count)
VertexManager::Flush();
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count);
VertexManager::s_pCurBufferPointer += native_stride * count;
DataSkip(count * m_VertexSize);
VertexManager::AddVertices(primitive, count);
#endif
}


void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int const count, u8* Data)
{
auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count);

memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * new_count);
VertexManager::s_pCurBufferPointer += native_stride * new_count;
DataSkip(new_count * m_VertexSize);

VertexManager::AddVertices(primitive, new_count);
}

void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2)
{
Expand Down
3 changes: 3 additions & 0 deletions Source/Core/VideoCommon/Src/VertexLoader.h
Expand Up @@ -88,6 +88,8 @@ class VertexLoader
~VertexLoader();

int GetVertexSize() const {return m_VertexSize;}

int SetupRunVertices(int vtx_attr_group, int primitive, int const count);
void RunVertices(int vtx_attr_group, int primitive, int count);
void RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data);

Expand Down Expand Up @@ -124,6 +126,7 @@ class VertexLoader
void SetVAT(u32 _group0, u32 _group1, u32 _group2);

void CompileVertexTranslator();
void ConvertVertices(int count);

void WriteCall(TPipelineFunction);

Expand Down
95 changes: 38 additions & 57 deletions Source/Core/VideoCommon/Src/VertexLoader_Color.cpp
Expand Up @@ -15,9 +15,6 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/

#ifndef _VERTEXLOADERCOLOR_H
#define _VERTEXLOADERCOLOR_H

#include "Common.h"
#include "VideoCommon.h"
#include "LookUpTables.h"
Expand All @@ -37,8 +34,7 @@ extern int colElements[2];

__forceinline void _SetCol(u32 val)
{
*(u32*)VertexManager::s_pCurBufferPointer = val;
VertexManager::s_pCurBufferPointer += 4;
DataWrite(val);
colIndex++;
}

Expand Down Expand Up @@ -132,80 +128,65 @@ void LOADERDECL Color_ReadDirect_32b_8888()
_SetCol(col);
}



void LOADERDECL Color_ReadIndex8_16b_565()
template <typename I>
void Color_ReadIndex_16b_565()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
_SetCol565(val);
}
void LOADERDECL Color_ReadIndex8_24b_888()

template <typename I>
void Color_ReadIndex_24b_888()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex8_32b_888x()

template <typename I>
void Color_ReadIndex_32b_888x()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex8_16b_4444()

template <typename I>
void Color_ReadIndex_16b_4444()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
_SetCol4444(val);
}
void LOADERDECL Color_ReadIndex8_24b_6666()

template <typename I>
void Color_ReadIndex_24b_6666()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
u32 val = Common::swap32(pData);
_SetCol6666(val);
}
void LOADERDECL Color_ReadIndex8_32b_8888()
{
u8 Index = DataReadU8();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress));
}
void LOADERDECL Color_ReadIndex16_16b_565()
{
u16 Index = DataReadU16();
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
_SetCol565(val);
}
void LOADERDECL Color_ReadIndex16_24b_888()
{
u16 Index = DataReadU16();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex16_32b_888x()
{
u16 Index = DataReadU16();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex16_16b_4444()
{
u16 Index = DataReadU16();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
_SetCol4444(val);
}
void LOADERDECL Color_ReadIndex16_24b_6666()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
u32 val = Common::swap32(pData);
_SetCol6666(val);
}
void LOADERDECL Color_ReadIndex16_32b_8888()

template <typename I>
void Color_ReadIndex_32b_8888()
{
u16 Index = DataReadU16();
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress));
}
#endif

void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565<u8>(); }
void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888<u8>(); }
void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x<u8>(); }
void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444<u8>(); }
void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666<u8>(); }
void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888<u8>(); }

void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565<u16>(); }
void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888<u16>(); }
void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x<u16>(); }
void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444<u16>(); }
void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666<u16>(); }
void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888<u16>(); }
495 changes: 136 additions & 359 deletions Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp

Large diffs are not rendered by default.

43 changes: 9 additions & 34 deletions Source/Core/VideoCommon/Src/VertexLoader_Normal.h
Expand Up @@ -70,45 +70,20 @@ class VertexLoader_Normal
NUM_NRM_INDICES
};

struct Set {
Set() {}
Set(int gc_size_, TPipelineFunction function_) : gc_size(gc_size_), function(function_) {}
struct Set
{
template <typename T>
void operator=(const T&)
{
gc_size = T::size;
function = T::function;
}

int gc_size;
TPipelineFunction function;
// int pc_size;
};

static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];

// direct
static void LOADERDECL Normal_DirectByte();
static void LOADERDECL Normal_DirectShort();
static void LOADERDECL Normal_DirectFloat();
static void LOADERDECL Normal_DirectByte3();
static void LOADERDECL Normal_DirectShort3();
static void LOADERDECL Normal_DirectFloat3();

// index8
static void LOADERDECL Normal_Index8_Byte();
static void LOADERDECL Normal_Index8_Short();
static void LOADERDECL Normal_Index8_Float();
static void LOADERDECL Normal_Index8_Byte3_Indices1();
static void LOADERDECL Normal_Index8_Short3_Indices1();
static void LOADERDECL Normal_Index8_Float3_Indices1();
static void LOADERDECL Normal_Index8_Byte3_Indices3();
static void LOADERDECL Normal_Index8_Short3_Indices3();
static void LOADERDECL Normal_Index8_Float3_Indices3();

// index16
static void LOADERDECL Normal_Index16_Byte();
static void LOADERDECL Normal_Index16_Short();
static void LOADERDECL Normal_Index16_Float();
static void LOADERDECL Normal_Index16_Byte3_Indices1();
static void LOADERDECL Normal_Index16_Short3_Indices1();
static void LOADERDECL Normal_Index16_Float3_Indices1();
static void LOADERDECL Normal_Index16_Byte3_Indices3();
static void LOADERDECL Normal_Index16_Short3_Indices3();
static void LOADERDECL Normal_Index16_Float3_Indices3();
};

#endif
250 changes: 56 additions & 194 deletions Source/Core/VideoCommon/Src/VertexLoader_Position.cpp
Expand Up @@ -15,6 +15,8 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/

#include <limits>

#include "Common.h"
#include "VideoCommon.h"
#include "VertexLoader.h"
Expand Down Expand Up @@ -71,189 +73,65 @@ MOVUPS(MOffset(EDI, 0), XMM0);
*/

// ==============================================================================
// Direct
// ==============================================================================

template <class T, bool three>
void Pos_ReadDirect()
template <typename T>
float PosScale(T val)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead<T>() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead<T>() * posScale;
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead<T>() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
return val * posScale;
}

void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect<u8, true>(); }
void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect<s8, true>(); }
void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect<u16, true>(); }
void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect<s16, true>(); }
void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect<u8, false>(); }
void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect<s8, false>(); }
void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect<u16, false>(); }
void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect<s16, false>(); }

void LOADERDECL Pos_ReadDirect_Float3()
{
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
template <>
float PosScale(float val)
{ return val; }

void LOADERDECL Pos_ReadDirect_Float2()
template <typename T, int N>
void LOADERDECL Pos_ReadDirect()
{
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0;
static_assert(N <= 3, "N > 3 is not sane!");

for (int i = 0; i < 3; ++i)
DataWrite(i<N ? PosScale(DataRead<T>()) : 0.f);

LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}


template<class T, bool three,int MaxSize>
inline void Pos_ReadIndex_Byte(int Index)
{
if(Index < MaxSize)
{
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}

template<class T, bool three,int MaxSize>
inline void Pos_ReadIndex_Short(int Index)
{
if(Index < MaxSize)
{
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}

template<bool three,int MaxSize>
void Pos_ReadIndex_Float(int Index)
template <typename I, typename T, int N>
void LOADERDECL Pos_ReadIndex()
{
if(Index < MaxSize)
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
static_assert(N <= 3, "N > 3 is not sane!");

auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max())
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
if (three)
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));

for (int i = 0; i < 3; ++i)
DataWrite(i<N ? PosScale(Common::FromBigEndian(data[i])) : 0.f);

LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}

#if _M_SSE >= 0x301
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);

template<bool three,int MaxSize>
void Pos_ReadIndex_Float_SSSE3(int Index)
template <typename I, bool three>
void LOADERDECL Pos_ReadIndex_Float_SSSE3()
{
if(Index < MaxSize)
auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max())
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
VertexManager::s_pCurBufferPointer += sizeof(float) * 3;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}
#endif

// Explicitly instantiate these functions to decrease the possibility of
// symbol binding problems when (only) calling them from JIT compiled code.
template void Pos_ReadDirect<u8, true>();
template void Pos_ReadDirect<s8, true>();
template void Pos_ReadDirect<u16, true>();
template void Pos_ReadDirect<s16, true>();
template void Pos_ReadDirect<u8, false>();
template void Pos_ReadDirect<s8, false>();
template void Pos_ReadDirect<u16, false>();
template void Pos_ReadDirect<s16, false>();
template void Pos_ReadIndex_Byte<u8, true, 255>(int Index);
template void Pos_ReadIndex_Byte<s8, true, 255>(int Index);
template void Pos_ReadIndex_Short<u16, true, 255>(int Index);
template void Pos_ReadIndex_Short<s16, true, 255>(int Index);
template void Pos_ReadIndex_Float<true, 255>(int Index);
template void Pos_ReadIndex_Byte<u8, false, 255>(int Index);
template void Pos_ReadIndex_Byte<s8, false, 255>(int Index);
template void Pos_ReadIndex_Short<u16, false, 255>(int Index);
template void Pos_ReadIndex_Short<s16, false, 255>(int Index);
template void Pos_ReadIndex_Float<false, 255>(int Index);
template void Pos_ReadIndex_Byte<u8, true, 65535>(int Index);
template void Pos_ReadIndex_Byte<s8, true, 65535>(int Index);
template void Pos_ReadIndex_Short<u16, true, 65535>(int Index);
template void Pos_ReadIndex_Short<s16, true, 65535>(int Index);
template void Pos_ReadIndex_Float<true, 65535>(int Index);
template void Pos_ReadIndex_Byte<u8, false, 65535>(int Index);
template void Pos_ReadIndex_Byte<s8, false, 65535>(int Index);
template void Pos_ReadIndex_Short<u16, false, 65535>(int Index);
template void Pos_ReadIndex_Short<s16, false, 65535>(int Index);
template void Pos_ReadIndex_Float<false, 65535>(int Index);

// ==============================================================================
// Index 8
// ==============================================================================
void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false, 255> (DataReadU8());}

// ==============================================================================
// Index 16
// ==============================================================================
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false, 65535> (DataReadU16());}

#if _M_SSE >= 0x301
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 65535> (DataReadU16());}
#endif

static TPipelineFunction tableReadPosition[4][8][2] = {
{
{NULL, NULL,},
Expand All @@ -263,56 +141,40 @@ static TPipelineFunction tableReadPosition[4][8][2] = {
{NULL, NULL,},
},
{
{Pos_ReadDirect_UByte2, Pos_ReadDirect_UByte3,},
{Pos_ReadDirect_Byte2, Pos_ReadDirect_Byte3,},
{Pos_ReadDirect_UShort2, Pos_ReadDirect_UShort3,},
{Pos_ReadDirect_Short2, Pos_ReadDirect_Short3,},
{Pos_ReadDirect_Float2, Pos_ReadDirect_Float3,},
{Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>,},
{Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>,},
{Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>,},
{Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>,},
{Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>,},
},
{
{Pos_ReadIndex8_UByte2, Pos_ReadIndex8_UByte3,},
{Pos_ReadIndex8_Byte2, Pos_ReadIndex8_Byte3,},
{Pos_ReadIndex8_UShort2, Pos_ReadIndex8_UShort3,},
{Pos_ReadIndex8_Short2, Pos_ReadIndex8_Short3,},
{Pos_ReadIndex8_Float2, Pos_ReadIndex8_Float3,},
{Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>,},
{Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>,},
{Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>,},
{Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>,},
{Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>,},
},
{
{Pos_ReadIndex16_UByte2, Pos_ReadIndex16_UByte3,},
{Pos_ReadIndex16_Byte2, Pos_ReadIndex16_Byte3,},
{Pos_ReadIndex16_UShort2, Pos_ReadIndex16_UShort3,},
{Pos_ReadIndex16_Short2, Pos_ReadIndex16_Short3,},
{Pos_ReadIndex16_Float2, Pos_ReadIndex16_Float3,},
{Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>,},
{Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>,},
{Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>,},
{Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>,},
{Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>,},
},
};

static int tableReadPositionVertexSize[4][8][2] = {
{
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
},
{
{2, 3,},
{2, 3,},
{4, 6,},
{4, 6,},
{8, 12,},
{2, 3,}, {2, 3,}, {4, 6,}, {4, 6,}, {8, 12,},
},
{
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
},
{
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
},
};

Expand All @@ -322,10 +184,10 @@ void VertexLoader_Position::Init(void) {
#if _M_SSE >= 0x301

if (cpu_info.bSSSE3) {
tableReadPosition[2][4][0] = Pos_ReadIndex8_Float2_SSSE3;
tableReadPosition[2][4][1] = Pos_ReadIndex8_Float3_SSSE3;
tableReadPosition[3][4][0] = Pos_ReadIndex16_Float2_SSSE3;
tableReadPosition[3][4][1] = Pos_ReadIndex16_Float3_SSSE3;
tableReadPosition[2][4][0] = Pos_ReadIndex_Float_SSSE3<u8, false>;
tableReadPosition[2][4][1] = Pos_ReadIndex_Float_SSSE3<u8, true>;
tableReadPosition[3][4][0] = Pos_ReadIndex_Float_SSSE3<u16, false>;
tableReadPosition[3][4][1] = Pos_ReadIndex_Float_SSSE3<u16, true>;
}

#endif
Expand Down
409 changes: 83 additions & 326 deletions Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp

Large diffs are not rendered by default.

178 changes: 64 additions & 114 deletions Source/Core/VideoCommon/Src/VertexManagerBase.cpp
Expand Up @@ -12,171 +12,120 @@
#include "BPStructs.h"

#include "VertexManagerBase.h"
#include "MainBase.h"
#include "VideoConfig.h"

VertexManager *g_vertex_manager;

u8 *VertexManager::s_pCurBufferPointer;
u8 *VertexManager::s_pBaseBufferPointer;

u8 *VertexManager::LocalVBuffer;
u16 *VertexManager::TIBuffer;
u16 *VertexManager::LIBuffer;
u16 *VertexManager::PIBuffer;

bool VertexManager::Flushed;
u8 *VertexManager::s_pEndBufferPointer;

VertexManager::VertexManager()
{
Flushed = false;
LocalVBuffer.resize(MAXVBUFFERSIZE);
s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0];
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();

LocalVBuffer = new u8[MAXVBUFFERSIZE];
s_pCurBufferPointer = s_pBaseBufferPointer = LocalVBuffer;
TIBuffer.resize(MAXIBUFFERSIZE);
LIBuffer.resize(MAXIBUFFERSIZE);
PIBuffer.resize(MAXIBUFFERSIZE);

TIBuffer = new u16[MAXIBUFFERSIZE];
LIBuffer = new u16[MAXIBUFFERSIZE];
PIBuffer = new u16[MAXIBUFFERSIZE];

IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer);
ResetBuffer();
}

VertexManager::~VertexManager()
{}

void VertexManager::ResetBuffer()
{
s_pCurBufferPointer = LocalVBuffer;
s_pCurBufferPointer = s_pBaseBufferPointer;
IndexGenerator::Start(GetTriangleIndexBuffer(), GetLineIndexBuffer(), GetPointIndexBuffer());
}

VertexManager::~VertexManager()
u32 VertexManager::GetRemainingSize()
{
delete[] LocalVBuffer;

delete[] TIBuffer;
delete[] LIBuffer;
delete[] PIBuffer;

// TODO: necessary??
ResetBuffer();
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
}

void VertexManager::AddIndices(int primitive, int numVertices)
{
//switch (primitive)
//{
//case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVertices); break;
//case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVertices); break;
//case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVertices); break;
//case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVertices); break;
//case GX_DRAW_LINES: IndexGenerator::AddLineList(numVertices); break;
//case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVertices); break;
//case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVertices); break;
//}

static void (*const primitive_table[])(int) =
void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
{
u32 const needed_vertex_bytes = count * stride;

if (needed_vertex_bytes > GetRemainingSize() || count > GetRemainingIndices(primitive))
{
IndexGenerator::AddQuads,
NULL,
IndexGenerator::AddList,
IndexGenerator::AddStrip,
IndexGenerator::AddFan,
IndexGenerator::AddLineList,
IndexGenerator::AddLineStrip,
IndexGenerator::AddPoints,
};

primitive_table[primitive](numVertices);
Flush();

if (needed_vertex_bytes > GetRemainingSize())
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! "
"Increase MAXVBUFFERSIZE or we need primitive breaking afterall.");
if (count > GetRemainingIndices(primitive))
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
"Increase MAXIBUFFERSIZE or we need primitive breaking afterall.");
}
}

int VertexManager::GetRemainingSize()
bool VertexManager::IsFlushed() const
{
return MAXVBUFFERSIZE - (int)(s_pCurBufferPointer - LocalVBuffer);
return s_pBaseBufferPointer == s_pCurBufferPointer;
}

int VertexManager::GetRemainingVertices(int primitive)
u32 VertexManager::GetRemainingIndices(int primitive)
{
switch (primitive)
{
case GX_DRAW_QUADS:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 6 * 4;
case GX_DRAW_TRIANGLES:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen());
case GX_DRAW_TRIANGLE_STRIP:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2;
case GX_DRAW_TRIANGLE_FAN:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3;
break;
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2;

case GX_DRAW_LINES:
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen());
case GX_DRAW_LINE_STRIP:
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2;
break;
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2 + 1;

case GX_DRAW_POINTS:
return (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen());
break;

default:
return 0;
break;
}
}

void VertexManager::AddVertices(int primitive, int numVertices)
void VertexManager::AddVertices(int primitive, u32 numVertices)
{
if (numVertices <= 0)
return;

switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_TRIANGLES:
case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN:
if (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen() < 3 * numVertices)
Flush();
break;

case GX_DRAW_LINES:
case GX_DRAW_LINE_STRIP:
if (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen() < 2 * numVertices)
Flush();
break;

case GX_DRAW_POINTS:
if (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen() < numVertices)
Flush();
break;

default:
return;
break;
}

if (Flushed)
{
IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer);
Flushed = false;
}

ADDSTAT(stats.thisFrame.numPrims, numVertices);
INCSTAT(stats.thisFrame.numPrimitiveJoins);
AddIndices(primitive, numVertices);

IndexGenerator::AddIndices(primitive, numVertices);
}

void VertexManager::Flush()
{
if (g_vertex_manager->IsFlushed())
return;

// loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState();

VideoFifo_CheckEFBAccess();

g_vertex_manager->vFlush();

g_vertex_manager->ResetBuffer();
}

// TODO: need to merge more stuff into VideoCommon to use this
#if (0)
void VertexManager::Flush()
{
if (LocalVBuffer == s_pCurBufferPointer || Flushed)
return;

Flushed = true;

VideoFifo_CheckEFBAccess();

#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens,
xfregs.nNumChans, (int)xfregs.bEnableDualTexTransform, bpmem.ztex2.op,
Expand Down Expand Up @@ -249,9 +198,9 @@ void VertexManager::Flush()

// finally bind
if (false == PixelShaderCache::SetShader(false, g_nativeVertexFmt->m_components))
goto shader_fail;
return;
if (false == VertexShaderCache::SetShader(g_nativeVertexFmt->m_components))
goto shader_fail;
return;

const int stride = g_nativeVertexFmt->GetVertexStride();
//if (g_nativeVertexFmt)
Expand All @@ -265,7 +214,7 @@ void VertexManager::Flush()
if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate)
{
if (false == PixelShaderCache::SetShader(true, g_nativeVertexFmt->m_components))
goto shader_fail;
return;

g_vertex_manager->Draw(stride, true);
}
Expand Down Expand Up @@ -301,9 +250,6 @@ void VertexManager::Flush()
}
#endif
++g_Config.iSaveTargetId;

shader_fail:
ResetBuffer();
}
#endif

Expand All @@ -314,12 +260,16 @@ void VertexManager::DoState(PointerWrap& p)

void VertexManager::DoStateShared(PointerWrap& p)
{
p.DoPointer(s_pCurBufferPointer, LocalVBuffer);
p.DoArray(LocalVBuffer, MAXVBUFFERSIZE);
p.DoArray(TIBuffer, MAXIBUFFERSIZE);
p.DoArray(LIBuffer, MAXIBUFFERSIZE);
p.DoArray(PIBuffer, MAXIBUFFERSIZE);

if (p.GetMode() == PointerWrap::MODE_READ)
Flushed = false;
// It seems we half-assume to be flushed here
// We update s_pCurBufferPointer yet don't worry about IndexGenerator's outdated pointers
// and maybe other things are overlooked

p.Do(LocalVBuffer);
p.Do(TIBuffer);
p.Do(LIBuffer);
p.Do(PIBuffer);

s_pBaseBufferPointer = &LocalVBuffer[0];
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
p.DoPointer(s_pCurBufferPointer, s_pBaseBufferPointer);
}
70 changes: 34 additions & 36 deletions Source/Core/VideoCommon/Src/VertexManagerBase.h
Expand Up @@ -2,72 +2,70 @@
#ifndef _VERTEXMANAGERBASE_H
#define _VERTEXMANAGERBASE_H

#include <vector>

class NativeVertexFormat;
class PointerWrap;

class VertexManager
{
private:
// What are the actual values?
static const u32 SMALLEST_POSSIBLE_VERTEX = 1;
static const u32 LARGEST_POSSIBLE_VERTEX = 188;

static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1;

public:

enum
{
// values from OGL backend
//MAXVBUFFERSIZE = 0x1FFFF,
//MAXIBUFFERSIZE = 0xFFFF,

// values from DX9 backend
//MAXVBUFFERSIZE = 0x50000,
//MAXIBUFFERSIZE = 0xFFFF,

// values from DX11 backend
MAXVBUFFERSIZE = 0x50000,
MAXIBUFFERSIZE = 0xFFFF,
};
static const u32 MAXVBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX;

// We may convert triangle-fans to triangle-lists, almost 3x as many indices.
static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3;

VertexManager();
virtual ~VertexManager(); // needs to be virtual for DX11's dtor
// needs to be virtual for DX11's dtor
virtual ~VertexManager();

static void AddVertices(int _primitive, int _numVertices);
static void AddVertices(int _primitive, u32 _numVertices);

// TODO: protected?
static u8 *s_pCurBufferPointer;
static u8 *s_pBaseBufferPointer;
static u8 *s_pEndBufferPointer;

static int GetRemainingSize();
static int GetRemainingVertices(int primitive);
static u32 GetRemainingSize();
static void PrepareForAdditionalData(int primitive, u32 count, u32 stride);
static u32 GetRemainingIndices(int primitive);

static void Flush();

virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0;

static u16* GetTriangleIndexBuffer() { return TIBuffer; }
static u16* GetLineIndexBuffer() { return LIBuffer; }
static u16* GetPointIndexBuffer() { return PIBuffer; }
static u8* GetVertexBuffer() { return LocalVBuffer; }

static void DoState(PointerWrap& p);
virtual void CreateDeviceObjects(){};
virtual void DestroyDeviceObjects(){};

protected:
// TODO: make private after Flush() is merged
static void ResetBuffer();

static u8 *LocalVBuffer;
static u16 *TIBuffer;
static u16 *LIBuffer;
static u16 *PIBuffer;

static bool Flushed;
u16* GetTriangleIndexBuffer() { return &TIBuffer[0]; }
u16* GetLineIndexBuffer() { return &LIBuffer[0]; }
u16* GetPointIndexBuffer() { return &PIBuffer[0]; }
u8* GetVertexBuffer() { return &s_pBaseBufferPointer[0]; }

virtual void vDoState(PointerWrap& p) { DoStateShared(p); }
void DoStateShared(PointerWrap& p);

private:
static void AddIndices(int primitive, int numVertices);
bool IsFlushed() const;

void ResetBuffer();

//virtual void Draw(u32 stride, bool alphapass) = 0;
// temp
virtual void vFlush() = 0;


std::vector<u8> LocalVBuffer;
std::vector<u16> TIBuffer;
std::vector<u16> LIBuffer;
std::vector<u16> PIBuffer;
};

extern VertexManager *g_vertex_manager;
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoCommon/Src/VideoCommon.h
Expand Up @@ -90,8 +90,8 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
#define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__)
#endif


// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]);
// warning: mapping buffer should be disabled to use this
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);

#define LOG_VTX()

Expand Down
3 changes: 1 addition & 2 deletions Source/Core/VideoCommon/Src/x64DLCache.cpp
Expand Up @@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices);
u8* EndAddress = VertexManager::s_pCurBufferPointer;
u32 Vdatasize = (u32)(EndAddress - StartAddress);
u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress);
if (Vdatasize > 0)
{
// Compile
Expand Down
23 changes: 8 additions & 15 deletions Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp
Expand Up @@ -104,7 +104,7 @@ void VertexManager::LoadBuffers()
{
D3D11_MAPPED_SUBRESOURCE map;

UINT vSize = UINT(s_pCurBufferPointer - LocalVBuffer);
UINT vSize = UINT(s_pCurBufferPointer - s_pBaseBufferPointer);
D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE;
if (m_vertexBufferCursor + vSize >= VBUFFER_SIZE)
{
Expand All @@ -116,7 +116,7 @@ void VertexManager::LoadBuffers()

D3D::context->Map(m_vertexBuffers[m_activeVertexBuffer], 0, MapType, 0, &map);

memcpy((u8*)map.pData + m_vertexBufferCursor, LocalVBuffer, vSize);
memcpy((u8*)map.pData + m_vertexBufferCursor, s_pBaseBufferPointer, vSize);
D3D::context->Unmap(m_vertexBuffers[m_activeVertexBuffer], 0);
m_vertexDrawOffset = m_vertexBufferCursor;
m_vertexBufferCursor += vSize;
Expand All @@ -136,9 +136,9 @@ void VertexManager::LoadBuffers()
m_triangleDrawIndex = m_indexBufferCursor;
m_lineDrawIndex = m_triangleDrawIndex + IndexGenerator::GetTriangleindexLen();
m_pointDrawIndex = m_lineDrawIndex + IndexGenerator::GetLineindexLen();
memcpy((u16*)map.pData + m_triangleDrawIndex, TIBuffer, sizeof(u16) * IndexGenerator::GetTriangleindexLen());
memcpy((u16*)map.pData + m_lineDrawIndex, LIBuffer, sizeof(u16) * IndexGenerator::GetLineindexLen());
memcpy((u16*)map.pData + m_pointDrawIndex, PIBuffer, sizeof(u16) * IndexGenerator::GetPointindexLen());
memcpy((u16*)map.pData + m_triangleDrawIndex, GetTriangleIndexBuffer(), sizeof(u16) * IndexGenerator::GetTriangleindexLen());
memcpy((u16*)map.pData + m_lineDrawIndex, GetLineIndexBuffer(), sizeof(u16) * IndexGenerator::GetLineindexLen());
memcpy((u16*)map.pData + m_pointDrawIndex, GetPointIndexBuffer(), sizeof(u16) * IndexGenerator::GetPointindexLen());
D3D::context->Unmap(m_indexBuffers[m_activeIndexBuffer], 0);
m_indexBufferCursor += iCount;
}
Expand Down Expand Up @@ -208,13 +208,9 @@ void VertexManager::Draw(UINT stride)
if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0)
((DX11::Renderer*)g_renderer)->RestoreCull();
}

void VertexManager::vFlush()
{
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();

u32 usedtextures = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
Expand Down Expand Up @@ -262,12 +258,12 @@ void VertexManager::vFlush()
g_nativeVertexFmt->m_components))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
goto shader_fail;
return;
}
if (!VertexShaderCache::SetShader(g_nativeVertexFmt->m_components))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
goto shader_fail;
return;
}
LoadBuffers();
unsigned int stride = g_nativeVertexFmt->GetVertexStride();
Expand All @@ -281,9 +277,6 @@ void VertexManager::vFlush()
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);

g_renderer->RestoreState();

shader_fail:
ResetBuffer();
}

} // namespace
26 changes: 10 additions & 16 deletions Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp
Expand Up @@ -173,7 +173,7 @@ void VertexManager::PrepareVBuffers(int stride)
DestroyDeviceObjects();
return;
}
memcpy(pVertices, LocalVBuffer, datasize);
memcpy(pVertices, s_pBaseBufferPointer, datasize);
VBuffers[CurrentVBuffer]->Unlock();

LockMode = D3DLOCK_NOOVERWRITE;
Expand All @@ -192,17 +192,17 @@ void VertexManager::PrepareVBuffers(int stride)
}
if(TdataSize)
{
memcpy(pIndices, TIBuffer, TdataSize * sizeof(u16));
memcpy(pIndices, GetTriangleIndexBuffer(), TdataSize * sizeof(u16));
pIndices += TdataSize;
}
if(LDataSize)
{
memcpy(pIndices, LIBuffer, LDataSize * sizeof(u16));
memcpy(pIndices, GetLineIndexBuffer(), LDataSize * sizeof(u16));
pIndices += LDataSize;
}
if(PDataSize)
{
memcpy(pIndices, PIBuffer, PDataSize * sizeof(u16));
memcpy(pIndices, GetPointIndexBuffer(), PDataSize * sizeof(u16));
}
IBuffers[CurrentIBuffer]->Unlock();
D3D::dev->SetStreamSource( 0, VBuffers[CurrentVBuffer], CurrentVBufferIndex, stride);
Expand Down Expand Up @@ -266,9 +266,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_TRIANGLELIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumTriangles(),
TIBuffer,
GetTriangleIndexBuffer(),
D3DFMT_INDEX16,
LocalVBuffer,
s_pBaseBufferPointer,
stride)))
{
DumpBadShaders();
Expand All @@ -280,9 +280,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_LINELIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumLines(),
LIBuffer,
GetLineIndexBuffer(),
D3DFMT_INDEX16,
LocalVBuffer,
s_pBaseBufferPointer,
stride)))
{
DumpBadShaders();
Expand All @@ -294,9 +294,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_POINTLIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumPoints(),
PIBuffer,
GetPointIndexBuffer(),
D3DFMT_INDEX16,
LocalVBuffer,
s_pBaseBufferPointer,
stride)))
{
DumpBadShaders();
Expand All @@ -307,11 +307,6 @@ void VertexManager::DrawVA(int stride)

void VertexManager::vFlush()
{
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed = true;
VideoFifo_CheckEFBAccess();

u32 usedtextures = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
Expand Down Expand Up @@ -388,7 +383,6 @@ void VertexManager::vFlush()
CurrentIBufferIndex += IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen();
CurrentVBufferIndex += IndexGenerator::GetNumVerts() * stride;
}
ResetBuffer();
}

}
14 changes: 4 additions & 10 deletions Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp
Expand Up @@ -84,27 +84,23 @@ void VertexManager::Draw()
{
if (IndexGenerator::GetNumTriangles() > 0)
{
glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, TIBuffer);
glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, GetTriangleIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (IndexGenerator::GetNumLines() > 0)
{
glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, LIBuffer);
glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, GetLineIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (IndexGenerator::GetNumPoints() > 0)
{
glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, PIBuffer);
glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, GetPointIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
}

void VertexManager::vFlush()
{
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGen.numTexGens,
xfregs.numChan.numColorChans, xfregs.dualTexTrans.enabled, bpmem.ztex2.op,
Expand Down Expand Up @@ -136,7 +132,7 @@ void VertexManager::vFlush()
(void)GL_REPORT_ERROR();

//glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
//glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer, GL_STREAM_DRAW);
//glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW);
GL_REPORT_ERRORD();

// setup the pointers
Expand Down Expand Up @@ -244,8 +240,6 @@ void VertexManager::vFlush()
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);

//s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
s_pCurBufferPointer = LocalVBuffer;
IndexGenerator::Start(TIBuffer,LIBuffer,PIBuffer);

#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS)
Expand Down