Skip to content

Commit

Permalink
Merge pull request #8687 from unknownbrackets/ind32
Browse files Browse the repository at this point in the history
Add support for 32-bit indices
  • Loading branch information
hrydgard committed Apr 10, 2016
2 parents d85dce0 + 1e0051a commit 8ab9bcc
Show file tree
Hide file tree
Showing 14 changed files with 180 additions and 96 deletions.
17 changes: 13 additions & 4 deletions GPU/Common/DrawEngineCommon.cpp
Expand Up @@ -201,23 +201,32 @@ bool DrawEngineCommon::GetCurrentSimpleVertices(int count, std::vector<GPUDebugV
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
const u8 *inds = Memory::GetPointer(gstate_c.indexAddr);
const u16 *inds16 = (const u16 *)inds;
const u32 *inds32 = (const u32 *)inds;

if (inds) {
GetIndexBounds(inds, count, gstate.vertType, &indexLowerBound, &indexUpperBound);
indices.resize(count);
switch (gstate.vertType & GE_VTYPE_IDX_MASK) {
case GE_VTYPE_IDX_8BIT:
for (int i = 0; i < count; ++i) {
indices[i] = inds[i];
}
break;
case GE_VTYPE_IDX_16BIT:
for (int i = 0; i < count; ++i) {
indices[i] = inds16[i];
}
break;
case GE_VTYPE_IDX_8BIT:
case GE_VTYPE_IDX_32BIT:
WARN_LOG_REPORT_ONCE(simpleIndexes32, G3D, "SimpleVertices: Decoding 32-bit indexes");
for (int i = 0; i < count; ++i) {
indices[i] = inds[i];
// These aren't documented and should be rare. Let's bounds check each one.
if (inds32[i] != (u16)inds32[i]) {
ERROR_LOG_REPORT_ONCE(simpleIndexes32Bounds, G3D, "SimpleVertices: Index outside 16-bit range");
}
indices[i] = (u16)inds32[i];
}
break;
default:
return false;
}
} else {
indices.clear();
Expand Down
10 changes: 10 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Expand Up @@ -62,6 +62,16 @@ class DrawEngineCommon {

VertexDecoder *GetVertexDecoder(u32 vtype);

inline int IndexSize(u32 vtype) const {
const u32 indexType = (vtype & GE_VTYPE_IDX_MASK);
if (indexType == GE_VTYPE_IDX_16BIT) {
return 2;
} else if (indexType == GE_VTYPE_IDX_32BIT) {
return 4;
}
return 1;
}

// Vertex collector buffers
u8 *decoded;
u16 *decIndex;
Expand Down
81 changes: 37 additions & 44 deletions GPU/Common/IndexGenerator.cpp
Expand Up @@ -112,7 +112,7 @@ void IndexGenerator::AddStrip(int numVerts) {
inds_ = outInds;
index_ += numVerts;
if (numTris > 0)
count_ += numTris * 3;
count_ += numTris * 3;
// This is so we can detect one single strip by just looking at seenPrims_.
if (!seenPrims_) {
seenPrims_ = 1 << GE_PRIM_TRIANGLE_STRIP;
Expand Down Expand Up @@ -187,10 +187,9 @@ void IndexGenerator::AddRectangles(int numVerts) {
seenPrims_ |= 1 << GE_PRIM_RECTANGLES;
}

template <class IType, class ITypeLE, int flag>
void IndexGenerator::TranslatePoints(int numInds, const IType *_inds, int indexOffset) {
template <class ITypeLE, int flag>
void IndexGenerator::TranslatePoints(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
const ITypeLE *inds = (const IType *)_inds;
u16 *outInds = inds_;
for (int i = 0; i < numInds; i++)
*outInds++ = indexOffset + inds[i];
Expand All @@ -200,10 +199,9 @@ void IndexGenerator::TranslatePoints(int numInds, const IType *_inds, int indexO
seenPrims_ |= (1 << GE_PRIM_POINTS) | flag;
}

template <class IType, class ITypeLE, int flag>
void IndexGenerator::TranslateLineList(int numInds, const IType *_inds, int indexOffset) {
template <class ITypeLE, int flag>
void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
const ITypeLE *inds = (ITypeLE*)_inds;
u16 *outInds = inds_;
numInds = numInds & ~1;
for (int i = 0; i < numInds; i += 2) {
Expand All @@ -216,10 +214,9 @@ void IndexGenerator::TranslateLineList(int numInds, const IType *_inds, int inde
seenPrims_ |= (1 << GE_PRIM_LINES) | flag;
}

template <class IType, class ITypeLE, int flag>
void IndexGenerator::TranslateLineStrip(int numInds, const IType *_inds, int indexOffset) {
template <class ITypeLE, int flag>
void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
const ITypeLE *inds = (const ITypeLE*)_inds;
int numLines = numInds - 1;
u16 *outInds = inds_;
for (int i = 0; i < numLines; i++) {
Expand All @@ -232,9 +229,8 @@ void IndexGenerator::TranslateLineStrip(int numInds, const IType *_inds, int ind
seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | flag;
}

template <class IType, class ITypeLE, int flag>
void IndexGenerator::TranslateList(int numInds, const IType *_inds, int indexOffset) {
const ITypeLE *inds = (const IType *)_inds;
template <class ITypeLE, int flag>
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
u16 *outInds = inds_;
int numTris = numInds / 3; // Round to whole triangles
Expand All @@ -250,9 +246,8 @@ void IndexGenerator::TranslateList(int numInds, const IType *_inds, int indexOff
seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | flag;
}

template <class IType, class ITypeLE, int flag>
void IndexGenerator::TranslateStrip(int numInds, const IType *_inds, int indexOffset) {
const IType *inds = (ITypeLE*)_inds;
template <class ITypeLE, int flag>
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset) {
int wind = 1;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
Expand All @@ -269,9 +264,8 @@ void IndexGenerator::TranslateStrip(int numInds, const IType *_inds, int indexOf
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | flag;
}

template <class IType, class ITypeLE, int flag>
void IndexGenerator::TranslateFan(int numInds, const IType *_inds, int indexOffset) {
const ITypeLE *inds = (IType*)_inds;
template <class ITypeLE, int flag>
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset) {
if (numInds <= 0) return;
indexOffset = index_ - indexOffset;
int numTris = numInds - 2;
Expand All @@ -287,10 +281,9 @@ void IndexGenerator::TranslateFan(int numInds, const IType *_inds, int indexOffs
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | flag;
}

template <class IType, class ITypeLE, int flag>
inline void IndexGenerator::TranslateRectangles(int numInds, const IType *_inds, int indexOffset) {
template <class ITypeLE, int flag>
inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
const ITypeLE *inds = (const ITypeLE*)_inds;
u16 *outInds = inds_;
//rectangles always need 2 vertices, disregard the last one if there's an odd number
numInds = numInds & ~1;
Expand All @@ -307,36 +300,36 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const IType *_inds,
// Could template this too, but would have to define in header.
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u8, u8, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8, u8, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8, u8, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8, u8, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, u8, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, u8, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8, u8, SEEN_INDEX16>(numInds, inds, indexOffset); break; // Same
case GE_PRIM_POINTS: TranslatePoints<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break; // Same
}
}

void IndexGenerator::TranslatePrim(int prim, int numInds, const u16 *inds, int indexOffset) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u16, u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16, u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16, u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16, u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16, u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16, u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u16, u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break; // Same
case GE_PRIM_POINTS: TranslatePoints<u16_le, SEEN_INDEX16>(numInds, (const u16_le *)inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16_le, SEEN_INDEX16>(numInds, (const u16_le *)inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le, SEEN_INDEX16>(numInds, (const u16_le *)inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le, SEEN_INDEX16>(numInds, (const u16_le *)inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, (const u16_le *)inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le, SEEN_INDEX16>(numInds, (const u16_le *)inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le, SEEN_INDEX16>(numInds, (const u16_le *)inds, indexOffset); break; // Same
}
}

void IndexGenerator::TranslatePrim(int prim, int numInds, const u32 *inds, int indexOffset) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u32, u32_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32, u32_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32, u32_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32, u32_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32, u32_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32, u32_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u32, u32_le, SEEN_INDEX16>(numInds, inds, indexOffset); break; // Same
case GE_PRIM_POINTS: TranslatePoints<u32_le, SEEN_INDEX32>(numInds, (const u32_le *)inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32_le, SEEN_INDEX32>(numInds, (const u32_le *)inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le, SEEN_INDEX32>(numInds, (const u32_le *)inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le, SEEN_INDEX32>(numInds, (const u32_le *)inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, (const u32_le *)inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le, SEEN_INDEX32>(numInds, (const u32_le *)inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le, SEEN_INDEX32>(numInds, (const u32_le *)inds, indexOffset); break; // Same
}
}
}
35 changes: 18 additions & 17 deletions GPU/Common/IndexGenerator.h
Expand Up @@ -66,26 +66,27 @@ class IndexGenerator {
void AddRectangles(int numVerts);

// These translate already indexed lists
template <class IType, class ITypeLE, int flag>
void TranslatePoints(int numVerts, const IType *inds, int indexOffset);
template <class IType, class ITypeLE, int flag>
void TranslateList(int numVerts, const IType *_inds, int indexOffset);
template <class IType, class ITypeLE, int flag>
inline void TranslateLineList(int numVerts, const IType *inds, int indexOffset);
template <class IType, class ITypeLE, int flag>
inline void TranslateLineStrip(int numVerts, const IType *inds, int indexOffset);

template <class IType, class ITypeLE, int flag>
void TranslateStrip(int numVerts, const IType *inds, int indexOffset);
template <class IType, class ITypeLE, int flag>
void TranslateFan(int numVerts, const IType *inds, int indexOffset);

template <class IType, class ITypeLE, int flag>
inline void TranslateRectangles(int numVerts, const IType *inds, int indexOffset);
template <class ITypeLE, int flag>
void TranslatePoints(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
inline void TranslateLineList(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset);

template <class ITypeLE, int flag>
void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset);
template <class ITypeLE, int flag>
void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset);

template <class ITypeLE, int flag>
inline void TranslateRectangles(int numVerts, const ITypeLE *inds, int indexOffset);

enum {
SEEN_INDEX8 = 1 << 16,
SEEN_INDEX16 = 1 << 17
SEEN_INDEX16 = 1 << 17,
SEEN_INDEX32 = 1 << 18,
};

u16 *indsBase_;
Expand Down
44 changes: 33 additions & 11 deletions GPU/Common/SplineCommon.cpp
Expand Up @@ -791,10 +791,12 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
u16 index_lower_bound = 0;
u16 index_upper_bound = count_u * count_v - 1;
bool indices_16bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT;
const u8* indices8 = (const u8*)indices;
const u16* indices16 = (const u16*)indices;
bool indices_32bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_32BIT;
const u8 *indices8 = (const u8 *)indices;
const u16 *indices16 = (const u16 *)indices;
const u32 *indices32 = (const u32 *)indices;
if (indices)
GetIndexBounds(indices, count_u*count_v, vertType, &index_lower_bound, &index_upper_bound);
GetIndexBounds(indices, count_u * count_v, vertType, &index_lower_bound, &index_upper_bound);

// Simplify away bones and morph before proceeding
SimpleVertex *simplified_control_points = (SimpleVertex *)(decoded + 65536 * 12);
Expand All @@ -815,10 +817,19 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi

// Make an array of pointers to the control points, to get rid of indices.
for (int idx = 0; idx < count_u * count_v; idx++) {
if (indices)
points[idx] = simplified_control_points + (indices_16bit ? indices16[idx] : indices8[idx]);
else
if (indices) {
u32 ind;
if (indices_32bit) {
ind = indices32[idx];
} else if (indices_16bit) {
ind = indices16[idx];
} else {
ind = indices8[idx];
}
points[idx] = simplified_control_points + ind;
} else {
points[idx] = simplified_control_points + idx;
}
}

int count = 0;
Expand Down Expand Up @@ -876,8 +887,10 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
u16 index_lower_bound = 0;
u16 index_upper_bound = count_u * count_v - 1;
bool indices_16bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT;
const u8* indices8 = (const u8*)indices;
const u16* indices16 = (const u16*)indices;
bool indices_32bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_32BIT;
const u8 *indices8 = (const u8 *)indices;
const u16 *indices16 = (const u16 *)indices;
const u32 *indices32 = (const u32 *)indices;
if (indices)
GetIndexBounds(indices, count_u*count_v, vertType, &index_lower_bound, &index_upper_bound);

Expand Down Expand Up @@ -905,10 +918,19 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
BezierPatch& patch = patches[patch_u + patch_v * num_patches_u];
for (int point = 0; point < 16; ++point) {
int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * count_u;
if (indices)
patch.points[point] = simplified_control_points + (indices_16bit ? indices16[idx] : indices8[idx]);
else
if (indices) {
u32 ind;
if (indices_32bit) {
ind = indices32[idx];
} else if (indices_16bit) {
ind = indices16[idx];
} else {
ind = indices8[idx];
}
patch.points[point] = simplified_control_points + ind;
} else {
patch.points[point] = simplified_control_points + idx;
}
}
patch.u_index = patch_u * 3;
patch.v_index = patch_v * 3;
Expand Down
16 changes: 15 additions & 1 deletion GPU/Common/VertexDecoderCommon.cpp
Expand Up @@ -95,14 +95,28 @@ void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBo
lowerBound = value;
}
} else if (idx == GE_VTYPE_IDX_16BIT) {
const u16 *ind16 = (const u16*)inds;
const u16 *ind16 = (const u16 *)inds;
for (int i = 0; i < count; i++) {
u16 value = ind16[i];
if (value > upperBound)
upperBound = value;
if (value < lowerBound)
lowerBound = value;
}
} else if (idx == GE_VTYPE_IDX_32BIT) {
WARN_LOG_REPORT_ONCE(indexBounds32, G3D, "GetIndexBounds: Decoding 32-bit indexes");
const u32 *ind32 = (const u32 *)inds;
for (int i = 0; i < count; i++) {
u16 value = (u16)ind32[i];
// These aren't documented and should be rare. Let's bounds check each one.
if (ind32[i] != value) {
ERROR_LOG_REPORT_ONCE(indexBounds32Bounds, G3D, "GetIndexBounds: Index outside 16-bit range");
}
if (value > upperBound)
upperBound = value;
if (value < lowerBound)
lowerBound = value;
}
} else {
lowerBound = 0;
upperBound = count - 1;
Expand Down
4 changes: 2 additions & 2 deletions GPU/Directx9/DrawEngineDX9.cpp
Expand Up @@ -462,7 +462,7 @@ inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) {
u32 DrawEngineDX9::ComputeMiniHash() {
u32 fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;
const int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
const int indexSize = IndexSize(dec_->VertexType());

int step;
if (numDrawCalls < 3) {
Expand Down Expand Up @@ -501,7 +501,7 @@ void DrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) {
ReliableHashType DrawEngineDX9::ComputeHash() {
ReliableHashType fullhash = 0;
const int vertexSize = dec_->GetDecVtxFmt().stride;
const int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
const int indexSize = IndexSize(dec_->VertexType());

// TODO: Add some caps both for numDrawCalls and num verts to check?
// It is really very expensive to check all the vertex data so often.
Expand Down
2 changes: 2 additions & 0 deletions GPU/Directx9/GPU_DX9.cpp
Expand Up @@ -840,6 +840,8 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
int indexSize = 1;
if ((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT)
indexSize = 2;
else if ((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_32BIT)
indexSize = 4;
gstate_c.indexAddr += count * indexSize;
} else {
gstate_c.vertexAddr += bytesRead;
Expand Down

0 comments on commit 8ab9bcc

Please sign in to comment.