diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 8903137d0e8e..95d6d5ad2eb4 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -520,7 +520,12 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, dc.verts, indexLowerBound, indexUpperBound); decodedVerts += indexUpperBound - indexLowerBound + 1; - indexGen.AddPrim(dc.prim, dc.vertexCount); + + bool clockwise = true; + if (dc.cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != dc.cullMode) { + clockwise = false; + } + indexGen.AddPrim(dc.prim, dc.vertexCount, clockwise); } else { // It's fairly common that games issue long sequences of PRIM calls, with differing // inds pointer but the same base vertex pointer. We'd like to reuse vertices between @@ -544,17 +549,29 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { switch (dc.indexType) { case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound); + bool clockwise = true; + if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) { + clockwise = false; + } + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound, clockwise); } break; case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound); + bool clockwise = true; + if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) { + clockwise = false; + } + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16_le *)drawCalls[j].inds, indexLowerBound, clockwise); } break; case GE_VTYPE_IDX_32BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound); + bool clockwise = true; + if (drawCalls[j].cullMode != -1 && gstate.isCullEnabled() && gstate.getCullMode() != drawCalls[j].cullMode) { + clockwise = false; + } + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u32_le *)drawCalls[j].inds, indexLowerBound, clockwise); } break; } @@ -659,7 +676,7 @@ ReliableHashType DrawEngineCommon::ComputeHash() { } // vertTypeID is the vertex type but with the UVGen mode smashed into the top bits. -void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) { +void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { DispatchFlush(); } @@ -697,6 +714,7 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, dc.prim = prim; dc.vertexCount = vertexCount; dc.uvScale = gstate_c.uv; + dc.cullMode = cullMode; if (inds) { GetIndexBounds(inds, vertexCount, vertTypeID, &dc.indexLowerBound, &dc.indexUpperBound); diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index 45d9a185abdc..7e7f09443be2 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -67,12 +67,12 @@ class DrawEngineCommon { // is different. Should probably refactor that. // Note that vertTypeID should be computed using GetVertTypeID(). virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead) { - SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, bytesRead); + SubmitPrim(verts, inds, prim, vertexCount, vertTypeID, -1, bytesRead); } bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead); - void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int *bytesRead); + void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead); void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead); void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead); @@ -143,6 +143,7 @@ class DrawEngineCommon { u16 indexLowerBound; u16 indexUpperBound; UVScale uvScale; + int cullMode; }; enum { MAX_DEFERRED_DRAW_CALLS = 128 }; diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 46f076cee173..9c1b13d42db8 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -36,14 +36,14 @@ void IndexGenerator::Setup(u16 *inds) { Reset(); } -void IndexGenerator::AddPrim(int prim, int vertexCount) { +void IndexGenerator::AddPrim(int prim, int vertexCount, bool clockwise) { switch (prim) { case GE_PRIM_POINTS: AddPoints(vertexCount); break; case GE_PRIM_LINES: AddLineList(vertexCount); break; case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break; - case GE_PRIM_TRIANGLES: AddList(vertexCount); break; - case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount); break; - case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount); break; + case GE_PRIM_TRIANGLES: AddList(vertexCount, clockwise); break; + case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, clockwise); break; + case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, clockwise); break; case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break; // Same } } @@ -61,13 +61,15 @@ void IndexGenerator::AddPoints(int numVerts) { seenPrims_ |= 1 << GE_PRIM_POINTS; } -void IndexGenerator::AddList(int numVerts) { +void IndexGenerator::AddList(int numVerts, bool clockwise) { u16 *outInds = inds_; const int startIndex = index_; + const int v1 = clockwise ? 1 : 2; + const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numVerts; i += 3) { *outInds++ = startIndex + i; - *outInds++ = startIndex + i + 1; - *outInds++ = startIndex + i + 2; + *outInds++ = startIndex + i + v1; + *outInds++ = startIndex + i + v2; } inds_ = outInds; // ignore overflow verts @@ -77,8 +79,8 @@ void IndexGenerator::AddList(int numVerts) { seenPrims_ |= 1 << GE_PRIM_TRIANGLES; } -void IndexGenerator::AddStrip(int numVerts) { - int wind = 1; +void IndexGenerator::AddStrip(int numVerts, bool clockwise) { + int wind = clockwise ? 1 : 2; const int numTris = numVerts - 2; u16 *outInds = inds_; int ibase = index_; @@ -105,14 +107,16 @@ void IndexGenerator::AddStrip(int numVerts) { } } -void IndexGenerator::AddFan(int numVerts) { +void IndexGenerator::AddFan(int numVerts, bool clockwise) { const int numTris = numVerts - 2; u16 *outInds = inds_; const int startIndex = index_; + const int v1 = clockwise ? 1 : 2; + const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numTris; i++) { *outInds++ = startIndex; - *outInds++ = startIndex + i + 1; - *outInds++ = startIndex + i + 2; + *outInds++ = startIndex + i + v1; + *outInds++ = startIndex + i + v2; } inds_ = outInds; index_ += numVerts; @@ -210,7 +214,7 @@ void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int in } template -void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset) { +void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { indexOffset = index_ - indexOffset; // We only bother doing this minor optimization in triangle list, since it's by far the most // common operation that can benefit. @@ -222,10 +226,12 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf u16 *outInds = inds_; int numTris = numInds / 3; // Round to whole triangles numInds = numTris * 3; + const int v1 = clockwise ? 1 : 2; + const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numInds; i += 3) { *outInds++ = indexOffset + inds[i]; - *outInds++ = indexOffset + inds[i + 1]; - *outInds++ = indexOffset + inds[i + 2]; + *outInds++ = indexOffset + inds[i + v1]; + *outInds++ = indexOffset + inds[i + v2]; } inds_ = outInds; count_ += numInds; @@ -235,8 +241,8 @@ void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOf } template -void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset) { - int wind = 1; +void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { + int wind = clockwise ? 1 : 2; indexOffset = index_ - indexOffset; int numTris = numInds - 2; u16 *outInds = inds_; @@ -253,15 +259,17 @@ void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexO } template -void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset) { +void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) { if (numInds <= 0) return; indexOffset = index_ - indexOffset; int numTris = numInds - 2; u16 *outInds = inds_; + const int v1 = clockwise ? 1 : 2; + const int v2 = clockwise ? 2 : 1; for (int i = 0; i < numTris; i++) { *outInds++ = indexOffset + inds[0]; - *outInds++ = indexOffset + inds[i + 1]; - *outInds++ = indexOffset + inds[i + 2]; + *outInds++ = indexOffset + inds[i + v1]; + *outInds++ = indexOffset + inds[i + v2]; } inds_ = outInds; count_ += numTris * 3; @@ -286,38 +294,38 @@ inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds } // Could template this too, but would have to define in header. -void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } -void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } -void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset) { +void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) { switch (prim) { case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break; case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break; case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break; - case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break; + case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset, clockwise); break; + case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset, clockwise); break; case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same } } diff --git a/GPU/Common/IndexGenerator.h b/GPU/Common/IndexGenerator.h index 23185c7215ec..6fb24930eef4 100644 --- a/GPU/Common/IndexGenerator.h +++ b/GPU/Common/IndexGenerator.h @@ -49,10 +49,10 @@ class IndexGenerator { GEPrimitiveType Prim() const { return prim_; } - void AddPrim(int prim, int vertexCount); - void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset); - void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset); - void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset); + void AddPrim(int prim, int vertexCount, bool clockwise = true); + void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise); + void TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise); + void TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise); void Advance(int numVerts) { index_ += numVerts; @@ -75,9 +75,9 @@ class IndexGenerator { // Points (why index these? code simplicity) void AddPoints(int numVerts); // Triangles - void AddList(int numVerts); - void AddStrip(int numVerts); - void AddFan(int numVerts); + void AddList(int numVerts, bool clockwise); + void AddStrip(int numVerts, bool clockwise); + void AddFan(int numVerts, bool clockwise); // Lines void AddLineList(int numVerts); void AddLineStrip(int numVerts); @@ -88,16 +88,16 @@ class IndexGenerator { template void TranslatePoints(int numVerts, const ITypeLE *inds, int indexOffset); template - void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset); + void TranslateList(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); template inline void TranslateLineList(int numVerts, const ITypeLE *inds, int indexOffset); template inline void TranslateLineStrip(int numVerts, const ITypeLE *inds, int indexOffset); template - void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset); + void TranslateStrip(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); template - void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset); + void TranslateFan(int numVerts, const ITypeLE *inds, int indexOffset, bool clockwise); template inline void TranslateRectangles(int numVerts, const ITypeLE *inds, int indexOffset); diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 262ed460a783..1ab2977c8086 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1530,8 +1530,11 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { int bytesRead = 0; UpdateUVScaleOffset(); + // cull mode + int cullMode = gstate.isCullEnabled() ? gstate.getCullMode() : -1; + uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode()); - drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, &bytesRead); + drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead); // After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed). // Some games rely on this, they don't bother reloading VADDR and IADDR. // The VADDR/IADDR registers are NOT updated. @@ -1577,7 +1580,14 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { inds = Memory::GetPointerUnchecked(gstate_c.indexAddr); } - drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, &bytesRead); + if (newPrim != GE_PRIM_TRIANGLE_STRIP && cullMode != -1 && cullMode != gstate.getCullMode()) { + DEBUG_LOG(G3D, "flush cull mode before prim: %d", newPrim); + drawEngineCommon_->DispatchFlush(); + gstate.cmdmem[GE_CMD_CULL] ^= 1; + gstate_c.Dirty(DIRTY_RASTER_STATE); + } + + drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, vertTypeID, cullMode, &bytesRead); AdvanceVerts(vertexType, count, bytesRead); totalVertCount += count; break; @@ -1604,6 +1614,10 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { case GE_CMD_BASE: gstate.cmdmem[GE_CMD_BASE] = data; break; + case GE_CMD_CULL: + // flip face by indices for GE_PRIM_TRIANGLE_STRIP + cullMode = data & 1; + break; case GE_CMD_NOP: case GE_CMD_NOP_FF: break; @@ -1618,6 +1632,14 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { gstate.cmdmem[GE_CMD_TEXSCALEV] = data; gstate_c.uv.vScale = getFloat24(data); break; + case GE_CMD_TEXOFFSETU: + gstate.cmdmem[GE_CMD_TEXOFFSETU] = data; + gstate_c.uv.uOff = getFloat24(data); + break; + case GE_CMD_TEXOFFSETV: + gstate.cmdmem[GE_CMD_TEXOFFSETV] = data; + gstate_c.uv.vOff = getFloat24(data); + break; case GE_CMD_TEXLEVEL: // Same Gran Turismo hack from Execute_TexLevel if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) { @@ -1654,6 +1676,12 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { if (cmdCount > 0) { UpdatePC(currentList->pc, currentList->pc + cmdCount * 4); currentList->pc += cmdCount * 4; + // flush back cull mode + if (cullMode != -1 && cullMode != gstate.getCullMode()) { + drawEngineCommon_->DispatchFlush(); + gstate.cmdmem[GE_CMD_CULL] ^= 1; + gstate_c.Dirty(DIRTY_RASTER_STATE); + } } gpuStats.vertexGPUCycles += vertexCost_ * totalVertCount;