@@ -20,28 +20,92 @@ namespace Rasterizer
{
static constexpr int BLOCK_SIZE = 2 ;
struct SlopeContext
{
SlopeContext (const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2,
s32 x0, s32 y0)
: x0(x0), y0(y0)
{
// adjust a little less than 0.5
const float adjust = 0 .495f ;
xOff = ((float )x0 - v0->screenPosition .x ) + adjust;
yOff = ((float )y0 - v0->screenPosition .y ) + adjust;
dx10 = v1->screenPosition .x - v0->screenPosition .x ;
dx20 = v2->screenPosition .x - v0->screenPosition .x ;
dy10 = v1->screenPosition .y - v0->screenPosition .y ;
dy20 = v2->screenPosition .y - v0->screenPosition .y ;
}
s32 x0;
s32 y0;
float xOff;
float yOff;
float dx10;
float dx20;
float dy10;
float dy20;
};
struct Slope
{
Slope () = default ;
Slope (float f0, float f1, float f2, const SlopeContext& ctx) : f0(f0)
{
float delta_20 = f2 - f0;
float delta_10 = f1 - f0;
// x2 - x0 y1 - y0 x1 - x0 y2 - y0
float a = delta_20 * ctx.dy10 - delta_10 * ctx.dy20 ;
float b = ctx.dx20 * delta_10 - ctx.dx10 * delta_20;
float c = ctx.dx20 * ctx.dy10 - ctx.dx10 * ctx.dy20 ;
dfdx = a / c;
dfdy = b / c;
x0 = ctx.x0 ;
y0 = ctx.y0 ;
xOff = ctx.xOff ;
yOff = ctx.yOff ;
}
// These default values are used in the unlikely case that zfreeze is enabled when drawing the
// first primitive.
// TODO: This is just a guess!
float dfdx = 0 .0f ;
float dfdy = 0 .0f ;
float f0 = 1 .0f ;
// Both an s32 value and a float value are used to minimize rounding error
// TODO: is this really needed?
s32 x0 = 0 ;
s32 y0 = 0 ;
float xOff = 0 .0f ;
float yOff = 0 .0f ;
float GetValue (s32 x, s32 y) const
{
float dx = xOff + (float )(x - x0);
float dy = yOff + (float )(y - y0 );
return f0 + (dfdx * dx) + (dfdy * dy);
}
};
static Slope ZSlope;
static Slope WSlope;
static Slope ColorSlopes[2 ][4 ];
static Slope TexSlopes[8 ][3 ];
static s32 vertex0X;
static s32 vertex0Y;
static float vertexOffsetX;
static float vertexOffsetY;
static Tev tev;
static RasterBlock rasterBlock;
void Init ()
{
tev.Init ();
// Set initial z reference plane in the unlikely case that zfreeze is enabled when drawing the
// first primitive.
// TODO: This is just a guess!
ZSlope.dfdx = ZSlope.dfdy = 0 .f ;
ZSlope.f0 = 1 .f ;
// The other slopes are set each for each primitive drawn, but zfreeze means that the z slope
// needs to be set to an (untested) default value.
ZSlope = Slope ();
}
// Returns approximation of log2(f) in s28.4
@@ -75,12 +139,9 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
{
INCSTAT (g_stats.this_frame .rasterized_pixels );
float dx = vertexOffsetX + (float )(x - vertex0X);
float dy = vertexOffsetY + (float )(y - vertex0Y);
s32 z = (s32)std::clamp<float >(ZSlope.GetValue (dx, dy), 0 .0f , 16777215 .0f );
s32 z = (s32)std::clamp<float >(ZSlope.GetValue (x, y), 0 .0f , 16777215 .0f );
if (bpmem.UseEarlyDepthTest () && g_ActiveConfig. bZComploc )
if (bpmem.UseEarlyDepthTest ())
{
// TODO: Test if perf regs are incremented even if test is disabled
EfbInterface::IncPerfCounterQuadCount (PQ_ZCOMP_INPUT_ZCOMPLOC);
@@ -104,7 +165,7 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
{
for (int comp = 0 ; comp < 4 ; comp++)
{
u16 color = (u16)ColorSlopes[i][comp].GetValue (dx, dy );
u16 color = (u16)ColorSlopes[i][comp].GetValue (x, y );
// clamp color value to 0
u16 mask = ~(color >> 8 );
@@ -136,31 +197,6 @@ static void Draw(s32 x, s32 y, s32 xi, s32 yi)
tev.Draw ();
}
static void InitTriangle (float X1, float Y1, s32 xi, s32 yi)
{
vertex0X = xi;
vertex0Y = yi;
// adjust a little less than 0.5
const float adjust = 0 .495f ;
vertexOffsetX = ((float )xi - X1) + adjust;
vertexOffsetY = ((float )yi - Y1) + adjust;
}
static void InitSlope (Slope* slope, float f1, float f2, float f3, float DX31, float DX12,
float DY12, float DY31)
{
float DF31 = f3 - f1;
float DF21 = f2 - f1;
float a = DF31 * -DY12 - DF21 * DY31;
float b = DX31 * DF21 + DX12 * DF31;
float c = -DX12 * DY31 - DX31 * -DY12;
slope->dfdx = -a / c;
slope->dfdy = -b / c;
slope->f0 = f1;
}
static inline void CalculateLOD (s32* lodp, bool * linear, u32 texmap, u32 texcoord)
{
auto texUnit = bpmem.tex .GetUnit (texmap);
@@ -220,22 +256,22 @@ static void BuildBlock(s32 blockX, s32 blockY)
{
RasterBlockPixel& pixel = rasterBlock.Pixel [xi][yi];
float dx = vertexOffsetX + ( float )( xi + blockX - vertex0X) ;
float dy = vertexOffsetY + ( float )( yi + blockY - vertex0Y) ;
s32 x = xi + blockX;
s32 y = yi + blockY;
float invW = 1 .0f / WSlope.GetValue (dx, dy );
float invW = 1 .0f / WSlope.GetValue (x, y );
pixel.InvW = invW;
// tex coords
for (unsigned int i = 0 ; i < bpmem.genMode .numtexgens ; i++)
{
float projection = invW;
float q = TexSlopes[i][2 ].GetValue (dx, dy ) * invW;
float q = TexSlopes[i][2 ].GetValue (x, y ) * invW;
if (q != 0 .0f )
projection = invW / q;
pixel.Uv [i][0 ] = TexSlopes[i][0 ].GetValue (dx, dy ) * projection;
pixel.Uv [i][1 ] = TexSlopes[i][1 ].GetValue (dx, dy ) * projection;
pixel.Uv [i][0 ] = TexSlopes[i][0 ].GetValue (x, y ) * projection;
pixel.Uv [i][1 ] = TexSlopes[i][1 ].GetValue (x, y ) * projection;
}
}
}
@@ -265,11 +301,27 @@ static void BuildBlock(s32 blockX, s32 blockY)
}
}
void UpdateZSlope (const OutputVertexData* v0, const OutputVertexData* v1,
const OutputVertexData* v2)
{
if (!bpmem.genMode .zfreeze )
{
const s32 X1 = iround (16 .0f * v0->screenPosition [0 ]) - 9 ;
const s32 Y1 = iround (16 .0f * v0->screenPosition [1 ]) - 9 ;
const SlopeContext ctx (v0, v1, v2, (X1 + 0xF ) >> 4 , (Y1 + 0xF ) >> 4 );
ZSlope = Slope (v0->screenPosition .z , v1->screenPosition .z , v2->screenPosition .z , ctx);
}
}
void DrawTriangleFrontFace (const OutputVertexData* v0, const OutputVertexData* v1,
const OutputVertexData* v2)
{
INCSTAT (g_stats.this_frame .num_triangles_drawn );
// The zslope should be updated now, even if the triangle is rejected by the scissor test, as
// zfreeze depends on it
UpdateZSlope (v0, v1, v2);
// adapted from http://devmaster.net/posts/6145/advanced-rasterization
// 28.4 fixed-pou32 coordinates. rounded to nearest and adjusted to match hardware output
@@ -334,42 +386,26 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v
if (minx >= maxx || miny >= maxy)
return ;
// Setup slopes
float fltx1 = v0->screenPosition .x ;
float flty1 = v0->screenPosition .y ;
float fltdx31 = v2->screenPosition .x - fltx1;
float fltdx12 = fltx1 - v1->screenPosition .x ;
float fltdy12 = flty1 - v1->screenPosition .y ;
float fltdy31 = v2->screenPosition .y - flty1;
InitTriangle (fltx1, flty1, (X1 + 0xF ) >> 4 , (Y1 + 0xF ) >> 4 );
// Set up the remaining slopes
const SlopeContext ctx (v0, v1, v2, (X1 + 0xF ) >> 4 , (Y1 + 0xF ) >> 4 );
float w[3 ] = {1 .0f / v0->projectedPosition .w , 1 .0f / v1->projectedPosition .w ,
1 .0f / v2->projectedPosition .w };
InitSlope (&WSlope, w[0 ], w[1 ], w[2 ], fltdx31, fltdx12, fltdy12, fltdy31);
// TODO: The zfreeze emulation is not quite correct, yet!
// Many things might prevent us from reaching this line (culling, clipping, scissoring).
// However, the zslope is always guaranteed to be calculated unless all vertices are trivially
// rejected during clipping!
// We're currently sloppy at this since we abort early if any of the culling/clipping/scissoring
// tests fail.
if (!bpmem.genMode .zfreeze || !g_ActiveConfig.bZFreeze )
InitSlope (&ZSlope, v0->screenPosition [2 ], v1->screenPosition [2 ], v2->screenPosition [2 ], fltdx31,
fltdx12, fltdy12, fltdy31);
WSlope = Slope (w[0 ], w[1 ], w[2 ], ctx);
for (unsigned int i = 0 ; i < bpmem.genMode .numcolchans ; i++)
{
for (int comp = 0 ; comp < 4 ; comp++)
InitSlope (&ColorSlopes[i][comp], v0->color [i][comp], v1->color [i][comp], v2->color [i][comp],
fltdx31, fltdx12, fltdy12, fltdy31);
ColorSlopes[i][comp] = Slope (v0->color [i][comp], v1->color [i][comp], v2->color [i][comp], ctx);
}
for (unsigned int i = 0 ; i < bpmem.genMode .numtexgens ; i++)
{
for (int comp = 0 ; comp < 3 ; comp++)
InitSlope (&TexSlopes[i][comp], v0->texCoords [i][comp] * w[0 ], v1->texCoords [i][comp] * w[1 ],
v2->texCoords [i][comp] * w[2 ], fltdx31, fltdx12, fltdy12, fltdy31);
{
TexSlopes[i][comp] = Slope (v0->texCoords [i][comp] * w[0 ], v1->texCoords [i][comp] * w[1 ],
v2->texCoords [i][comp] * w[2 ], ctx);
}
}
// Half-edge constants