Skip to content

Commit

Permalink
Always clamp in ToRGB[A]?().
Browse files Browse the repository at this point in the history
Before we only clamped with SSE, better to be consistent.  This may also
be slightly faster.
  • Loading branch information
unknownbrackets committed Oct 31, 2014
1 parent 0df4afc commit eee3ac7
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 21 deletions.
4 changes: 2 additions & 2 deletions GPU/Common/SoftwareTransformCommon.cpp
Expand Up @@ -272,9 +272,9 @@ void SoftwareTransform(
c1[j] = litColor1[j];
}
} else {
// Summed color into c0
// Summed color into c0 (will clamp in ToRGBA().)
for (int j = 0; j < 4; j++) {
c0[j] = ((c0[j] + litColor1[j]) > 1.0f) ? 1.0f : (c0[j] + litColor1[j]);
c0[j] += litColor1[j];
}
}
} else {
Expand Down
6 changes: 3 additions & 3 deletions GPU/Common/TransformCommon.cpp
Expand Up @@ -174,9 +174,9 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[
}
}

// 4?
// The colors must eventually be clamped, but we expect the caller to do that.
for (int i = 0; i < 4; i++) {
colorOut0[i] = lightSum0[i] > 1.0f ? 1.0f : lightSum0[i];
colorOut1[i] = lightSum1[i] > 1.0f ? 1.0f : lightSum1[i];
colorOut0[i] = lightSum0[i];
colorOut1[i] = lightSum1[i];
}
}
18 changes: 9 additions & 9 deletions GPU/Math3D.h
Expand Up @@ -967,9 +967,9 @@ __forceinline unsigned int Vec3<float>::ToRGB() const
__m128i c16 = _mm_packs_epi32(c, c);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
#else
return ((unsigned int)(r()*255.f) << 0) |
((unsigned int)(g()*255.f) << 8) |
((unsigned int)(b()*255.f) << 16);
return (clamp_u8((int)(r() * 255.f)) << 0) |
(clamp_u8(int)((g() * 255.f)) << 8) |
(clamp_u8((int)(b() * 255.f)) << 16);
#endif
}

Expand All @@ -980,7 +980,7 @@ __forceinline unsigned int Vec3<int>::ToRGB() const
__m128i c16 = _mm_packs_epi32(ivec, ivec);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
#else
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16);
return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16);
#endif
}

Expand Down Expand Up @@ -1021,10 +1021,10 @@ __forceinline unsigned int Vec4<float>::ToRGBA() const
__m128i c16 = _mm_packs_epi32(c, c);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
#else
return ((unsigned int)(r()*255.f) << 0) |
((unsigned int)(g()*255.f) << 8) |
((unsigned int)(b()*255.f) << 16) |
((unsigned int)(a()*255.f) << 24);
return (clamp_u8((int)(r() * 255.f)) << 0) |
(clamp_u8((int)(g() * 255.f)) << 8) |
(clamp_u8((int)(b() * 255.f)) << 16) |
(clamp_u8((int)(a() * 255.f)) << 24);
#endif
}

Expand All @@ -1035,7 +1035,7 @@ __forceinline unsigned int Vec4<int>::ToRGBA() const
__m128i c16 = _mm_packs_epi32(ivec, ivec);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
#else
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16) | ((a()&0xFF)<<24);
return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16) | (clamp_u8(a()) << 24);
#endif
}

Expand Down
8 changes: 1 addition & 7 deletions GPU/Software/Rasterizer.cpp
Expand Up @@ -993,20 +993,14 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, const Vec4<int> &colo

if (gstate.isAlphaBlendEnabled() && !clearMode) {
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
#if defined(_M_SSE)
// ToRGBA() on SSE automatically clamps.
// ToRGBA() always automatically clamps.
new_color = AlphaBlendingResult(prim_color, dst).ToRGB();
new_color |= stencil << 24;
#else
new_color = Vec4<int>(AlphaBlendingResult(prim_color, dst).Clamp(0, 255), stencil).ToRGBA();
#endif
} else {
#if defined(_M_SSE)
new_color = Vec3<int>(prim_color.ivec).ToRGB();
new_color |= stencil << 24;
#else
if (!clearMode)
prim_color = prim_color.Clamp(0, 255);
new_color = Vec4<int>(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA();
#endif
}
Expand Down

0 comments on commit eee3ac7

Please sign in to comment.