Expand Up
@@ -15,6 +15,8 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include < limits>
#include " Common.h"
#include " VideoCommon.h"
#include " VertexLoader.h"
Expand Down
Expand Up
@@ -71,189 +73,65 @@ MOVUPS(MOffset(EDI, 0), XMM0);
*/
// ==============================================================================
// Direct
// ==============================================================================
template <class T , bool three>
void Pos_ReadDirect ()
template <typename T>
float PosScale (T val)
{
((float *)VertexManager::s_pCurBufferPointer)[0 ] = (float )(T)DataRead<T>() * posScale;
((float *)VertexManager::s_pCurBufferPointer)[1 ] = (float )(T)DataRead<T>() * posScale;
if (three)
((float *)VertexManager::s_pCurBufferPointer)[2 ] = (float )(T)DataRead<T>() * posScale;
else
((float *)VertexManager::s_pCurBufferPointer)[2 ] = 0 .0f ;
LOG_VTX ();
VertexManager::s_pCurBufferPointer += 12 ;
return val * posScale;
}
void LOADERDECL Pos_ReadDirect_UByte3 () { Pos_ReadDirect<u8, true >(); }
void LOADERDECL Pos_ReadDirect_Byte3 () { Pos_ReadDirect<s8, true >(); }
void LOADERDECL Pos_ReadDirect_UShort3 () { Pos_ReadDirect<u16, true >(); }
void LOADERDECL Pos_ReadDirect_Short3 () { Pos_ReadDirect<s16, true >(); }
void LOADERDECL Pos_ReadDirect_UByte2 () { Pos_ReadDirect<u8, false >(); }
void LOADERDECL Pos_ReadDirect_Byte2 () { Pos_ReadDirect<s8, false >(); }
void LOADERDECL Pos_ReadDirect_UShort2 () { Pos_ReadDirect<u16, false >(); }
void LOADERDECL Pos_ReadDirect_Short2 () { Pos_ReadDirect<s16, false >(); }
void LOADERDECL Pos_ReadDirect_Float3 ()
{
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0 ] = DataReadU32 ();
((u32 *)VertexManager::s_pCurBufferPointer)[1 ] = DataReadU32 ();
((u32 *)VertexManager::s_pCurBufferPointer)[2 ] = DataReadU32 ();
LOG_VTX ();
VertexManager::s_pCurBufferPointer += 12 ;
}
template <>
float PosScale (float val)
{ return val; }
void LOADERDECL Pos_ReadDirect_Float2 ()
template <typename T, int N>
void LOADERDECL Pos_ReadDirect ()
{
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0 ] = DataReadU32 ();
((u32 *)VertexManager::s_pCurBufferPointer)[1 ] = DataReadU32 ();
((u32 *)VertexManager::s_pCurBufferPointer)[2 ] = 0 ;
static_assert (N <= 3 , " N > 3 is not sane!" );
for (int i = 0 ; i < 3 ; ++i)
DataWrite (i<N ? PosScale (DataRead<T>()) : 0 .f );
LOG_VTX ();
VertexManager::s_pCurBufferPointer += 12 ;
}
template <class T , bool three,int MaxSize>
inline void Pos_ReadIndex_Byte (int Index)
{
if (Index < MaxSize)
{
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
((float *)VertexManager::s_pCurBufferPointer)[0 ] = ((float )(T)(pData[0 ])) * posScale;
((float *)VertexManager::s_pCurBufferPointer)[1 ] = ((float )(T)(pData[1 ])) * posScale;
if (three)
((float *)VertexManager::s_pCurBufferPointer)[2 ] = ((float )(T)(pData[2 ])) * posScale;
else
((float *)VertexManager::s_pCurBufferPointer)[2 ] = 0 .0f ;
LOG_VTX ();
VertexManager::s_pCurBufferPointer += 12 ;
}
}
template <class T , bool three,int MaxSize>
inline void Pos_ReadIndex_Short (int Index)
{
if (Index < MaxSize)
{
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
((float *)VertexManager::s_pCurBufferPointer)[0 ] = ((float )(T)Common::swap16 (pData[0 ])) * posScale;
((float *)VertexManager::s_pCurBufferPointer)[1 ] = ((float )(T)Common::swap16 (pData[1 ])) * posScale;
if (three)
((float *)VertexManager::s_pCurBufferPointer)[2 ] = ((float )(T)Common::swap16 (pData[2 ])) * posScale;
else
((float *)VertexManager::s_pCurBufferPointer)[2 ] = 0 .0f ;
LOG_VTX ();
VertexManager::s_pCurBufferPointer += 12 ;
}
}
template < bool three, int MaxSize >
void Pos_ReadIndex_Float ( int Index )
template < typename I, typename T, int N >
void LOADERDECL Pos_ReadIndex ( )
{
if (Index < MaxSize)
static_assert (!std::numeric_limits<I>::is_signed, " Only unsigned I is sane!" );
static_assert (N <= 3 , " N > 3 is not sane!" );
auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max ())
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
((u32*)VertexManager::s_pCurBufferPointer)[0 ] = Common::swap32 (pData[0 ]);
((u32*)VertexManager::s_pCurBufferPointer)[1 ] = Common::swap32 (pData[1 ]);
if (three)
((u32*)VertexManager::s_pCurBufferPointer)[2 ] = Common::swap32 (pData[2 ]);
else
((float *)VertexManager::s_pCurBufferPointer)[2 ] = 0 .0f ;
auto const data = reinterpret_cast <const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
for (int i = 0 ; i < 3 ; ++i)
DataWrite (i<N ? PosScale (Common::FromBigEndian (data[i])) : 0 .f );
LOG_VTX ();
VertexManager::s_pCurBufferPointer += 12 ;
}
}
#if _M_SSE >= 0x301
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL , 0x08090A0BL , 0x04050607L , 0x00010203L );
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL , 0xFFFFFFFFL , 0x04050607L , 0x00010203L );
template < bool three, int MaxSize >
void Pos_ReadIndex_Float_SSSE3 (int Index )
template < typename I, bool three >
void LOADERDECL Pos_ReadIndex_Float_SSSE3 ()
{
if (Index < MaxSize)
auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max ())
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
GC_ALIGNED128 (const __m128i a = _mm_loadu_si128 ((__m128i*)pData));
GC_ALIGNED128 (__m128i b = _mm_shuffle_epi8 (a, three ? kMaskSwap32_3 : kMaskSwap32_2 ));
_mm_storeu_si128 ((__m128i*)VertexManager::s_pCurBufferPointer, b);
VertexManager::s_pCurBufferPointer += sizeof (float ) * 3 ;
LOG_VTX ();
VertexManager::s_pCurBufferPointer += 12 ;
}
}
#endif
// Explicitly instantiate these functions to decrease the possibility of
// symbol binding problems when (only) calling them from JIT compiled code.
template void Pos_ReadDirect<u8, true >();
template void Pos_ReadDirect<s8, true >();
template void Pos_ReadDirect<u16, true >();
template void Pos_ReadDirect<s16, true >();
template void Pos_ReadDirect<u8, false >();
template void Pos_ReadDirect<s8, false >();
template void Pos_ReadDirect<u16, false >();
template void Pos_ReadDirect<s16, false >();
template void Pos_ReadIndex_Byte<u8, true , 255 >(int Index);
template void Pos_ReadIndex_Byte<s8, true , 255 >(int Index);
template void Pos_ReadIndex_Short<u16, true , 255 >(int Index);
template void Pos_ReadIndex_Short<s16, true , 255 >(int Index);
template void Pos_ReadIndex_Float<true , 255 >(int Index);
template void Pos_ReadIndex_Byte<u8, false , 255 >(int Index);
template void Pos_ReadIndex_Byte<s8, false , 255 >(int Index);
template void Pos_ReadIndex_Short<u16, false , 255 >(int Index);
template void Pos_ReadIndex_Short<s16, false , 255 >(int Index);
template void Pos_ReadIndex_Float<false , 255 >(int Index);
template void Pos_ReadIndex_Byte<u8, true , 65535 >(int Index);
template void Pos_ReadIndex_Byte<s8, true , 65535 >(int Index);
template void Pos_ReadIndex_Short<u16, true , 65535 >(int Index);
template void Pos_ReadIndex_Short<s16, true , 65535 >(int Index);
template void Pos_ReadIndex_Float<true , 65535 >(int Index);
template void Pos_ReadIndex_Byte<u8, false , 65535 >(int Index);
template void Pos_ReadIndex_Byte<s8, false , 65535 >(int Index);
template void Pos_ReadIndex_Short<u16, false , 65535 >(int Index);
template void Pos_ReadIndex_Short<s16, false , 65535 >(int Index);
template void Pos_ReadIndex_Float<false , 65535 >(int Index);
// ==============================================================================
// Index 8
// ==============================================================================
void LOADERDECL Pos_ReadIndex8_UByte3 () {Pos_ReadIndex_Byte<u8, true , 255 > (DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_Byte3 () {Pos_ReadIndex_Byte<s8, true , 255 > (DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_UShort3 () {Pos_ReadIndex_Short<u16, true , 255 > (DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_Short3 () {Pos_ReadIndex_Short<s16, true , 255 > (DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_Float3 () {Pos_ReadIndex_Float<true , 255 > (DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_UByte2 () {Pos_ReadIndex_Byte<u8, false , 255 >(DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_Byte2 () {Pos_ReadIndex_Byte<s8, false , 255 >(DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_UShort2 () {Pos_ReadIndex_Short<u16, false , 255 >(DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_Short2 () {Pos_ReadIndex_Short<s16, false , 255 >(DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_Float2 () {Pos_ReadIndex_Float<false , 255 > (DataReadU8 ());}
// ==============================================================================
// Index 16
// ==============================================================================
void LOADERDECL Pos_ReadIndex16_UByte3 () {Pos_ReadIndex_Byte<u8, true , 65535 > (DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_Byte3 () {Pos_ReadIndex_Byte<s8, true , 65535 > (DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_UShort3 () {Pos_ReadIndex_Short<u16, true , 65535 > (DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_Short3 () {Pos_ReadIndex_Short<s16, true , 65535 > (DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_Float3 () {Pos_ReadIndex_Float<true , 65535 > (DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_UByte2 () {Pos_ReadIndex_Byte<u8, false , 65535 >(DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_Byte2 () {Pos_ReadIndex_Byte<s8, false , 65535 >(DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_UShort2 () {Pos_ReadIndex_Short<u16, false , 65535 >(DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_Short2 () {Pos_ReadIndex_Short<s16, false , 65535 >(DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_Float2 () {Pos_ReadIndex_Float<false , 65535 > (DataReadU16 ());}
#if _M_SSE >= 0x301
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3 () {Pos_ReadIndex_Float_SSSE3<true , 255 > (DataReadU8 ());}
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3 () {Pos_ReadIndex_Float_SSSE3<false , 255 > (DataReadU8 ());}
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3 () {Pos_ReadIndex_Float_SSSE3<true , 65535 > (DataReadU16 ());}
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3 () {Pos_ReadIndex_Float_SSSE3<false , 65535 > (DataReadU16 ());}
#endif
static TPipelineFunction tableReadPosition[4 ][8 ][2 ] = {
{
{NULL , NULL ,},
Expand All
@@ -263,56 +141,40 @@ static TPipelineFunction tableReadPosition[4][8][2] = {
{NULL , NULL ,},
},
{
{Pos_ReadDirect_UByte2, Pos_ReadDirect_UByte3 ,},
{Pos_ReadDirect_Byte2, Pos_ReadDirect_Byte3 ,},
{Pos_ReadDirect_UShort2, Pos_ReadDirect_UShort3 ,},
{Pos_ReadDirect_Short2, Pos_ReadDirect_Short3 ,},
{Pos_ReadDirect_Float2, Pos_ReadDirect_Float3 ,},
{Pos_ReadDirect<u8, 2 >, Pos_ReadDirect<u8, 3 > ,},
{Pos_ReadDirect<s8, 2 >, Pos_ReadDirect<s8, 3 > ,},
{Pos_ReadDirect<u16, 2 >, Pos_ReadDirect<u16, 3 > ,},
{Pos_ReadDirect<s16, 2 >, Pos_ReadDirect<s16, 3 > ,},
{Pos_ReadDirect< float , 2 >, Pos_ReadDirect< float , 3 > ,},
},
{
{Pos_ReadIndex8_UByte2, Pos_ReadIndex8_UByte3 ,},
{Pos_ReadIndex8_Byte2, Pos_ReadIndex8_Byte3 ,},
{Pos_ReadIndex8_UShort2, Pos_ReadIndex8_UShort3 ,},
{Pos_ReadIndex8_Short2, Pos_ReadIndex8_Short3 ,},
{Pos_ReadIndex8_Float2, Pos_ReadIndex8_Float3 ,},
{Pos_ReadIndex<u8, u8, 2 >, Pos_ReadIndex<u8, u8, 3 > ,},
{Pos_ReadIndex<u8, s8, 2 >, Pos_ReadIndex<u8, s8, 3 > ,},
{Pos_ReadIndex<u8, u16, 2 >, Pos_ReadIndex<u8, u16, 3 > ,},
{Pos_ReadIndex<u8, s16, 2 >, Pos_ReadIndex<u8, s16, 3 > ,},
{Pos_ReadIndex<u8, float , 2 >, Pos_ReadIndex<u8, float , 3 > ,},
},
{
{Pos_ReadIndex16_UByte2, Pos_ReadIndex16_UByte3 ,},
{Pos_ReadIndex16_Byte2, Pos_ReadIndex16_Byte3 ,},
{Pos_ReadIndex16_UShort2, Pos_ReadIndex16_UShort3 ,},
{Pos_ReadIndex16_Short2, Pos_ReadIndex16_Short3 ,},
{Pos_ReadIndex16_Float2, Pos_ReadIndex16_Float3 ,},
{Pos_ReadIndex<u16, u8, 2 >, Pos_ReadIndex<u16, u8, 3 > ,},
{Pos_ReadIndex<u16, s8, 2 >, Pos_ReadIndex<u16, s8, 3 > ,},
{Pos_ReadIndex<u16, u16, 2 >, Pos_ReadIndex<u16, u16, 3 > ,},
{Pos_ReadIndex<u16, s16, 2 >, Pos_ReadIndex<u16, s16, 3 > ,},
{Pos_ReadIndex<u16, float , 2 >, Pos_ReadIndex<u16, float , 3 > ,},
},
};
static int tableReadPositionVertexSize[4 ][8 ][2 ] = {
{
{0 , 0 ,},
{0 , 0 ,},
{0 , 0 ,},
{0 , 0 ,},
{0 , 0 ,},
{0 , 0 ,}, {0 , 0 ,}, {0 , 0 ,}, {0 , 0 ,}, {0 , 0 ,},
},
{
{2 , 3 ,},
{2 , 3 ,},
{4 , 6 ,},
{4 , 6 ,},
{8 , 12 ,},
{2 , 3 ,}, {2 , 3 ,}, {4 , 6 ,}, {4 , 6 ,}, {8 , 12 ,},
},
{
{1 , 1 ,},
{1 , 1 ,},
{1 , 1 ,},
{1 , 1 ,},
{1 , 1 ,},
{1 , 1 ,}, {1 , 1 ,}, {1 , 1 ,}, {1 , 1 ,}, {1 , 1 ,},
},
{
{2 , 2 ,},
{2 , 2 ,},
{2 , 2 ,},
{2 , 2 ,},
{2 , 2 ,},
{2 , 2 ,}, {2 , 2 ,}, {2 , 2 ,}, {2 , 2 ,}, {2 , 2 ,},
},
};
Expand All
@@ -322,10 +184,10 @@ void VertexLoader_Position::Init(void) {
#if _M_SSE >= 0x301
if (cpu_info.bSSSE3 ) {
tableReadPosition[2 ][4 ][0 ] = Pos_ReadIndex8_Float2_SSSE3 ;
tableReadPosition[2 ][4 ][1 ] = Pos_ReadIndex8_Float3_SSSE3 ;
tableReadPosition[3 ][4 ][0 ] = Pos_ReadIndex16_Float2_SSSE3 ;
tableReadPosition[3 ][4 ][1 ] = Pos_ReadIndex16_Float3_SSSE3 ;
tableReadPosition[2 ][4 ][0 ] = Pos_ReadIndex_Float_SSSE3<u8, false > ;
tableReadPosition[2 ][4 ][1 ] = Pos_ReadIndex_Float_SSSE3<u8, true > ;
tableReadPosition[3 ][4 ][0 ] = Pos_ReadIndex_Float_SSSE3<u16, false > ;
tableReadPosition[3 ][4 ][1 ] = Pos_ReadIndex_Float_SSSE3<u16, true > ;
}
#endif
Expand Down