133 changes: 100 additions & 33 deletions Source/Core/VideoBackends/D3D/D3DBase.cpp
Expand Up @@ -4,8 +4,13 @@

#include <unordered_map>

//#include <Initguid.h>
//#include <DXGIDebug.h>

#include "Common/CPUDetect.h"
#include "Common/Hash.h"
#include "Common/StringUtil.h"
#include "VideoBackends/D3D/D3DPtr.h"
#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DTexture.h"
#include "VideoBackends/D3D/GfxState.h"
Expand All @@ -20,7 +25,9 @@ pD3DCompile PD3DCompile = nullptr;
int d3dcompiler_dll_ref = 0;

CREATEDXGIFACTORY PCreateDXGIFactory = nullptr;
DXGIGETDEBUGINTERFACE PDXGIGetDebugInterface {};
HINSTANCE hDXGIDll = nullptr;
HINSTANCE hDXGIDebugDll = nullptr;
int dxgi_dll_ref = 0;

typedef HRESULT (WINAPI* D3D11CREATEDEVICEANDSWAPCHAIN)(IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, UINT, CONST D3D_FEATURE_LEVEL*, UINT, UINT, CONST DXGI_SWAP_CHAIN_DESC*, IDXGISwapChain**, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext**);
Expand All @@ -33,6 +40,7 @@ namespace D3D
{

ID3D11Device* device = nullptr;
ID3D11Device1* device1 = nullptr;
WrapDeviceContext context;
IDXGISwapChain* swapchain = nullptr;
D3D_FEATURE_LEVEL featlevel;
Expand All @@ -59,6 +67,7 @@ HRESULT LoadDXGI()
if (dxgi_dll_ref++ > 0) return S_OK;

if (hDXGIDll) return S_OK;

hDXGIDll = LoadLibraryA("dxgi.dll");
if (!hDXGIDll)
{
Expand All @@ -69,6 +78,17 @@ HRESULT LoadDXGI()
PCreateDXGIFactory = (CREATEDXGIFACTORY)GetProcAddress(hDXGIDll, "CreateDXGIFactory");
if (PCreateDXGIFactory == nullptr) MessageBoxA(nullptr, "GetProcAddress failed for CreateDXGIFactory!", "Critical error", MB_OK | MB_ICONERROR);

//hDXGIDebugDll = LoadLibraryA("dxgidebug.dll");
//if (!hDXGIDebugDll)
//{
// MessageBoxA(nullptr, "Failed to load dxgidebug.dll", "Critical error", MB_OK | MB_ICONERROR);
// --dxgi_dll_ref;
// return E_FAIL;
//}

//PDXGIGetDebugInterface = (CREATEDXGIFACTORY)GetProcAddress(hDXGIDebugDll, "DXGIGetDebugInterface");
//if (PDXGIGetDebugInterface == nullptr) MessageBoxA(nullptr, "GetProcAddress failed for DXGIGetDebugInterface!", "Critical error", MB_OK | MB_ICONERROR);

return S_OK;
}

Expand Down Expand Up @@ -132,6 +152,7 @@ void UnloadDXGI()
if (hDXGIDll) FreeLibrary(hDXGIDll);
hDXGIDll = nullptr;
PCreateDXGIFactory = nullptr;
PDXGIGetDebugInterface = nullptr;
}

void UnloadD3D()
Expand Down Expand Up @@ -279,7 +300,7 @@ HRESULT Create(HWND wnd)
swap_chain_desc.BufferDesc.Width = xres;
swap_chain_desc.BufferDesc.Height = yres;

#if defined(_DEBUG) || defined(DEBUGFAST)
#if defined(_DEBUG) || defined(DEBUGFAST) || 0
// Creating debug devices can sometimes fail if the user doesn't have the correct
// version of the DirectX SDK. If it does, simply fallback to a non-debug device.
{
Expand Down Expand Up @@ -313,6 +334,9 @@ HRESULT Create(HWND wnd)
SAFE_RELEASE(output);
SAFE_RELEASE(adapter);

//device->QueryInterface( __uuidof(ID3D11Device1), (void**)&device1);
//context->InitContext1();

ID3D11Texture2D* buf;
hr = swapchain->GetBuffer(0, IID_ID3D11Texture2D, (void**)&buf);
if (FAILED(hr))
Expand Down Expand Up @@ -353,6 +377,7 @@ void Close()

ReleaseStates();
SAFE_RELEASE(context);

ULONG references = device->Release();
if (references)
{
Expand All @@ -363,6 +388,13 @@ void Close()
NOTICE_LOG(VIDEO, "Successfully released all device references!");
}
device = nullptr;

//D3D::UniquePtr<IDXGIDebug> debugInterface;
//PDXGIGetDebugInterface(__uuidof(IDXGIDebug), ToAddr(debugInterface) );
//if (debugInterface) {
// //LPGUID guid = (LPGUID)GetProcAddress(hDXGIDebugDll, "DXGI_DEBUG_ALL");
// debugInterface->ReportLiveObjects(DXGI_DEBUG_ALL,DXGI_DEBUG_RLO_ALL);
//}

// unload DLLs
UnloadD3D();
Expand Down Expand Up @@ -390,6 +422,13 @@ const char* PixelShaderVersionString()
else /*if(featlevel == D3D_FEATURE_LEVEL_10_0)*/ return "ps_4_0";
}

const char* ComputeShaderVersionString()
{
if (featlevel == D3D_FEATURE_LEVEL_11_0) return "cs_5_0";
else if (featlevel == D3D_FEATURE_LEVEL_10_1) return "cs_4_1";
else /*if(featlevel == D3D_FEATURE_LEVEL_10_0)*/ return "cs_4_0";
}

D3DTexture2D* &GetBackBuffer() { return backbuf; }
unsigned int GetBackBufferWidth() { return xres; }
unsigned int GetBackBufferHeight() { return yres; }
Expand Down Expand Up @@ -420,81 +459,109 @@ unsigned int GetMaxTextureSize()
}
}

template <typename T>
struct HashDesc {
std::size_t operator() ( T const & val ) {
#if _M_SSE >= 0x402
if (cpu_info.bSSE4_2) // sse crc32 version
{
return std::size_t( GetCRC32_CT( val ) );
}
else
#endif
{
return GetMurmurHash3( (u8 const*)&val, sizeof(val), 0 );
}
}
};

struct PassHash {
std::size_t operator()(size_t val) const { return val; }
};

//TODO: Put this in this own file
std::unordered_map<u64, ID3D11BlendState*> bstates_;
std::unordered_map<u64, ID3D11SamplerState*> sstates_;
std::unordered_map<u64, ID3D11RasterizerState*> rstates_;
std::unordered_map<u64, ID3D11DepthStencilState*> dstates_;
std::unordered_map<std::size_t, D3D::UniquePtr<ID3D11BlendState>,PassHash> bstates_;
std::unordered_map<std::size_t, D3D::UniquePtr<ID3D11SamplerState>,PassHash> sstates_;
std::unordered_map<std::size_t, D3D::UniquePtr<ID3D11RasterizerState>,PassHash> rstates_;
std::unordered_map<std::size_t, D3D::UniquePtr<ID3D11DepthStencilState>,PassHash> dstates_;

ID3D11RasterizerState* GetRasterizerState( D3D11_RASTERIZER_DESC const& desc, char const* debugNameOnCreation ) {
auto crc = GetHash64( (u8 const*)&desc, sizeof( desc ), 0 );
ID3D11RasterizerState* GetRasterizerState( PackedD3DRasterisationDesc const& desc, char const* debugNameOnCreation ) {
auto crc = HashDesc<decltype(desc)>{}( desc );
auto it = rstates_.find( crc );
if ( it != rstates_.end() ) {
return it->second;
return it->second.get();
}
ID3D11RasterizerState* state;
auto hr = D3D::device->CreateRasterizerState( &desc, &state );
auto d3ddesc = desc.Unpack();
//d3ddesc.CullMode = D3D11_CULL_NONE;
auto hr = D3D::device->CreateRasterizerState( &d3ddesc, &state );
if ( FAILED( hr ) )
PanicAlert( "Failed to create rasterizer state at %s %d\n", __FILE__, __LINE__ );
D3D::SetDebugObjectName( state, debugNameOnCreation );
rstates_.emplace( crc, state );
rstates_.emplace( crc, D3D::UniquePtr<ID3D11RasterizerState>{state} );
return state;
}

ID3D11BlendState* GetBlendState( D3D11_BLEND_DESC const& desc, char const* debugNameOnCreation ) {
auto crc = GetHash64( (u8 const*)&desc, sizeof( desc ), 0 );


ID3D11BlendState* GetBlendState( PackedD3DBlendDesc const& desc, char const* debugNameOnCreation ) {
auto crc = HashDesc<decltype(desc)>{}( desc );
auto it = bstates_.find( crc );
if ( it != bstates_.end() ) {
return it->second;
return it->second.get();
}
ID3D11BlendState* state;
auto hr = D3D::device->CreateBlendState( &desc, &state );
if ( FAILED( hr ) )
PanicAlert( "Failed to create blend state at %s %d\n", __FILE__, __LINE__ );
D3D::SetDebugObjectName( state, debugNameOnCreation );
bstates_.emplace( crc, state );
return state;
if (device1 && desc.LogicOpEnable) {
auto d3ddesc = desc.Unpack1();
auto hr = D3D::device1->CreateBlendState1( &d3ddesc, (ID3D11BlendState1**)&state );
if ( FAILED( hr ) )
PanicAlert( "Failed to create blend state at %s %d\n", __FILE__, __LINE__ );
D3D::SetDebugObjectName( state, debugNameOnCreation );
bstates_.emplace( crc, D3D::UniquePtr<ID3D11BlendState>{state} );
return state;
} else {
auto d3ddesc = desc.Unpack();
auto hr = D3D::device->CreateBlendState( &d3ddesc, &state );
if ( FAILED( hr ) )
PanicAlert( "Failed to create blend state at %s %d\n", __FILE__, __LINE__ );
D3D::SetDebugObjectName( state, debugNameOnCreation );
bstates_.emplace( crc, D3D::UniquePtr<ID3D11BlendState>{state} );
return state;
}

}

ID3D11DepthStencilState* GetDepthStencilState( D3D11_DEPTH_STENCIL_DESC const& desc, char const* debugNameOnCreation ) {
auto crc = GetHash64( (u8 const*)&desc, sizeof( desc ), 0 );
auto crc = HashDesc<decltype(desc)>{}( desc );
auto it = dstates_.find( crc );
if ( it != dstates_.end() ) {
return it->second;
return it->second.get();
}
ID3D11DepthStencilState* state;
auto hr = D3D::device->CreateDepthStencilState( &desc, &state );
if ( FAILED( hr ) )
PanicAlert( "Failed to create depth stencil state at %s %d\n", __FILE__, __LINE__ );
D3D::SetDebugObjectName( state, debugNameOnCreation );
dstates_.emplace( crc, state );
dstates_.emplace( crc, D3D::UniquePtr<ID3D11DepthStencilState>{state} );
return state;
}

ID3D11SamplerState* GetSamplerState( D3D11_SAMPLER_DESC const& desc, char const* debugNameOnCreation ) {
auto crc = GetHash64( (u8 const*)&desc, sizeof( desc ), 0 );
auto crc = HashDesc<decltype(desc)>{}( desc );
auto it = sstates_.find( crc );
if ( it != sstates_.end() ) {
return it->second;
return it->second.get();
}
ID3D11SamplerState* state;
auto hr = D3D::device->CreateSamplerState( &desc, &state );
if ( FAILED( hr ) )
PanicAlert( "Failed to create sampler state at %s %d\n", __FILE__, __LINE__ );
D3D::SetDebugObjectName( state, debugNameOnCreation );
sstates_.emplace( crc, state );
sstates_.emplace( crc, D3D::UniquePtr<ID3D11SamplerState>{state} );
return state;
}

void ReleaseStates() {
for( auto & state : sstates_ )
state.second->Release();
for( auto & state : dstates_ )
state.second->Release();
for( auto & state : bstates_ )
state.second->Release();
for( auto & state : rstates_ )
state.second->Release();
sstates_.clear();
dstates_.clear();
bstates_.clear();
Expand Down
148 changes: 144 additions & 4 deletions Source/Core/VideoBackends/D3D/D3DBase.h
Expand Up @@ -4,7 +4,7 @@

#pragma once

#include <d3d11.h>
#include <d3d11_1.h>
#include <d3dcompiler.h>
#include <dxgi.h>
#include <vector>
Expand Down Expand Up @@ -40,6 +40,7 @@ HRESULT Create(HWND wnd);
void Close();

extern ID3D11Device* device;
extern ID3D11Device1* device1;
extern WrapDeviceContext context;
extern IDXGISwapChain* swapchain;
extern bool bFrameInProgress;
Expand All @@ -55,12 +56,149 @@ D3DTexture2D* &GetBackBuffer();
const char* PixelShaderVersionString();
const char* GeometryShaderVersionString();
const char* VertexShaderVersionString();
const char* ComputeShaderVersionString();
bool BGRATexturesSupported();

unsigned int GetMaxTextureSize();

ID3D11RasterizerState* GetRasterizerState( D3D11_RASTERIZER_DESC const&, char const* debugNameOnCreation = nullptr);
ID3D11BlendState* GetBlendState( D3D11_BLEND_DESC const&, char const* debugNameOnCreation = nullptr);
struct PackedD3DRenderTargetBlendDesc {
u32 BlendEnable : 1;
u32 SrcBlend : 5;
u32 DestBlend : 5;
u32 BlendOp : 3;
u32 SrcBlendAlpha : 5;
u32 DestBlendAlpha : 5;
u32 BlendOpAlpha : 3;
u32 RenderTargetWriteMask : 5;
PackedD3DRenderTargetBlendDesc() = default;
PackedD3DRenderTargetBlendDesc( D3D11_RENDER_TARGET_BLEND_DESC const & desc ) :
BlendEnable(desc.BlendEnable),
SrcBlend(desc.SrcBlend),
DestBlend(desc.DestBlend),
BlendOp(desc.BlendOp),
SrcBlendAlpha(desc.SrcBlendAlpha),
DestBlendAlpha(desc.DestBlendAlpha),
BlendOpAlpha(desc.BlendOpAlpha),
RenderTargetWriteMask(desc.RenderTargetWriteMask)
{
}
D3D11_RENDER_TARGET_BLEND_DESC Unpack() const {
return {
BlendEnable,
D3D11_BLEND(SrcBlend),
D3D11_BLEND(DestBlend),
D3D11_BLEND_OP(BlendOp),
D3D11_BLEND(SrcBlendAlpha),
D3D11_BLEND(DestBlendAlpha),
D3D11_BLEND_OP(BlendOpAlpha),
RenderTargetWriteMask
};
}
D3D11_RENDER_TARGET_BLEND_DESC1 Unpack1() const {
return {
BlendEnable,
FALSE,
D3D11_BLEND(SrcBlend),
D3D11_BLEND(DestBlend),
D3D11_BLEND_OP(BlendOp),
D3D11_BLEND(SrcBlendAlpha),
D3D11_BLEND(DestBlendAlpha),
D3D11_BLEND_OP(BlendOpAlpha),
D3D11_LOGIC_OP_CLEAR,
RenderTargetWriteMask
};
}

};

struct PackedD3DBlendDesc {
u32 AlphaToCoverageEnable : 8;
u32 IndependentBlendEnable : 8;
u32 LogicOpEnable : 8;
u32 LogicOp : 8;
PackedD3DRenderTargetBlendDesc RenderTarget[ 8 ];

PackedD3DBlendDesc() = default;
PackedD3DBlendDesc( D3D11_BLEND_DESC const& desc ) :
AlphaToCoverageEnable(desc.AlphaToCoverageEnable),
IndependentBlendEnable(desc.IndependentBlendEnable)
{
for( int i = 0; i!=8;++i)
RenderTarget[i] = desc.RenderTarget[i];
}

D3D11_BLEND_DESC Unpack() const {
return {
AlphaToCoverageEnable,
IndependentBlendEnable,
{
RenderTarget[0].Unpack(),RenderTarget[1].Unpack(),RenderTarget[2].Unpack(),
RenderTarget[3].Unpack(),RenderTarget[4].Unpack(),RenderTarget[5].Unpack(),
RenderTarget[6].Unpack(),RenderTarget[7].Unpack(),
}
};
}

D3D11_BLEND_DESC1 Unpack1() const {
D3D11_BLEND_DESC1 result {
AlphaToCoverageEnable,
IndependentBlendEnable,
{
RenderTarget[0].Unpack1(),RenderTarget[1].Unpack1(),RenderTarget[2].Unpack1(),
RenderTarget[3].Unpack1(),RenderTarget[4].Unpack1(),RenderTarget[5].Unpack1(),
RenderTarget[6].Unpack1(),RenderTarget[7].Unpack1(),
}
};
for(auto & rt : result.RenderTarget) {
rt.LogicOpEnable = LogicOpEnable;
rt.LogicOp = D3D11_LOGIC_OP(LogicOp);
if (rt.LogicOpEnable)
rt.BlendEnable = FALSE;
}
if (LogicOpEnable)
result.IndependentBlendEnable = FALSE;
return result;
}
};


struct PackedD3DRasterisationDesc {
u32 FillMode : 6;
u32 CullMode : 6;
u32 FrontCounterClockwise : 4;
u32 DepthClipEnable : 4;
u32 ScissorEnable : 4;
u32 MultisampleEnable : 4;
u32 AntialiasedLineEnable : 4;
INT DepthBias;
FLOAT DepthBiasClamp;
FLOAT SlopeScaledDepthBias;

PackedD3DRasterisationDesc() = default;
PackedD3DRasterisationDesc( D3D11_RASTERIZER_DESC const& desc ) :
FillMode(desc.FillMode), CullMode(desc.CullMode),
FrontCounterClockwise(desc.FrontCounterClockwise), DepthClipEnable(desc.DepthClipEnable),
ScissorEnable(desc.ScissorEnable), MultisampleEnable(desc.MultisampleEnable),
AntialiasedLineEnable(desc.AntialiasedLineEnable), DepthBias(desc.DepthBias),
DepthBiasClamp(desc.DepthBiasClamp),SlopeScaledDepthBias(desc.SlopeScaledDepthBias)
{
}


D3D11_RASTERIZER_DESC Unpack() const {
return {
D3D11_FILL_MODE(FillMode), D3D11_CULL_MODE(CullMode),
FrontCounterClockwise, DepthBias,
DepthBiasClamp, SlopeScaledDepthBias,
DepthClipEnable, ScissorEnable,
MultisampleEnable, AntialiasedLineEnable,
};
}
};


ID3D11RasterizerState* GetRasterizerState( PackedD3DRasterisationDesc const&, char const* debugNameOnCreation = nullptr);
ID3D11BlendState* GetBlendState( PackedD3DBlendDesc const&, char const* debugNameOnCreation = nullptr);
ID3D11DepthStencilState* GetDepthStencilState( D3D11_DEPTH_STENCIL_DESC const&, char const* debugNameOnCreation = nullptr);
ID3D11SamplerState* GetSamplerState( D3D11_SAMPLER_DESC const&, char const* debugNameOnCreation = nullptr);

Expand All @@ -73,7 +211,7 @@ void SetDebugObjectName(T resource, const char* name)
static_assert(std::is_convertible<T, ID3D11DeviceChild*>::value,
"resource must be convertible to ID3D11DeviceChild*");
#if defined(_DEBUG) || defined(DEBUGFAST)
resource->SetPrivateData(WKPDID_D3DDebugObjectName, (UINT)strlen(name), name);
if (name && resource )resource->SetPrivateData(WKPDID_D3DDebugObjectName, (UINT)strlen(name), name);
#endif
}

Expand All @@ -84,6 +222,8 @@ extern CREATEDXGIFACTORY PCreateDXGIFactory;
typedef HRESULT (WINAPI *D3D11CREATEDEVICE)(IDXGIAdapter*, D3D_DRIVER_TYPE, HMODULE, UINT, CONST D3D_FEATURE_LEVEL*, UINT, UINT, ID3D11Device**, D3D_FEATURE_LEVEL*, ID3D11DeviceContext**);
extern D3D11CREATEDEVICE PD3D11CreateDevice;

typedef HRESULT (WINAPI *DXGIGETDEBUGINTERFACE)(REFIID riid, void **ppDebug);

typedef HRESULT (WINAPI *D3DREFLECT)(LPCVOID, SIZE_T, REFIID, void**);
extern D3DREFLECT PD3DReflect;
extern pD3DCompile PD3DCompile;
Expand Down
48 changes: 19 additions & 29 deletions Source/Core/VideoBackends/D3D/D3DBlob.cpp
Expand Up @@ -2,56 +2,46 @@
// Licensed under GPLv2
// Refer to the license.txt file included.

#include <d3d11.h>
#include <d3d11_1.h>

#include "VideoBackends/D3D/D3DBlob.h"

namespace DX11
{

D3DBlob::D3DBlob(unsigned int blob_size, const u8* init_data) : ref(1), size(blob_size), blob(nullptr)
D3DBlob::D3DBlob(unsigned int blob_size, const u8* init_data) :
data_{ new u8[blob_size] },
size_{ blob_size }
{
data = new u8[blob_size];
if (init_data) memcpy(data, init_data, size);
if (init_data)
memcpy(data_.get(), init_data, size_);
}

D3DBlob::D3DBlob(ID3D10Blob* d3dblob) : ref(1)
D3DBlob::D3DBlob(ID3DBlobPtr && d3dblob) :
blob_{ std::move(d3dblob) },
size_{0}
{
blob = d3dblob;
data = (u8*)blob->GetBufferPointer();
size = (unsigned int)blob->GetBufferSize();
d3dblob->AddRef();
if (blob_) {
data_.reset( (u8*)blob_->GetBufferPointer());
size_ = blob_->GetBufferSize();
}
}

D3DBlob::~D3DBlob()
{
if (blob) blob->Release();
else delete[] data;
}

void D3DBlob::AddRef()
{
++ref;
}

unsigned int D3DBlob::Release()
{
if (--ref == 0)
{
delete this;
return 0;
if (blob_) {
data_.release();
}
return ref;
}

unsigned int D3DBlob::Size()
size_t D3DBlob::Size() const
{
return size;
return size_;
}

u8* D3DBlob::Data()
u8 const * D3DBlob::Data() const
{
return data;
return data_.get();
}

} // namespace DX11
59 changes: 47 additions & 12 deletions Source/Core/VideoBackends/D3D/D3DBlob.h
Expand Up @@ -5,36 +5,71 @@
#pragma once

#include "Common/CommonTypes.h"
#include "VideoBackends/D3D/D3DPtr.h"

struct ID3D10Blob;

namespace DX11
{

using ID3DBlobPtr = D3D::UniquePtr<ID3D10Blob>;
// use this class instead ID3D10Blob or ID3D11Blob whenever possible
class D3DBlob
{
public:
D3DBlob() = default;
D3DBlob( D3DBlob const & ) = delete;
D3DBlob& operator=( D3DBlob const & ) = delete;
D3DBlob( D3DBlob && b ) : blob_{ b.blob_.release() }, data_{std::move(b.data_)}, size_(b.size_) {
}

D3DBlob& operator=( D3DBlob && b ) {
*this = nullptr;
blob_.reset( b.blob_.release() );
data_.reset( b.data_.release() );
size_ = b.size_;
return *this;
}

D3DBlob& operator=( std::nullptr_t ) {
if (blob_)
data_.release();
blob_.reset();
size_ = 0;
return *this;
}

void* operator new( std::size_t ) = delete;
void operator delete( void* ) = delete;
void* operator new[]( std::size_t ) = delete;
void operator delete[]( void* ) = delete;


// memory will be copied into an own buffer
D3DBlob(unsigned int blob_size, const u8* init_data = nullptr);

// d3dblob will be AddRef'd
D3DBlob(ID3D10Blob* d3dblob);
D3DBlob(ID3DBlobPtr && d3dblob);
D3DBlob& operator=(ID3DBlobPtr && b){
*this = nullptr;
blob_.reset( b.release() );
data_.reset( (u8*)blob_->GetBufferPointer() );
size_ = blob_->GetBufferSize();
return *this;
}

void AddRef();
unsigned int Release();
//void AddRef();
//unsigned int Release();

unsigned int Size();
u8* Data();
size_t Size() const;
u8 const* Data() const;

private:
~D3DBlob();

unsigned int ref;
unsigned int size;

u8* data;
ID3D10Blob* blob;
private:
ID3DBlobPtr blob_;
std::unique_ptr<u8[]> data_; // if blob_ is not nil, then the destructor will release.
size_t size_;

};

} // namespace
138 changes: 128 additions & 10 deletions Source/Core/VideoBackends/D3D/D3DPtr.h
Expand Up @@ -4,26 +4,143 @@

#pragma once

#include <d3d11.h>
#include <d3d11_1.h>
#include <memory>

namespace DX11
{
namespace D3D
{

// use as a deleter in std::unique_ptr
struct IUnknownDeleter {
void operator() ( IUnknown * ptr ) const {
ptr->Release();
// This is a specialized simpler version of std::unique_ptr dedicaced to D3D objects (with AddRef/Release methods).
// The single ownership semantics of std::unique_ptr is used and we hide the AddRef/Release API to prevent invalid uses.
// When VS2014 will be here, think to add the noexcept where it is possible
template <typename T>
struct UniquePtr {
// http://en.cppreference.com/w/cpp/memory/unique_ptr/unique_ptr
UniquePtr() = default;

// http://en.cppreference.com/w/cpp/memory/unique_ptr/unique_ptr
UniquePtr( std::nullptr_t ) {
}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/unique_ptr
explicit UniquePtr( T* p ) :
ptr_ { p } {
}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/unique_ptr
// Copy semantics is unwanted
UniquePtr( UniquePtr const & ) = delete;

// http://en.cppreference.com/w/cpp/memory/unique_ptr/unique_ptr
UniquePtr& operator=( UniquePtr const & ) = delete;

// http://en.cppreference.com/w/cpp/memory/unique_ptr/unique_ptr
UniquePtr( UniquePtr && p ) : ptr_ { p.release() } {}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/operator%3D
UniquePtr& operator=( UniquePtr && p ) {
// the next line is important as it is safe with self assignment
reset( p.release() );
return *this;
}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/operator%3D
UniquePtr& operator=( std::nullptr_t ) {
reset();
return *this;
}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/~unique_ptr
~UniquePtr() {
if (ptr_)
ptr_->Release();
}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/release
T* release() {
auto p = ptr_;
ptr_ = nullptr;
return p;
}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/reset
void reset( T* p = nullptr ) {
// the std enforce the order of the operation ( first assign, then delete )
auto old = ptr_;
ptr_ = p;
if (old)
old->Release();
}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/reset
void reset( std::nullptr_t ) {
reset();
}

// this is the ugly part but there is no real clean way to hide the unwanted API
struct ReleaseAndAddRefHiddenT : public T {
private:
virtual ULONG Release(void) override;
virtual ULONG AddRef(void) override;
};

// http://en.cppreference.com/w/cpp/memory/unique_ptr/get
ReleaseAndAddRefHiddenT* get() const { return static_cast<ReleaseAndAddRefHiddenT*>(ptr_); }

// http://en.cppreference.com/w/cpp/memory/unique_ptr/operator*
ReleaseAndAddRefHiddenT* operator->() const { return static_cast<ReleaseAndAddRefHiddenT*>(ptr_); }
ReleaseAndAddRefHiddenT& operator*() const { return *static_cast<ReleaseAndAddRefHiddenT*>(ptr_); }

// Because D3D ojjects are ref counted, we are able to clone if really we need it,
// still it is better to keep owership in one place and use naked pointer in the rendering code
// to prune unneccessary AddRef/Release cycles
UniquePtr Share() {
auto p = get();
if (p)
p->AddRef();
return UniquePtr{p};
}

// http://en.cppreference.com/w/cpp/memory/unique_ptr/operator_bool
explicit operator bool() const { return ptr_!=nullptr; }

// http://en.cppreference.com/w/cpp/memory/unique_ptr/operator_cmp
friend bool operator!=( UniquePtr const& a, UniquePtr const& b ) { return a.ptr_ != b.ptr_; }
friend bool operator!=( UniquePtr const& p, std::nullptr_t ) { return p.ptr_ != nullptr; }
friend bool operator!=( std::nullptr_t, UniquePtr const& p ) { return p.ptr_ != nullptr; }
friend bool operator==( UniquePtr const& a, UniquePtr const& b ) { return a.ptr_ == b.ptr_; }
friend bool operator==( UniquePtr const& p, std::nullptr_t ) { return p.ptr_ == nullptr; }
friend bool operator==( std::nullptr_t, UniquePtr const& p ) { return p.ptr_ == nullptr; }

private:
T* ptr_{};
};

// a simple alias to std::unique_ptr, add RAII semantic to a D3D pointer.
template <typename T>
using UniquePtr = std::unique_ptr<T,IUnknownDeleter>;
// Alias to UniquePtr of most of the D3D object
using VertexShaderPtr = UniquePtr<ID3D11VertexShader>;
using PixelShaderPtr = UniquePtr<ID3D11PixelShader>;
using GeometryShaderPtr = UniquePtr<ID3D11GeometryShader>;
using ComputeShaderPtr = UniquePtr<ID3D11ComputeShader>;
using BufferPtr = UniquePtr<ID3D11Buffer>;
using SrvPtr = UniquePtr<ID3D11ShaderResourceView>;
using UavPtr = UniquePtr<ID3D11UnorderedAccessView>;
using Texture1dPtr = UniquePtr<ID3D11Texture1D>;
using Texture2dPtr = UniquePtr<ID3D11Texture2D>;
using RtvPtr = UniquePtr<ID3D11RenderTargetView>;
using DsvPtr = UniquePtr<ID3D11DepthStencilView>;

using InputLayoutPtr = UniquePtr<ID3D11InputLayout>;
using BlendStatePtr = UniquePtr<ID3D11BlendState>;

using RasterizerStatePtr = UniquePtr<ID3D11RasterizerState>;
using DepthStencilStatePtr = UniquePtr<ID3D11DepthStencilState>;
using SamplerStatePtr = UniquePtr<ID3D11SamplerState>;



// helper class to pass a UniquePtr to the various CreateSomething( T**/void** result )
// helper class to use a UniquePtr in the various CreateSomething( (T**)/(void**) result )
// use with the following function
template <typename T>
struct ToAddrImpl {
Expand All @@ -32,7 +149,8 @@ struct ToAddrImpl {
ToAddrImpl& operator=( ToAddrImpl const & ) = delete;

~ToAddrImpl() {
ptr_.reset(temp_);
if (temp_)
ptr_.reset(temp_);
}

operator void**() { return (void**)&temp_; }
Expand Down
235 changes: 172 additions & 63 deletions Source/Core/VideoBackends/D3D/D3DShader.cpp
Expand Up @@ -16,29 +16,29 @@ namespace D3D
{

// bytecode->shader
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, unsigned int len)
UniquePtr<ID3D11VertexShader> CreateVertexShaderFromByteCode(const void* bytecode, size_t len)
{
ID3D11VertexShader* v_shader;
HRESULT hr = D3D::device->CreateVertexShader(bytecode, len, nullptr, &v_shader);
UniquePtr<ID3D11VertexShader> v_shader;
HRESULT hr = D3D::device->CreateVertexShader(bytecode, len, nullptr, ToAddr(v_shader) );
if (FAILED(hr))
return nullptr;

return v_shader;
}

// code->bytecode
bool CompileVertexShader(const char* code, unsigned int len, D3DBlob** blob)
bool CompileVertexShader(const char* code, size_t len, D3DBlob& blob)
{
ID3D10Blob* shaderBuffer = nullptr;
ID3D10Blob* errorBuffer = nullptr;
ID3DBlobPtr shaderBuffer;
ID3DBlobPtr errorBuffer;

#if defined(_DEBUG) || defined(DEBUGFAST)
UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY|D3D10_SHADER_DEBUG;
UINT flags = D3DCOMPILE_DEBUG|D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY|D3D10_SHADER_OPTIMIZATION_LEVEL3|D3D10_SHADER_SKIP_VALIDATION;
UINT flags = D3DCOMPILE_OPTIMIZATION_LEVEL3|D3DCOMPILE_SKIP_VALIDATION;
#endif
HRESULT hr = PD3DCompile(code, len, nullptr, nullptr, nullptr, "main", D3D::VertexShaderVersionString(),
flags, 0, &shaderBuffer, &errorBuffer);
flags, 0, ToAddr(shaderBuffer), ToAddr(errorBuffer) );
if (errorBuffer)
{
INFO_LOG(VIDEO, "Vertex shader compiler messages:\n%s\n",
Expand All @@ -60,42 +60,40 @@ bool CompileVertexShader(const char* code, unsigned int len, D3DBlob** blob)
D3D::VertexShaderVersionString(),
(char*)errorBuffer->GetBufferPointer());

*blob = nullptr;
errorBuffer->Release();
blob = nullptr;
}
else
{
*blob = new D3DBlob(shaderBuffer);
shaderBuffer->Release();
blob = std::move(shaderBuffer);
}
return SUCCEEDED(hr);
}

// bytecode->shader
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, unsigned int len)
UniquePtr<ID3D11GeometryShader> CreateGeometryShaderFromByteCode(const void* bytecode, size_t len)
{
ID3D11GeometryShader* g_shader;
HRESULT hr = D3D::device->CreateGeometryShader(bytecode, len, nullptr, &g_shader);
UniquePtr<ID3D11GeometryShader> g_shader;
HRESULT hr = D3D::device->CreateGeometryShader(bytecode, len, nullptr, ToAddr(g_shader) );
if (FAILED(hr))
return nullptr;

return g_shader;
}

// code->bytecode
bool CompileGeometryShader(const char* code, unsigned int len, D3DBlob** blob,
bool CompileGeometryShader(const char* code, size_t len, D3DBlob& blob,
const D3D_SHADER_MACRO* pDefines)
{
ID3D10Blob* shaderBuffer = nullptr;
ID3D10Blob* errorBuffer = nullptr;
ID3DBlobPtr shaderBuffer;
ID3DBlobPtr errorBuffer;

#if defined(_DEBUG) || defined(DEBUGFAST)
UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY|D3D10_SHADER_DEBUG;
UINT flags = D3DCOMPILE_DEBUG|D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY|D3D10_SHADER_OPTIMIZATION_LEVEL3|D3D10_SHADER_SKIP_VALIDATION;
UINT flags = D3DCOMPILE_OPTIMIZATION_LEVEL3|D3DCOMPILE_SKIP_VALIDATION;
#endif
HRESULT hr = PD3DCompile(code, len, nullptr, pDefines, nullptr, "main", D3D::GeometryShaderVersionString(),
flags, 0, &shaderBuffer, &errorBuffer);
flags, 0, ToAddr(shaderBuffer), ToAddr(errorBuffer) );

if (errorBuffer)
{
Expand All @@ -118,31 +116,29 @@ bool CompileGeometryShader(const char* code, unsigned int len, D3DBlob** blob,
D3D::GeometryShaderVersionString(),
(char*)errorBuffer->GetBufferPointer());

*blob = nullptr;
errorBuffer->Release();
blob = nullptr;

}
else
{
*blob = new D3DBlob(shaderBuffer);
shaderBuffer->Release();
blob = std::move(shaderBuffer);
}
return SUCCEEDED(hr);
}

// bytecode->shader
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, unsigned int len)
UniquePtr<ID3D11PixelShader> CreatePixelShaderFromByteCode(const void* bytecode, size_t len)
{
ID3D11PixelShader* p_shader;
HRESULT hr = D3D::device->CreatePixelShader(bytecode, len, nullptr, &p_shader);
UniquePtr<ID3D11PixelShader> p_shader;
HRESULT hr = D3D::device->CreatePixelShader(bytecode, len, nullptr, ToAddr(p_shader) );
if (FAILED(hr))
{
PanicAlert("CreatePixelShaderFromByteCode failed at %s %d\n", __FILE__, __LINE__);
p_shader = nullptr;
}
return p_shader;
}

u32 ReflectTextureMask( const void* code, unsigned int len ) {
u32 ReflectTextureMask( const void* code, size_t len ) {
UniquePtr<ID3D11ShaderReflection> reflect;
PD3DReflect( code, len, IID_ID3D11ShaderReflection, ToAddr(reflect) );
if (!reflect)
Expand All @@ -162,19 +158,19 @@ u32 ReflectTextureMask( const void* code, unsigned int len ) {
return mask;
};
// code->bytecode
bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob,
bool CompilePixelShader(const char* code, size_t len, D3DBlob& blob,
const D3D_SHADER_MACRO* pDefines)
{
ID3D10Blob* shaderBuffer = nullptr;
ID3D10Blob* errorBuffer = nullptr;
ID3DBlobPtr shaderBuffer;
ID3DBlobPtr errorBuffer;

#if defined(_DEBUG) || defined(DEBUGFAST)
UINT flags = D3D10_SHADER_DEBUG;
UINT flags = D3DCOMPILE_DEBUG|D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT flags = D3D10_SHADER_OPTIMIZATION_LEVEL3;
UINT flags = D3DCOMPILE_OPTIMIZATION_LEVEL3;
#endif
HRESULT hr = PD3DCompile(code, len, nullptr, pDefines, nullptr, "main", D3D::PixelShaderVersionString(),
flags, 0, &shaderBuffer, &errorBuffer);
flags, 0, ToAddr(shaderBuffer), ToAddr(errorBuffer) );

if (errorBuffer)
{
Expand All @@ -197,54 +193,167 @@ bool CompilePixelShader(const char* code, unsigned int len, D3DBlob** blob,
D3D::PixelShaderVersionString(),
(char*)errorBuffer->GetBufferPointer());

*blob = nullptr;
errorBuffer->Release();
blob = nullptr;
}
else
{
*blob = new D3DBlob(shaderBuffer);
shaderBuffer->Release();
blob = std::move(shaderBuffer);
}

return SUCCEEDED(hr);
}

ID3D11VertexShader* CompileAndCreateVertexShader(const char* code,
unsigned int len)
bool CompileComputeShader(const char* code, size_t len, D3DBlob& blob,
const D3D_SHADER_MACRO* pDefines)
{
ID3DBlobPtr shaderBuffer;
ID3DBlobPtr errorBuffer;

#if defined(_DEBUG) || defined(DEBUGFAST)
UINT flags = D3DCOMPILE_DEBUG|D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT flags = D3DCOMPILE_OPTIMIZATION_LEVEL3;
#endif
HRESULT hr = PD3DCompile(code, len, nullptr, pDefines, nullptr, "main", D3D::ComputeShaderVersionString(),
flags, 0, ToAddr(shaderBuffer), ToAddr(errorBuffer) );

if (errorBuffer)
{
INFO_LOG(VIDEO, "Pixel shader compiler messages:\n%s",
(const char*)errorBuffer->GetBufferPointer());
}

if (FAILED(hr))
{
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%sbad_cs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file;
OpenFStream(file, szTemp, std::ios_base::out);
file << code;
file.close();

PanicAlert("Failed to compile pixel shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
szTemp,
D3D::ComputeShaderVersionString(),
(char*)errorBuffer->GetBufferPointer());

blob = nullptr;
}
else
{
blob = std::move(shaderBuffer);
}

return SUCCEEDED(hr);
}
/*
struct ShaderException {
D3DBlob
};
struct CallerSite {
char const * file_;
u32 line_;
char const * func_;
};
#define CALLERSITE CallerSite{__FILE__, __LINE__, __FUNC__ }
D3DBlob CompileShader(ShaderType type, const char* code, size_t len, D3D_SHADER_MACRO const* pDefines)
{
char const *profile{};
switch (type) {
case DX11::D3D::ShaderType::Vertex:
profile = D3D::VertexShaderVersionString();
break;
case DX11::D3D::ShaderType::Pixel:
profile = D3D::PixelShaderVersionString();
break;
case DX11::D3D::ShaderType::Geometry:
profile = D3D::GeometryShaderVersionString();
break;
case DX11::D3D::ShaderType::Compute:
profile = D3D::ComputeShaderVersionString();
break;
default:
return false;
break;
}
#if defined(_DEBUG) || defined(DEBUGFAST)
UINT flags = D3DCOMPILE_DEBUG|D3DCOMPILE_SKIP_OPTIMIZATION;
#else
UINT flags = D3DCOMPILE_OPTIMIZATION_LEVEL3;
#endif
ID3DBlobPtr shaderBuffer;
ID3DBlobPtr errorBuffer;
HRESULT hr = PD3DCompile(code, len, nullptr, pDefines, nullptr, "main", profile,
flags, 0, ToAddr(shaderBuffer), ToAddr(errorBuffer) );
if (errorBuffer)
{
INFO_LOG(VIDEO, "Pixel shader compiler messages:\n%s",
(const char*)errorBuffer->GetBufferPointer());
}
if (FAILED(hr))
{
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%sbad_cs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file;
OpenFStream(file, szTemp, std::ios_base::out);
file << code;
file.close();
PanicAlert("Failed to compile pixel shader!\nThis usually happens when trying to use Dolphin with an outdated GPU or integrated GPU like the Intel GMA series.\n\nIf you're sure this is Dolphin's error anyway, post the contents of %s along with this error message at the forums.\n\nDebug info (%s):\n%s",
szTemp,
D3D::ComputeShaderVersionString(),
(char*)errorBuffer->GetBufferPointer());
blob = nullptr;
}
else
{
blob = std::move(shaderBuffer);
}
return SUCCEEDED(hr);
}
*/


UniquePtr<ID3D11VertexShader> CompileAndCreateVertexShader(const char* code,
size_t len)
{
D3DBlob* blob = nullptr;
if (CompileVertexShader(code, len, &blob))
D3DBlob blob;
if (CompileVertexShader(code, len, blob))
{
ID3D11VertexShader* v_shader = CreateVertexShaderFromByteCode(blob);
blob->Release();
return v_shader;
return CreateVertexShaderFromByteCode(blob);
}
return nullptr;
}

ID3D11GeometryShader* CompileAndCreateGeometryShader(const char* code,
unsigned int len, const D3D_SHADER_MACRO* pDefines)
UniquePtr<ID3D11GeometryShader> CompileAndCreateGeometryShader(const char* code,
size_t len, const D3D_SHADER_MACRO* pDefines)
{
D3DBlob* blob = nullptr;
if (CompileGeometryShader(code, len, &blob, pDefines))
D3DBlob blob;
if (CompileGeometryShader(code, len, blob, pDefines))
{
ID3D11GeometryShader* g_shader = CreateGeometryShaderFromByteCode(blob);
blob->Release();
return g_shader;
return CreateGeometryShaderFromByteCode(blob);
}
return nullptr;
}

ID3D11PixelShader* CompileAndCreatePixelShader(const char* code,
unsigned int len)
UniquePtr<ID3D11PixelShader> CompileAndCreatePixelShader(const char* code,
size_t len)
{
D3DBlob* blob = nullptr;
CompilePixelShader(code, len, &blob);
if (blob)
D3DBlob blob;
if (CompilePixelShader(code, len, blob))
{
ID3D11PixelShader* p_shader = CreatePixelShaderFromByteCode(blob);
blob->Release();
return p_shader;
return CreatePixelShaderFromByteCode(blob);
}
return nullptr;
}
Expand Down
76 changes: 46 additions & 30 deletions Source/Core/VideoBackends/D3D/D3DShader.h
Expand Up @@ -15,40 +15,56 @@ namespace DX11

namespace D3D
{
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, unsigned int len);
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, unsigned int len);
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, unsigned int len);
u32 ReflectTextureMask( const void* code, unsigned int len );
enum class ShaderType : u32 {
Vertex,
Pixel,
Geometry,
Compute,
//Hull,
//Domain,
};

VertexShaderPtr CreateVertexShaderFromByteCode(const void* bytecode, size_t len);
GeometryShaderPtr CreateGeometryShaderFromByteCode(const void* bytecode, size_t len);
PixelShaderPtr CreatePixelShaderFromByteCode(const void* bytecode, size_t len);
u32 ReflectTextureMask( const void* code, size_t len );

// The returned bytecode buffers should be Release()d.
bool CompileVertexShader(const char* code, unsigned int len,
D3DBlob** blob);
bool CompileGeometryShader(const char* code, unsigned int len,
D3DBlob** blob, const D3D_SHADER_MACRO* pDefines = nullptr);
bool CompilePixelShader(const char* code, unsigned int len,
D3DBlob** blob, const D3D_SHADER_MACRO* pDefines = nullptr);
bool CompileVertexShader(const char* code, size_t len,
D3DBlob& blob);
bool CompileGeometryShader(const char* code, size_t len,
D3DBlob& blob, const D3D_SHADER_MACRO* pDefines = nullptr);
bool CompilePixelShader(const char* code, size_t len,
D3DBlob& blob, const D3D_SHADER_MACRO* pDefines = nullptr);
bool CompileComputeShader(const char* code, size_t len,
D3DBlob& blob, const D3D_SHADER_MACRO* pDefines = nullptr);

bool CompileShader(ShaderType type, const char* code, size_t len,
D3DBlob& blob, const D3D_SHADER_MACRO* pDefines = nullptr);

// Utility functions
ID3D11VertexShader* CompileAndCreateVertexShader(const char* code,
unsigned int len);
ID3D11GeometryShader* CompileAndCreateGeometryShader(const char* code,
unsigned int len, const D3D_SHADER_MACRO* pDefines = nullptr);
ID3D11PixelShader* CompileAndCreatePixelShader(const char* code,
unsigned int len);

inline ID3D11VertexShader* CreateVertexShaderFromByteCode(D3DBlob* bytecode)
{ return CreateVertexShaderFromByteCode(bytecode->Data(), bytecode->Size()); }
inline ID3D11GeometryShader* CreateGeometryShaderFromByteCode(D3DBlob* bytecode)
{ return CreateGeometryShaderFromByteCode(bytecode->Data(), bytecode->Size()); }
inline ID3D11PixelShader* CreatePixelShaderFromByteCode(D3DBlob* bytecode)
{ return CreatePixelShaderFromByteCode(bytecode->Data(), bytecode->Size()); }

inline ID3D11VertexShader* CompileAndCreateVertexShader(D3DBlob* code)
{ return CompileAndCreateVertexShader((const char*)code->Data(), code->Size()); }
inline ID3D11GeometryShader* CompileAndCreateGeometryShader(D3DBlob* code, const D3D_SHADER_MACRO* pDefines = nullptr)
{ return CompileAndCreateGeometryShader((const char*)code->Data(), code->Size(), pDefines); }
inline ID3D11PixelShader* CompileAndCreatePixelShader(D3DBlob* code)
{ return CompileAndCreatePixelShader((const char*)code->Data(), code->Size()); }
VertexShaderPtr CompileAndCreateVertexShader(const char* code,
size_t len);
GeometryShaderPtr CompileAndCreateGeometryShader(const char* code,
size_t len, const D3D_SHADER_MACRO* pDefines = nullptr);
PixelShaderPtr CompileAndCreatePixelShader(const char* code,
size_t len);
ComputeShaderPtr CompileAndCreateComputeShader(const char* code,
size_t len);

inline VertexShaderPtr CreateVertexShaderFromByteCode(D3DBlob& bytecode)
{ return CreateVertexShaderFromByteCode(bytecode.Data(), bytecode.Size()); }
inline GeometryShaderPtr CreateGeometryShaderFromByteCode(D3DBlob& bytecode)
{ return CreateGeometryShaderFromByteCode(bytecode.Data(), bytecode.Size()); }
inline PixelShaderPtr CreatePixelShaderFromByteCode(D3DBlob& bytecode)
{ return CreatePixelShaderFromByteCode(bytecode.Data(), bytecode.Size()); }

inline VertexShaderPtr CompileAndCreateVertexShader(D3DBlob& code)
{ return CompileAndCreateVertexShader((const char*)code.Data(), code.Size()); }
inline GeometryShaderPtr CompileAndCreateGeometryShader(D3DBlob& code, const D3D_SHADER_MACRO* pDefines = nullptr)
{ return CompileAndCreateGeometryShader((const char*)code.Data(), code.Size(), pDefines); }
inline PixelShaderPtr CompileAndCreatePixelShader(D3DBlob& code)
{ return CompileAndCreatePixelShader((const char*)code.Data(), code.Size()); }
}

} // namespace DX11
2 changes: 1 addition & 1 deletion Source/Core/VideoBackends/D3D/D3DTexture.h
Expand Up @@ -4,7 +4,7 @@

#pragma once

#include <d3d11.h>
#include <d3d11_1.h>

namespace DX11
{
Expand Down
32 changes: 17 additions & 15 deletions Source/Core/VideoBackends/D3D/D3DUtil.cpp
Expand Up @@ -255,24 +255,26 @@ int CD3DFont::Init()
// setup device objects for drawing
m_pshader = D3D::CompileAndCreatePixelShader(fontpixshader, sizeof(fontpixshader));
if (m_pshader == nullptr) PanicAlert("Failed to create pixel shader, %s %d\n", __FILE__, __LINE__);
D3D::SetDebugObjectName((ID3D11DeviceChild*)m_pshader, "pixel shader of a CD3DFont object");
D3D::SetDebugObjectName((ID3D11DeviceChild*)m_pshader.get(), "pixel shader of a CD3DFont object");

D3DBlob* vsbytecode;
D3D::CompileVertexShader(fontvertshader, sizeof(fontvertshader), &vsbytecode);
if (vsbytecode == nullptr) PanicAlert("Failed to compile vertex shader, %s %d\n", __FILE__, __LINE__);
D3DBlob vsbytecode;
if(!D3D::CompileVertexShader(fontvertshader, sizeof(fontvertshader), vsbytecode))
PanicAlert("Failed to compile vertex shader, %s %d\n", __FILE__, __LINE__);
m_vshader = D3D::CreateVertexShaderFromByteCode(vsbytecode);
if (m_vshader == nullptr) PanicAlert("Failed to create vertex shader, %s %d\n", __FILE__, __LINE__);
D3D::SetDebugObjectName((ID3D11DeviceChild*)m_vshader, "vertex shader of a CD3DFont object");
if (!m_vshader)
PanicAlert("Failed to create vertex shader, %s %d\n", __FILE__, __LINE__);
D3D::SetDebugObjectName((ID3D11DeviceChild*)m_vshader.get(), "vertex shader of a CD3DFont object");

const D3D11_INPUT_ELEMENT_DESC desc[] =
{
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
hr = D3D::device->CreateInputLayout(desc, 3, vsbytecode->Data(), vsbytecode->Size(), &m_InputLayout);
if (FAILED(hr)) PanicAlert("Failed to create input layout, %s %d\n", __FILE__, __LINE__);
SAFE_RELEASE(vsbytecode);
hr = D3D::device->CreateInputLayout(desc, 3, vsbytecode.Data(), vsbytecode.Size(), ToAddr(m_InputLayout));
if (FAILED(hr))
PanicAlert("Failed to create input layout, %s %d\n", __FILE__, __LINE__);


D3D11_BLEND_DESC blenddesc;
blenddesc.AlphaToCoverageEnable = FALSE;
Expand Down Expand Up @@ -308,9 +310,9 @@ int CD3DFont::Shutdown()
{
SAFE_RELEASE(m_pVB);
SAFE_RELEASE(m_pTexture);
SAFE_RELEASE(m_InputLayout);
SAFE_RELEASE(m_pshader);
SAFE_RELEASE(m_vshader);
m_InputLayout.reset();
m_pshader.reset();
m_vshader.reset();

SAFE_RELEASE(m_blendstate);
SAFE_RELEASE(m_raststate);
Expand Down Expand Up @@ -348,11 +350,11 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw
D3D::stateman->PushRasterizerState(m_raststate);
D3D::stateman->Apply();

D3D::context->PSSetShader(m_pshader, nullptr, 0);
D3D::context->VSSetShader(m_vshader, nullptr, 0);
D3D::context->PSSetShader(m_pshader.get(), nullptr, 0);
D3D::context->VSSetShader(m_vshader.get(), nullptr, 0);
D3D::context->GSSetShader(nullptr, nullptr, 0);

D3D::context->IASetInputLayout(m_InputLayout);
D3D::context->IASetInputLayout(m_InputLayout.get());
D3D::context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
D3D::context->PSSetShaderResources(0, 1, &m_pTexture);

Expand Down
9 changes: 5 additions & 4 deletions Source/Core/VideoBackends/D3D/D3DUtil.h
Expand Up @@ -4,10 +4,11 @@

#pragma once

#include <d3d11.h>
#include <d3d11_1.h>
#include <string>

#include "Common/MathUtil.h"
#include "VideoBackends/D3D/D3DPtr.h"

namespace DX11
{
Expand All @@ -25,9 +26,9 @@ namespace D3D
{
ID3D11ShaderResourceView* m_pTexture;
ID3D11Buffer* m_pVB;
ID3D11InputLayout* m_InputLayout;
ID3D11PixelShader* m_pshader;
ID3D11VertexShader* m_vshader;
D3D::UniquePtr<ID3D11InputLayout> m_InputLayout;
D3D::UniquePtr<ID3D11PixelShader> m_pshader;
D3D::UniquePtr<ID3D11VertexShader> m_vshader;
ID3D11BlendState* m_blendstate;
ID3D11RasterizerState* m_raststate;
const int m_dwTexWidth;
Expand Down
37 changes: 36 additions & 1 deletion Source/Core/VideoBackends/D3D/D3DWrapDeviceContext.h
Expand Up @@ -4,7 +4,7 @@

#pragma once

#include <d3d11.h>
#include <d3d11_1.h>
#include <array>

namespace DX11
Expand All @@ -16,6 +16,7 @@ namespace D3D
// Wrap a ID3D11DeviceContext to prune redundant state changes.
class WrapDeviceContext {
ID3D11DeviceContext* ctx_ { nullptr };
ID3D11DeviceContext1* ctx1_ { nullptr };

struct Cache {
ID3D11PixelShader * ps_{};
Expand Down Expand Up @@ -81,6 +82,10 @@ class WrapDeviceContext {
explicit operator bool() const { return ctx_ != nullptr; }
operator ID3D11DeviceChild* ( ) { return ctx_; }

void InitContext1() {
ctx_->QueryInterface( __uuidof(ID3D11DeviceContext1), (void**)&ctx1_);
}

//
ULONG Release() {
c_ = Cache{}; // in case of restart, as i am a global variable.
Expand Down Expand Up @@ -201,6 +206,10 @@ class WrapDeviceContext {
}
}

void CSSetShader( ID3D11ComputeShader *pShader, ID3D11ClassInstance *const *ppClassInstances, UINT NumClassInstances ){
ctx_->CSSetShader( pShader, ppClassInstances, NumClassInstances );
}

void VSSetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer *const *ppConstantBuffers ){
NumBuffers+=StartSlot;
bool dirty{};
Expand Down Expand Up @@ -279,6 +288,10 @@ class WrapDeviceContext {
ctx_->ClearRenderTargetView( pRenderTargetView, ColorRGBA );
}

void ClearUnorderedAccessViewUint( ID3D11UnorderedAccessView *pRenderTargetView, const UINT ColorRGBA[ 4 ] ) {
ctx_->ClearUnorderedAccessViewUint( pRenderTargetView, ColorRGBA );
}

void DrawIndexed( UINT IndexCount, UINT StartIndexLocation, INT BaseVertexLocation ) {
ApplyLazyStates();
ctx_->DrawIndexed( IndexCount, StartIndexLocation, BaseVertexLocation );
Expand All @@ -289,6 +302,28 @@ class WrapDeviceContext {
ctx_->Draw( VertexCount, StartVertexLocation );
}

void CSSetSamplers( UINT StartSlot, UINT NumSamplers, ID3D11SamplerState *const *ppSamplers ){
ctx_->CSSetSamplers(StartSlot,NumSamplers, ppSamplers);
}

void CSSetShaderResources( UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView *const *ppShaderResourceViews ) {
ctx_->CSSetShaderResources(StartSlot,NumViews, ppShaderResourceViews);
}

void CSSetUnorderedAccessViews( UINT StartSlot, UINT Num, ID3D11UnorderedAccessView *const *ppUavs ){
ctx_->CSSetUnorderedAccessViews(StartSlot,Num, ppUavs,nullptr);
}

void CSSetConstantBuffers( UINT StartSlot, UINT NumBuffers, ID3D11Buffer *const *ppConstantBuffers ) {
ctx_->CSSetConstantBuffers(StartSlot,NumBuffers, ppConstantBuffers);
}


void Dispatch( UINT X, UINT Y, UINT Z) {
ApplyLazyStates();
ctx_->Dispatch( X, Y, Z );
}

void UpdateSubresource( ID3D11Resource *pDstResource, UINT DstSubresource, const D3D11_BOX *pDstBox, const void *pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch ){
ctx_->UpdateSubresource( pDstResource, DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch );
}
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoBackends/D3D/FramebufferManager.h
Expand Up @@ -4,7 +4,7 @@

#pragma once

#include "d3d11.h"
#include "d3d11_1.h"

#include "VideoBackends/D3D/D3DTexture.h"
#include "VideoCommon/FramebufferManagerBase.h"
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/VideoBackends/D3D/LineAndPointGeometryShader.cpp
Expand Up @@ -228,7 +228,7 @@ bool LineAndPointGeometryShader::SetLineShader(u32 components, float lineWidth,
lineWidth, texOffset, vpWidth, vpHeight);

D3D::context->GSSetShader(shader, nullptr, 0);
auto paramBuffer = paramsBuffer_.get();
ID3D11Buffer* paramBuffer = paramsBuffer_.get();
D3D::context->GSSetConstantBuffers(0, 1, &paramBuffer);
return true;
}
Expand All @@ -249,7 +249,7 @@ bool LineAndPointGeometryShader::SetPointShader(u32 components, float pointSize,
pointSize, texOffset, vpWidth, vpHeight);

D3D::context->GSSetShader(shader, nullptr, 0);
auto paramBuffer = paramsBuffer_.get();
ID3D11Buffer* paramBuffer = paramsBuffer_.get();
D3D::context->GSSetConstantBuffers(0, 1, &paramBuffer);

return true;
Expand Down
14 changes: 6 additions & 8 deletions Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp
Expand Up @@ -14,13 +14,11 @@ namespace DX11
class D3DVertexFormat : public NativeVertexFormat
{
D3D11_INPUT_ELEMENT_DESC m_elems[32];
UINT m_num_elems;
UINT m_num_elems{};

ID3D11InputLayout* m_layout;
D3D::InputLayoutPtr m_layout;

public:
D3DVertexFormat() : m_num_elems(0), m_layout(nullptr) {}
~D3DVertexFormat() { SAFE_RELEASE(m_layout); }

void Initialize(const PortableVertexDeclaration &_vtx_decl) override;
void SetupVertexPointers() override;
Expand Down Expand Up @@ -132,13 +130,13 @@ void D3DVertexFormat::SetupVertexPointers()
// CreateInputLayout requires a shader input, but it only looks at the
// signature of the shader, so we don't need to recompute it if the shader
// changes.
D3DBlob* vs_bytecode = DX11::VertexShaderCache::GetActiveShaderBytecode();
auto& vs_bytecode = DX11::VertexShaderCache::GetActiveShaderBytecode();

HRESULT hr = DX11::D3D::device->CreateInputLayout(m_elems, m_num_elems, vs_bytecode->Data(), vs_bytecode->Size(), &m_layout);
HRESULT hr = DX11::D3D::device->CreateInputLayout(m_elems, m_num_elems, vs_bytecode.Data(), vs_bytecode.Size(), ToAddr(m_layout) );
if (FAILED(hr)) PanicAlert("Failed to create input layout, %s %d\n", __FILE__, __LINE__);
DX11::D3D::SetDebugObjectName((ID3D11DeviceChild*)m_layout, "input layout used to emulate the GX pipeline");
DX11::D3D::SetDebugObjectName((ID3D11DeviceChild*)m_layout.get(), "input layout used to emulate the GX pipeline");
}
DX11::D3D::context->IASetInputLayout(m_layout);
DX11::D3D::context->IASetInputLayout(m_layout.get());
}

bool D3DVertexFormat::Equal(NativeVertexFormat const& other) const
Expand Down
536 changes: 536 additions & 0 deletions Source/Core/VideoBackends/D3D/PSTextureDecoder.cpp

Large diffs are not rendered by default.

105 changes: 105 additions & 0 deletions Source/Core/VideoBackends/D3D/PSTextureDecoder.h
@@ -0,0 +1,105 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.

#pragma once

#include <map>
#include "Common/LinearDiskCache.h"
#include "VideoBackends/D3D/D3DPtr.h"
#include "VideoBackends/D3D/TextureCache.h"


namespace DX11
{

class PSTextureDecoder : public TextureDecoder
{

public:

PSTextureDecoder() = default;

void Init() override;
void Shutdown() override;
size_t Decode(u8* dst, u32 srcFmt, u32 w, u32 h, u32 levels, D3DTexture2D& dstTexture, u32 dstFmt) override;
size_t DecodeRGBAFromTMEM( u8 const * ar_src, u8 const * bg_src, u32 width, u32 height, D3DTexture2D& dstTexture) override;
void LoadLut(u32 lutFmt, void* addr, u32 size ) override;
private:

bool m_ready{};

D3D::BufferPtr m_in; // dynamic to fill with the raw texture
//D3D::UniquePtr<ID3D11Buffer> m_outStage;

struct PoolKey {
u32 dstFmt;
u32 w_;
u32 h_;
bool operator<(PoolKey const & o) const {
return dstFmt < o.dstFmt
|| dstFmt == o.dstFmt && w_ < o.w_
|| dstFmt == o.dstFmt && w_ == o.w_ && h_ < o.h_;
}
};

struct PoolValue {
D3D::Texture2dPtr rsc_;
D3D::UavPtr uav_;
PoolValue() = default;
PoolValue( PoolValue && o) : rsc_{std::move(o.rsc_)}, uav_{std::move(o.uav_)} {}
};

using TexturePool = std::map<PoolKey,PoolValue>;
TexturePool pool_;



//D3D::UniquePtr<ID3D11Buffer> m_encodeParams;

// Stuff only used in static-linking mode (SM4.0-compatible)

bool InitStaticMode();
bool SetStaticShader(TextureFormat srcFmt, u32 lutFmt, u32 dstFmt);

typedef unsigned int ComboKey; // Key for a shader combination

ID3D11ComputeShader* InsertShader( ComboKey const &key, u8 const *data, u32 sz);

ComboKey MakeComboKey(TextureFormat srcFmt, u32 lutFmt, u32 dstFmt)
{
u32 rawFmt = (u32(srcFmt)&0xF);
return rawFmt | ((lutFmt&0xF)<<16);
}

typedef std::map<ComboKey, D3D::ComputeShaderPtr> ComboMap;

D3D::BufferPtr rawDataRsc_;
D3D::SrvPtr rawDataSrv_;

D3D::BufferPtr lutRsc_;
D3D::SrvPtr lutSrv_;
u32 lutFmt_{};

D3D::BufferPtr parms_;

ComboMap m_staticShaders;

class ShaderCacheInserter : public LinearDiskCacheReader<ComboKey, u8>
{
public:
void Read(const ComboKey &key, const u8 *value, u32 value_size)
{
//encoder_.InsertShader(key, value, value_size);
}
ShaderCacheInserter(PSTextureDecoder &encoder) : encoder_(encoder) {}
private:
PSTextureDecoder& encoder_;
};
friend ShaderCacheInserter;

LinearDiskCache<ComboKey, u8> m_shaderCache;

};

}
1,784 changes: 836 additions & 948 deletions Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp

Large diffs are not rendered by default.

59 changes: 36 additions & 23 deletions Source/Core/VideoBackends/D3D/PSTextureEncoder.h
Expand Up @@ -4,6 +4,8 @@

#pragma once

#include "Common/LinearDiskCache.h"
#include "VideoBackends/D3D/D3DPtr.h"
#include "VideoBackends/D3D/TextureEncoder.h"

struct ID3D11Texture2D;
Expand All @@ -27,29 +29,23 @@ class PSTextureEncoder : public TextureEncoder

public:

PSTextureEncoder();
PSTextureEncoder() = default;

void Init();
void Shutdown();
size_t Encode(u8* dst, unsigned int dstFormat,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf);

private:

bool m_ready;
bool m_ready{};

D3D::UniquePtr<ID3D11Buffer> m_out;
D3D::UniquePtr<ID3D11Buffer> m_outStage;
D3D::UniquePtr<ID3D11UnorderedAccessView> m_outUav;

ID3D11Texture2D* m_out;
ID3D11RenderTargetView* m_outRTV;
ID3D11Texture2D* m_outStage;
ID3D11Buffer* m_encodeParams;
ID3D11Buffer* m_quad;
ID3D11VertexShader* m_vShader;
ID3D11InputLayout* m_quadLayout;
ID3D11BlendState* m_efbEncodeBlendState;
ID3D11DepthStencilState* m_efbEncodeDepthState;
ID3D11RasterizerState* m_efbEncodeRastState;
ID3D11SamplerState* m_efbSampler;
D3D::UniquePtr<ID3D11Buffer> m_encodeParams;
D3D::UniquePtr<ID3D11SamplerState> m_efbSampler;

// Stuff only used in static-linking mode (SM4.0-compatible)

Expand All @@ -59,26 +55,43 @@ class PSTextureEncoder : public TextureEncoder

typedef unsigned int ComboKey; // Key for a shader combination

ID3D11ComputeShader* InsertShader( ComboKey const &key, u8 const *data, u32 sz);

ComboKey MakeComboKey(unsigned int dstFormat,
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf)
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf, bool model5)
{
return (dstFormat << 4) | (static_cast<int>(srcFormat) << 2) | (isIntensity ? (1<<1) : 0)
return (model5 ? (1<<24) : 0) | (dstFormat << 4) | (static_cast<int>(srcFormat) << 2) | (isIntensity ? (1<<1) : 0)
| (scaleByHalf ? (1<<0) : 0);
}

typedef std::map<ComboKey, ID3D11PixelShader*> ComboMap;
typedef std::map<ComboKey, D3D::UniquePtr<ID3D11ComputeShader>> ComboMap;

ComboMap m_staticShaders;

class ShaderCacheInserter : public LinearDiskCacheReader<ComboKey, u8>
{
public:
void Read(const ComboKey &key, const u8 *value, u32 value_size)
{
encoder_.InsertShader(key, value, value_size);
}
ShaderCacheInserter(PSTextureEncoder &encoder) : encoder_(encoder) {}
private:
PSTextureEncoder& encoder_;
};
friend ShaderCacheInserter;

LinearDiskCache<ComboKey, u8> m_shaderCache;

// Stuff only used for dynamic-linking mode (SM5.0+, available as soon as
// Microsoft fixes their bloody HLSL compiler)

bool InitDynamicMode();
bool SetDynamicShader(unsigned int dstFormat,
PEControl::PixelFormat srcFormat, bool isIntensity, bool scaleByHalf);

ID3D11PixelShader* m_dynamicShader;
ID3D11ClassLinkage* m_classLinkage;
D3D::UniquePtr<ID3D11ComputeShader> m_dynamicShader;
D3D::UniquePtr<ID3D11ClassLinkage> m_classLinkage;

// Interface slots
UINT m_fetchSlot;
Expand All @@ -88,13 +101,13 @@ class PSTextureEncoder : public TextureEncoder

// Class instances
// Fetch: 0 is RGB, 1 is RGBA, 2 is RGB565, 3 is Z
ID3D11ClassInstance* m_fetchClass[4];
D3D::UniquePtr<ID3D11ClassInstance> m_fetchClass[4];
// ScaledFetch: 0 is off, 1 is on
ID3D11ClassInstance* m_scaledFetchClass[2];
D3D::UniquePtr<ID3D11ClassInstance> m_scaledFetchClass[2];
// Intensity: 0 is off, 1 is on
ID3D11ClassInstance* m_intensityClass[2];
D3D::UniquePtr<ID3D11ClassInstance> m_intensityClass[2];
// Generator: one for each dst format, 16 total
ID3D11ClassInstance* m_generatorClass[16];
D3D::UniquePtr<ID3D11ClassInstance> m_generatorClass[16];

std::vector<ID3D11ClassInstance*> m_linkageArray;

Expand Down
169 changes: 105 additions & 64 deletions Source/Core/VideoBackends/D3D/PixelShaderCache.cpp
Expand Up @@ -31,14 +31,15 @@ UidChecker<PixelShaderUid,PixelShaderCode> PixelShaderCache::pixel_uid_checker;

LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;

ID3D11PixelShader* s_ColorMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr};
ID3D11PixelShader* s_DepthMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ClearProgram = nullptr;
ID3D11PixelShader* s_rgba6_to_rgb8[2] = {nullptr};
ID3D11PixelShader* s_rgb8_to_rgba6[2] = {nullptr};
D3D::UniquePtr<ID3D11PixelShader> s_ColorMatrixProgram[2];
D3D::UniquePtr<ID3D11PixelShader> s_ColorCopyProgram[2];
D3D::UniquePtr<ID3D11PixelShader> s_DepthMatrixProgram[2];
D3D::UniquePtr<ID3D11PixelShader> s_ClearProgram;
D3D::UniquePtr<ID3D11PixelShader> s_rgba6_to_rgb8[2];
D3D::UniquePtr<ID3D11PixelShader> s_rgb8_to_rgba6[2];
ID3D11Buffer* pscbuf = nullptr;
ID3D11Buffer* pscbuf_alt = nullptr;
static PixelShaderConstants s_shadow;

const char clear_program_code[] = {
"void main(\n"
Expand Down Expand Up @@ -111,7 +112,7 @@ const char color_matrix_program_code_msaa[] = {
"}\n"
};

const char depth_matrix_program[] = {
const char depth_matrix_program__[] = {
"sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
Expand All @@ -126,7 +127,23 @@ const char depth_matrix_program[] = {
"}\n"
};

const char depth_matrix_program_msaa[] = {
const char depth_matrix_program[] = R"HLSL(
sampler samp0 : register(s0);
Texture2D<float> Tex0 : register(t0);
uniform float4 cColMatrix[7] : register(c0);
void main( out float4 ocol0 : SV_Target,
in float4 pos : SV_Position,
in float2 uv0 : TEXCOORD0) {
float texcol = Tex0.Sample(samp0,uv0);
int zCoord = 0xFFFFFF-round(texcol * float(0xFFFFFF));
int3 zCoord3 = int3((zCoord&0xFF0000)>>16, (zCoord&0x00FF00)>>8, (zCoord&0xFF) );
float4 oD = float4( float3(zCoord3)/255.f.xxx, 0.f);
oD.w = floor(oD.r*15.f)/15.f;
ocol0 = float4(dot(oD,cColMatrix[0]),dot(oD,cColMatrix[1]),dot(oD,cColMatrix[2]),dot(oD,cColMatrix[3])) + cColMatrix[4];
}
)HLSL";

const char depth_matrix_program_msaa_old[] = {
"sampler samp0 : register(s0);\n"
"Texture2DMS<float4, %d> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
Expand All @@ -146,6 +163,27 @@ const char depth_matrix_program_msaa[] = {
"}\n"
};

const char depth_matrix_program_msaa[] = R"HLSL(
sampler samp0 : register(s0);
Texture2DMS<float, %d> Tex0 : register(t0);
uniform float4 cColMatrix[7] : register(c0);
void main( out float4 ocol0 : SV_Target,
in float4 pos : SV_Position,
in float2 uv0 : TEXCOORD0) {
int width, height, samples;
Tex0.GetDimensions(width, height, samples);
float texcol = 0;
for(int i = 0; i < samples; ++i)
texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);
texcol /= samples;
int zCoord = 0xFFFFFF-round(texcol * float(0xFFFFFF));
int3 zCoord3 = int3((zCoord&0xFF0000)>>16, (zCoord&0x00FF00)>>8, (zCoord&0xFF) );
float4 oD = float4( float3(zCoord3)/255.f.xxx, 0.f);
oD.w = floor(oD.r*15.f)/15.f;
ocol0 = float4(dot(oD,cColMatrix[0]),dot(oD,cColMatrix[1]),dot(oD,cColMatrix[2]),dot(oD,cColMatrix[3])) + cColMatrix[4];
}
)HLSL";

const char reint_rgba6_to_rgb8[] = {
"sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n"
Expand Down Expand Up @@ -238,9 +276,9 @@ ID3D11PixelShader* PixelShaderCache::ReinterpRGBA6ToRGB8(bool multisampled)
{
s_rgba6_to_rgb8[0] = D3D::CompileAndCreatePixelShader(reint_rgba6_to_rgb8, sizeof(reint_rgba6_to_rgb8));
CHECK(s_rgba6_to_rgb8[0], "Create RGBA6 to RGB8 pixel shader");
D3D::SetDebugObjectName(s_rgba6_to_rgb8[0], "RGBA6 to RGB8 pixel shader");
D3D::SetDebugObjectName(s_rgba6_to_rgb8[0].get(), "RGBA6 to RGB8 pixel shader");
}
return s_rgba6_to_rgb8[0];
return s_rgba6_to_rgb8[0].get();
}
else if (!s_rgba6_to_rgb8[1])
{
Expand All @@ -251,9 +289,9 @@ ID3D11PixelShader* PixelShaderCache::ReinterpRGBA6ToRGB8(bool multisampled)
s_rgba6_to_rgb8[1] = D3D::CompileAndCreatePixelShader(buf, l);

CHECK(s_rgba6_to_rgb8[1], "Create RGBA6 to RGB8 MSAA pixel shader");
D3D::SetDebugObjectName(s_rgba6_to_rgb8[1], "RGBA6 to RGB8 MSAA pixel shader");
D3D::SetDebugObjectName(s_rgba6_to_rgb8[1].get(), "RGBA6 to RGB8 MSAA pixel shader");
}
return s_rgba6_to_rgb8[1];
return s_rgba6_to_rgb8[1].get();
}

ID3D11PixelShader* PixelShaderCache::ReinterpRGB8ToRGBA6(bool multisampled)
Expand All @@ -264,9 +302,9 @@ ID3D11PixelShader* PixelShaderCache::ReinterpRGB8ToRGBA6(bool multisampled)
{
s_rgb8_to_rgba6[0] = D3D::CompileAndCreatePixelShader(reint_rgb8_to_rgba6, sizeof(reint_rgb8_to_rgba6));
CHECK(s_rgb8_to_rgba6[0], "Create RGB8 to RGBA6 pixel shader");
D3D::SetDebugObjectName(s_rgb8_to_rgba6[0], "RGB8 to RGBA6 pixel shader");
D3D::SetDebugObjectName(s_rgb8_to_rgba6[0].get(), "RGB8 to RGBA6 pixel shader");
}
return s_rgb8_to_rgba6[0];
return s_rgb8_to_rgba6[0].get();
}
else if (!s_rgb8_to_rgba6[1])
{
Expand All @@ -277,62 +315,62 @@ ID3D11PixelShader* PixelShaderCache::ReinterpRGB8ToRGBA6(bool multisampled)
s_rgb8_to_rgba6[1] = D3D::CompileAndCreatePixelShader(buf, l);

CHECK(s_rgb8_to_rgba6[1], "Create RGB8 to RGBA6 MSAA pixel shader");
D3D::SetDebugObjectName(s_rgb8_to_rgba6[1], "RGB8 to RGBA6 MSAA pixel shader");
D3D::SetDebugObjectName(s_rgb8_to_rgba6[1].get(), "RGB8 to RGBA6 MSAA pixel shader");
}
return s_rgb8_to_rgba6[1];
return s_rgb8_to_rgba6[1].get();
}

ID3D11PixelShader* PixelShaderCache::GetColorCopyProgram(bool multisampled)
{
if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1) return s_ColorCopyProgram[0];
else if (s_ColorCopyProgram[1]) return s_ColorCopyProgram[1];
if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1) return s_ColorCopyProgram[0].get();
else if (s_ColorCopyProgram[1]) return s_ColorCopyProgram[1].get();
else
{
// create MSAA shader for current AA mode
char buf[1024];
int l = sprintf_s(buf, 1024, color_copy_program_code_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(buf, l);
CHECK(s_ColorCopyProgram[1]!=nullptr, "Create color copy MSAA pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[1], "color copy MSAA pixel shader");
return s_ColorCopyProgram[1];
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[1].get(), "color copy MSAA pixel shader");
return s_ColorCopyProgram[1].get();
}
}

ID3D11PixelShader* PixelShaderCache::GetColorMatrixProgram(bool multisampled)
{
if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1) return s_ColorMatrixProgram[0];
else if (s_ColorMatrixProgram[1]) return s_ColorMatrixProgram[1];
if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1) return s_ColorMatrixProgram[0].get();
else if (s_ColorMatrixProgram[1]) return s_ColorMatrixProgram[1].get();
else
{
// create MSAA shader for current AA mode
char buf[1024];
int l = sprintf_s(buf, 1024, color_matrix_program_code_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf, l);
CHECK(s_ColorMatrixProgram[1]!=nullptr, "Create color matrix MSAA pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[1], "color matrix MSAA pixel shader");
return s_ColorMatrixProgram[1];
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[1].get(), "color matrix MSAA pixel shader");
return s_ColorMatrixProgram[1].get();
}
}

ID3D11PixelShader* PixelShaderCache::GetDepthMatrixProgram(bool multisampled)
{
if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1) return s_DepthMatrixProgram[0];
else if (s_DepthMatrixProgram[1]) return s_DepthMatrixProgram[1];
if (!multisampled || D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count == 1) return s_DepthMatrixProgram[0].get();
else if (s_DepthMatrixProgram[1]) return s_DepthMatrixProgram[1].get();
else
{
// create MSAA shader for current AA mode
char buf[1024];
int l = sprintf_s(buf, 1024, depth_matrix_program_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
char buf[2048];
int l = sprintf_s(buf, 2048, depth_matrix_program_msaa, D3D::GetAAMode(g_ActiveConfig.iMultisampleMode).Count);
s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf, l);
CHECK(s_DepthMatrixProgram[1]!=nullptr, "Create depth matrix MSAA pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[1], "depth matrix MSAA pixel shader");
return s_DepthMatrixProgram[1];
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[1].get(), "depth matrix MSAA pixel shader");
return s_DepthMatrixProgram[1].get();
}
}

ID3D11PixelShader* PixelShaderCache::GetClearProgram()
{
return s_ClearProgram;
return s_ClearProgram.get();
}

ID3D11Buffer* &PixelShaderCache::GetConstantBuffer()
Expand All @@ -343,8 +381,11 @@ ID3D11Buffer* &PixelShaderCache::GetConstantBuffer()
{
PixelShaderManager::dirty = false;
int sz = g_ActiveConfig.bEnablePixelLighting ? sizeof(PixelShaderConstants) : sizeof(PixelShaderNoLightConstants);
D3D::context->UpdateSubresource(buf,0,nullptr, &PixelShaderManager::constants, sz,0);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sz);
if ( memcmp(&s_shadow, &PixelShaderManager::constants, sz )) {
memcpy(&s_shadow, &PixelShaderManager::constants, sz );
D3D::context->UpdateSubresource(buf,0,nullptr, &s_shadow, sz,0);
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sz);
}
}
return buf;
}
Expand Down Expand Up @@ -376,22 +417,22 @@ void PixelShaderCache::Init()
// used when drawing clear quads
s_ClearProgram = D3D::CompileAndCreatePixelShader(clear_program_code, sizeof(clear_program_code));
CHECK(s_ClearProgram!=nullptr, "Create clear pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ClearProgram, "clear pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ClearProgram.get(), "clear pixel shader");

// used when copying/resolving the color buffer
s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(color_copy_program_code, sizeof(color_copy_program_code));
CHECK(s_ColorCopyProgram[0]!=nullptr, "Create color copy pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[0], "color copy pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorCopyProgram[0].get(), "color copy pixel shader");

// used for color conversion
s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(color_matrix_program_code, sizeof(color_matrix_program_code));
CHECK(s_ColorMatrixProgram[0]!=nullptr, "Create color matrix pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[0], "color matrix pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_ColorMatrixProgram[0].get(), "color matrix pixel shader");

// used for depth copy
s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(depth_matrix_program, sizeof(depth_matrix_program));
CHECK(s_DepthMatrixProgram[0]!=nullptr, "Create depth matrix pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[0], "depth matrix pixel shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)s_DepthMatrixProgram[0].get(), "depth matrix pixel shader");

Clear();

Expand Down Expand Up @@ -427,28 +468,30 @@ void PixelShaderCache::Clear()
// Used in Swap() when AA mode has changed
void PixelShaderCache::InvalidateMSAAShaders()
{
SAFE_RELEASE(s_ColorCopyProgram[1]);
SAFE_RELEASE(s_ColorMatrixProgram[1]);
SAFE_RELEASE(s_DepthMatrixProgram[1]);
SAFE_RELEASE(s_rgb8_to_rgba6[1]);
SAFE_RELEASE(s_rgba6_to_rgb8[1]);
s_ColorCopyProgram[1].reset();
s_ColorMatrixProgram[1].reset();
s_DepthMatrixProgram[1].reset();
s_rgb8_to_rgba6[1].reset();
s_rgba6_to_rgb8[1].reset();
}

void PixelShaderCache::Shutdown()
{
SAFE_RELEASE(pscbuf);
SAFE_RELEASE(pscbuf_alt);

SAFE_RELEASE(s_ClearProgram);
for (int i = 0; i < 2; ++i)
{
SAFE_RELEASE(s_ColorCopyProgram[i]);
SAFE_RELEASE(s_ColorMatrixProgram[i]);
SAFE_RELEASE(s_DepthMatrixProgram[i]);
SAFE_RELEASE(s_rgba6_to_rgb8[i]);
SAFE_RELEASE(s_rgb8_to_rgba6[i]);
}

s_ClearProgram.reset();
for( auto & p : s_ColorCopyProgram )
p.reset();
for( auto & p : s_ColorMatrixProgram )
p.reset();
for( auto & p : s_DepthMatrixProgram )
p.reset();
for( auto & p : s_rgba6_to_rgb8 )
p.reset();
for( auto & p : s_rgb8_to_rgba6 )
p.reset();

Clear();
g_ps_disk_cache.Sync();
g_ps_disk_cache.Close();
Expand Down Expand Up @@ -493,19 +536,18 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D, components);

D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code.GetBuffer(), (unsigned int)strlen(code.GetBuffer()), &pbytecode))
D3DBlob bytecode;
if (!D3D::CompilePixelShader(code.GetBuffer(), (unsigned int)strlen(code.GetBuffer()), bytecode))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false;
}

// Insert the bytecode into the caches
g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());

bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
pbytecode->Release();
g_ps_disk_cache.Append(uid, bytecode.Data(), bytecode.Size());

bool success = InsertByteCode(uid, bytecode.Data(), bytecode.Size());

if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code.GetBuffer();
Expand All @@ -517,19 +559,18 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)

bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen)
{
ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
auto shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
if (shader == nullptr)
return false;

// TODO: Somehow make the debug name a bit more specific
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of PixelShaderCache");
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader.get(), "a pixel shader of PixelShaderCache");

// Make an entry in the table
PSCacheEntry newentry;
newentry.shader = shader;
auto& newentry = PixelShaders[uid];
newentry.shader = std::move(shader);
newentry.mask_ = D3D::ReflectTextureMask(bytecode, bytecodelen);
PixelShaders[uid] = newentry;
last_entry = &PixelShaders[uid];
last_entry = &newentry;

if (!shader) {
// INCSTAT(stats.numPixelShadersFailed);
Expand Down
11 changes: 6 additions & 5 deletions Source/Core/VideoBackends/D3D/PixelShaderCache.h
Expand Up @@ -4,10 +4,11 @@

#pragma once

#include <d3d11.h>
#include <d3d11_1.h>
#include <map>

#include "VideoCommon/PixelShaderGen.h"
#include "VideoBackends/D3D/D3DPtr.h"

enum DSTALPHA_MODE;

Expand All @@ -23,7 +24,7 @@ class PixelShaderCache
static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); // TODO: Should be renamed to LoadShader
static bool InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen);

static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; }
static ID3D11PixelShader* GetActiveShader() { return last_entry->shader.get(); }
static u32 GetActiveMask() { return last_entry->mask_; }
static ID3D11Buffer* &GetConstantBuffer();

Expand All @@ -39,13 +40,13 @@ class PixelShaderCache
private:
struct PSCacheEntry
{
ID3D11PixelShader* shader;
D3D::UniquePtr<ID3D11PixelShader> shader;
u32 mask_ {};

std::string code;

PSCacheEntry() : shader(nullptr) {}
void Destroy() { SAFE_RELEASE(shader); }
PSCacheEntry() {}
void Destroy() { shader.reset(); }
};

typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;
Expand Down
125 changes: 104 additions & 21 deletions Source/Core/VideoBackends/D3D/Render.cpp
Expand Up @@ -59,9 +59,9 @@ static ID3D11Texture2D* s_screenshot_texture = nullptr;
struct
{
D3D11_SAMPLER_DESC sampdc[8];
D3D11_BLEND_DESC blenddc;
D3D::PackedD3DBlendDesc blenddc;
D3D11_DEPTH_STENCIL_DESC depthdc;
D3D11_RASTERIZER_DESC rastdc;
D3D::PackedD3DRasterisationDesc rastdc;
} gx_state;


Expand Down Expand Up @@ -219,7 +219,7 @@ Renderer::Renderer()
memset(&gx_state.depthdc, 0, sizeof(gx_state.depthdc));
gx_state.depthdc.DepthEnable = TRUE;
gx_state.depthdc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
gx_state.depthdc.DepthFunc = D3D11_COMPARISON_LESS;
gx_state.depthdc.DepthFunc = D3D11_COMPARISON_GREATER;
gx_state.depthdc.StencilEnable = FALSE;
gx_state.depthdc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK;
gx_state.depthdc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK;
Expand Down Expand Up @@ -325,6 +325,22 @@ void Renderer::SetColorMask()
gx_state.blenddc.RenderTarget[0].RenderTargetWriteMask = color_mask;
}

// EFB cache related
static const u32 EFB_CACHE_RECT_SIZE = 64; // Cache 64x64 blocks.
static const u32 EFB_CACHE_WIDTH = (EFB_WIDTH + EFB_CACHE_RECT_SIZE - 1) / EFB_CACHE_RECT_SIZE; // round up
static const u32 EFB_CACHE_HEIGHT = (EFB_HEIGHT + EFB_CACHE_RECT_SIZE - 1) / EFB_CACHE_RECT_SIZE;

union ColorAndDepth{
BitField< 0,4,u8> color_;
BitField< 4,4,u8> depth_;
u8 raw_;
};
static_assert(sizeof(ColorAndDepth)==1,"ColorAndDepth must be 8 bits");
std::array<ColorAndDepth, EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT> s_efbCacheValid{};

static std::vector<float> s_depthEfbCache(EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT); // 2 for PEEK_Z and PEEK_COLOR
static std::vector<u32> s_colorEfbCache(EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT); // 2 for PEEK_Z and PEEK_COLOR

// This function allows the CPU to directly access the EFB.
// There are EFB peeks (which will read the color or depth of a pixel)
// and EFB pokes (which will change the color or depth of a pixel).
Expand Down Expand Up @@ -406,10 +422,13 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)

RestoreAPIState(); // restore game state

float val = 0.f;
// read the data from system memory
D3D::context->Map(read_tex, 0, D3D11_MAP_READ, 0, &map);
if ( S_OK == D3D::context->Map(read_tex, 0, D3D11_MAP_READ, 0, &map) ) {
val = 1.f-*(float*)map.pData; // depth buffer logic is inverted in d3d
D3D::context->Unmap(read_tex, 0);
}

float val = *(float*)map.pData;
u32 ret = 0;
if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16)
{
Expand All @@ -420,7 +439,7 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
{
ret = ((u32)(val * 0xffffff));
}
D3D::context->Unmap(read_tex, 0);


// TODO: in RE0 this value is often off by one in Video_DX9 (where this code is derived from), which causes lighting to disappear
return ret;
Expand All @@ -432,12 +451,12 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
D3D11_BOX box = CD3D11_BOX(RectToLock.left, RectToLock.top, 0, RectToLock.right, RectToLock.bottom, 1);
D3D::context->CopySubresourceRegion(read_tex, 0, 0, 0, 0, FramebufferManager::GetEFBColorTexture()->GetTex(), 0, &box);

// read the data from system memory
D3D::context->Map(read_tex, 0, D3D11_MAP_READ, 0, &map);
u32 ret = 0;
if (map.pData)
// read the data from system memory
if (S_OK == D3D::context->Map(read_tex, 0, D3D11_MAP_READ, 0, &map) ) {
ret = *(u32*)map.pData;
D3D::context->Unmap(read_tex, 0);
D3D::context->Unmap(read_tex, 0);
}

// check what to do with the alpha channel (GX_PokeAlphaRead)
PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode();
Expand Down Expand Up @@ -516,10 +535,29 @@ void Renderer::SetViewport()
Wd = (X + Wd <= GetTargetWidth()) ? Wd : (GetTargetWidth() - X);
Ht = (Y + Ht <= GetTargetHeight()) ? Ht : (GetTargetHeight() - Y);

auto zfar = xfmem.viewport.farZ;
auto zRange = xfmem.viewport.zRange;

if (zRange<0)
gx_state.rastdc.FrontCounterClockwise = TRUE;
else
gx_state.rastdc.FrontCounterClockwise = FALSE;

D3D11_VIEWPORT vp;
// Some games set invalid values for z-min and z-max so fix them to the max and min allowed and let the shaders do this work
D3D11_VIEWPORT vp = CD3D11_VIEWPORT(X, Y, Wd, Ht,
0.f, // (xfmem.viewport.farZ - xfmem.viewport.zRange) / 16777216.0f;
1.f); // xfmem.viewport.farZ / 16777216.0f;
// To emulate the same depth buffer accurancy, we revert the depth test and viewport need to be 1-far..1-near
vp = CD3D11_VIEWPORT(X, Y, Wd, Ht,
1.f - zfar / 16777216.0f,
1.f - (zfar - zRange) / 16777216.0f
);
//TODO: Remove the clipdistance and viewport transform from vertex shader when the viewport is in 0..1 range
//TODO: when the range overflow, clamp to 0..1 and use scale and bias at the end of the vertex shader, clip planes should not be useful
//TODO: Some games use an inverted near = 0 and far = -1, check what has to be done in that case.
if (VertexShaderManager::ViewportNonStandard()) {
vp.MaxDepth = 1.f;
vp.MinDepth = 0.f;
}

D3D::context->RSSetViewports(1, &vp);
}

Expand All @@ -544,7 +582,7 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaE

// Color is passed in bgra mode so we need to convert it to rgba
u32 rgbaColor = (color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000);
D3D::drawClearQuad(rgbaColor, (z & 0xFFFFFF) / float(0xFFFFFF), PixelShaderCache::GetClearProgram(), VertexShaderCache::GetClearVertexShader(), VertexShaderCache::GetClearInputLayout());
D3D::drawClearQuad(rgbaColor, (0xFFFFFF-(z & 0xFFFFFF)) / float(0xFFFFFF), PixelShaderCache::GetClearProgram(), VertexShaderCache::GetClearVertexShader(), VertexShaderCache::GetClearInputLayout());

D3D::stateman->PopDepthState();
D3D::stateman->PopBlendState();
Expand Down Expand Up @@ -731,6 +769,7 @@ void formatBufferDump(const u8* in, u8* out, int w, int h, int p)
// This function has the final picture. We adjust the aspect ratio here.
void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbHeight,const EFBRectangle& rc,float Gamma)
{
//NOTICE_LOG(VIDEO,"PRESENT");
if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.RealXFBEnabled()) || !fbWidth || !fbHeight)
{
if (g_ActiveConfig.bDumpFrames && !frame_data.empty())
Expand Down Expand Up @@ -806,7 +845,7 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbHeight,const EFBRectangl
if (g_ActiveConfig.bUseRealXFB)
{
drawRc.top = 1;
drawRc.bottom = -1;
drawRc.bottom = -1;
drawRc.left = -1;
drawRc.right = 1;
}
Expand Down Expand Up @@ -1053,7 +1092,7 @@ void Renderer::ApplyState(bool bUseDstAlpha) {
gx_state.rastdc.FillMode = (g_ActiveConfig.bWireFrame) ? D3D11_FILL_WIREFRAME : D3D11_FILL_SOLID;
D3D::stateman->PushRasterizerState( D3D::GetRasterizerState(gx_state.rastdc, "rasterizer state used to emulate the GX pipeline") );

auto mask = u32(-1);//PixelShaderCache::GetActiveMask();
auto mask = PixelShaderCache::GetActiveMask();
for (unsigned int stage = 0; stage < 8; stage++) {
if ( (1u<<stage)&mask ) {
if (g_ActiveConfig.iMaxAnisotropy > 0 )
Expand Down Expand Up @@ -1087,7 +1126,7 @@ void Renderer::RestoreState() {

void Renderer::ApplyCullDisable()
{
D3D11_RASTERIZER_DESC rastDesc = gx_state.rastdc;
auto rastDesc = gx_state.rastdc;
rastDesc.CullMode = D3D11_CULL_NONE;
D3D::stateman->PushRasterizerState(D3D::GetRasterizerState(rastDesc));
D3D::stateman->Apply();
Expand All @@ -1113,17 +1152,23 @@ void Renderer::SetGenerationMode()
gx_state.rastdc.CullMode = d3dCullModes[bpmem.genMode.cullmode];
}

int lastFrameCount = 0;
int bibias = 0;
void Renderer::SetDepthMode()
{
if( frameCount != lastFrameCount ){
lastFrameCount = frameCount;
bibias = 0;
}
const D3D11_COMPARISON_FUNC d3dCmpFuncs[8] =
{
D3D11_COMPARISON_NEVER,
D3D11_COMPARISON_LESS,
D3D11_COMPARISON_EQUAL,
D3D11_COMPARISON_LESS_EQUAL,
D3D11_COMPARISON_GREATER,
D3D11_COMPARISON_NOT_EQUAL,
D3D11_COMPARISON_EQUAL,
D3D11_COMPARISON_GREATER_EQUAL,
D3D11_COMPARISON_LESS,
D3D11_COMPARISON_NOT_EQUAL,
D3D11_COMPARISON_LESS_EQUAL,
D3D11_COMPARISON_ALWAYS
};

Expand All @@ -1132,13 +1177,25 @@ void Renderer::SetDepthMode()
gx_state.depthdc.DepthEnable = TRUE;
gx_state.depthdc.DepthWriteMask = bpmem.zmode.updateenable ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
gx_state.depthdc.DepthFunc = d3dCmpFuncs[bpmem.zmode.func];
gx_state.rastdc.DepthClipEnable = FALSE;//xfmem.clipDisable == 0;
}
else
{
// if the test is disabled write is disabled too
gx_state.depthdc.DepthEnable = FALSE;
gx_state.depthdc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO;
gx_state.rastdc.DepthClipEnable = FALSE;
}
if (bpmem.genMode.zfreeze != 0) {
bibias -=2;;
gx_state.rastdc.DepthBias = -2;
gx_state.rastdc.SlopeScaledDepthBias = -2;
gx_state.depthdc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO;
} else {
gx_state.rastdc.DepthBias = 0;
gx_state.rastdc.SlopeScaledDepthBias = 0;
}

}

void Renderer::SetLogicOpMode()
Expand Down Expand Up @@ -1220,15 +1277,41 @@ void Renderer::SetLogicOpMode()
D3D11_BLEND_ONE//15
};

const D3D11_LOGIC_OP d3dLogicOp[16] =
{
D3D11_LOGIC_OP_CLEAR,
D3D11_LOGIC_OP_AND,
D3D11_LOGIC_OP_AND_REVERSE,
D3D11_LOGIC_OP_COPY,
D3D11_LOGIC_OP_AND_INVERTED,
D3D11_LOGIC_OP_NOOP,
D3D11_LOGIC_OP_XOR,
D3D11_LOGIC_OP_OR,
D3D11_LOGIC_OP_NOR,
D3D11_LOGIC_OP_EQUIV,
D3D11_LOGIC_OP_INVERT,
D3D11_LOGIC_OP_OR_REVERSE,
D3D11_LOGIC_OP_COPY_INVERTED,
D3D11_LOGIC_OP_OR_INVERTED,
D3D11_LOGIC_OP_NAND,
D3D11_LOGIC_OP_SET
};

if (bpmem.blendmode.logicopenable)
{

gx_state.blenddc.RenderTarget[0].BlendEnable = true;
SetBlendOp(d3dLogicOps[bpmem.blendmode.logicmode]);
SetSrcBlend(d3dLogicOpSrcFactors[bpmem.blendmode.logicmode]);
SetDestBlend(d3dLogicOpDestFactors[bpmem.blendmode.logicmode]);

//g_Config.backend_info.bSupportsLogicOp
//gx_state.blenddc.LogicOpEnable = TRUE;
//gx_state.blenddc.LogicOp = d3dLogicOp[bpmem.blendmode.logicmode];
}
else
{
gx_state.blenddc.LogicOpEnable = FALSE;
SetBlendMode(true);
}
}
Expand Down
6 changes: 3 additions & 3 deletions Source/Core/VideoBackends/D3D/Television.cpp
Expand Up @@ -100,7 +100,7 @@ void Television::Init()

m_pShader = D3D::CompileAndCreatePixelShader(YUYV_DECODER_PS, sizeof(YUYV_DECODER_PS));
CHECK(m_pShader != nullptr, "compile and create yuyv decoder pixel shader");
D3D::SetDebugObjectName(m_pShader, "yuyv decoder pixel shader");
D3D::SetDebugObjectName(m_pShader.get(), "yuyv decoder pixel shader");

// Create sampler state and set border color
//
Expand All @@ -120,7 +120,7 @@ void Television::Init()

void Television::Shutdown()
{
SAFE_RELEASE(m_pShader);
m_pShader.reset();
SAFE_RELEASE(m_yuyvTextureSRV);
SAFE_RELEASE(m_yuyvTexture);
SAFE_RELEASE(m_samplerState);
Expand Down Expand Up @@ -156,7 +156,7 @@ void Television::Render()
m_yuyvTextureSRV, &sourceRc,
MAX_XFB_WIDTH, MAX_XFB_HEIGHT,
&destRc,
m_pShader,
m_pShader.get(),
VertexShaderCache::GetSimpleVertexShader(),
VertexShaderCache::GetSimpleInputLayout());
}
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoBackends/D3D/Television.h
Expand Up @@ -43,7 +43,7 @@ class Television

ID3D11Texture2D* m_yuyvTexture;
ID3D11ShaderResourceView* m_yuyvTextureSRV;
ID3D11PixelShader* m_pShader;
D3D::UniquePtr<ID3D11PixelShader> m_pShader;
ID3D11SamplerState* m_samplerState;

};
Expand Down
52 changes: 40 additions & 12 deletions Source/Core/VideoBackends/D3D/TextureCache.cpp
Expand Up @@ -8,6 +8,7 @@
#include "VideoBackends/D3D/FramebufferManager.h"
#include "VideoBackends/D3D/PixelShaderCache.h"
#include "VideoBackends/D3D/PSTextureEncoder.h"
#include "VideoBackends/D3D/PSTextureDecoder.h"
#include "VideoBackends/D3D/TextureCache.h"
#include "VideoBackends/D3D/TextureEncoder.h"
#include "VideoBackends/D3D/VertexShaderCache.h"
Expand All @@ -19,6 +20,7 @@ namespace DX11
{

static TextureEncoder* g_encoder = nullptr;
static PSTextureDecoder* g_decoder = nullptr;
const size_t MAX_COPY_BUFFERS = 32;
ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = { 0 };

Expand Down Expand Up @@ -69,18 +71,31 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
void TextureCache::TCacheEntry::Load(unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int level)
{
D3D::ReplaceRGBATexture2D(texture->GetTex(), TextureCache::temp, width, height, expanded_width, level, usage);
//g_decoder->Decode(
g_decoder->Decode( nullptr, TextureFormat(format), width, height, level, *texture,0);
//D3D::ReplaceRGBATexture2D(texture->GetTex(), TextureCache::temp, width, height, expanded_width, level, usage);
}

TextureCache::TCacheEntryBase* TextureCache::CreateTexture(unsigned int width,
void TextureCache::TCacheEntry::LoadRGBAFromTMEM( u8 const* ar_src, u8 const* bg_src, unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int level)
{
//g_decoder->Decode(
g_decoder->DecodeRGBAFromTMEM( ar_src, bg_src, width, height, *texture);
//D3D::ReplaceRGBATexture2D(texture->GetTex(), TextureCache::temp, width, height, expanded_width, level, usage);
}

void TextureCache::LoadLut(u32 lutFmt, void* addr, u32 size ) {
g_decoder->LoadLut( lutFmt, addr, size );
}
TextureCache::TCacheEntryBase* TextureCache::CreateTexture(u32 fmt, unsigned int width,
unsigned int height, unsigned int expanded_width,
unsigned int tex_levels, PC_TexFormat pcfmt)
{
D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
D3D11_CPU_ACCESS_FLAG cpu_access = (D3D11_CPU_ACCESS_FLAG)0;
D3D11_SUBRESOURCE_DATA srdata, *data = nullptr;

if (tex_levels == 1)
if (tex_levels == 1 && 0)
{
usage = D3D11_USAGE_DYNAMIC;
cpu_access = D3D11_CPU_ACCESS_WRITE;
Expand All @@ -100,15 +115,18 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(unsigned int width,

TCacheEntry* const entry = new TCacheEntry(new D3DTexture2D(pTexture, D3D11_BIND_SHADER_RESOURCE));
entry->usage = usage;
entry->format = fmt;

// TODO: better debug names
D3D::SetDebugObjectName((ID3D11DeviceChild*)entry->texture->GetTex(), "a texture of the TextureCache");
D3D::SetDebugObjectName((ID3D11DeviceChild*)entry->texture->GetSRV(), "shader resource view of a texture of the TextureCache");

SAFE_RELEASE(pTexture);

if (tex_levels != 1)
entry->Load(width, height, expanded_width, 0);
if (1 || tex_levels != 1) {
//g_decoder->Decode( nullptr, 0, width, height, tex_levels, *(entry->texture),0);
//entry->Load(width, height, expanded_width, 0);
}

return entry;
}
Expand Down Expand Up @@ -166,16 +184,19 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
{
u8* dst = Memory::GetPointer(dstAddr);
size_t encoded_size = g_encoder->Encode(dst, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf);
//NOTICE_LOG(VIDEO,"EFB2RAM : dstp %p dstfmt %d srcfmt %d [%d %d %d %d]", dstAddr, dstFormat, int( srcFormat), srcRect.left, srcRect.top, srcRect.right, srcRect.bottom);

u64 hash = GetHash64(dst, (int)encoded_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
if (!g_ActiveConfig.bCopyEFBToTexture) {
u64 hash = GetHash64(dst, (int)encoded_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);

// Mark texture entries in destination address range dynamic unless caching is enabled and the texture entry is up to date
if (!g_ActiveConfig.bEFBCopyCacheEnable)
TextureCache::MakeRangeDynamic(addr, (u32)encoded_size);
else if (!TextureCache::Find(addr, hash))
TextureCache::MakeRangeDynamic(addr, (u32)encoded_size);
// Mark texture entries in destination address range dynamic unless caching is enabled and the texture entry is up to date
if (!g_ActiveConfig.bEFBCopyCacheEnable)
TextureCache::MakeRangeDynamic(addr, (u32)encoded_size);
else if (!TextureCache::Find(addr, hash))
TextureCache::MakeRangeDynamic(addr, (u32)encoded_size);

this->hash = hash;
this->hash = hash;
}
}
}

Expand All @@ -192,6 +213,9 @@ TextureCache::TextureCache()
// FIXME: Is it safe here?
g_encoder = new PSTextureEncoder;
g_encoder->Init();

g_decoder = new PSTextureDecoder;
g_decoder->Init();
}

TextureCache::~TextureCache()
Expand All @@ -202,6 +226,10 @@ TextureCache::~TextureCache()
g_encoder->Shutdown();
delete g_encoder;
g_encoder = nullptr;

g_decoder->Shutdown();
delete g_decoder;
g_encoder = nullptr;
}

}
10 changes: 9 additions & 1 deletion Source/Core/VideoBackends/D3D/TextureCache.h
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "VideoBackends/D3D/D3DTexture.h"
#include "VideoBackends/D3D/TextureEncoder.h"
#include "VideoCommon/TextureCacheBase.h"

namespace DX11
Expand All @@ -28,6 +29,9 @@ class TextureCache : public ::TextureCache

void Load(unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int levels) override;

void LoadRGBAFromTMEM( u8 const* ar_src, u8 const* bg_src, unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int level) override;

void FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
Expand All @@ -38,11 +42,15 @@ class TextureCache : public ::TextureCache
bool Save(const std::string& filename, unsigned int level) override;
};

TCacheEntryBase* CreateTexture(unsigned int width, unsigned int height,
void TextureCache::LoadLut(u32 lutFmt, void* addr, u32 size );
TCacheEntryBase* CreateTexture(u32 fmt, unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int tex_levels, PC_TexFormat pcfmt) override;

TCacheEntryBase* CreateRenderTargetTexture(unsigned int scaled_tex_w, unsigned int scaled_tex_h) override;
u64 EncodeToRamFromTexture(u32 address, void* source_texture, u32 SourceW, u32 SourceH, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source) {return 0;};

//std::unique_ptr<TextureEncoder> encoder_;
//std::unique_ptr<TextureDecoder> decoder_;
};

}
16 changes: 16 additions & 0 deletions Source/Core/VideoBackends/D3D/TextureEncoder.h
Expand Up @@ -117,4 +117,20 @@ class TextureEncoder

};

class TextureDecoder
{

public:

virtual ~TextureDecoder() { }

virtual void Init() = 0;
virtual void Shutdown() = 0;

virtual size_t Decode(u8* dst, u32 srcFmt, u32 w, u32 h, u32 levels, D3DTexture2D& dstTexture, u32 dstFmt) = 0;
virtual size_t DecodeRGBAFromTMEM( u8 const * ar_src, u8 const * bg_src, u32 width, u32 height, D3DTexture2D& dstTexture) = 0;
virtual void LoadLut(u32 lutFmt, void* addr, u32 size ) = 0;

};

}
47 changes: 44 additions & 3 deletions Source/Core/VideoBackends/D3D/VertexManager.cpp
Expand Up @@ -66,7 +66,7 @@ void Memcpy16( void* dst_raw, void const *src_raw, unsigned int size ) {

// TODO: Find sensible values for these two
const UINT IBUFFER_SIZE = VertexManager::MAXIBUFFERSIZE* sizeof(u16) * 8;
const UINT VBUFFER_SIZE = 1u<<20u;//VertexManager::MAXVBUFFERSIZE;
const UINT VBUFFER_SIZE = 4u<<20u;//VertexManager::MAXVBUFFERSIZE;

void VertexManager::CreateDeviceObjects()
{
Expand Down Expand Up @@ -174,6 +174,7 @@ static const float LINE_PT_TEX_OFFSETS[8] = {

void VertexManager::Draw(UINT stride)
{

u32 zero{};
D3D::context->IASetVertexBuffers(0, 1, &m_buffers[m_currentBuffer], &stride, &zero);
D3D::context->IASetIndexBuffer(m_buffers[m_currentBuffer], DXGI_FORMAT_R16_UINT, 0);
Expand Down Expand Up @@ -238,6 +239,46 @@ void VertexManager::Draw(UINT stride)

void VertexManager::vFlush(bool useDstAlpha)
{
/*
// TODO: Only do this if triangles are being used.
// TODO: Is it nice that we're assuming that first 4 bytes == position? o_o
float vtx[9], out[9];
u8* vtx_ptr = (u8*)&GetVertexBuffer()[GetTriangleIndexBuffer()[IndexGenerator::GetNumTriangles()*3 - 1] * g_nativeVertexFmt->GetVertexStride()];
for (unsigned int i = 0; i < 3; ++i)
{
vtx[0 + i * 3] = ((float*)vtx_ptr)[0];
vtx[1 + i * 3] = ((float*)vtx_ptr)[1];
vtx[2 + i * 3] = ((float*)vtx_ptr)[2];
VertexLoader::TransformVertex(&vtx[i*3], &out[i*3]);
vtx_ptr += g_nativeVertexFmt->GetVertexStride();
}
float fltx1 = out[0];
float flty1 = out[1];
float fltdx31 = out[6] - fltx1;
float fltdx12 = fltx1 - out[3];
float fltdy12 = flty1 - out[4];
float fltdy31 = out[7] - flty1;
float DF31 = vtx[8] - vtx[2];
float DF21 = vtx[5] - vtx[2];
float a = DF31 * -fltdy12 - DF21 * fltdy31;
float b = fltdx31 * DF21 + fltdx12 * DF31;
float c = -fltdx12 * fltdy31 - fltdx31 * -fltdy12;
float slope_dfdx = -a / c;
float slope_dfdy = -b / c;
float slope_f0 = vtx[2];
if (!bpmem.genMode.zfreeze)
PixelShaderManager::SetZSlope(slope_dfdx, slope_dfdy, slope_f0);
*/
if (IndexGenerator::GetIndexLen()== 4149) {
// __debugbreak();
NOTICE_LOG(VIDEO,"");
}

if (!PixelShaderCache::SetShader(
useDstAlpha ? DSTALPHA_DUAL_SOURCE_BLEND : DSTALPHA_NONE,
g_nativeVertexFmt->m_components))
Expand All @@ -257,8 +298,8 @@ void VertexManager::vFlush(bool useDstAlpha)
PrepareDrawBuffers(stride);
g_nativeVertexFmt->SetupVertexPointers();
g_renderer->ApplyState(useDstAlpha);

Draw(stride);
//if (bpmem.genMode.zfreeze == 0)
Draw(stride);

g_renderer->RestoreState();
}
Expand Down
88 changes: 43 additions & 45 deletions Source/Core/VideoBackends/D3D/VertexShaderCache.cpp
Expand Up @@ -23,17 +23,17 @@ const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VertexShaderUid VertexShaderCache::last_uid;
UidChecker<VertexShaderUid,VertexShaderCode> VertexShaderCache::vertex_uid_checker;

static ID3D11VertexShader* SimpleVertexShader = nullptr;
static ID3D11VertexShader* ClearVertexShader = nullptr;
static ID3D11InputLayout* SimpleLayout = nullptr;
static ID3D11InputLayout* ClearLayout = nullptr;
static D3D::UniquePtr<ID3D11VertexShader> SimpleVertexShader;
static D3D::UniquePtr<ID3D11VertexShader> ClearVertexShader;
static D3D::UniquePtr<ID3D11InputLayout> SimpleLayout;
static D3D::UniquePtr<ID3D11InputLayout> ClearLayout;

LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;

ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() { return SimpleVertexShader; }
ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() { return ClearVertexShader; }
ID3D11InputLayout* VertexShaderCache::GetSimpleInputLayout() { return SimpleLayout; }
ID3D11InputLayout* VertexShaderCache::GetClearInputLayout() { return ClearLayout; }
ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() { return SimpleVertexShader.get(); }
ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() { return ClearVertexShader.get(); }
ID3D11InputLayout* VertexShaderCache::GetSimpleInputLayout() { return SimpleLayout.get(); }
ID3D11InputLayout* VertexShaderCache::GetClearInputLayout() { return ClearLayout.get(); }

ID3D11Buffer* vscbuf = nullptr;

Expand All @@ -56,17 +56,14 @@ class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid,
public:
void Read(const VertexShaderUid &key, const u8 *value, u32 value_size)
{
D3DBlob* blob = new D3DBlob(value_size, value);
VertexShaderCache::InsertByteCode(key, blob);
blob->Release();

VertexShaderCache::InsertByteCode( key, D3DBlob(value_size, value) );
}
};

const char simple_shader_code[] = {
"struct VSOUTPUT\n"
"{\n"
"float4 vPosition : POSITION;\n"
"float4 vPosition : SV_Position;\n"
"float2 vTexCoord : TEXCOORD0;\n"
"float vTexCoord1 : TEXCOORD1;\n"
"};\n"
Expand All @@ -83,7 +80,7 @@ const char simple_shader_code[] = {
const char clear_shader_code[] = {
"struct VSOUTPUT\n"
"{\n"
"float4 vPosition : POSITION;\n"
"float4 vPosition : SV_Position;\n"
"float4 vColor0 : COLOR0;\n"
"};\n"
"VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n"
Expand Down Expand Up @@ -115,22 +112,25 @@ void VertexShaderCache::Init()
CHECK(hr==S_OK, "Create vertex shader constant buffer (size=%u)", cbsize);
D3D::SetDebugObjectName((ID3D11DeviceChild*)vscbuf, "vertex shader constant buffer used to emulate the GX pipeline");

D3DBlob* blob;
D3D::CompileVertexShader(simple_shader_code, sizeof(simple_shader_code), &blob);
D3D::device->CreateInputLayout(simpleelems, 2, blob->Data(), blob->Size(), &SimpleLayout);
D3DBlob blob;
D3D::CompileVertexShader(simple_shader_code, sizeof(simple_shader_code), blob);
D3D::device->CreateInputLayout(simpleelems, 2, blob.Data(), blob.Size(), ToAddr(SimpleLayout));
SimpleVertexShader = D3D::CreateVertexShaderFromByteCode(blob);
if (SimpleLayout == nullptr || SimpleVertexShader == nullptr) PanicAlert("Failed to create simple vertex shader or input layout at %s %d\n", __FILE__, __LINE__);
blob->Release();
D3D::SetDebugObjectName((ID3D11DeviceChild*)SimpleVertexShader, "simple vertex shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)SimpleLayout, "simple input layout");
if (SimpleLayout == nullptr || SimpleVertexShader == nullptr)
PanicAlert("Failed to create simple vertex shader or input layout at %s %d\n", __FILE__, __LINE__);

D3D::SetDebugObjectName((ID3D11DeviceChild*)SimpleVertexShader.get(), "simple vertex shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)SimpleLayout.get(), "simple input layout");

D3D::CompileVertexShader(clear_shader_code, sizeof(clear_shader_code), &blob);
D3D::device->CreateInputLayout(clearelems, 2, blob->Data(), blob->Size(), &ClearLayout);
D3D::CompileVertexShader(clear_shader_code, sizeof(clear_shader_code), blob);
D3D::device->CreateInputLayout(clearelems, 2, blob.Data(), blob.Size(), ToAddr(ClearLayout));
ClearVertexShader = D3D::CreateVertexShaderFromByteCode(blob);
if (ClearLayout == nullptr || ClearVertexShader == nullptr) PanicAlert("Failed to create clear vertex shader or input layout at %s %d\n", __FILE__, __LINE__);
blob->Release();
D3D::SetDebugObjectName((ID3D11DeviceChild*)ClearVertexShader, "clear vertex shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)ClearLayout, "clear input layout");
if (ClearLayout == nullptr || ClearVertexShader == nullptr)
PanicAlert("Failed to create clear vertex shader or input layout at %s %d\n", __FILE__, __LINE__);


D3D::SetDebugObjectName((ID3D11DeviceChild*)ClearVertexShader.get(), "clear vertex shader");
D3D::SetDebugObjectName((ID3D11DeviceChild*)ClearLayout.get(), "clear input layout");

Clear();

Expand Down Expand Up @@ -166,11 +166,11 @@ void VertexShaderCache::Shutdown()
{
SAFE_RELEASE(vscbuf);

SAFE_RELEASE(SimpleVertexShader);
SAFE_RELEASE(ClearVertexShader);
SimpleVertexShader.reset();
ClearVertexShader.reset();

SAFE_RELEASE(SimpleLayout);
SAFE_RELEASE(ClearLayout);
SimpleLayout.reset();
ClearLayout.reset();

Clear();
g_vs_disk_cache.Sync();
Expand Down Expand Up @@ -212,18 +212,17 @@ bool VertexShaderCache::SetShader(u32 components)
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D);

D3DBlob* pbytecode = nullptr;
D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &pbytecode);
D3DBlob bytecode;
;

if (pbytecode == nullptr)
if (!D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), bytecode))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false;
}
g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
g_vs_disk_cache.Append(uid, bytecode.Data(), u32(bytecode.Size()));

bool success = InsertByteCode(uid, pbytecode);
pbytecode->Release();
bool success = InsertByteCode(uid, std::move(bytecode));

if (g_ActiveConfig.bEnableShaderDebugging && success)
{
Expand All @@ -234,21 +233,20 @@ bool VertexShaderCache::SetShader(u32 components)
return success;
}

bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob)
bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob && bcodeblob)
{
ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
if (shader == nullptr)
auto shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
if (!shader)
return false;

// TODO: Somehow make the debug name a bit more specific
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a vertex shader of VertexShaderCache");
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader.get(), "a vertex shader of VertexShaderCache");

// Make an entry in the table
VSCacheEntry entry;
entry.shader = shader;
entry.SetByteCode(bcodeblob);
VSCacheEntry &entry = vshaders[uid];
entry.shader = std::move(shader);
entry.SetByteCode(std::move(bcodeblob));

vshaders[uid] = entry;
last_entry = &vshaders[uid];

INCSTAT(stats.numVertexShadersCreated);
Expand Down
20 changes: 9 additions & 11 deletions Source/Core/VideoBackends/D3D/VertexShaderCache.h
Expand Up @@ -21,36 +21,34 @@ class VertexShaderCache
static void Shutdown();
static bool SetShader(u32 components); // TODO: Should be renamed to LoadShader

static ID3D11VertexShader* GetActiveShader() { return last_entry->shader; }
static D3DBlob* GetActiveShaderBytecode() { return last_entry->bytecode; }
static ID3D11VertexShader* GetActiveShader() { return last_entry->shader.get(); }
static D3DBlob const& GetActiveShaderBytecode() { return last_entry->bytecode; }
static ID3D11Buffer* &GetConstantBuffer();

static ID3D11VertexShader* GetSimpleVertexShader();
static ID3D11VertexShader* GetClearVertexShader();
static ID3D11InputLayout* GetSimpleInputLayout();
static ID3D11InputLayout* GetClearInputLayout();

static bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob);
static bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob&& bcodeblob);

private:
struct VSCacheEntry
{
ID3D11VertexShader* shader;
D3DBlob* bytecode; // needed to initialize the input layout
D3D::UniquePtr<ID3D11VertexShader> shader;
D3DBlob bytecode; // needed to initialize the input layout

std::string code;

VSCacheEntry() : shader(nullptr), bytecode(nullptr) {}
void SetByteCode(D3DBlob* blob)
void SetByteCode(D3DBlob&& blob)
{
SAFE_RELEASE(bytecode);
bytecode = blob;
blob->AddRef();
bytecode = std::move(blob);
}
void Destroy()
{
SAFE_RELEASE(shader);
SAFE_RELEASE(bytecode);
shader.reset();
bytecode = nullptr;
}
};
typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;
Expand Down