Skip to content

Commit

Permalink
Merge pull request #15303 from unknownbrackets/softgpu-verts
Browse files Browse the repository at this point in the history
softgpu: Enqueue batches of prims when binning
  • Loading branch information
hrydgard committed Jan 15, 2022
2 parents bdc62be + 3134bd1 commit a12a196
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 65 deletions.
59 changes: 43 additions & 16 deletions GPU/Software/BinManager.cpp
Expand Up @@ -84,25 +84,36 @@ static inline void DrawBinItem(const BinItem &item, const RasterizerState &state
}
}

class DrawBinItemTask : public Task {
class DrawBinItemsTask : public Task {
public:
DrawBinItemTask(BinWaitable *notify, const BinItem &item, const BinCoords &range, const RasterizerState &state)
: notify_(notify), item_(item), state_(state) {
item_.range = range;
DrawBinItemsTask(BinWaitable *notify, BinQueue<BinItem, 1024> &items, std::atomic<bool> &status, const BinQueue<RasterizerState, 32> &states)
: notify_(notify), items_(items), status_(status), states_(states) {
}

TaskType Type() const override {
return TaskType::CPU_COMPUTE;
}

void Run() override {
DrawBinItem(item_, state_);
ProcessItems();
status_ = false;
// In case of any atomic issues, do another pass.
ProcessItems();
notify_->Drain();
}

private:
void ProcessItems() {
while (!items_.Empty()) {
const BinItem item = items_.Pop();
DrawBinItem(item, states_[item.stateIndex]);
}
}

BinWaitable *notify_;
BinItem item_;
const RasterizerState &state_;
BinQueue<BinItem, 1024> &items_;
std::atomic<bool> &status_;
const BinQueue<RasterizerState, 32> &states_;
};

BinManager::BinManager() {
Expand Down Expand Up @@ -143,6 +154,8 @@ void BinManager::UpdateState() {
}

int newMaxTasks = selfRender ? 1 : g_threadManager.GetNumLooperThreads();
if (newMaxTasks > MAX_POSSIBLE_TASKS)
newMaxTasks = MAX_POSSIBLE_TASKS;
if (maxTasks_ != newMaxTasks) {
maxTasks_ = newMaxTasks;
tasksSplit_ = false;
Expand Down Expand Up @@ -246,22 +259,36 @@ void BinManager::Drain() {
tasksSplit_ = true;
}

while (!queue_.Empty()) {
const BinItem item = queue_.Pop();

if (taskRanges_.size() <= 1) {
if (taskRanges_.size() <= 1) {
while (!queue_.Empty()) {
const BinItem item = queue_.Pop();
DrawBinItem(item, states_[item.stateIndex]);
continue;
}
} else {
while (!queue_.Empty()) {
const BinItem item = queue_.Pop();
for (int i = 0; i < (int)taskRanges_.size(); ++i) {
const BinCoords range = taskRanges_[i].Intersect(item.range);
if (range.Invalid())
continue;

// This shouldn't often happen, but if it does, wait for space.
if (taskQueues_[i].Full())
waitable_->Wait();

BinItem subitem = item;
subitem.range = range;
taskQueues_[i].Push(subitem);
}
}

for (int i = 0; i < (int)taskRanges_.size(); ++i) {
const BinCoords &taskRange = taskRanges_[i];
const BinCoords range = taskRange.Intersect(item.range);
if (range.Invalid())
if (taskQueues_[i].Empty() || taskStatus_[i])
continue;

waitable_->Fill();
DrawBinItemTask *task = new DrawBinItemTask(waitable_, item, range, states_[item.stateIndex]);
taskStatus_[i] = true;
DrawBinItemsTask *task = new DrawBinItemsTask(waitable_, taskQueues_[i], taskStatus_[i], states_);
g_threadManager.EnqueueTaskOnThread(i, task, true);
}
}
Expand Down
25 changes: 15 additions & 10 deletions GPU/Software/BinManager.h
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <atomic>
#include "Common/Log.h"
#include "GPU/Software/Rasterizer.h"

Expand Down Expand Up @@ -70,22 +71,22 @@ struct BinQueue {

size_t Push(const T &item) {
_dbg_assert_(size_ < N);
size_++;

size_t i = tail_++;
if (tail_ == N)
tail_ = 0;
if (i + 1 == N)
tail_ -= N;
items_[i] = item;
size_++;
return i;
}

T Pop() {
_dbg_assert_(!Empty());
size_t i = head_++;
if (head_ == N)
head_ = 0;
if (i + 1 == N)
head_ -= N;
T item = items_[i];
size_--;
return items_[i];
return item;
}

size_t Size() const {
Expand All @@ -109,9 +110,9 @@ struct BinQueue {
}

T *items_ = nullptr;
size_t head_;
size_t tail_ ;
size_t size_;
std::atomic<size_t> head_;
std::atomic<size_t> tail_ ;
std::atomic<size_t> size_;
};

class BinManager {
Expand All @@ -135,6 +136,8 @@ class BinManager {
void Flush();

private:
static constexpr int MAX_POSSIBLE_TASKS = 64;

BinQueue<Rasterizer::RasterizerState, 32> states_;
int stateIndex_;
BinCoords scissor_;
Expand All @@ -144,6 +147,8 @@ class BinManager {
int maxTasks_ = 1;
bool tasksSplit_ = false;
std::vector<BinCoords> taskRanges_;
BinQueue<BinItem, 1024> taskQueues_[MAX_POSSIBLE_TASKS];
std::atomic<bool> taskStatus_[MAX_POSSIBLE_TASKS];
BinWaitable *waitable_ = nullptr;

BinCoords Scissor(BinCoords range);
Expand Down
18 changes: 9 additions & 9 deletions GPU/Software/Lighting.cpp
Expand Up @@ -44,23 +44,23 @@ static inline float pspLightPow(float v, float e) {
return v;
}

static inline float GenerateLightCoord(VertexData &vertex, int light) {
static inline float GenerateLightCoord(VertexData &vertex, const WorldCoords &worldnormal, int light) {
// TODO: Should specular lighting should affect this, too? Doesn't in GLES.
Vec3<float> L = GetLightVec(gstate.lpos, light);
// In other words, L.Length2() == 0.0f means Dot({0, 0, 1}, worldnormal).
float diffuse_factor = Dot(L.NormalizedOr001(cpu_info.bSSE4_1), vertex.worldnormal);
float diffuse_factor = Dot(L.NormalizedOr001(cpu_info.bSSE4_1), worldnormal);

return (diffuse_factor + 1.0f) / 2.0f;
}

void GenerateLightST(VertexData &vertex) {
void GenerateLightST(VertexData &vertex, const WorldCoords &worldnormal) {
// Always calculate texture coords from lighting results if environment mapping is active
// This should be done even if lighting is disabled altogether.
vertex.texturecoords.s() = GenerateLightCoord(vertex, gstate.getUVLS0());
vertex.texturecoords.t() = GenerateLightCoord(vertex, gstate.getUVLS1());
vertex.texturecoords.s() = GenerateLightCoord(vertex, worldnormal, gstate.getUVLS0());
vertex.texturecoords.t() = GenerateLightCoord(vertex, worldnormal, gstate.getUVLS1());
}

void Process(VertexData& vertex, bool hasColor) {
void Process(VertexData& vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, bool hasColor) {
const int materialupdate = gstate.materialupdate & (hasColor ? 7 : 0);

Vec4<int> mec = Vec4<int>::FromRGBA(gstate.getMaterialEmissive());
Expand All @@ -82,7 +82,7 @@ void Process(VertexData& vertex, bool hasColor) {
// TODO: Should transfer the light positions to world/view space for these calculations?
Vec3<float> L = GetLightVec(gstate.lpos, light);
if (!gstate.isDirectionalLight(light)) {
L -= vertex.worldpos;
L -= worldpos;
}
// TODO: Should this normalize (0, 0, 0) to (0, 0, 1)?
float d = L.NormalizeOr001();
Expand Down Expand Up @@ -124,7 +124,7 @@ void Process(VertexData& vertex, bool hasColor) {
final_color += lambient;

// diffuse lighting
float diffuse_factor = Dot(L, vertex.worldnormal);
float diffuse_factor = Dot(L, worldnormal);
if (gstate.isUsingPoweredDiffuseLight(light)) {
float k = gstate.getMaterialSpecularCoef();
diffuse_factor = pspLightPow(diffuse_factor, k);
Expand All @@ -143,7 +143,7 @@ void Process(VertexData& vertex, bool hasColor) {
if (gstate.isUsingSpecularLight(light) && diffuse_factor >= 0.0f) {
Vec3<float> H = L + Vec3<float>(0.f, 0.f, 1.f);

float specular_factor = Dot(H.NormalizedOr001(cpu_info.bSSE4_1), vertex.worldnormal);
float specular_factor = Dot(H.NormalizedOr001(cpu_info.bSSE4_1), worldnormal);
float k = gstate.getMaterialSpecularCoef();
specular_factor = pspLightPow(specular_factor, k);

Expand Down
4 changes: 2 additions & 2 deletions GPU/Software/Lighting.h
Expand Up @@ -21,7 +21,7 @@

namespace Lighting {

void GenerateLightST(VertexData &vertex);
void Process(VertexData& vertex, bool hasColor);
void GenerateLightST(VertexData &vertex, const WorldCoords &worldnormal);
void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, bool hasColor);

}
31 changes: 16 additions & 15 deletions GPU/Software/TransformUnit.cpp
Expand Up @@ -164,11 +164,12 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
vreader.ReadUV(vertex.texturecoords.AsArray());
}

Vec3<float> normal;
if (vreader.hasNormal()) {
vreader.ReadNrm(vertex.normal.AsArray());
vreader.ReadNrm(normal.AsArray());

if (gstate.areNormalsReversed())
vertex.normal = -vertex.normal;
normal = -normal;
}

if (vertTypeIsSkinningEnabled(gstate.vertType) && !gstate.isModeThrough()) {
Expand All @@ -182,14 +183,14 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
Vec3<float> step = Vec3ByMatrix43(pos, gstate.boneMatrix + i * 12);
tmppos += step * W[i];
if (vreader.hasNormal()) {
step = Norm3ByMatrix43(vertex.normal, gstate.boneMatrix + i * 12);
step = Norm3ByMatrix43(normal, gstate.boneMatrix + i * 12);
tmpnrm += step * W[i];
}
}

pos = tmppos;
if (vreader.hasNormal())
vertex.normal = tmpnrm;
normal = tmpnrm;
}

if (vreader.hasColor0()) {
Expand All @@ -209,9 +210,8 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
}

if (!gstate.isModeThrough()) {
vertex.modelpos = pos;
vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos));
ModelCoords viewpos = TransformUnit::WorldToView(vertex.worldpos);
WorldCoords worldpos = WorldCoords(TransformUnit::ModelToWorld(pos));
ModelCoords viewpos = TransformUnit::WorldToView(worldpos);
vertex.clippos = ClipCoords(TransformUnit::ViewToClip(viewpos));
if (gstate.isFogEnabled()) {
float fog_end = getFloat24(gstate.fog1);
Expand All @@ -230,31 +230,32 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
}
vertex.screenpos = ClipToScreenInternal(vertex.clippos, &outside_range_flag);

Vec3<float> worldnormal;
if (vreader.hasNormal()) {
vertex.worldnormal = TransformUnit::ModelToWorldNormal(vertex.normal);
vertex.worldnormal.NormalizeOr001();
worldnormal = TransformUnit::ModelToWorldNormal(normal);
worldnormal.NormalizeOr001();
} else {
vertex.worldnormal = Vec3<float>(0.0f, 0.0f, 1.0f);
worldnormal = Vec3<float>(0.0f, 0.0f, 1.0f);
}

// Time to generate some texture coords. Lighting will handle shade mapping.
if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX) {
Vec3f source;
switch (gstate.getUVProjMode()) {
case GE_PROJMAP_POSITION:
source = vertex.modelpos;
source = pos;
break;

case GE_PROJMAP_UV:
source = Vec3f(vertex.texturecoords, 0.0f);
break;

case GE_PROJMAP_NORMALIZED_NORMAL:
source = vertex.normal.NormalizedOr001(cpu_info.bSSE4_1);
source = normal.NormalizedOr001(cpu_info.bSSE4_1);
break;

case GE_PROJMAP_NORMAL:
source = vertex.normal;
source = normal;
break;

default:
Expand All @@ -268,12 +269,12 @@ VertexData TransformUnit::ReadVertex(VertexReader &vreader, bool &outside_range_
float z_recip = 1.0f / stq.z;
vertex.texturecoords = Vec2f(stq.x * z_recip, stq.y * z_recip);
} else if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) {
Lighting::GenerateLightST(vertex);
Lighting::GenerateLightST(vertex, worldnormal);
}

PROFILE_THIS_SCOPE("light");
if (gstate.isLightingEnabled())
Lighting::Process(vertex, vreader.hasColor0());
Lighting::Process(vertex, worldpos, worldnormal, vreader.hasColor0());
} else {
vertex.screenpos.x = (int)(pos[0] * 16) + gstate.getOffsetX16();
vertex.screenpos.y = (int)(pos[1] * 16) + gstate.getOffsetY16();
Expand Down
16 changes: 3 additions & 13 deletions GPU/Software/TransformUnit.h
Expand Up @@ -82,31 +82,21 @@ struct DrawingCoords
}
};

struct VertexData
{
void Lerp(float t, const VertexData& a, const VertexData& b)
{
// World coords only needed for lighting, so we don't Lerp those

modelpos = ::Lerp(a.modelpos, b.modelpos, t);
struct VertexData {
void Lerp(float t, const VertexData &a, const VertexData &b) {
clippos = ::Lerp(a.clippos, b.clippos, t);
screenpos = ::Lerp(a.screenpos, b.screenpos, t); // TODO: Should use a LerpInt (?)
// Ignore screenpos because Lerp() is only used pre-calculation of screenpos.
texturecoords = ::Lerp(a.texturecoords, b.texturecoords, t);
normal = ::Lerp(a.normal, b.normal, t);
fogdepth = ::Lerp(a.fogdepth, b.fogdepth, t);

u16 t_int = (u16)(t*256);
color0 = LerpInt<Vec4<int>,256>(a.color0, b.color0, t_int);
color1 = LerpInt<Vec3<int>,256>(a.color1, b.color1, t_int);
}

ModelCoords modelpos;
WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead
ClipCoords clippos;
ScreenCoords screenpos; // TODO: Shouldn't store this ?
Vec2<float> texturecoords;
Vec3<float> normal;
WorldCoords worldnormal;
Vec4<int> color0;
Vec3<int> color1;
float fogdepth;
Expand Down

0 comments on commit a12a196

Please sign in to comment.