Permalink
Browse files

Merge branch 'master' of github.com:benvanik/xenia

  • Loading branch information...
2 parents 3e1ca3b + a95de67 commit bb9369b128d5506a6d762feec7d15d87921b5a44 @DrChat DrChat committed Jan 10, 2017
@@ -477,13 +477,15 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
// Upload/convert.
bool uploaded = false;
switch (texture_info.dimension) {
+ case Dimension::k1D:
+ uploaded = UploadTexture1D(entry->handle, texture_info);
+ break;
case Dimension::k2D:
uploaded = UploadTexture2D(entry->handle, texture_info);
break;
case Dimension::kCube:
uploaded = UploadTextureCube(entry->handle, texture_info);
break;
- case Dimension::k1D:
case Dimension::k3D:
assert_unhandled_case(texture_info.dimension);
return nullptr;
@@ -706,6 +708,62 @@ void TextureSwap(Endian endianness, void* dest, const void* src,
}
}
+bool TextureCache::UploadTexture1D(GLuint texture,
+ const TextureInfo& texture_info) {
+ SCOPE_profile_cpu_f("gpu");
+ const auto host_address =
+ memory_->TranslatePhysical(texture_info.guest_address);
+
+ const auto& config =
+ texture_configs[uint32_t(texture_info.format_info->format)];
+ if (config.format == GL_INVALID_ENUM) {
+ assert_always("Unhandled texture format");
+ return false;
+ }
+
+ size_t unpack_length = texture_info.output_length;
+ glTextureStorage1D(texture, 1, config.internal_format,
+ texture_info.size_1d.output_width);
+
+ auto allocation = scratch_buffer_->Acquire(unpack_length);
+
+ if (!texture_info.is_tiled) {
+ if (texture_info.size_1d.input_pitch == texture_info.size_1d.output_pitch) {
+ TextureSwap(texture_info.endianness, allocation.host_ptr, host_address,
+ unpack_length);
+ } else {
+ assert_always();
+ }
+ } else {
+ assert_always();
+ }
+ size_t unpack_offset = allocation.offset;
+ scratch_buffer_->Commit(std::move(allocation));
+ // TODO(benvanik): avoid flush on entire buffer by using another texture
+ // buffer.
+ scratch_buffer_->Flush();
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle());
+ if (texture_info.is_compressed()) {
+ glCompressedTextureSubImage1D(
+ texture, 0, 0, texture_info.size_1d.output_width, config.format,
+ static_cast<GLsizei>(unpack_length),
+ reinterpret_cast<void*>(unpack_offset));
+ } else {
+ // Most of these don't seem to have an effect on compressed images.
+ // glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
+ // glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch);
+ // glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width);
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+
+ glTextureSubImage1D(texture, 0, 0, texture_info.size_1d.output_width,
+ config.format, config.type,
+ reinterpret_cast<void*>(unpack_offset));
+ }
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ return true;
+}
+
bool TextureCache::UploadTexture2D(GLuint texture,
const TextureInfo& texture_info) {
SCOPE_profile_cpu_f("gpu");
@@ -96,6 +96,7 @@ class TextureCache {
uint64_t opt_hash = 0);
void EvictTexture(TextureEntry* entry);
+ bool UploadTexture1D(GLuint texture, const TextureInfo& texture_info);
bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);
bool UploadTextureCube(GLuint texture, const TextureInfo& texture_info);
@@ -165,7 +165,33 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
void TextureInfo::CalculateTextureSizes1D(const xe_gpu_texture_fetch_t& fetch) {
// ?
- size_1d.width = fetch.size_1d.width;
+ size_1d.logical_width = 1 + fetch.size_1d.width;
+
+ uint32_t block_width =
+ xe::round_up(size_1d.logical_width, format_info->block_width) /
+ format_info->block_width;
+
+ uint32_t tile_width = uint32_t(std::ceil(block_width / 32.0f));
+ size_1d.block_width = tile_width * 32;
+
+ uint32_t bytes_per_block =
+ format_info->block_width * format_info->bits_per_pixel / 8;
+
+ uint32_t byte_pitch = tile_width * 32 * bytes_per_block;
+ if (!is_tiled) {
+ // Each row must be a multiple of 256 in linear textures.
+ byte_pitch = xe::round_up(byte_pitch, 256);
+ }
+
+ size_1d.input_width = tile_width * 32 * format_info->block_width;
+
+ size_1d.output_width = block_width * format_info->block_width;
+
+ size_1d.input_pitch = byte_pitch;
+ size_1d.output_pitch = block_width * bytes_per_block;
+
+ input_length = size_1d.input_pitch;
+ output_length = size_1d.output_pitch;
}
void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
@@ -231,7 +231,12 @@ struct TextureInfo {
union {
struct {
- uint32_t width;
+ uint32_t logical_width;
+ uint32_t block_width;
+ uint32_t input_width;
+ uint32_t input_pitch;
+ uint32_t output_width;
+ uint32_t output_pitch;
} size_1d;
struct {
uint32_t logical_width;
@@ -420,6 +420,11 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
bool uploaded = false;
switch (texture_info.dimension) {
+ case Dimension::k1D: {
+ uploaded = UploadTexture1D(command_buffer, completion_fence, texture,
+ texture_info);
+ } break;
+
case Dimension::k2D: {
uploaded = UploadTexture2D(command_buffer, completion_fence, texture,
texture_info);
@@ -822,6 +827,19 @@ void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer,
vkBeginCommandBuffer(command_buffer, &begin_info);
}
+void TextureCache::ConvertTexture1D(uint8_t* dest, const TextureInfo& src) {
+ void* host_address = memory_->TranslatePhysical(src.guest_address);
+ if (!src.is_tiled) {
+ if (src.size_1d.input_pitch == src.size_1d.output_pitch) {
+ TextureSwap(src.endianness, dest, host_address, src.output_length);
+ } else {
+ assert_always();
+ }
+ } else {
+ assert_always();
+ }
+}
+
void TextureCache::ConvertTexture2D(uint8_t* dest, const TextureInfo& src) {
void* host_address = memory_->TranslatePhysical(src.guest_address);
if (!src.is_tiled) {
@@ -936,6 +954,86 @@ void TextureCache::ConvertTextureCube(uint8_t* dest, const TextureInfo& src) {
}
}
+bool TextureCache::UploadTexture1D(VkCommandBuffer command_buffer,
+ VkFence completion_fence, Texture* dest,
+ const TextureInfo& src) {
+#if FINE_GRAINED_DRAW_SCOPES
+ SCOPE_profile_cpu_f("gpu");
+#endif // FINE_GRAINED_DRAW_SCOPES
+
+ assert_true(src.dimension == Dimension::k1D);
+
+ size_t unpack_length = src.output_length;
+ if (!staging_buffer_.CanAcquire(unpack_length)) {
+ // Need to have unique memory for every upload for at least one frame. If we
+ // run out of memory, we need to flush all queued upload commands to the
+ // GPU.
+ FlushPendingCommands(command_buffer, completion_fence);
+
+ // Uploads have been flushed. Continue.
+ if (!staging_buffer_.CanAcquire(unpack_length)) {
+ // The staging buffer isn't big enough to hold this texture.
+ XELOGE(
+ "TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
+ unpack_length);
+ assert_always();
+ return false;
+ }
+ }
+
+ // Grab some temporary memory for staging.
+ auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
+ assert_not_null(alloc);
+
+ // Upload texture into GPU memory.
+ // TODO: If the GPU supports it, we can submit a compute batch to convert the
+ // texture and copy it to its destination. Otherwise, fallback to conversion
+ // on the CPU.
+ ConvertTexture1D(reinterpret_cast<uint8_t*>(alloc->host_ptr), src);
+ staging_buffer_.Flush(alloc);
+
+ // Transition the texture into a transfer destination layout.
+ VkImageMemoryBarrier barrier;
+ barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+ barrier.pNext = nullptr;
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask =
+ VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT;
+ barrier.oldLayout = dest->image_layout;
+ barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+ barrier.image = dest->image;
+ barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+ vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+ nullptr, 1, &barrier);
+
+ // Now move the converted texture into the destination.
+ VkBufferImageCopy copy_region;
+ copy_region.bufferOffset = alloc->offset;
+ copy_region.bufferRowLength = src.size_1d.output_width;
+ copy_region.bufferImageHeight = 1;
+ copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
+ copy_region.imageOffset = {0, 0, 0};
+ copy_region.imageExtent = {src.size_1d.output_width, 1, 1};
+ vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
+ dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
+ &copy_region);
+
+ // Now transition the texture into a shader readonly source.
+ barrier.srcAccessMask = barrier.dstAccessMask;
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ barrier.oldLayout = barrier.newLayout;
+ barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+ nullptr, 1, &barrier);
+
+ dest->image_layout = barrier.newLayout;
+ return true;
+}
+
bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer,
VkFence completion_fence, Texture* dest,
const TextureInfo& src) {
@@ -148,12 +148,16 @@ class TextureCache {
void FlushPendingCommands(VkCommandBuffer command_buffer,
VkFence completion_fence);
+ void ConvertTexture1D(uint8_t* dest, const TextureInfo& src);
void ConvertTexture2D(uint8_t* dest, const TextureInfo& src);
void ConvertTextureCube(uint8_t* dest, const TextureInfo& src);
// Queues commands to upload a texture from system memory, applying any
// conversions necessary. This may flush the command buffer to the GPU if we
// run out of staging memory.
+ bool UploadTexture1D(VkCommandBuffer command_buffer, VkFence completion_fence,
+ Texture* dest, const TextureInfo& src);
+
bool UploadTexture2D(VkCommandBuffer command_buffer, VkFence completion_fence,
Texture* dest, const TextureInfo& src);
@@ -378,6 +378,12 @@ inline void AppendParam(StringBuffer* string_buffer, lpstring_t param) {
string_buffer->AppendFormat("(%s)", param.value().c_str());
}
}
+inline void AppendParam(StringBuffer* string_buffer, lpwstring_t param) {
+ string_buffer->AppendFormat("%.8X", param.guest_address());
+ if (param) {
+ string_buffer->AppendFormat("(%S)", param.value().c_str());
+ }
+}
inline void AppendParam(StringBuffer* string_buffer,
pointer_t<X_OBJECT_ATTRIBUTES> record) {
string_buffer->AppendFormat("%.8X", record.guest_address());
@@ -503,13 +503,36 @@ dword_result_t NetDll_XNetGetEthernetLinkStatus(dword_t caller) { return 0; }
DECLARE_XAM_EXPORT(NetDll_XNetGetEthernetLinkStatus,
ExportTag::kStub | ExportTag::kNetworking);
-dword_result_t NetDll_XNetDnsLookup(lpstring_t address, dword_t evt_handle,
- pointer_t<XNDNS> host_out) {
+dword_result_t NetDll_XNetDnsLookup(dword_t caller, lpstring_t host,
+ dword_t event_handle, lpdword_t pdns) {
+ // TODO(gibbed): actually implement this
+ if (pdns) {
+ auto dns_guest = kernel_memory()->SystemHeapAlloc(sizeof(XNDNS));
+ auto dns = kernel_memory()->TranslateVirtual<XNDNS*>(dns_guest);
+ dns->status = 1; // non-zero = error
+ *pdns = dns_guest;
+ }
+ if (event_handle) {
+ auto ev =
+ kernel_state()->object_table()->LookupObject<XEvent>(event_handle);
+ assert_not_null(ev);
+ ev->Set(0, false);
+ }
return 0;
}
DECLARE_XAM_EXPORT(NetDll_XNetDnsLookup,
ExportTag::kStub | ExportTag::kNetworking);
+dword_result_t NetDll_XNetDnsRelease(dword_t caller, pointer_t<XNDNS> dns) {
+ if (!dns) {
+ return X_STATUS_INVALID_PARAMETER;
+ }
+ kernel_memory()->SystemHeapFree(dns.guest_address());
+ return 0;
+}
+DECLARE_XAM_EXPORT(NetDll_XNetDnsRelease,
+ ExportTag::kStub | ExportTag::kNetworking);
+
SHIM_CALL NetDll_XNetQosServiceLookup_shim(PPCContext* ppc_context,
KernelState* kernel_state) {
uint32_t caller = SHIM_GET_ARG_32(0);

0 comments on commit bb9369b

Please sign in to comment.