Skip to content

Commit

Permalink
small refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack Dermody committed Feb 20, 2024
1 parent f858702 commit 64f806e
Show file tree
Hide file tree
Showing 26 changed files with 263 additions and 234 deletions.
1 change: 1 addition & 0 deletions BrightData.Cuda/BrightData.Cuda.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
<PackageReleaseNotes>performance refactor</PackageReleaseNotes>
<Nullable>enable</Nullable>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<TrimmerSingleWarn>false</TrimmerSingleWarn>
<PackageReadmeFile>readme.md</PackageReadmeFile>
</PropertyGroup>

Expand Down
2 changes: 1 addition & 1 deletion BrightData.Cuda/CudaToolkit/Definitions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,7 @@ internal struct CudaMemsetNodeParams
public uint elementSize;
public SizeT width;
public SizeT height;
public static CudaMemsetNodeParams Init<T>(CudaDeviceVariable<T> deviceVariable, uint value) where T : struct
public static CudaMemsetNodeParams Init<T>(CudaDeviceVariable<T> deviceVariable, uint value) where T : unmanaged
{
var para = new CudaMemsetNodeParams {
dst = deviceVariable.DevicePointer,
Expand Down
24 changes: 12 additions & 12 deletions BrightData.Cuda/CudaToolkit/DriverAPINativeMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ public static CuResult CuLaunchKernelEx(ref CuLaunchConfig config, CuFunction f,
conf.NumAttrs = (uint)arraySize;
}

var paramsSize = Marshal.SizeOf(typeof(CuLaunchAttribute));
var paramsSize = Marshal.SizeOf<CuLaunchAttribute>();

if (arraySize > 0) {
conf.Attrs = Marshal.AllocHGlobal(arraySize * paramsSize);
Expand Down Expand Up @@ -774,7 +774,7 @@ public static CuResult CuOccupancyMaxPotentialClusterSize(ref int clusterSize, C
conf.NumAttrs = (uint)arraySize;
}

var paramsSize = Marshal.SizeOf(typeof(CuLaunchAttribute));
var paramsSize = Marshal.SizeOf<CuLaunchAttribute>();

if (arraySize > 0) {
conf.Attrs = Marshal.AllocHGlobal(arraySize * paramsSize);
Expand Down Expand Up @@ -823,7 +823,7 @@ public static CuResult CuOccupancyMaxActiveClusters(ref int numClusters, CuFunct
conf.NumAttrs = (uint)arraySize;
}

var paramsSize = Marshal.SizeOf(typeof(CuLaunchAttribute));
var paramsSize = Marshal.SizeOf<CuLaunchAttribute>();
if (arraySize > 0) {
conf.Attrs = Marshal.AllocHGlobal(arraySize * paramsSize);
}
Expand Down Expand Up @@ -916,7 +916,7 @@ public static CuResult CuGraphAddExternalSemaphoresSignalNode(ref CuGraphNode ph
arraySize = nodeParams.ExtSemArray.Length;
}

var paramsSize = Marshal.SizeOf(typeof(CudaExternalSemaphoreSignalParams));
var paramsSize = Marshal.SizeOf<CudaExternalSemaphoreSignalParams>();
mainPtr = Marshal.AllocHGlobal(2 * IntPtr.Size + sizeof(int));

if (arraySize > 0) {
Expand Down Expand Up @@ -965,7 +965,7 @@ public static CuResult CuGraphExternalSemaphoresSignalNodeGetParams(CuGraphNode

try {
const int arraySize = 0;
var paramsSize = Marshal.SizeOf(typeof(CudaExternalSemaphoreSignalParams));
var paramsSize = Marshal.SizeOf<CudaExternalSemaphoreSignalParams>();
mainPtr = Marshal.AllocHGlobal(2 * IntPtr.Size + sizeof(int));

Marshal.WriteIntPtr(mainPtr + 0, IntPtr.Zero);
Expand Down Expand Up @@ -1020,7 +1020,7 @@ public static CuResult CuGraphExternalSemaphoresSignalNodeSetParams(CuGraphNode
arraySize = nodeParams.ExtSemArray.Length;
}

var paramsSize = Marshal.SizeOf(typeof(CudaExternalSemaphoreSignalParams));
var paramsSize = Marshal.SizeOf<CudaExternalSemaphoreSignalParams>();
mainPtr = Marshal.AllocHGlobal(2 * IntPtr.Size + sizeof(int));

if (arraySize > 0) {
Expand Down Expand Up @@ -1077,7 +1077,7 @@ public static CuResult CuGraphAddExternalSemaphoresWaitNode(ref CuGraphNode phGr
arraySize = nodeParams.ExtSemArray.Length;
}

var paramsSize = Marshal.SizeOf(typeof(CudaExternalSemaphoreWaitParams));
var paramsSize = Marshal.SizeOf<CudaExternalSemaphoreSignalParams>();
mainPtr = Marshal.AllocHGlobal(2 * IntPtr.Size + sizeof(int));

if (arraySize > 0) {
Expand Down Expand Up @@ -1125,7 +1125,7 @@ public static CuResult CuGraphExternalSemaphoresWaitNodeGetParams(CuGraphNode hN

try {
const int arraySize = 0;
var paramsSize = Marshal.SizeOf(typeof(CudaExternalSemaphoreWaitParams));
var paramsSize = Marshal.SizeOf<CudaExternalSemaphoreSignalParams>();
mainPtr = Marshal.AllocHGlobal(2 * IntPtr.Size + sizeof(int));

Marshal.WriteIntPtr(mainPtr + 0, IntPtr.Zero);
Expand Down Expand Up @@ -1179,7 +1179,7 @@ public static CuResult CuGraphExternalSemaphoresWaitNodeSetParams(CuGraphNode hN
arraySize = nodeParams.ExtSemArray.Length;
}

var paramsSize = Marshal.SizeOf(typeof(CudaExternalSemaphoreWaitParams));
var paramsSize = Marshal.SizeOf<CudaExternalSemaphoreSignalParams>();

mainPtr = Marshal.AllocHGlobal(2 * IntPtr.Size + sizeof(int));

Expand Down Expand Up @@ -1231,7 +1231,7 @@ public static CuResult CuGraphBatchMemOpNodeGetParams(CuGraphNode hNode, ref Cud
try {
retVal = cuGraphBatchMemOpNodeGetParamsInternal(hNode, ref parameters);
var arraySize = (int)parameters.Count;
var paramsSize = Marshal.SizeOf(typeof(CuStreamBatchMemOpParams));
var paramsSize = Marshal.SizeOf<CuStreamBatchMemOpParams>();

if (arraySize > 0) {
nodeParamsOut.ParamArray = new CuStreamBatchMemOpParams[arraySize];
Expand Down Expand Up @@ -1270,7 +1270,7 @@ public static CuResult CuGraphBatchMemOpNodeSetParams(CuGraphNode hNode, ref Cud
parameters.Count = (uint)arraySize;
}

var paramsSize = Marshal.SizeOf(typeof(CuStreamBatchMemOpParams));
var paramsSize = Marshal.SizeOf<CuStreamBatchMemOpParams>();
if (arraySize > 0) {
parameters.ParamArray = Marshal.AllocHGlobal(arraySize * paramsSize);
}
Expand Down Expand Up @@ -1307,7 +1307,7 @@ public static CuResult CuGraphMemAllocNodeGetParams(CuGraphNode hNode, ref CudaM
try {
retVal = cuGraphMemAllocNodeGetParamsInternal(hNode, ref parameters);
var arraySize = (int)parameters.accessDescCount;
var paramsSize = Marshal.SizeOf(typeof(CuMemAccessDesc));
var paramsSize = Marshal.SizeOf<CuMemAccessDesc>();

if (arraySize > 0) {
paramsOut.accessDescs = new CuMemAccessDesc[arraySize];
Expand Down
81 changes: 14 additions & 67 deletions BrightData.Cuda/CudaToolkit/Types/CudaDeviceVariable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

namespace BrightData.Cuda.CudaToolkit.Types
{
internal class CudaDeviceVariable<T> : IDisposable where T : struct
internal class CudaDeviceVariable<T> : IDisposable where T : unmanaged
{
readonly CuDevicePtr _devPtr;
readonly SizeT _size = 0;
Expand All @@ -16,7 +16,7 @@ public CudaDeviceVariable(SizeT size)
{
_devPtr = new CuDevicePtr();
_size = size;
_typeSize = (uint)Marshal.SizeOf(typeof(T));
_typeSize = (uint)Marshal.SizeOf<T>();

_res = DriverApiNativeMethods.MemoryManagement.cuMemAlloc_v2(ref _devPtr, _typeSize * size);

Expand All @@ -27,7 +27,7 @@ public CudaDeviceVariable(SizeT size, CuStream stream)
{
_devPtr = new CuDevicePtr();
_size = size;
_typeSize = (uint)Marshal.SizeOf(typeof(T));
_typeSize = (uint)Marshal.SizeOf<T>();

_res = DriverApiNativeMethods.MemoryManagement.cuMemAllocAsync(ref _devPtr, _typeSize * size, stream);

Expand All @@ -45,7 +45,7 @@ public CudaDeviceVariable(CuDevicePtr devPtr, bool isOwner)
_res = DriverApiNativeMethods.MemoryManagement.cuMemGetAddressRange_v2(ref nullPtr, ref _size, devPtr);

if (_res != CuResult.Success) throw new CudaException(_res);
_typeSize = (uint)Marshal.SizeOf(typeof(T));
_typeSize = (uint)Marshal.SizeOf<T>();
var sizeInBytes = _size;
_size = sizeInBytes / _typeSize;
if (sizeInBytes != _size * _typeSize)
Expand All @@ -59,7 +59,7 @@ public CudaDeviceVariable(CuDevicePtr devPtr, SizeT size)
public CudaDeviceVariable(CuDevicePtr devPtr, bool isOwner, SizeT size)
{
_devPtr = devPtr;
_typeSize = (uint)Marshal.SizeOf(typeof(T));
_typeSize = (uint)Marshal.SizeOf<T>();
_size = size / _typeSize;
if (size != _size * _typeSize)
throw new CudaException("Variable size is not a multiple of its type size.");
Expand All @@ -73,7 +73,7 @@ public CudaDeviceVariable(CuModule module, string name)

if (_res != CuResult.Success) throw new CudaException(_res);

_typeSize = Marshal.SizeOf(typeof(T));
_typeSize = Marshal.SizeOf<T>();
_size = sizeInBytes / _typeSize;

if (sizeInBytes != _size * _typeSize)
Expand Down Expand Up @@ -330,48 +330,6 @@ public void CopyToHost(T[] dest, SizeT offsetSrc, SizeT offsetDest, SizeT sizeIn
if (res != CuResult.Success)
throw new CudaException(res);
}
public void CopyToHost(ref T dest)
{
if (_disposed) throw new ObjectDisposedException(ToString());
var aSizeInBytes = _typeSize;
var handle = GCHandle.Alloc(dest, GCHandleType.Pinned);
CuResult res;
try
{
var ptr = handle.AddrOfPinnedObject();
res = DriverApiNativeMethods.SynchronousMemcpyV2.cuMemcpyDtoH_v2(ptr, _devPtr, aSizeInBytes);

dest = (T)(Marshal.PtrToStructure(ptr, typeof(T)) ?? throw new InvalidOperationException());
}
finally
{
handle.Free();
}

if (res != CuResult.Success)
throw new CudaException(res);
}
public void CopyToHost(ref T dest, SizeT offsetSrc)
{
if (_disposed) throw new ObjectDisposedException(ToString());
var aSizeInBytes = _typeSize;
var handle = GCHandle.Alloc(dest, GCHandleType.Pinned);
CuResult res;
try
{
var ptr = handle.AddrOfPinnedObject();
res = DriverApiNativeMethods.SynchronousMemcpyV2.cuMemcpyDtoH_v2(ptr, _devPtr + offsetSrc, aSizeInBytes);

dest = (T)(Marshal.PtrToStructure(ptr, typeof(T)) ?? throw new InvalidOperationException());
}
finally
{
handle.Free();
}

if (res != CuResult.Success)
throw new CudaException(res);
}
public void CopyToHost(IntPtr dest)
{
if (_disposed) throw new ObjectDisposedException(ToString());
Expand Down Expand Up @@ -509,33 +467,22 @@ public CuMemPoolPtrExportData MemPoolExportPointer()
throw new CudaException(_res);
return exportData;
}
public T this[SizeT index]
public unsafe T this[SizeT index]
{
get
{
if (_disposed) throw new ObjectDisposedException(ToString());

if (_disposed)
throw new ObjectDisposedException(ToString());
var position = _devPtr + index * _typeSize;
var dest = new T();

var aSizeInBytes = _typeSize;
var handle = GCHandle.Alloc(dest, GCHandleType.Pinned);
Span<T> local = stackalloc T[1];
CuResult res;
try
{
var ptr = handle.AddrOfPinnedObject();
res = DriverApiNativeMethods.SynchronousMemcpyV2.cuMemcpyDtoH_v2(ptr, position, aSizeInBytes);

dest = (T)(Marshal.PtrToStructure(ptr, typeof(T)) ?? throw new InvalidOperationException());
}
finally
{
handle.Free();
var aSizeInBytes = _typeSize;
fixed (T* ptr = local) {
res = DriverApiNativeMethods.SynchronousMemcpyV2.cuMemcpyDtoH_v2((IntPtr)ptr, position, aSizeInBytes);
}

if (res != CuResult.Success)
throw new CudaException(res);
return dest;
return local[0];
}

set
Expand Down
2 changes: 1 addition & 1 deletion BrightData.Cuda/CudaToolkit/Types/Dim3.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,6 @@ public Dim3(int val) : this((uint)val, 1, 1)

public static Dim3 Min(Dim3 aValue, Dim3 bValue) => new(System.Math.Min(aValue.X, bValue.X), System.Math.Min(aValue.Y, bValue.Y), System.Math.Min(aValue.Z, bValue.Z));
public static Dim3 Max(Dim3 aValue, Dim3 bValue) => new(System.Math.Max(aValue.X, bValue.X), System.Math.Max(aValue.Y, bValue.Y), System.Math.Max(aValue.Z, bValue.Z));
public uint Size => (uint)Marshal.SizeOf(this);
public uint Size => (uint)Marshal.SizeOf<Dim3>();
}
}

0 comments on commit 64f806e

Please sign in to comment.