In [1]:
### kernel used in this code: python3
#!pip install pycuda

In [2]:
import pycuda
import pycuda.driver as drv
drv.init()

print(f'Detected {drv.Device.count()} CUDA Capable device(s)')


Detected 2 CUDA Capable device(s)


In [3]:
i=0
gpu_device = drv.Device(i)

print(f'Device {i}: {gpu_device.name()}')

compute_capability = float( '%d.%d' % gpu_device.compute_capability() )

print(f'\t Compute Capability: {compute_capability}')
print(f'\t Total Memory: {gpu_device.total_memory()//(1024**2)} megabytes')

Device 0: NVIDIA GeForce GTX TITAN X
	 Compute Capability: 5.2
	 Total Memory: 12209 megabytes


In [4]:
device_attributes_tuples = tuple(gpu_device.get_attributes().items())
device_attributes = {}
    
for k, v in device_attributes_tuples:
        device_attributes[str(k)] = v

device_attributes

{'ASYNC_ENGINE_COUNT': 2,
 'CAN_MAP_HOST_MEMORY': 1,
 'CLOCK_RATE': 1240500,
 'COMPUTE_CAPABILITY_MAJOR': 5,
 'COMPUTE_CAPABILITY_MINOR': 2,
 'COMPUTE_MODE': pycuda._driver.compute_mode.DEFAULT,
 'CONCURRENT_KERNELS': 1,
 'ECC_ENABLED': 0,
 'GLOBAL_L1_CACHE_SUPPORTED': 1,
 'GLOBAL_MEMORY_BUS_WIDTH': 384,
 'GPU_OVERLAP': 1,
 'INTEGRATED': 0,
 'KERNEL_EXEC_TIMEOUT': 1,
 'L2_CACHE_SIZE': 3145728,
 'LOCAL_L1_CACHE_SUPPORTED': 1,
 'MANAGED_MEMORY': 1,
 'MAXIMUM_SURFACE1D_LAYERED_LAYERS': 2048,
 'MAXIMUM_SURFACE1D_LAYERED_WIDTH': 16384,
 'MAXIMUM_SURFACE1D_WIDTH': 16384,
 'MAXIMUM_SURFACE2D_HEIGHT': 65536,
 'MAXIMUM_SURFACE2D_LAYERED_HEIGHT': 16384,
 'MAXIMUM_SURFACE2D_LAYERED_LAYERS': 2048,
 'MAXIMUM_SURFACE2D_LAYERED_WIDTH': 16384,
 'MAXIMUM_SURFACE2D_WIDTH': 65536,
 'MAXIMUM_SURFACE3D_DEPTH': 4096,
 'MAXIMUM_SURFACE3D_HEIGHT': 4096,
 'MAXIMUM_SURFACE3D_WIDTH': 4096,
 'MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS': 2046,
 'MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH': 16384,
 'MAXIMUM_SURFACECUBEMAP_WID

In [5]:
# Cores per multiprocessor is not reported by the GPU!  
# We must use a lookup table based on compute capability.
# See the following:
# http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
    
print(f'compute_capability: {compute_capability}')

cuda_cores_per_mp = {3.7:32, 5.0 : 128, 5.1 : 128, 5.2 : 128, 6.0 : 64, 6.1 : 128, 6.2 : 128}[compute_capability]
print(f'CUDA Cores / Multiprocessor: {cuda_cores_per_mp}')

num_mp = device_attributes['MULTIPROCESSOR_COUNT']
print(f'number of multiprocessors: {num_mp}')

print(f'CUDA Cores: {num_mp*cuda_cores_per_mp} ')
    


compute_capability: 5.2
CUDA Cores / Multiprocessor: 128
number of multiprocessors: 24
CUDA Cores: 3072 


In [6]:
for i in range(drv.Device.count()):
    
    gpu_device = drv.Device(i)
    print('Device {}: {}'.format( i, gpu_device.name() ) )
    compute_capability = float( '%d.%d' % gpu_device.compute_capability() )
    print('\t Compute Capability: {}'.format(compute_capability))
    print('\t Total Memory: {} megabytes'.format(gpu_device.total_memory()//(1024**2)))
    
    # The following will give us all remaining device attributes as seen 
    # in the original deviceQuery.
    # We set up a dictionary as such so that we can easily index
    # the values using a string descriptor.
    
    device_attributes_tuples = gpu_device.get_attributes().items() 
    device_attributes = {}
    
    for k, v in device_attributes_tuples:
        device_attributes[str(k)] = v
    
    for k in device_attributes.keys():
        print('\t {}: {}'.format(k, device_attributes[k]))

Device 0: NVIDIA GeForce GTX TITAN X
	 Compute Capability: 5.2
	 Total Memory: 12209 megabytes
	 ASYNC_ENGINE_COUNT: 2
	 CAN_MAP_HOST_MEMORY: 1
	 CLOCK_RATE: 1240500
	 COMPUTE_CAPABILITY_MAJOR: 5
	 COMPUTE_CAPABILITY_MINOR: 2
	 COMPUTE_MODE: DEFAULT
	 CONCURRENT_KERNELS: 1
	 ECC_ENABLED: 0
	 GLOBAL_L1_CACHE_SUPPORTED: 1
	 GLOBAL_MEMORY_BUS_WIDTH: 384
	 GPU_OVERLAP: 1
	 INTEGRATED: 0
	 KERNEL_EXEC_TIMEOUT: 1
	 L2_CACHE_SIZE: 3145728
	 LOCAL_L1_CACHE_SUPPORTED: 1
	 MANAGED_MEMORY: 1
	 MAXIMUM_SURFACE1D_LAYERED_LAYERS: 2048
	 MAXIMUM_SURFACE1D_LAYERED_WIDTH: 16384
	 MAXIMUM_SURFACE1D_WIDTH: 16384
	 MAXIMUM_SURFACE2D_HEIGHT: 65536
	 MAXIMUM_SURFACE2D_LAYERED_HEIGHT: 16384
	 MAXIMUM_SURFACE2D_LAYERED_LAYERS: 2048
	 MAXIMUM_SURFACE2D_LAYERED_WIDTH: 16384
	 MAXIMUM_SURFACE2D_WIDTH: 65536
	 MAXIMUM_SURFACE3D_DEPTH: 4096
	 MAXIMUM_SURFACE3D_HEIGHT: 4096
	 MAXIMUM_SURFACE3D_WIDTH: 4096
	 MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: 2046
	 MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: 16384
	 MAXIMUM_SURFACECU