In [1]:
import pycuda
import pycuda.driver as drv
drv.init()

In [2]:
print('Detected {} CUDA Capable device(s) \n'.format(drv.Device.count()))
for i in range(drv.Device.count()):
    gpu_device = drv.Device(i)
    print('Device {}: {}'.format( i, gpu_device.name() ) )
    compute_capability = float( '%d.%d' % gpu_device.compute_capability() )
    print('\t Compute Capability: {}'.format(compute_capability))
    print('\t Total Memory: {} megabytes'.format(gpu_device.total_memory()//(1024**2)))
    
    device_attributes = {}
    for k, v in gpu_device.get_attributes().items():
        device_attributes[str(k)] = v
    
    # Cores per multiprocessor is not reported by the GPU!  
    # We must use a lookup table based on compute capability.
    # See the following:
    # http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities
    cuda_cores_per_mp = {5.0:128, 5.1: 128, 5.2 : 128, 6.0 : 64, 6.1 : 128, 6.2 : 128, 7.5: 128}[compute_capability]
    num_mp = device_attributes.pop('MULTIPROCESSOR_COUNT')
    print('\t ({}) Multiprocessors * ({}) CUDA Cores per Multiprocessor = {} CUDA Cores'.format(num_mp, cuda_cores_per_mp, num_mp*cuda_cores_per_mp))

print("Other Attributes:")
for k in device_attributes.keys():
    print('\t {}: {}'.format(k, device_attributes[k]))

Detected 4 CUDA Capable device(s) 

Device 0: NVIDIA GeForce RTX 2080 Ti
	 Compute Capability: 7.5
	 Total Memory: 11019 megabytes
	 (68) Multiprocessors * (128) CUDA Cores per Multiprocessor = 8704 CUDA Cores
Device 1: NVIDIA GeForce RTX 2080 Ti
	 Compute Capability: 7.5
	 Total Memory: 11019 megabytes
	 (68) Multiprocessors * (128) CUDA Cores per Multiprocessor = 8704 CUDA Cores
Device 2: NVIDIA GeForce RTX 2080 Ti
	 Compute Capability: 7.5
	 Total Memory: 11019 megabytes
	 (68) Multiprocessors * (128) CUDA Cores per Multiprocessor = 8704 CUDA Cores
Device 3: NVIDIA GeForce RTX 2080 Ti
	 Compute Capability: 7.5
	 Total Memory: 11016 megabytes
	 (68) Multiprocessors * (128) CUDA Cores per Multiprocessor = 8704 CUDA Cores
Other Attributes:
	 ASYNC_ENGINE_COUNT: 3
	 CAN_MAP_HOST_MEMORY: 1
	 CLOCK_RATE: 1545000
	 COMPUTE_CAPABILITY_MAJOR: 7
	 COMPUTE_CAPABILITY_MINOR: 5
	 COMPUTE_MODE: DEFAULT
	 CONCURRENT_KERNELS: 1
	 ECC_ENABLED: 0
	 GLOBAL_L1_CACHE_SUPPORTED: 1
	 GLOBAL_MEMORY_BUS_WID