In [1]:
import time
import mxnet as mx
from mxnet import nd
from mxnet.gluon import nn

In [4]:
ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
ctx

gpu(0)

In [2]:
!nvidia-smi

Sun Jan 12 21:33:29 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 390.116                Driver Version: 390.116                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GT 745M     Off  | 00000000:01:00.0 N/A |                  N/A |
| N/A   58C    P8    N/A /  N/A |    114MiB /  2002MiB |     N/A      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [3]:
mx.cpu(), mx.gpu()

(cpu(0), gpu(0))

In [4]:
x = nd.array([1, 2, 3])

In [5]:
x.context

cpu(0)

In [6]:
# GPU上的存储
x_gpu = nd.array([1,2,3], ctx=mx.gpu(0))

In [7]:
x_uni = nd.random.uniform(shape=(2, 3), ctx=mx.gpu(0))

In [8]:
x_uni


[[0.6686509  0.17409194 0.3850025 ]
 [0.24678314 0.35134333 0.8404298 ]]
<NDArray 2x3 @gpu(0)>

In [9]:
x_gpu_copy = x.copyto(mx.gpu())

In [10]:
x_gpu_copy


[1. 2. 3.]
<NDArray 3 @gpu(0)>

In [11]:
(x_uni + 2).exp() * x_gpu_copy


[[14.420502 17.588388 32.57727 ]
 [ 9.457265 20.99933  51.36937 ]]
<NDArray 2x3 @gpu(0)>

In [12]:
net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(ctx=mx.gpu())

In [13]:
net(x_gpu_copy)


[[0.0068339 ]
 [0.01366779]
 [0.02050169]]
<NDArray 3x1 @gpu(0)>

In [14]:
net[0].weight.data()


[[0.0068339]]
<NDArray 1x1 @gpu(0)>

In [27]:
# 试一下大矩阵的乘法
n = 784
cpu_hist = list()
gpu_hist = list()
for m in range(10000000, 100000000, 10000000):
    print("Compute m=", m, end=', ')
    X_cpu = nd.random.uniform(shape=(m, n))
    W_cpu = nd.random.uniform(shape=(n, m))
    X_gpu = nd.random.uniform(shape=(m, n), ctx=mx.gpu(0))
    W_gpu = nd.random.uniform(shape=(n, m), ctx=mx.gpu(0))
    # CPU
    for _ in range(10):
        cpu_time = time.time()
        res = nd.dot(X_cpu, W_cpu)
    avg_time = (time.time() - cpu_time) / 10
    cpu_hist.append(avg_time)
    print("CPU Avg Time: ", avg_time, end=', ')
    # GPU
    gpu_time = time.time()
    for _ in range(10):
        gpu_time = time.time()
        nd.dot(X_gpu, W_gpu)
    avg_time = (time.time() - gpu_time) / 10
    gpu_hist.append(gpu_time)
    print("GPU Avg Time: ", avg_time)

Compute m= 1000000000, CPU Avg Time:  5.078315734863281e-06, GPU Avg Time:  3.1948089599609376e-06
Compute m= 2000000000, CPU Avg Time:  2.9087066650390623e-06, GPU Avg Time:  3.0755996704101563e-06
Compute m= 3000000000, CPU Avg Time:  2.408027648925781e-06, GPU Avg Time:  2.360343933105469e-06
Compute m= 4000000000, CPU Avg Time:  2.1219253540039063e-06, GPU Avg Time:  2.2411346435546876e-06
Compute m= 5000000000, CPU Avg Time:  2.1696090698242187e-06, GPU Avg Time:  2.2172927856445314e-06
Compute m= 6000000000, CPU Avg Time:  2.0742416381835936e-06, GPU Avg Time:  2.288818359375e-06
Compute m= 7000000000, CPU Avg Time:  2.0742416381835936e-06, GPU Avg Time:  2.2649765014648438e-06
Compute m= 8000000000, CPU Avg Time:  2.0742416381835936e-06, GPU Avg Time:  2.2172927856445314e-06
Compute m= 9000000000, CPU Avg Time:  2.0503997802734374e-06, GPU Avg Time:  2.193450927734375e-06
