In [1]:
import numpy as np
import tensorflow as tf

In [2]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
np.random.seed(2022)
n = 50
A = np.random.randn(n,n).astype(np.float32)
B = np.random.randn(n,n).astype(np.float32)

In [5]:
with tf.device('/GPU:0'):
    dA = tf.constant(A,dtype=tf.float32)
    dB = tf.constant(B,dtype=tf.float32)
    dC = tf.matmul(A, B)

C = dC.numpy()

In [6]:
C[0]

array([-4.2651682e+00,  4.4717894e+00,  2.8567140e+00,  1.1422405e+00,
       -6.4879456e+00, -4.8427286e+00, -6.4231067e+00,  4.0089178e+00,
       -4.5662951e+00, -3.9613004e+00,  3.1529098e+00, -2.8642151e+00,
        1.5266349e+01,  8.8474407e+00,  1.3822375e+01, -1.1303354e+01,
        1.7988284e+00,  1.3061624e+00, -9.8149610e-01, -1.2086377e+00,
        1.2391439e+01, -3.9272158e+00, -4.4011540e+00,  5.5740180e+00,
       -4.9696283e+00,  5.6897240e+00, -1.0580479e+01, -1.3111970e+01,
       -2.0447900e+00, -3.0077927e+00, -1.1668243e+00, -6.6540704e+00,
        3.6730709e+00, -5.8680348e+00,  6.0824614e+00,  9.8098640e+00,
       -3.2595110e-01, -4.4762983e+00, -3.1373346e-01, -9.2297792e-03,
        1.8256099e+00, -1.7101532e+00,  4.2715921e+00,  1.1261980e+01,
       -6.8811326e+00, -1.1625242e+00, -7.9430017e+00, -7.8587799e+00,
        4.6561575e-01,  1.0123330e+01], dtype=float32)

In [7]:
np.dot(A,B)[0]

array([-4.2651687e+00,  4.4717889e+00,  2.8567147e+00,  1.1422411e+00,
       -6.4879460e+00, -4.8427286e+00, -6.4231071e+00,  4.0089183e+00,
       -4.5662951e+00, -3.9613001e+00,  3.1529100e+00, -2.8642142e+00,
        1.5266350e+01,  8.8474398e+00,  1.3822375e+01, -1.1303352e+01,
        1.7988281e+00,  1.3061628e+00, -9.8149627e-01, -1.2086369e+00,
        1.2391439e+01, -3.9272153e+00, -4.4011531e+00,  5.5740185e+00,
       -4.9696288e+00,  5.6897259e+00, -1.0580478e+01, -1.3111972e+01,
       -2.0447896e+00, -3.0077918e+00, -1.1668241e+00, -6.6540699e+00,
        3.6730716e+00, -5.8680353e+00,  6.0824609e+00,  9.8098640e+00,
       -3.2595092e-01, -4.4762979e+00, -3.1373355e-01, -9.2294300e-03,
        1.8256099e+00, -1.7101533e+00,  4.2715926e+00,  1.1261982e+01,
       -6.8811331e+00, -1.1625246e+00, -7.9430013e+00, -7.8587794e+00,
        4.6561626e-01,  1.0123331e+01], dtype=float32)

In [8]:
import numpy as np
import tensorflow as tf
import time

np.random.seed(2022)
n = 2500
A = np.random.randn(n,n).astype(np.float32)
B = np.random.randn(n,n).astype(np.float32)
niter = 10
comp_time_tf = np.zeros(niter)

for i in np.arange(niter):

    t1 = time.time()
    with tf.device('/GPU:0'):
        dA = tf.constant(A, dtype=tf.float32)
        dB = tf.constant(B, dtype=tf.float32)
        dC = tf.matmul(dA, dB)
    C = dC.numpy()
    t2 = time.time()
    comp_time_tf[i] = t2-t1
        
    print('\n ',i+1,'-th iteration, Collapsed Time: ', comp_time_tf[i])
    



  1 -th iteration, Collapsed Time:  0.05407547950744629

  2 -th iteration, Collapsed Time:  0.0642099380493164

  3 -th iteration, Collapsed Time:  0.0541529655456543

  4 -th iteration, Collapsed Time:  0.05482363700866699

  5 -th iteration, Collapsed Time:  0.035675048828125

  6 -th iteration, Collapsed Time:  0.0596919059753418

  7 -th iteration, Collapsed Time:  0.05087733268737793

  8 -th iteration, Collapsed Time:  0.04907536506652832

  9 -th iteration, Collapsed Time:  0.04673337936401367

  10 -th iteration, Collapsed Time:  0.04918670654296875


In [9]:
np.round(np.mean(comp_time_tf),4)

0.0519