<a href="https://colab.research.google.com/github/deepakantony/colaboratory/blob/master/testing_gpu_acceleration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import numpy as ni
import time
import numpy.linalg as LA
from scipy.linalg import orth

In [0]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

In [0]:
A = torch.randn(3000, 3000)
B = torch.randn(3000, 3000)
iters = 500

In [0]:
t1 = time.time()
A_gpu = A.cuda()
B_gpu = B.cuda()
C_gpu = torch.zeros(3000, 3000, device=torch.device("cuda"))
for _ in range(iters):
  C_gpu = C_gpu + A_gpu.mm(B_gpu)
out = C_gpu.cpu()
t2 = time.time()
print(LA.norm(out))
print(f"GPU Time: {t2-t1}")

82137250.0
GPU Time: 7.271770477294922


In [0]:
t1 = time.time()
C = torch.zeros(3000, 3000)
for _ in range(iters):
  C = C + A.mm(B)
out = C
t2 = time.time()
print(LA.norm(out))
print(f"CPU Time: {t2-t1}")

82137250.0
CPU Time: 352.89118480682373


In [0]:
def run_iter_cpu(phi, psi, y, niters):
    x = np.dot(phi.T, y)
    for _ in range(niters):
        x = x + np.dot(phi.T, y - np.dot(phi, x))
        xhat = np.dot(psi.T, x)
        t = np.median(abs(xhat))
        xhat[abs(xhat) < t] = 0  # thresholding
        x = np.dot(psi, xhat)
        x = x + np.dot(phi.T, y - np.dot(phi, x))
    return x

In [0]:
def run_iter_pytorch_gpu(in_phi, in_psi, in_y, niters):
    assert torch.cuda.is_available(), "There is no point running this without CUDA!"

    phi = torch.from_numpy(in_phi).cuda()
    psi = torch.from_numpy(in_psi).cuda()
    
    y = torch.from_numpy(in_y).cuda()
    x = torch.mm(phi.t(), y)
    for _ in range(niters):
        x = x + torch.mm(phi.t(), y - torch.mm(phi, x))
        xhat = torch.mm(psi.t(), x)
        t = torch.median(torch.abs(xhat))
        xhat[torch.abs(xhat) < t] = 0  # thresholding
        x = torch.mm(psi, xhat)
        x = x + torch.mm(phi.t(), y - torch.mm(phi, x))

    x_out = x.cpu().numpy()
    return x_out

In [0]:
n = 256
ni = 1000
b = 50

N = b*b
m = N//4
nb = n*n//N
print(f'n = {n}  b = {b}  N = {N}  nb = {nb}  m = {m}  niters {ni}')

n = 256  b = 50  N = 2500  nb = 26  m = 625  niters 1000


In [0]:
phi = np.random.randn(N, N)
phi = orth(phi.T).T  # make orthogonal
phi = phi[:m, :]  # take m rows

psi = np.random.randn(N, N)
psi = orth(psi.T)

y = np.random.randn(m, nb)

nphi = np.linalg.norm(phi)
npsi = np.linalg.norm(psi)

In [0]:
t0 = time.time()
x = run_iter_cpu(phi, psi, y, ni)
t1 = time.time()
nx = np.linalg.norm(x)
print(f'CPU Time: {t1-t0:.3f} sec\nnorms phi {nphi:.4e} psi {npsi:.4e}  x {nx:.4e}')

CPU Time: 65.150 sec
norms phi 2.5000e+01 psi 5.0000e+01  x 1.3684e+02


In [0]:
t0 = time.time()
x = run_iter_pytorch_gpu(phi, psi, y, ni)
t1 = time.time()
nx = np.linalg.norm(x)
print(f'Pytorch GPU Time: {t1-t0:.3f} sec\nnorms phi {nphi:.4e} psi {npsi:.4e}  x {nx:.4e}')   

Pytorch GPU Time: 12.640 sec
norms phi 2.5000e+01 psi 5.0000e+01  x 1.3684e+02
