In [1]:
import timeit
import torch
import random

x = torch.ones(50000000, device='mps')
timeit.timeit(lambda: x * random.randint(0,100), number=1)

0.018089125000017248

In [2]:
import timeit
import torch
import random

x = torch.ones(50000000, device='cpu')
timeit.timeit(lambda: x * random.randint(0,100), number=1)

0.055619124999992664

In [3]:
from time import process_time
import torch

def testgpu():
    if torch.backends.mps.is_available():
        mps_device = torch.device("mps")
    t0 = process_time()
    x = torch.ones(n1, device=mps_device)
    y = x + torch.rand(n1, device=mps_device)
    t1 = process_time()
    print(f"Total time with gpu ({n1}): {t1-t0}")
    t0 = process_time()
    x = torch.ones(n2, device=mps_device)
    y = x + torch.rand(n2, device=mps_device)
    t1 = process_time()
    print(f"Total time with gpu ({n2}): {t1-t0}")

def testcpu():
    t0 = process_time()
    x = torch.ones(n1)
    y = x + torch.rand(n1)
    t1 = process_time()
    print(f"Total time with cpu ({n1}): {t1-t0}")
    t0 = process_time()
    x = torch.ones(n2)
    y = x + torch.rand(n2)
    t1 = process_time()
    print(f"Total time with cpu ({n2}): {t1-t0}")

if __name__ == '__main__':
    n1 = 10000
    n2 = 100000000
    testcpu()
    testgpu()

Total time with cpu (10000): 0.0038329999999999753
Total time with cpu (100000000): 0.9997750000000001
Total time with gpu (10000): 0.01377600000000001
Total time with gpu (100000000): 0.012073


In [4]:
import torch
import transformers
import time
import huggingface_hub

print("Pytorch", torch.__version__)

tokenizer = transformers.AutoTokenizer.from_pretrained("gpt2")
tokens = tokenizer("deep learning is", return_tensors="pt")
mps_tokens = tokens.copy().to("mps")

mps_model = transformers.AutoModelForCausalLM.from_pretrained("gpt2").to("mps")
mps_model.eval()
print("Model 1:", mps_model.device)

cpu_model = transformers.AutoModelForCausalLM.from_pretrained("gpt2")
cpu_model.eval()
print("Model 2:", cpu_model.device)

steps = 30
ts = time.monotonic()
for _ in range(steps):
    out = cpu_model(**tokens)
print("CPU %.3f sec" % (time.monotonic() - ts))

ts = time.monotonic()
for _ in range(steps):
    out = mps_model(**mps_tokens)
print("MPS %.3f sec" % (time.monotonic() - ts))

Pytorch 2.2.1
Model 1: mps:0
Model 2: cpu
CPU 1.493 sec
MPS 1.305 sec
