In [230]:
import torch
from timeit import default_timer as timer
import timeit
import torch
import importlib
import src.physics as physics
importlib.reload(physics)
import EINCASMConfig 
importlib.reload(EINCASMConfig)

cfg = EINCASMConfig.Config('config.yaml')

In [235]:
def init_tensors(config, w, h):
    klen = config.kernel.shape[0]
    muscle_radii = torch.randn((klen, w, h), device=config.device, dtype=config.float_dtype)
    radii_deltas = torch.randn((klen, w, h), device=config.device, dtype=config.float_dtype)
    capital = torch.rand((w, h), device=config.device, dtype=config.float_dtype)
    growth_efficiency = torch.rand((w, h), device=config.device, dtype=config.float_dtype)
    open_cells = torch.randint(0,2,(w, h), device=config.device, dtype=torch.bool)

    muscle_masks = physics.generate_muscle_masks(config, open_cells)
    muscle_radii *= muscle_masks
    capital *= open_cells
    
    return muscle_radii, radii_deltas, capital, growth_efficiency, muscle_masks, open_cells

w = 1000
h = 1000
# cfg.set_kernel([[0,0]])
reps = 10

cfg.update_device('mps')



muscle_radii, radii_deltas, capital, growth_efficiency, muscle_masks, open_cells = init_tensors(cfg, w, h)
start = timer()
for _ in range (reps):
    physics.grow_muscle_csa(cfg, muscle_radii, radii_deltas, capital, growth_efficiency, muscle_masks, open_cells)
end = timer()

print(f"mps grow muscle: {(end-start)/reps}")


cfg.update_device('cpu')
torch.mps.empty_cache()

muscle_radii, radii_deltas, capital, growth_efficiency, muscle_masks, open_cells = init_tensors(cfg, w, h)
start = timer()
for i in range (reps):
    physics.grow_muscle_csa(cfg, muscle_radii, radii_deltas, capital, growth_efficiency, muscle_masks, open_cells)
end = timer()

print(f"cpu grow muscle: {(end-start)/reps}")

mps grow muscle: 0.0026886457984801383
cpu grow muscle: 0.023998370801564306


In [92]:
w=10000
h=10000
reps = 100
cfg.update_device('mps')

a = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
b = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
c = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
d = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
a1 = a.clone()
b1 = b.clone()
c1 = c.clone()
d1 = d.clone()

torch.mps.empty_cache()
start = timer()
for _ in range (reps):
    a1 = torch.sign(b1.add(c1))
    torch.mul(torch.sqrt(d1), a1, out=b1)  
end = timer()
print(f"stored: {(end-start)/reps}")

torch.mps.empty_cache()

start = timer()
for _ in range (reps):
    a = torch.sign(b.add(c))
    torch.mul(torch.sqrt(d), a, out=b)  
end = timer()
print(f"unstored: {(end-start)/reps}")


stored: 0.020247973329969682
unstored: 0.02023460999975214


# Interesting: Empty Cache!

Consider the time difference between the identical code snippets

In [104]:
w=10000
h=10000
reps = 10
cfg.update_device('mps')

a = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
b = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
c = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
d = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)


torch.mps.empty_cache()
start = timer()
for _ in range (reps):
    a = torch.sign(b.add(c))
    torch.mul(torch.sqrt(d), a, out=b)  
end = timer()
print(f"stored: {(end-start)/reps}")

torch.mps.empty_cache()
start = timer()
for _ in range (reps):
    a = torch.sign(b.add(c))
    torch.mul(torch.sqrt(d), a, out=b)  
end = timer()
print(f"unstored: {(end-start)/reps}")

stored: 0.0003061457973672077
unstored: 0.00027897910040337593


In [105]:
w=10000
h=10000
reps = 10
cfg.update_device('mps')

a = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
b = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
c = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)
d = torch.randn(w, h, device = cfg.device, dtype = cfg.float_dtype)

start = timer()
for _ in range (reps):
    a = torch.sign(b.add(c))
    torch.mul(torch.sqrt(d), a, out=b)    
end = timer()
print(f"stored: {(end-start)/reps}")

start = timer()
for _ in range (reps):
    a = torch.sign(b.add(c))
    torch.mul(torch.sqrt(d), a, out=b)  
end = timer()
print(f"unstored: {(end-start)/reps}")

stored: 0.00043198749772273005
unstored: 0.015266337498906069


# ...

In [2]:
device = torch.device('mps')

In [4]:
num = 1000
mps_setup = """
import torch
mps_device = torch.device('mps')

x = torch.randn(1000, 1000, device=mps_device)
y = torch.randn(1000, 1000, device=mps_device)
"""

cpu_setup = """
import torch
cpu_device = torch.device('cpu')

x = torch.randn(1000, 1000, device=cpu_device)
y = torch.randn(1000, 1000, device=cpu_device)
"""

mps_code = """
z = torch.where(x>0, x, y)
"""

cpu_code = """
z = torch.where(x>0, x, y)
"""

print('MPS time: ', timeit.timeit(setup=mps_setup, stmt=mps_code, number=num))
print('CPU time: ', timeit.timeit(setup=cpu_setup, stmt=cpu_code, number=num))

MPS time:  0.19086349999997765
CPU time:  0.5777838329959195


In [12]:
import torch
import importlib
import src.physics
import EINCASMConfig as cfg
importlib.reload(cfg)
importlib.reload(src.physics)

<module 'src.physics' from '/Users/aidanbx/CS/EINCASM/src/physics.py'>

In [19]:
times1

[1.4439228330011247, 1.3595647920010379, 1.3719559580058558]

In [4]:
import timeit
setup_code = """
import torch
t = torch.randn(10000)
t2 = torch.randn(10000)
"""

test_code1 = """
t -= t2
"""

test_code2 = """
t.sub_(t2)
"""

times1 = timeit.repeat(setup=setup_code, stmt=test_code1, repeat=3, number=10000)
times2 = timeit.repeat(setup=setup_code, stmt=test_code2, repeat=3, number=10000)

print(f"Average time for 't -= t2': {sum(times1) / len(times1)}")
print(f"Average time for 't.sub_(t2)': {sum(times2) / len(times2)}")


Average time for 't -= t2': 0.014941791669116355
Average time for 't.sub_(t2)': 0.011395708002964966


In [2]:
setup_code = """
import torch
a = torch.randn(10000)
b = torch.randn(10000)
c = torch.randn(10000)
"""

test_code1 = """
a.addcmul_(-b, c)
"""

test_code2 = """
a.sub_(b * c)
"""

test_code3 = """
a -= b * c
"""

times1 = timeit.repeat(setup=setup_code, stmt=test_code1, repeat=3, number=10000)
times2 = timeit.repeat(setup=setup_code, stmt=test_code2, repeat=3, number=10000)
times3 = timeit.repeat(setup=setup_code, stmt=test_code3, repeat=3, number=10000)

print(f"Average time for 'a.addcmul_(-b, c)': {sum(times1) / len(times1)}")
print(f"Average time for 'a.sub_(b * c)': {sum(times2) / len(times2)}")
print(f"Average time for 'a -= b * c': {sum(times3) / len(times3)}")

Average time for 'a.addcmul_(-b, c)': 0.034066513670647204
Average time for 'a.sub_(b * c)': 0.032938444666797295
Average time for 'a -= b * c': 0.033240666661489136


In [3]:
setup_code_unsqueeze = """
import torch
t = torch.randn(10000)
"""

test_code_unsqueeze = """
t.unsqueeze(1)
"""

test_code_none = """
t[:, None]
"""

times_unsqueeze = timeit.repeat(setup=setup_code_unsqueeze, stmt=test_code_unsqueeze, repeat=3, number=10000)
times_none = timeit.repeat(setup=setup_code_unsqueeze, stmt=test_code_none, repeat=3, number=10000)

print(f"Average time for 't.unsqueeze(1)': {sum(times_unsqueeze) / len(times_unsqueeze)}")
print(f"Average time for 't[:, None]': {sum(times_none) / len(times_none)}")


Average time for 't.unsqueeze(1)': 0.005111194666824304
Average time for 't[:, None]': 0.009533514001911195


In [34]:
setup_code_unsqueeze = """
import torch
t = torch.randn(10000)
"""

test_code_where = """
t1 = torch.where(t>0.75, t*t, t)
"""

test_code_control = """
t1 = t.clone()
"""

test_code_all = """
t1 = t.clone()
t1 *= t1
t = torch.where(t>0.75, t1, t)
"""

test_code_batch = """
t1 = t.where(t>0.75, t,)
"""

times_where = timeit.repeat(setup=setup_code_unsqueeze, stmt=test_code_where, repeat=3, number=10000)
times_control = timeit.repeat(setup=setup_code_unsqueeze, stmt=test_code_control, repeat=3, number=10000)
times_all = timeit.repeat(setup=setup_code_unsqueeze, stmt=test_code_all, repeat=3, number=10000)

print(f"Average time for 'torch.where': {sum(times_where) / len(times_where)}")
print(f"Average time for 'control': {sum(times_control) / len(times_control)}")
print(f"Average time for 'all': {sum(times_all) / len(times_all)}")


Average time for 'torch.where': 0.13378423633791195
Average time for 'control': 0.010918666666839272
Average time for 'all': 0.13261576367464536


In [26]:
import torch
t = torch.tensor([1,2,3,4,5])
t1 = t * 1.0

t1.mul_(10)
print(t)

tensor([1, 2, 3, 4, 5])


In [63]:
setup_code = """
import torch
t = torch.randn((10,100,100))
threshold = 2
"""

whole = """
t1 = torch.where(t>2, t, torch.tensor(0.0))
t1*=t1
t1+=t1
t = torch.where(t>2, t1, t)
"""

batch = """
indices = t > threshold
vals = t[indices]
vals *= vals
vals += vals
t[indices] = vals
"""

inplace = """ 
indices = t > threshold
t[indices] *= t[indices]
t[indices] += t[indices]
"""

times_whole = timeit.repeat(setup=setup_code, stmt=whole, repeat=3, number=100)
times_batch = timeit.repeat(setup=setup_code, stmt=batch, repeat=3, number=100)
times_inplace = timeit.repeat(setup=setup_code, stmt=inplace, repeat=3, number=100)

print(f"Average time for 'whole': {sum(times_whole) / len(times_whole)}")
print(f"Average time for 'batch': {sum(times_batch) / len(times_batch)}")
print(f"Average time for 'inplace': {sum(times_inplace) / len(times_inplace)}")
times_whole


Average time for 'whole': 0.029083986339780193
Average time for 'batch': 0.03773087499818454
Average time for 'inplace': 0.10167401400394738


[0.03381441699457355, 0.02547270801733248, 0.027964834007434547]

In [74]:
import torch
kernel = torch.tensor([
    [0, 0],     # ORIGIN
    [-1, 0],    # UP
    [0, 1.0],   # RIGHT
    [1, 0],     # DOWN
    [0, -1]     # LEFT
])

muscle_radii = torch.randn((kernel.shape[0],1000,1000))

open_cells = torch.randint(0,2,(1000,1000), dtype=torch.bool)
muscle_masks = torch.ones_like(muscle_radii, dtype=torch.bool)

for i in range(kernel.shape[0]):
    muscle_masks[i] = torch.roll(open_cells, shifts=tuple(map(int, -kernel[i])), dims=[0, 1])

muscle_masks = muscle_masks&open_cells