### Numpy code

In [2]:
import numpy as np

# Example: Large matrices (adjust size as needed)
n = 7000  # For very large matrices, ensure you have enough RAM
A = np.random.rand(n, n).astype(np.float32)
B = np.random.rand(n, n).astype(np.float32)

C = np.dot(A, B)  # warm-up and Matrix multiplication

%timeit -r 2 -o np.dot(A, B)

print(f"Result shape: {C.shape}")
print(f"Result type: {C.dtype}")


1.06 s ± 5.51 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)
Result shape: (7000, 7000)
Result type: float32


### Codigo con torch

In [None]:
import torch
A_torch = torch.from_numpy(A)
B_torch = torch.from_numpy(B)
A_gpu = A_torch.to('cuda')
B_gpu = B_torch.to('cuda')
    
# Warm-up
C_gpu = torch.mm(A_gpu, B_gpu)
torch.cuda.synchronize()
    
# Benchmark
%timeit -r 2 -o torch.mm(A_gpu, B_gpu); torch.cuda.synchronize()
    
print(f"Result shape: {C_gpu.shape}")
print(f"Result device: {C_gpu.device}")

### Resultados al lanzar en bohr
654 ms ± 1.88 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)

Result shape: (7000, 7000)

Result type: float32

50 ms ± 30.2 μs per loop (mean ± std. dev. of 2 runs, 10 loops each)

Result shape: torch.Size([7000, 7000])

Result device: cuda:0

real    1m8.369s
user    0m43.256s
sys     0m2.466s

Por lo que vemos que es mejor el uso de pytorch


### Codigo con torch para pi

In [None]:
import sys
import numpy as np

def calc_pi_numpy(a, b):
    # 2. Calculate the squared distance from the origin for all points
    # This calculation (a**2 + b**2) is vectorized, 
    # meaning it's applied to all N elements simultaneously.
    dist_sq = a**2 + b**2
    
    # 3. Count the "hits" inside the circle
    # (dist_sq < 1.0) creates a boolean array (e.g., [True, False, True...])
    # np.sum() efficiently counts the True values (since True=1, False=0).
    M = np.sum(dist_sq < 1.0)
    
    # 4. Return the standard Monte Carlo estimate for pi
    return 4 * M / N

N = 5*10**6
print(N)
#N = int(sys.argv[1])

# 1. Generate all N random coordinates at once
# Creates two arrays, x and y, each with N random numbers from -1 to 1
x = np.random.uniform(-1, 1, N).astype(np.float32)
y = np.random.uniform(-1, 1, N).astype(np.float32)
print(x.dtype)

pi = calc_pi_numpy(x,y)

%timeit -r3 calc_pi_numpy(x,y)

print("\n \t Computing pi with numpy: \n")
print("\t For %d trials, pi = %f\n" % (N,pi))



def calc_pi_pytorch(a, b):
    dist_sq = a**2 + b**2  # Igual que NumPy
    M = torch.sum(dist_sq < 1.0)  # torch.sum en vez de np.sum
    return 4 * M.item() / len(a) 


x_torch = torch.from_numpy(x)
y_torch = torch.from_numpy(y)

x_gpu = x_torch.to('cuda')
y_gpu = y_torch.to('cuda')
pi_gpu = calc_pi_pytorch(x_gpu, y_gpu)
torch.cuda.synchronize()
    
# Benchmark
%timeit -r3 calc_pi_pytorch(x_gpu, y_gpu); torch.cuda.synchronize()
    
print("\t For %d trials, pi = %f\n" % (N, pi_gpu))

### Resultados al lanzar en bohr
5000000

float32

8.34 ms ± 71 μs per loop (mean ± std. dev. of 3 runs, 100 loops each)


         Computing pi with numpy:
         

         For 5000000 trials, pi = 3.141784
         

491 μs ± 25.3 ns per loop (mean ± std. dev. of 3 runs, 1,000 loops each)

         For 5000000 trials, pi = 3.141784
         

vemos que en el caso del calculo de pi tambien disminuye considerablemente el tiempo de ejecucion