In [3]:
import torch
from sympy import Matrix, init_printing, cos, sin, rad, N
import random
import time
from concurrent.futures import ProcessPoolExecutor
import timeit
import subprocess

# Initialize pretty printing
init_printing()


In [4]:
print(torch.__version__)

2.2.1+cu121


$$
\mathbf{A} = \begin{pmatrix} {11} & {12} & {13} \\ {21} & {22} & {23} \end{pmatrix}
$$


$$

a \in \mathbb{R}
$$

$$

\mathbf{M} = \begin{pmatrix} 
m_{11} & m_{12} & \cdots & m_{1n} \\ 
m_{21} & m_{22} & \cdots & m_{2n} \\ 
\vdots & \vdots & \ddots & \vdots \\ 
m_{m1} & m_{m2} & \cdots & m_{mn} 
\end{pmatrix}
$$
$$
\mathbf{M}^\top
$$

$$
\mathbf{M}^{-1}
$$

$$
\vec{u} \cdot \vec{v} = u_1v_1 + u_2v_2 + \cdots + u_nv_n

$$

$$
\mathbf{T} = T_{ijk} \quad \text{where } i, j, k \text{ are indices.}
$$

In [5]:

x = torch.rand(5, 3)
print(x)

tensor([[0.1566, 0.9445, 0.8768],
        [0.3462, 0.4169, 0.7194],
        [0.0064, 0.4543, 0.8027],
        [0.6691, 0.1408, 0.3926],
        [0.4141, 0.8113, 0.1571]])


$$
\mathbf{A} = \begin{pmatrix}
a & b \\
c & d
\end{pmatrix}
\\
\mathbf{A}^{-1} = \begin{pmatrix}
d & -b \\
-c & a
\end{pmatrix}
$$

$$
\mathbf{AA}^{-1} = \begin{pmatrix}
ad & -bc \\
-cb & ad
\end{pmatrix}
$$

In [6]:
# Define your vector
vector = [1, 2, 3]

# Convert the vector to a LaTeX formatted string
Matrix(vector)


⎡1⎤
⎢ ⎥
⎢2⎥
⎢ ⎥
⎣3⎦

In [7]:
len1 = 8
deg1 = 190
len2 = 3
deg2 = 310

# vector = [
#     N(cos(rad(deg1))*len1 + cos(rad(deg2))*len2), 
#     N(sin(rad(deg1))*len1 + sin(rad(deg2))*len2)]
# vector



In [8]:
def vec(len, deg):
    return len*Matrix([cos(rad(deg)), sin(rad(deg))])

v = vec(8, 190)+vec(3, 310)
N(v)

⎡-5.95009919503805⎤
⎢                 ⎥
⎣-3.68731875069238⎦

In [9]:
def det(m):
    size = len(m)
    det = 1
    for row in range(size):
        swap = row+1
        while m[row][row] == 0:
            if (swap == size):
                return 0
            m[row], m[swap] = m[swap], m[row]
            det*=-1
            swap+=1
        for op in range(row+1, size):
            for col in range(row+1, size):
                m[op][col] = m[op][col] - m[row][col] * m[op][row] / m[row][row]
            m[op][row] = 0
    for i in range(size):
        det*=m[i][i]
    return det

def mult(m1, m2):
    dim = len(m1[0])
    if dim != len(m2):
        raise ValueError("Invalid matrix dimensions")
    rows = len(m1)
    cols = len(m2[0])
    result = [[0 for c in range(cols)] for r in range(rows)]
    for r in range(rows):
        for c in range(cols):
            for d in range(dim):
                result[r][c] += m1[r][d]*m2[d][c]
    return result
    
def rand_matrix(size):
    return [[torch.rand(1).item() for i in range(size)] for j in range(size)]

In [10]:
vec = rand_matrix(10)
ten = torch.tensor(vec).float()
m = Matrix(vec)

cycles = 1000

start = time.time()
a = det(vec)
end = time.time()
print("mine: " + str((end-start)*1000000))

start = time.time()
b = torch.det(ten)
end = time.time()
print("torch: " + str((end-start)*1000000))


print("mine: " + str(a))
print("torch: " + str(b.item()))
N(m)

mine: 97.03636169433594
torch: 515.4609680175781
mine: -0.016750264986841377
torch: -0.016750268638134003


⎡0.752239644527435  0.364496409893036   0.965676724910736   0.62749171257019  
⎢                                                                             
⎢0.553320705890656  0.398569524288177   0.631999671459198  0.848053097724915  
⎢                                                                             
⎢0.829836845397949  0.813796997070313   0.234280228614807  0.0403961539268494 
⎢                                                                             
⎢0.787140071392059  0.690531849861145   0.925010681152344  0.619488894939423  
⎢                                                                             
⎢0.19734126329422   0.593598484992981   0.451926827430725  0.164760231971741  
⎢                                                                             
⎢0.332162022590637  0.938750505447388   0.767078816890717  0.468356072902679  
⎢                                                                             
⎢0.575127899646759  0.382499992847443   0.4933288097

In [11]:




# ts1 = []
# for i in range(cycles):
#     ts1.append(torch.tensor(rand_matrix(9)).float())

# ts2 = []
# for i in range(cycles):
#     ts2.append(torch.tensor(rand_matrix(9)).float())

# start = time.time()
# for i in range(1):
#     m1 = mult(v1, v2)
# end = time.time()
# print("mine: " + str((end-start)*1000000))

# start = time.time()
# for i in range(cycles):
#     m2 = ts1[i] @ ts2[i]
# end = time.time()
# print("torch: " + str((end-start)*1000000))



In [18]:
size = 2048

v1 = rand_matrix(size)
c1 = torch.tensor(v1).float()



# g1 = torch.tensor(v1).float().to("cuda")

v2 = rand_matrix(size)
c2 = torch.tensor(v2).float()
# g2 = torch.tensor(v2).float().to("cuda")

start = time.time()
g1 = c1.to("cuda")
g2 = c2.to("cuda")
end = time.time()
print("gpu: " + str((end-start)*1000000))

# %timeit t1 @ t2
# %timeit mult(v1, v2)

# Warm-up
# for _ in range(100):
#     torch.matmul(torch.rand(500,500).to("mps"), torch.rand(500,500).to("mps"))

# start = time.time()
# r1 = c1 @ c2
# end = time.time()
# print("cpu: " + str((end-start)*1000000))

start = time.time()
r2 = g1 @ g2
end = time.time()
print("gpu: " + str((end-start)*1000000))

start = time.time()
r_cpu = r2.cpu()
end = time.time()
print("transfer: " + str((end-start)*1000000))

# start = time.time()
# Matrix(mult(v1, v2))
# end = time.time()
# print("mine: " + str((end-start)*1000000))




to cuda: 55533.88595581055
gpu: 289.67857360839844
transfer: 6518.125534057617


In [None]:
# Assuming your compiled C program is named 'matmul' and is in the current directory
start = time.time()
result = subprocess.run(['../c/matmul'])
end = time.time()
print("c: " + str((end-start)*1000000))

c: 1141057.9681396484


In [19]:
size = 2048

for i in range(0, 10):
    v1 = rand_matrix(size)
    c1 = torch.tensor(v1).float()
    v2 = rand_matrix(size)
    c2 = torch.tensor(v2).float()

    start = time.time()
    g1 = c1.to("cuda")
    g2 = c2.to("cuda")
    end = time.time()
    print("to cuda: " + str((end-start)*1000000))

    start = time.time()
    r2 = g1 @ g2
    end = time.time()
    print("mult: " + str((end-start)*1000000))

    start = time.time()
    r_cpu = r2.cpu()
    end = time.time()
    print("to cpu: " + str((end-start)*1000000))

to cuda: 55465.2214050293
mult: 56.98204040527344
to cpu: 1293.6592102050781
to cuda: 1890.1824951171875
mult: 56.98204040527344
to cpu: 1243.3528900146484
to cuda: 1909.9712371826172
mult: 67.23403930664062
to cpu: 4726.886749267578
to cuda: 1959.5623016357422
mult: 66.04194641113281
to cpu: 1249.7901916503906
to cuda: 1991.9872283935547
mult: 128.26919555664062
to cpu: 4884.7198486328125
to cuda: 1883.0299377441406
mult: 66.99562072753906
to cpu: 4705.4290771484375
to cuda: 1861.0954284667969
mult: 68.90296936035156
to cpu: 4748.58283996582
to cuda: 1864.1948699951172
mult: 69.14138793945312
to cpu: 1268.148422241211
to cuda: 1924.0379333496094
mult: 68.90296936035156
to cpu: 1243.59130859375
to cuda: 1883.2683563232422
mult: 68.66455078125
to cpu: 1222.6104736328125
