In [1]:
import time
import numpy as np
import numba as nb

import warnings
warnings.filterwarnings('ignore')

In [2]:
sizes = [256, 1024, 4096]
sizes_small = [256, 1024]
iterations = 10

## pythonでの行列積速度

In [3]:
def matmul(x, y):
    out = []
    for i in range(len(x)):
        out_row = []
        for j in range(len(y[0])):
            out_elem = 0
            for k in range(len(x[0])):
                out_elem += x[i][k] * y[k][j]
            out_row.append(out_elem)
        out.append(out_row)
    return out

In [4]:
for size in sizes_small:
    x = np.random.randn(size, size).astype(np.float32).tolist()
    y = np.random.randn(size, size).astype(np.float32).tolist()

    d_sec = 0
    for i in range(iterations+1):
        start = time.time()
        res = matmul(x, y)
        end = time.time()
        if i != 0:
            d_sec += (end - start)

    d_sec /= iterations
    print(f"行列サイズ = {size}")
    print(f"処理時間 = {d_sec*1000}")

行列サイズ = 256
処理時間 = 1474.1198778152466
行列サイズ = 1024
処理時間 = 112160.70013046265


In [5]:
# for size in sizes_small:
#     x = np.random.randn(size, size).astype(np.float32)
#     y = np.random.randn(size, size).astype(np.float32)

#     d_sec = 0
#     for i in range(iterations+1):
#         start = time.time()
#         res = matmul(x, y)
#         end = time.time()
#         if i != 0:
#             d_sec += (end - start)

#     d_sec /= iterations
#     print(f"行列サイズ = {size}")
#     print(f"処理時間 = {d_sec*1000}")

## numpy (intel MKL)での行列積速度

In [6]:
# np.show_config()

In [7]:
# for size in sizes:
#     x = np.random.randn(size, size).astype(np.float32)
#     y = np.random.randn(size, size).astype(np.float32)

#     d_sec = 0
#     for i in range(iterations):
#         start = time.time()
#         res = np.dot(x, y)
#         end = time.time()
#         d_sec += (end - start)

#     d_sec /= iterations
#     print(f"行列サイズ = {size}")
#     print(f"処理時間 = {d_sec*1000}")


## numba (jit)

In [8]:
matmul_njit = nb.njit(matmul)

In [9]:
for size in sizes:
    x = np.random.randn(size, size).astype(np.float32)
    y = np.random.randn(size, size).astype(np.float32)

    ## warmup
    matmul_njit(x, y)

    d_sec = 0
    for i in range(iterations):
        start = time.time()
        res = matmul_njit(x, y)
        end = time.time()
        d_sec += (end - start)

    d_sec /= iterations
    print(f"行列サイズ = {size}")
    print(f"処理時間 = {d_sec*1000}")

行列サイズ = 256
処理時間 = 18.80359649658203
行列サイズ = 1024
処理時間 = 1348.9588499069214
行列サイズ = 4096
処理時間 = 409613.67185115814


## numpy (openblas)での行列積速度

In [10]:
import time
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [11]:
sizes = [256, 1024, 4096]
iterations = 10

In [12]:
np.show_config()

blas_mkl_info:
  NOT AVAILABLE
blis_info:
  NOT AVAILABLE
openblas_info:
    libraries = ['openblas', 'openblas']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None)]
    runtime_library_dirs = ['/usr/local/lib']
blas_opt_info:
    libraries = ['openblas', 'openblas']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None)]
    runtime_library_dirs = ['/usr/local/lib']
lapack_mkl_info:
  NOT AVAILABLE
openblas_lapack_info:
    libraries = ['openblas', 'openblas']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None)]
    runtime_library_dirs = ['/usr/local/lib']
lapack_opt_info:
    libraries = ['openblas', 'openblas']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None)]
    runtime_library_dirs = ['/usr/local/lib']
Supported SIMD extensions in this NumPy install:
    baseline = SSE,SSE2,SSE3
    found = SSSE3,SSE4

In [14]:
for size in sizes:
    x = np.random.randn(size, size).astype(np.float32)
    y = np.random.randn(size, size).astype(np.float32)

    d_sec = 0
    for i in range(iterations):
        start = time.time()
        res = np.dot(x, y)
        end = time.time()
        d_sec += (end - start)

    d_sec /= iterations
    print(f"行列サイズ = {size}")
    print(f"処理時間 = {d_sec*1000}")

行列サイズ = 256
処理時間 = 0.08473396301269531
行列サイズ = 1024
処理時間 = 2.4017333984375
行列サイズ = 4096
処理時間 = 162.8296136856079
