In [1]:
import time
import numpy as np
import numba as nb

In [2]:
sizes = [256, 1024, 4096]
iterations = 10

## pythonでの行列積速度

In [3]:
def matmul(x, y):
    out = []
    for i in range(len(x)):
        out_row = []
        for j in range(len(y[0])):
            out_elem = 0
            for k in range(len(x[0])):
                out_elem += x[i][k] * y[k][j]
            out_row.append(out_elem)
        out.append(out_row)
    return out

In [5]:
for size in sizes:
    x = np.random.randn(size, size).astype(np.float32).tolist()
    y = np.random.randn(size, size).astype(np.float32).tolist()

    d_sec = 0
    for i in range(iterations):
        start = time.time()
        res = matmul(x, y)
        end = time.time()
        d_sec += (end - start)

    d_sec /= iterations
    print(f"行列サイズ = {size}")
    print(f"処理時間 = {d_sec}")

行列サイズ = 256
処理時間 = 1.4835143089294434
行列サイズ = 1024
処理時間 = 113.07175493240356


In [7]:
for size in sizes:
    x = np.random.randn(size, size).astype(np.float32)
    y = np.random.randn(size, size).astype(np.float32)

    d_sec = 0
    for i in range(iterations):
        start = time.time()
        res = matmul(x, y)
        end = time.time()
        d_sec += (end - start)

    d_sec /= iterations
    print(f"行列サイズ = {size}")
    print(f"処理時間 = {d_sec}")

行列サイズ = 256
処理時間 = 6.547226977348328


KeyboardInterrupt: 

## numpy (intel MKL)での行列積速度

In [4]:
np.show_config()

blas_mkl_info:
    libraries = ['mkl_rt', 'pthread']
    library_dirs = ['/opt/intel/mkl/lib/intel64']
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    include_dirs = ['/opt/intel/mkl/include']
blas_opt_info:
    libraries = ['mkl_rt', 'pthread']
    library_dirs = ['/opt/intel/mkl/lib/intel64']
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    include_dirs = ['/opt/intel/mkl/include']
lapack_mkl_info:
    libraries = ['mkl_rt', 'pthread']
    library_dirs = ['/opt/intel/mkl/lib/intel64']
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    include_dirs = ['/opt/intel/mkl/include']
lapack_opt_info:
    libraries = ['mkl_rt', 'pthread']
    library_dirs = ['/opt/intel/mkl/lib/intel64']
    define_macros = [('SCIPY_MKL_H', None), ('HAVE_CBLAS', None)]
    include_dirs = ['/opt/intel/mkl/include']
Supported SIMD extensions in this NumPy install:
    baseline = SSE,SSE2,SSE3
    found = SSSE3,SSE41,POPCNT,SSE42,AVX,F16C,FMA3,AVX2
  

In [6]:
for size in sizes:
    x = np.random.randn(size, size).astype(np.float32)
    y = np.random.randn(size, size).astype(np.float32)

    d_sec = 0
    for i in range(iterations):
        start = time.time()
        res = np.dot(x, y)
        end = time.time()
        d_sec += (end - start)

    d_sec /= iterations
    print(f"行列サイズ = {size}")
    print(f"処理時間 = {d_sec}")


行列サイズ = 256
処理時間 = 0.0029145956039428713
行列サイズ = 1024
処理時間 = 0.002947068214416504
行列サイズ = 4096
処理時間 = 0.1760018825531006


## numba (jit)

In [8]:
matmul_njit = nb.njit(matmul)

In [10]:
for size in sizes:
    x = np.random.randn(size, size).astype(np.float32)
    y = np.random.randn(size, size).astype(np.float32)

    ## warmup
    matmul_njit(x, y)

    d_sec = 0
    for i in range(iterations):
        start = time.time()
        res = matmul_njit(x, y)
        end = time.time()
        d_sec += (end - start)

    d_sec /= iterations
    print(f"行列サイズ = {size}")
    print(f"処理時間 = {d_sec}")

行列サイズ = 256
処理時間 = 0.014850306510925292
行列サイズ = 1024
処理時間 = 1.31300847530365
行列サイズ = 4096
処理時間 = 405.71670775413514


## numpy (openblas)での行列積速度

In [1]:
import time
import numpy as np

In [4]:
sizes = [256, 1024, 4096]
iterations = 10

In [2]:
np.show_config()

openblas64__info:
    libraries = ['openblas64_', 'openblas64_']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None)]
    runtime_library_dirs = ['/usr/local/lib']
blas_ilp64_opt_info:
    libraries = ['openblas64_', 'openblas64_']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None)]
    runtime_library_dirs = ['/usr/local/lib']
openblas64__lapack_info:
    libraries = ['openblas64_', 'openblas64_']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None), ('HAVE_LAPACKE', None)]
    runtime_library_dirs = ['/usr/local/lib']
lapack_ilp64_opt_info:
    libraries = ['openblas64_', 'openblas64_']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None

In [6]:
for size in sizes:
    x = np.random.randn(size, size).astype(np.float32)
    y = np.random.randn(size, size).astype(np.float32)

    d_sec = 0
    for i in range(iterations):
        start = time.time()
        res = np.dot(x, y)
        end = time.time()
        d_sec += (end - start)

    d_sec /= iterations
    print(f"行列サイズ = {size}")
    print(f"処理時間 = {d_sec}")

行列サイズ = 256
処理時間 = 0.0001068115234375
行列サイズ = 1024
処理時間 = 0.0025608062744140623
行列サイズ = 4096
処理時間 = 0.14603085517883302
