In [64]:
import numpy as np
import time

In [65]:
print(f"NumPy Version: {np.version.version}")
print("\nCurrent NumPy Configuration:")
np.show_config()
print("\nCurrent NumPy Runtime Configuration:")
np.show_runtime()

NumPy Version: 1.26.2

Current NumPy Configuration:
Build Dependencies:
  blas:
    detection method: pkgconfig
    found: true
    include directory: /c/opt/64/include
    lib directory: /c/opt/64/lib
    name: openblas64
    openblas configuration: USE_64BITINT=1 DYNAMIC_ARCH=1 DYNAMIC_OLDER= NO_CBLAS=
      NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP= SKYLAKEX MAX_THREADS=2
    pc file directory: C:/opt/64/lib/pkgconfig
    version: 0.3.23.dev
  lapack:
    detection method: internal
    found: true
    include directory: unknown
    lib directory: unknown
    name: dep2306486572304
    openblas configuration: unknown
    pc file directory: unknown
    version: 1.26.2
Compilers:
  c:
    commands: cl
    linker: link
    name: msvc
    version: 19.29.30152
  c++:
    commands: cl
    linker: link
    name: msvc
    version: 19.29.30152
  cython:
    commands: cython
    linker: cython
    name: cython
    version: 3.0.5
Machine Information:
  build:
    cpu: x86_64
    endian: l

In [66]:
# Generate Random Matrices and Vectors
s1 = 250 # Size of matrices for matrix multiplication
s2 = 1000 # Size of matrix and vector for matrix vector multiplication
s3 = 10000000 # Size of vectors for addition, subtraction and dot product

rng = np.random.default_rng()

A = rng.integers(0, 101, (s1, s1)) 
B = rng.integers(0, 101, (s1, s1))
C = rng.integers(0, 101, (s2, s2))
D = rng.integers(0, 101, s2)
E = rng.integers(0, 101, s3)
F = rng.integers(0, 101, s3)

In [67]:
print(f"A: {A.shape}, {A.dtype}\n{A}\n")

A: (250, 250), int64
[[88 21 52 ... 79  4 14]
 [73 12 38 ... 55 61 11]
 [65 65 23 ... 23 28 32]
 ...
 [89 70 61 ... 30  8 40]
 [ 5 25 41 ... 27 56 81]
 [ 4 94 35 ... 53 84 63]]



In [68]:
print(f"B: {B.shape}, {B.dtype}\n{B}\n")

B: (250, 250), int64
[[15 25 38 ... 21 18 52]
 [63 49 88 ... 84  9 22]
 [31 73 84 ...  7 20 30]
 ...
 [39 40 36 ... 58 97 73]
 [62 76 30 ... 52 79 30]
 [ 5 17 98 ... 66 95 64]]



In [69]:
print(f"C: {C.shape}, {C.dtype}\n{C}\n")

C: (1000, 1000), int64
[[17 98 77 ... 13 95 47]
 [19 56  4 ... 61  9 30]
 [ 9 24 42 ... 10 65 88]
 ...
 [70 35 21 ... 49 73 70]
 [29 70 92 ... 26 21 61]
 [39 55 20 ... 86 57 25]]



In [70]:
print(f"D: {D.shape}, {D.dtype}\n{D}\n")

D: (1000,), int64
[ 17  12  43  69  57  75  20  77  83  51  64  11  18  97  49   7  32  58
  15  48  62  41  48   3  44  34  22   0  14  86  34  66   1  63  66   1
  90  92  36  64  81  43  75  15  47  39  52  85   7  37  12  33   4  79
  39   6  24  17  34   3  72  19  76  82  62  76   7  81  27   0  50  67
  69  13  70  23   6  61  21  43  42 100  93  31  62  98  96   7  25  38
  80  54  69  21  65  93  63  19  77  75  69  60  74  26  57  45  63  91
  70  63  57   3  95  64  22  51  94   8  91  74  45  74  61  47  64  35
   3  52 100  32  18  32  95  70  48   5   8  68  94  41  78  65  58  21
   4  23  58  20  28  16  86  67  31  87   6  74  12  43  73  87  19  33
  99  37  16  86  35  12  76  29  39  54  55  76  46  67  97  18  76  41
  40  74  66  25  34  47  63  22  76  23  54  84  13  75  98  93  32  25
  67  43  91   1  91  99  86  84  70  75  46  74  66  79  90  74  43  32
  91  82  91  44  54  25  54  50  58  58   2  99  71  58   0  12  10  27
  87  80  21  27  46   0  40  84 

In [71]:
print(f"E: {E.shape}, {E.dtype}\n{E}\n")

E: (10000000,), int64
[98 21 41 ... 94 91 49]



In [72]:
print(f"F: {F.shape}, {F.dtype}\n{F}\n")

F: (10000000,), int64
[37 78 34 ... 17 19 49]



## Matrix Multiplication

In [73]:
# Sequential Matrix Multiplication
G = np.zeros((s1, s1), "int64")

start = time.time()

for i in range(s1):
    for j in range(s1):
        sum = 0
        for k in range(s1):
            sum += A[i][k] * B[k][j]
        G[i][j] = sum

end = time.time()
smmTotal = end - start

print(f"Matrix multiplication of 2 {s1}x{s1} matrices")
print(f"Time taken: {smmTotal} seconds")

Matrix multiplication of 2 250x250 matrices
Time taken: 5.924459218978882 seconds


In [74]:
# Matrix Multiplication using SIMD
start = time.time()

H = np.matmul(A, B)
# When you import NumPy, it treats the @ operator as the matrix multiplication operator
# H = A @ B is the same as H = np.matmul(A, B)

end = time.time()
pmmTotal = end - start

print(f"NumPy matrix multiplication of 2 {s1}x{s1} matrices using SIMD")
print(f"Time taken: {pmmTotal} seconds")

NumPy matrix multiplication of 2 250x250 matrices using SIMD
Time taken: 0.009009599685668945 seconds


In [75]:
# Verify the results
if np.array_equal(G, H):
    print("Matrix Multiplication results match")
else:
    print("Matrix Multiplication results do not match")

Matrix Multiplication results match


In [76]:
# Calculate Speed Up
SU = smmTotal / pmmTotal
print(f"Speed up using SIMD: {SU}x")

Speed up using SIMD: 657.5718595358437x


## Matrix Vector Multiplication

In [77]:
start = time.time()

I = np.zeros(s2, "int64")

for i in range(s2):
    sum = 0
    for j in range(s2):
        I[i] += C[i][j] * D[j]

end = time.time()
mvsTotal = end - start

print(f"Sequential matrix vector multiplication of a {s2}x{s2} matrix and a {s2} element vector")
print(f"Time taken: {mvsTotal} seconds")

Sequential matrix vector multiplication of a 1000x1000 matrix and a 1000 element vector
Time taken: 0.38127803802490234 seconds


In [78]:
# Matrix Vector Multiplication using SIMD
start = time.time()

J = np.matmul(C, D)

end = time.time()
pmvTotal = end - start

print(f"NumPy matrix vector multiplication of a {s2}x{s2} matrix and a {s2} element vector using SIMD")
print(f"Time taken: {pmvTotal} seconds")

NumPy matrix vector multiplication of a 1000x1000 matrix and a 1000 element vector using SIMD
Time taken: 0.0009996891021728516 seconds


In [79]:
# Verify the results
if np.array_equal(I, J):
    print("Matrix Vector Multiplication results match")
else:
    print("Matrix Vector Multiplication results do not match")

Matrix Vector Multiplication results match


In [80]:
# Calculate Speed Up
SU2 = mvsTotal / pmvTotal
print(f"Speed up using SIMD: {SU2}x")

Speed up using SIMD: 381.3966134032912x


## Element-wise Vector Addition

In [81]:
# Sequential Element-wise Vector Addition
K = np.zeros(s3, "int64")

start = time.time()

for i in range(s3):
    K[i] = E[i] + F[i]

end = time.time()

svaTotal = end - start

print(f"Sequential vector addition of 2 {s3} element vectors")
print(f"Time taken: {svaTotal} seconds")

Sequential vector addition of 2 10000000 element vectors
Time taken: 2.1058056354522705 seconds


In [82]:
# Element-wise Vector Addition using SIMD
start = time.time()

L = np.add(E, F)
# When you import NumPy, it treats the + operator as the element-wise addition operator
# L = E + F is the same as L = np.add(E, F)

end = time.time()
pvaTotal = end - start

print(f"Numpy vector addition of 2 {s3} element vectors using SIMD")
print(f"Time taken: {pvaTotal} seconds")

Numpy vector addition of 2 10000000 element vectors using SIMD
Time taken: 0.03575325012207031 seconds


In [83]:
# Verify the accuracy of the results
if np.array_equal(K, L):
    print("Matrix Addition results match")
else:
    print("Matrix Addition results do not match!")

Matrix Addition results match


In [84]:
# Calculate the speed up
SU3 = svaTotal / pvaTotal
print(f"Speed up using SIMD: {SU3}x")

Speed up using SIMD: 58.89829954654574x


## Element-wise Vector Subtraction

In [85]:
# Sequential Element-wise Vector Subtraction
M = np.zeros(s3, "int64")

start = time.time()

for i in range(s3):
    M[i] = E[i] - F[i]

end = time.time()
svsTotal = end - start

print(f"Sequential vector subtraction of 2 {s3} element vectors")
print(f"Time taken: {svsTotal} seconds")

Sequential vector subtraction of 2 10000000 element vectors
Time taken: 2.0388548374176025 seconds


In [86]:
# Element-wise Vector Subtraction using SIMD
N = np.zeros(s3, "int64")

start = time.time()

N = np.subtract(E, F)
# When you import NumPy, it treats the - operator as the element-wise subtraction operator
# N = E - F is the same as N = np.subtract(E, F)

end = time.time()
pvsTotal = end - start

print(f"Numpy vector subtraction of 2 {s3} element vectors using SIMD")
print(f"Time taken: {pvsTotal} seconds")

Numpy vector subtraction of 2 10000000 element vectors using SIMD
Time taken: 0.027881622314453125 seconds


In [87]:
# Verify the results
if np.array_equal(M, N):
    print("Vector Subtraction results match")
else:
    print("Vector Subtraction results do not match")

Vector Subtraction results match


In [88]:
# Calculate the speed up
SU4 = svsTotal / pvsTotal
print(f"Speed up using SIMD: {SU4}x")

Speed up using SIMD: 73.12540190176495x


## Dot Product

In [89]:
# Sequential Dot Product
start = time.time()

sumDP = 0

for i in range(s3):
    sumDP += E[i] * F[i]

end = time.time()
sdpTotal = end - start

print(f"Sequential dot product of 2 {s3} element vectors")
print(f"Dot Product: {sumDP}")
print(f"Time taken: {sdpTotal} seconds")

Sequential dot product of 2 10000000 element vectors
Dot Product: 25001511041
Time taken: 1.9618752002716064 seconds


In [90]:
# Dot Product using SIMD
start = time.time()

dotProduct = np.dot(E, F)

end = time.time()

pdpTotal = end - start

print(f"NumPy dot product of 2 {s3} element vectors using SIMD")
print(f"Dot product: {dotProduct}")
print(f"Time taken: {pdpTotal} seconds")

NumPy dot product of 2 10000000 element vectors using SIMD
Dot product: 25001511041
Time taken: 0.01227569580078125 seconds


In [91]:
# Verify the results
if sumDP == dotProduct:
    print("Dot Product results match")
else:
    print("Dot Product results do not match")

Dot Product results match


In [92]:
SU5 = sdpTotal / pdpTotal
print(f"Speed up using SIMD: {SU5}x")

Speed up using SIMD: 159.81784105034183x
