In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import re
import subprocess
import numpy as np

In [None]:
re_pattern = r"Time elapsed: (\d+) milliseconds"
RESOLUTIONS = [1000, 2000, 4000, 8000, 16000]
RUNS = 1

# Best params CUDA

In [58]:
BLOCKDIM_Y = BLOCKDIM_X = [1, 2, 4, 8, 16, 32]

In [None]:
time_matrix_blockdim_by_resolution = {}

for r, resolution in enumerate(RESOLUTIONS[:4]):
    time_matrix = np.zeros((len(BLOCKDIM_X), len(BLOCKDIM_Y)))

    for run in range(RUNS):
        for i, block_x in enumerate(BLOCKDIM_X):
            for j, block_y in enumerate(BLOCKDIM_Y):
                print(f"{block_x=}, {block_y=}, {resolution=}")
                command = f"make cuda BLOCKDIM_X={block_x} BLOCKDIM_Y={block_y} RESOLUTION={resolution} && ./release/mandelbrot_cuda"
                result = subprocess.run(command, shell=True, capture_output=True, text=True)
                match = re.search(re_pattern, result.stdout)
                if match:
                    time_elapsed = int(match.group(1))
                    time_matrix[i, j] += time_elapsed
                else:
                    print(f"[DEBUG] No match found in stdout for elapsed time.")
    time_matrix /= RUNS # average

    time_matrix_blockdim_by_resolution[r] = time_matrix

block_x=1, block_y=1
block_x=1, block_y=2
block_x=1, block_y=4
block_x=1, block_y=8
block_x=1, block_y=16
block_x=1, block_y=32
block_x=2, block_y=1
block_x=2, block_y=2
block_x=2, block_y=4
block_x=2, block_y=8
block_x=2, block_y=16
block_x=2, block_y=32
block_x=4, block_y=1
block_x=4, block_y=2
block_x=4, block_y=4
block_x=4, block_y=8
block_x=4, block_y=16
block_x=4, block_y=32
block_x=8, block_y=1
block_x=8, block_y=2
block_x=8, block_y=4
block_x=8, block_y=8
block_x=8, block_y=16
block_x=8, block_y=32
block_x=16, block_y=1
block_x=16, block_y=2
block_x=16, block_y=4
block_x=16, block_y=8
block_x=16, block_y=16
block_x=16, block_y=32
block_x=32, block_y=1
block_x=32, block_y=2
block_x=32, block_y=4
block_x=32, block_y=8
block_x=32, block_y=16
block_x=32, block_y=32


In [None]:
best_time = np.inf
best_config = None

# iterate over each resolution's time matrix
for r, matrix in time_matrix_blockdim_by_resolution.items():
  # Find the indices of the minimum time in the current matrix
  i_min, j_min = np.unravel_index(np.argmin(matrix), matrix.shape)
  # Update the best time and configuration if the current minimum time is better
  if matrix[i_min, j_min] < best_time:
    best_time = matrix[i_min, j_min]
    best_config = (BLOCKDIM_X[i_min], BLOCKDIM_Y[j_min])

print(f"Best overall CUDA BLOCKDIM: block_x = {best_config} with time {best_time}")

Best blockdim_x: 4, Best blockdim_y: 8


# CPU best params

In [None]:
!source /opt/intel/oneapi/setvars.sh

In [77]:
# NUM_THREADS_LIST = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
NUM_THREADS_LIST = [1, 2, 4, 6]
SCHEDULE_LIST = ["static", "dynamic", "guided"]

In [78]:
best_cpu_param_times = np.zeros((len(NUM_THREADS_LIST), len(SCHEDULE_LIST)))

for run in range(RUNS):
  print(f"Run {run+1}")
  for i, nthreads in enumerate(NUM_THREADS_LIST):
    for j, schedule in enumerate(SCHEDULE_LIST):
      print(f"""nthreads={nthreads}, schedule={schedule}""")
      command = f"make NUM_THREADS={nthreads} OMP_SCHEDULE={schedule} && ./release/mandelbrot"
      result = subprocess.run(command, shell=True, capture_output=True, text=True)
      
      match = re.search(re_pattern, result.stdout)
      if match:
        time_elapsed = int(match.group(1))
        best_cpu_param_times[i, j] = time_elapsed
      else:
        print(f"[DEBUG] No match found in stdout for elapsed time.")

best_cpu_param_times /= RUNS

Run 1
nthreads=1, schedule=static
nthreads=1, schedule=dynamic
nthreads=1, schedule=guided
nthreads=2, schedule=static
nthreads=2, schedule=dynamic
nthreads=2, schedule=guided
nthreads=4, schedule=static
nthreads=4, schedule=dynamic
nthreads=4, schedule=guided
nthreads=6, schedule=static
nthreads=6, schedule=dynamic
nthreads=6, schedule=guided
Run 2
nthreads=1, schedule=static
nthreads=1, schedule=dynamic
nthreads=1, schedule=guided
nthreads=2, schedule=static
nthreads=2, schedule=dynamic
nthreads=2, schedule=guided
nthreads=4, schedule=static
nthreads=4, schedule=dynamic
nthreads=4, schedule=guided
nthreads=6, schedule=static
nthreads=6, schedule=dynamic
nthreads=6, schedule=guided
Run 3
nthreads=1, schedule=static
nthreads=1, schedule=dynamic
nthreads=1, schedule=guided
nthreads=2, schedule=static
nthreads=2, schedule=dynamic
nthreads=2, schedule=guided
nthreads=4, schedule=static
nthreads=4, schedule=dynamic
nthreads=4, schedule=guided
nthreads=6, schedule=static
nthreads=6, schedul

In [79]:
i_min, j_min = np.unravel_index(np.argmin(best_cpu_param_times), best_cpu_param_times.shape)
print(f"Best parameters: nthreads={NUM_THREADS_LIST[i_min]}, schedule={SCHEDULE_LIST[j_min]}")

Best parameters: nthreads=6, schedule=guided
