In [1]:
import itertools
 
def gera_pemutacoes():
  sequencia = 'ABCDEF'
 
  nums = list(sequencia)
  permutations = list(itertools.permutations(nums))
 
  return [''.join(permutation) for permutation in permutations]
permutacoes = gera_pemutacoes()

In [2]:
def gera_comandos(sequencia):
  comandos = ''
  for comando in sequencia:
    match comando:
      case 'A': comandos += 'for (block_i = 0; block_i < n; block_i += block_size)\n'
      case 'B': comandos += 'for (block_j = 0; block_j < n; block_j += block_size)\n'
      case 'C': comandos += 'for (block_k = 0; block_k < n; block_k += block_size)\n'
      case 'D': comandos += 'for (i = 0; i < block_size; ++i)\n'
      case 'E': comandos += 'for (j = 0; j < block_size; ++j)\n'
      case 'F': comandos += 'for (k = 0; k < block_size; ++k)\n'
  return comandos

def gera_funcao(sequencia, block_size, algoritmo):
  codigo = f'''
void matrix_dgemm_2_{algoritmo}(int n, double *restrict _C, double *restrict _A, double *restrict _B)
{{
#define A(i, j) _A[n * (i) + (j)]
#define B(i, j) _B[n * (i) + (j)]
#define C(i, j) _C[n * (i) + (j)]

  int block_size = {block_size};
  int block_i, block_j, block_k;
  int i, j, k;

{gera_comandos(sequencia)}

  C(block_i + i, block_j + j) += A(block_i + i, block_k + k) * B(block_k + k, block_j + j);
  
#undef A
#undef B
#undef C
}} 

'''
  return codigo

def gera_matrix_h(algoritmo): 
  return f'void matrix_dgemm_2_{algoritmo}(int n, double *restrict _C, double *restrict _A, double *restrict _B);\n'

In [5]:
def gera_codigo():
  algoritmo = 0
  conteudo = ''
  block_sizes = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
  for block_size in block_sizes:
    for permutacao in permutacoes:
      conteudo += gera_funcao(permutacao, block_size, algoritmo)
      algoritmo += 1

  algoritmo = 0
  for block_size in block_sizes:
    for permutacao in permutacoes: 
      conteudo += gera_matrix_h(algoritmo)
      algoritmo += 1
  
  conteudo += gera_matrix_which_dgemm_2()

  with open('codigo_gerado.c', 'w',encoding="utf-8") as file:
    file.write(conteudo)
  file.close()

gera_codigo()

In [38]:
gera_matrix_h(1)

'void matrix_dgemm_2_1(int n, double *restrict _C, double *restrict _A, double *restrict _B);'

In [4]:
def gera_cases():
  conteudo = '' 
  algoritmo = 0
  block_sizes = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
  for _ in block_sizes:
    for _ in permutacoes: 
      conteudo += f'''
      case {algoritmo}:
        matrix_dgemm_2_{algoritmo}(n, _C, _A, _B);
        ret = true;
        break;

      '''
      algoritmo += 1
  return conteudo


def gera_matrix_which_dgemm_2():
  conteudo = f'''
bool matrix_which_dgemm_2(int algorithm, int n, double *restrict _C, double *restrict _A, double *restrict _B)
{{
  bool ret;
  switch (algorithm)
  {{
  
  {gera_cases()}

  default:
    ret = false;
  }}
  return ret;
}}
'''
  return conteudo

gera_matrix_which_dgemm_2()

'\nbool matrix_which_dgemm_2(int algorithm, int n, double *restrict _C, double *restrict _A, double *restrict _B)\n{\n  bool ret;\n  switch (algorithm)\n  {\n  \n  \n      case 0:\n        matrix_dgemm_2_0(n, _C, _A, _B);\n        ret = true;\n        break;\n\n      \n      case 1:\n        matrix_dgemm_2_1(n, _C, _A, _B);\n        ret = true;\n        break;\n\n      \n      case 2:\n        matrix_dgemm_2_2(n, _C, _A, _B);\n        ret = true;\n        break;\n\n      \n      case 3:\n        matrix_dgemm_2_3(n, _C, _A, _B);\n        ret = true;\n        break;\n\n      \n      case 4:\n        matrix_dgemm_2_4(n, _C, _A, _B);\n        ret = true;\n        break;\n\n      \n      case 5:\n        matrix_dgemm_2_5(n, _C, _A, _B);\n        ret = true;\n        break;\n\n      \n      case 6:\n        matrix_dgemm_2_6(n, _C, _A, _B);\n        ret = true;\n        break;\n\n      \n      case 7:\n        matrix_dgemm_2_7(n, _C, _A, _B);\n        ret = true;\n        break;\n\n      \n  

In [59]:
def generate_bench():
  conteudo = '#!/bin/bash\n\n' 
  algoritmo = 0
  total = len(permutacoes) * len(block_sizes)
  for _ in block_sizes:
    for _ in permutacoes:
      conteudo += f'''
      echo "{algoritmo}/{total}"
      echo "{algoritmo},$(./main-experiment --matrix-size 2048 --algorithm {algoritmo})" >> experimento.csv \n'''
      algoritmo += 1
  with open('experiment.sh', 'w',encoding="utf-8") as file:
    file.write(conteudo)
  file.close()

generate_bench()

In [112]:
import optuna
import tqdm as notebook_tqdm
import subprocess

block_sizes = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]


def run(permutacao, block_size):
  algoritmo = str(block_sizes.index(block_size) * 720 + permutacoes.index(permutacao))
  timeout_s = 15  # em segundos
  cmd = ['./main-experiment', '--matrix-size', '2048', '--algorithm', algoritmo]
  try:
    result = subprocess.run(cmd, shell=False, capture_output=True, timeout=timeout_s, universal_newlines=True)
    return float(result.stdout)
  except subprocess.TimeoutExpired as e:
    return float(100)
  

def objective(trial):
  permutacao = trial.suggest_categorical("sequence", permutacoes)
  block_size = trial.suggest_categorical("block_size", block_sizes)

  return run(permutacao, block_size)

def tenta():
  study = optuna.create_study(
    storage="sqlite:///db.sqlite3",
    study_name="miniEP6",
    load_if_exists=True
  )
  study.optimize(objective, n_trials=1000)
  print(f"Best value: {study.best_value} (params: {study.best_params})")




tenta()


[32m[I 2023-05-07 14:03:40,285][0m Using an existing study with name 'miniEP6' instead of creating a new one.[0m
[32m[I 2023-05-07 14:03:55,578][0m Trial 422 finished with value: 14.585804 and parameters: {'sequence': 'DCFAEB', 'block_size': 512}. Best is trial 374 with value: 6.799146.[0m
[32m[I 2023-05-07 14:04:10,845][0m Trial 423 finished with value: 100.0 and parameters: {'sequence': 'BECFDA', 'block_size': 8}. Best is trial 374 with value: 6.799146.[0m
[32m[I 2023-05-07 14:04:26,113][0m Trial 424 finished with value: 100.0 and parameters: {'sequence': 'AEDBFC', 'block_size': 256}. Best is trial 374 with value: 6.799146.[0m
[32m[I 2023-05-07 14:04:41,484][0m Trial 425 finished with value: 100.0 and parameters: {'sequence': 'ADFEBC', 'block_size': 256}. Best is trial 374 with value: 6.799146.[0m
[32m[I 2023-05-07 14:04:56,842][0m Trial 426 finished with value: 100.0 and parameters: {'sequence': 'AFECDB', 'block_size': 512}. Best is trial 374 with value: 6.799146.[

KeyboardInterrupt: 

In [7]:
import optuna
import tqdm as notebook_tqdm
import subprocess

block_sizes = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]

def run(permutacao, block_size):
  algoritmo = str(block_sizes.index(block_size) * 720 + permutacoes.index(permutacao))
  timeout_s = 15  # em segundos
  cmd = ['./main-experiment', '--matrix-size', '2048', '--algorithm', algoritmo]
  try:
    result = subprocess.run(cmd, shell=False, capture_output=True, timeout=timeout_s, universal_newlines=True)
    return float(result.stdout)
  except subprocess.TimeoutExpired as e:
    return float(100)
  
  
run('ACDBFE', 512)

0.736256

In [2]:
block_sizes = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]

for b in block_sizes:
  print(str(block_sizes.index(b) * 720 + permutacoes.index('ACDBEF')))


30
750
1470
2190
2910
3630
4350
5070
5790
6510
