# importare

In [67]:
import os
import pandas as pd

# Directory containing the CSV files
results_dir = os.path.join('..', 'results')

# Elenco dei file che iniziano con "benchmark" e terminano con ".csv"
benchmark_csvs = [f for f in os.listdir(results_dir) if f.startswith("benchmark") and f.endswith(".csv")]

if not benchmark_csvs:
    raise FileNotFoundError("Nessun file CSV che inizia con 'benchmark' trovato in ../results")

# Seleziona il file con il nome lessicografico maggiore (assunto più recente)
latest_csv = max(benchmark_csvs)
csv_path = os.path.join(results_dir, latest_csv)

print("CSV importato da:", csv_path)

# Importa il CSV in un DataFrame di pandas
df = pd.read_csv(csv_path, delimiter=';')

df.head()

CSV importato da: ../results/benchmark_results_20250320_135321.csv


Unnamed: 0,implementation,input_string,batch_size,regex_pattern,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,match_success,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
0,Triton,101,1,(0|1)*1,1,3.601551,3.600359,0.5,10000,0.0,2,True,1.0,2,1,0
1,Triton,101,1,(0|1)*1,1,3.601551,3.600359,0.5,10000,0.0,2,True,1.0,2,1,0
2,Triton,100,1,(0|1)*0,1,1.055479,1.055002,0.5,10000,0.0,2,True,1.0,2,1,0
3,Triton,100,1,(0|1)*0,1,1.055479,1.055002,0.5,10000,0.0,2,True,1.0,2,1,0
4,Triton,1011,1,(0|1)*11,1,1.05381,1.05381,0.5,10000,0.0,2,True,1.0,2,1,0


In [68]:
df.shape

(174, 16)

# analisi

In [69]:
# separa i dati in cuda e triton per implementation
cuda_df = df[df['implementation'] == 'CUDA']
triton_df = df[df['implementation'] == 'Triton']

In [70]:
cuda_df.head()

Unnamed: 0,implementation,input_string,batch_size,regex_pattern,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,match_success,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
116,CUDA,101,1,(0|1)*1,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
117,CUDA,100,1,(0|1)*0,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
118,CUDA,1011,1,(0|1)*11,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
119,CUDA,1100,1,(0|1)*00,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
120,CUDA,101,1,0101,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0


In [71]:
# describe
cuda_df.describe()


Unnamed: 0,batch_size,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
count,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0
mean,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
std,0.0,0.0,1.7498740000000002e-18,1.7498740000000002e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
25%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
50%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
75%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
max,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0


In [72]:
triton_df.describe()

Unnamed: 0,batch_size,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,1.0,1.0,1.099541,1.099299,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
std,0.0,0.0,0.332939,0.332808,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,1.040936,1.040936,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
25%,1.0,1.0,1.053333,1.053095,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
50%,1.0,1.0,1.053572,1.053333,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
75%,1.0,1.0,1.054049,1.054049,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
max,1.0,1.0,3.601551,3.600359,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0


# controllo i test siano tutti

In [73]:
# Legge il file e costruisce un dataframe con le colonne: name, regex, input, expected
data = []
with open('../tests/cases/test_cases.txt', 'r') as f:
    lines = f.readlines()

current_test = None
for line in lines:
    line = line.strip()
    if not line or line.startswith('#'):
        continue

    if line.startswith('[') and line.endswith(']'):
        if current_test:
            data.append(current_test)
        # Crea un nuovo test e memorizza il nome rimuovendo le parentesi quadre
        current_test = {'name': line.strip('[]')}
    elif '=' in line and current_test is not None:
        key, value = line.split('=', 1)
        key = key.strip()
        value = value.strip()
        # Per la colonna expected, converte in bool se possibile
        if key == 'expected':
            if value.lower() == 'true':
                value = True
            elif value.lower() == 'false':
                value = False
        current_test[key] = value

if current_test:
    data.append(current_test)

# Crea il DataFrame con le colonne rilevanti
df_tests = pd.DataFrame(data)
df_tests.head()
print("Ci sono", len(df_tests), "test")

Ci sono 58 test


In [74]:
# prendi le coppie regex e inpu uniche da df e verificano che siano quante quelle in df_tests
df_tests_unique = df_tests[['regex', 'input']].drop_duplicates()
assert len(df_tests_unique) == len(df_tests), "Il numero di coppie uniche non corrisponde a quello in df_tests"
print("Il numero di coppie uniche corrisponde a quello in df_tests, ovvero:", len(df_tests_unique))

Il numero di coppie uniche corrisponde a quello in df_tests, ovvero: 58


In [75]:
# usa il parser in ../tests/cases/parser.py per generare i test e visualizza i risultati
import sys
import os
# Add the ../tests/cases folder to sys.path so we can import parser.py as a module
cases_path = os.path.abspath(os.path.join('..', 'tests', 'cases'))
if cases_path not in sys.path:
    sys.path.insert(0, cases_path)

from parser import parse_test_file
test_list = parse_test_file('../tests/cases/test_cases.txt')
df_parsed_tests = pd.DataFrame(test_list)

# trova i mancanti da df_tests
missing_tests = df_tests[~df_tests.set_index(['regex', 'input']).index.isin(df_parsed_tests.set_index(['regex', 'input']).index)]
if not missing_tests.empty:
    print("Test mancanti: " + str(len(df_tests)) + " - " + str(len(df_parsed_tests)) + " = " + str(len(missing_tests)))
    print(missing_tests)
else:
    print("Tutti i test sono stati trovati e parsati correttamente.")

Tutti i test sono stati trovati e parsati correttamente.


In [80]:
cuda_df.describe()

Unnamed: 0,batch_size,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
count,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0
mean,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
std,0.0,0.0,1.7498740000000002e-18,1.7498740000000002e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
25%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
50%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
75%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
max,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0


In [81]:
triton_df.describe()

Unnamed: 0,batch_size,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,1.0,1.0,1.099541,1.099299,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
std,0.0,0.0,0.332939,0.332808,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,1.040936,1.040936,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
25%,1.0,1.0,1.053333,1.053095,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
50%,1.0,1.0,1.053572,1.053333,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
75%,1.0,1.0,1.054049,1.054049,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
max,1.0,1.0,3.601551,3.600359,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0


In [76]:
# conta i test unici di cuda e di triton singolarmente
cuda_tests_unique = cuda_df[['input_string', 'regex_pattern']].drop_duplicates()
triton_tests_unique = triton_df[['input_string', 'regex_pattern']].drop_duplicates()
print("CUDA test unici:", len(cuda_tests_unique))
print("Triton test unici:", len(triton_tests_unique))

CUDA test unici: 58
Triton test unici: 58
