In [3]:
import os
import pandas as pd

# Directory containing the CSV files
results_dir = os.path.join('..', 'results')

# Elenco dei file che iniziano con "benchmark" e terminano con ".csv"
benchmark_csvs = [f for f in os.listdir(results_dir) if f.startswith("benchmark") and f.endswith(".csv")]

if not benchmark_csvs:
    raise FileNotFoundError("Nessun file CSV che inizia con 'benchmark' trovato in ../results")

# Seleziona il file con il nome lessicografico maggiore (assunto più recente)
latest_csv = max(benchmark_csvs)
csv_path = os.path.join(results_dir, latest_csv)

print("CSV importato da:", csv_path)

# Importa il CSV in un DataFrame di pandas
df = pd.read_csv(csv_path, delimiter=';')

df.head()

CSV importato da: ../results/benchmark_results_20250320_105510.csv


Unnamed: 0,implementation,input_string,batch_size,regex_pattern,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,match_success,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
0,Triton,101,1,(0|1)*1,1,1.057625,1.05691,0.5,10000,0.0,2,True,1.0,2,1,0
1,Triton,100,1,(0|1)*0,1,1.052141,1.051664,0.5,10000,0.0,2,True,1.0,2,1,0
2,Triton,1011,1,(0|1)*11,1,1.049757,1.049757,0.5,10000,0.0,2,True,1.0,2,1,0
3,Triton,1100,1,(0|1)*00,1,1.048565,1.048326,0.5,10000,0.0,2,True,1.0,2,1,0
4,Triton,101,1,0101,1,1.048088,1.04785,0.5,10000,0.0,2,True,1.0,2,1,0


In [4]:
df.shape

(112, 16)

In [5]:
# separa i dati in cuda e triton per implementation
cuda_df = df[df['implementation'] == 'CUDA']
triton_df = df[df['implementation'] == 'Triton']

In [6]:
cuda_df.head()

Unnamed: 0,implementation,input_string,batch_size,regex_pattern,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,match_success,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
57,CUDA,101,1,(0|1)*1,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
58,CUDA,100,1,(0|1)*0,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
59,CUDA,1011,1,(0|1)*11,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
60,CUDA,1100,1,(0|1)*00,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
61,CUDA,101,1,0101,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0


In [7]:
# describe
cuda_df.describe()


Unnamed: 0,batch_size,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
count,55.0,55.0,55.0,55.0,55.0,55.0,55.0,55.0,55.0,55.0,55.0,55.0
mean,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
25%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
50%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
75%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
max,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0


In [8]:
triton_df.describe()

Unnamed: 0,batch_size,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
count,57.0,57.0,57.0,57.0,57.0,57.0,57.0,57.0,57.0,57.0,57.0,57.0
mean,1.0,1.0,1.045921,1.045721,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
std,0.0,0.0,0.005049,0.004983,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,1.034498,1.034498,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
25%,1.0,1.0,1.041889,1.041889,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
50%,1.0,1.0,1.044989,1.044989,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
75%,1.0,1.0,1.049757,1.049757,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
max,1.0,1.0,1.05834,1.058102,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0


In [None]:
# Legge il file e costruisce un dataframe con le colonne: name, regex, input, expected
data = []
with open('../tests/cases/test_cases.txt', 'r') as f:
    lines = f.readlines()

current_test = None
for line in lines:
    line = line.strip()
    if not line or line.startswith('#'):
        continue

    if line.startswith('[') and line.endswith(']'):
        if current_test:
            data.append(current_test)
        # Crea un nuovo test e memorizza il nome rimuovendo le parentesi quadre
        current_test = {'name': line.strip('[]')}
    elif '=' in line and current_test is not None:
        key, value = line.split('=', 1)
        key = key.strip()
        value = value.strip()
        # Per la colonna expected, converte in bool se possibile
        if key == 'expected':
            if value.lower() == 'true':
                value = True
            elif value.lower() == 'false':
                value = False
        current_test[key] = value

if current_test:
    data.append(current_test)

# Crea il DataFrame con le colonne rilevanti
df_tests = pd.DataFrame(data)
df_tests

Unnamed: 0,name,regex,input,expected
0,Basic_EndsWith1,(0|1)*1,0101,True
1,Basic_EndsWith0,(0|1)*0,0100,True
2,Basic_EndsWithTwo1s,(0|1)*11,01011,True
3,Basic_EndsWithTwo0s,(0|1)*00,01100,True
4,Literal_ExactMatch,0101,0101,True
5,Literal_NoMatch,0101,0100,False
6,Literal_PartialMatch,0101,01010,False
7,Star_EmptyString,(0|1)*,,True
8,Star_RepeatedBits,1*0*,11110000,True
9,Star_MixedOrder,1*0*,11001100,False
