# importare

In [92]:
# Directory containing the CSV files
import os
import pandas as pd
import sys
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="pandas")


results_dir = os.path.join('..', 'results')

# Elenco dei file per CUDA e per Triton
cuda_csvs = [f for f in os.listdir(results_dir) if f.startswith("cuda_benchmark") and f.endswith(".csv")]
triton_csvs = [f for f in os.listdir(results_dir) if f.startswith("triton_benchmark") and f.endswith(".csv")]

if not cuda_csvs:
    raise FileNotFoundError("Nessun file CSV che inizia con 'cuda_benchmark' trovato in ../results")
if not triton_csvs:
    raise FileNotFoundError("Nessun file CSV che inizia con 'triton_benchmark' trovato in ../results")

# Seleziona il file più recente per ciascuno (in base all'ordinamento lessicografico)
latest_cuda = max(cuda_csvs)
latest_triton = max(triton_csvs)

cuda_csv_path = os.path.join(results_dir, latest_cuda)
triton_csv_path = os.path.join(results_dir, latest_triton)

print("CSV CUDA importato da:", cuda_csv_path)
print("CSV Triton importato da:", triton_csv_path)

# Importa i CSV in DataFrame
cuda_df = pd.read_csv(cuda_csv_path, delimiter=';')
triton_df = pd.read_csv(triton_csv_path, delimiter=';')

CSV CUDA importato da: ../results/cuda_benchmark_20250320_143543.csv
CSV Triton importato da: ../results/triton_benchmark_20250320_143545.csv


In [93]:
print("Primi record CUDA:")
cuda_df.head()

Primi record CUDA:


Unnamed: 0,implementation,input_string,batch_size,regex_pattern,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,match_success,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
0,CUDA,101,1,(0|1)*1,1,0.28965,0.28965,0,0,0,3,True,0,2,1,0
1,CUDA,100,1,(0|1)*0,1,0.199528,0.199528,0,0,0,3,True,0,2,1,0
2,CUDA,1011,1,(0|1)*11,1,0.204786,0.204786,0,0,0,4,True,0,2,1,0
3,CUDA,1100,1,(0|1)*00,1,0.194363,0.194363,0,0,0,4,True,0,2,1,0
4,CUDA,101,1,0101,1,0.137575,0.137575,0,0,0,6,True,0,2,1,0


In [94]:

print("Primi record Triton:")
triton_df.head()

Primi record Triton:


Unnamed: 0,implementation,input_string,batch_size,regex_pattern,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,match_success,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
0,Triton,101,1,(0|1)*1,1,13.906717,0.07534,0.064611,1536,0.0,3,True,0,2,1,0
1,Triton,100,1,(0|1)*0,1,14.178276,0.121593,0.06485,1536,0.0,3,True,0,2,1,0
2,Triton,1011,1,(0|1)*11,1,13.723373,0.090599,0.065088,1536,0.0,3,True,0,2,1,0
3,Triton,1100,1,(0|1)*00,1,13.950586,0.085592,0.088692,1536,0.0,3,True,0,2,1,0
4,Triton,101,1,0101,1,13.380051,0.124216,0.10252,1536,0.0,3,True,0,2,1,0


# analisi

In [95]:
# separa i dati in cuda e triton per implementation
cuda_df = df[df['implementation'] == 'CUDA']
triton_df = df[df['implementation'] == 'Triton']

In [96]:
cuda_df.head()

Unnamed: 0,implementation,input_string,batch_size,regex_pattern,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,match_success,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
116,CUDA,101,1,(0|1)*1,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
117,CUDA,100,1,(0|1)*0,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
118,CUDA,1011,1,(0|1)*11,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
119,CUDA,1100,1,(0|1)*00,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0
120,CUDA,101,1,0101,1,0.01,0.01,0.0,0,0.0,3,True,0.0,2,1,0


In [97]:
# describe
cuda_df.describe()


Unnamed: 0,batch_size,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
count,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0,58.0
mean,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
std,0.0,0.0,1.7498740000000002e-18,1.7498740000000002e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
25%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
50%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
75%,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0
max,1.0,1.0,0.01,0.01,0.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0


In [98]:
triton_df.describe()

Unnamed: 0,batch_size,match_result,execution_time_ms,kernel_time_ms,mem_transfer_time_ms,memory_used_bytes,gpu_util_percent,num_states,compilation_time_ms,num_symbols,number_of_accepting_states,start_state
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,1.0,1.0,1.099541,1.099299,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
std,0.0,0.0,0.332939,0.332808,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,1.040936,1.040936,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
25%,1.0,1.0,1.053333,1.053095,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
50%,1.0,1.0,1.053572,1.053333,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
75%,1.0,1.0,1.054049,1.054049,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0
max,1.0,1.0,3.601551,3.600359,0.5,10000.0,0.0,2.0,1.0,2.0,1.0,0.0


# controllo i test siano tutti

In [99]:
# Legge il file e costruisce un dataframe con le colonne: name, regex, input, expected
data = []
with open('../tests/cases/test_cases.txt', 'r') as f:
    lines = f.readlines()

current_test = None
for line in lines:
    line = line.strip()
    if not line or line.startswith('#'):
        continue

    if line.startswith('[') and line.endswith(']'):
        if current_test:
            data.append(current_test)
        # Crea un nuovo test e memorizza il nome rimuovendo le parentesi quadre
        current_test = {'name': line.strip('[]')}
    elif '=' in line and current_test is not None:
        key, value = line.split('=', 1)
        key = key.strip()
        value = value.strip()
        # Per la colonna expected, converte in bool se possibile
        if key == 'expected':
            if value.lower() == 'true':
                value = True
            elif value.lower() == 'false':
                value = False
        current_test[key] = value

if current_test:
    data.append(current_test)

# Crea il DataFrame con le colonne rilevanti
df_tests = pd.DataFrame(data)
df_tests.head()
print("Ci sono", len(df_tests), "test")

Ci sono 58 test


In [100]:
# prendi le coppie regex e inpu uniche da df e verificano che siano quante quelle in df_tests
df_tests_unique = df_tests[['regex', 'input']].drop_duplicates()
assert len(df_tests_unique) == len(df_tests), "Il numero di coppie uniche non corrisponde a quello in df_tests"
print("Il numero di coppie uniche corrisponde a quello in df_tests, ovvero:", len(df_tests_unique))

Il numero di coppie uniche corrisponde a quello in df_tests, ovvero: 58


In [101]:
# usa il parser in ../tests/cases/parser.py per generare i test e visualizza i risultati
import sys
import os
# Add the ../tests/cases folder to sys.path so we can import parser.py as a module
cases_path = os.path.abspath(os.path.join('..', 'tests', 'cases'))
if cases_path not in sys.path:
    sys.path.insert(0, cases_path)

from parser import parse_test_file
test_list = parse_test_file('../tests/cases/test_cases.txt')
df_parsed_tests = pd.DataFrame(test_list)

# trova i mancanti da df_tests
missing_tests = df_tests[~df_tests.set_index(['regex', 'input']).index.isin(df_parsed_tests.set_index(['regex', 'input']).index)]
if not missing_tests.empty:
    print("Test mancanti: " + str(len(df_tests)) + " - " + str(len(df_parsed_tests)) + " = " + str(len(missing_tests)))
    print(missing_tests)
else:
    print("Tutti i test sono stati trovati e parsati correttamente.")

Tutti i test sono stati trovati e parsati correttamente.


In [102]:
# conta i test unici di cuda e di triton singolarmente
cuda_tests_unique = cuda_df[['input_string', 'regex_pattern']].drop_duplicates()
triton_tests_unique = triton_df[['input_string', 'regex_pattern']].drop_duplicates()
print("CUDA test unici:", len(cuda_tests_unique))
print("Triton test unici:", len(triton_tests_unique))

CUDA test unici: 58
Triton test unici: 58
