### Обработка результатов из .csv файла

---

In [1]:
import ollama
import pandas as pd

from statistics import mean
from pathlib import Path

In [2]:
# Path to results directory
p_results = (Path('..')/"responses").resolve().absolute()
print(f"Results directory: {p_results}")

Results directory: /home/alex/Desktop/LLMLocalCompare/responses


In [3]:
# Set the .csv filename to process
csv_filename = "deepseek-r1:1.5b_t=0.0-1.0_almost_all.csv"

In [4]:
# Open .csv file as pandas dataframe
results_csv = pd.read_csv(p_results/csv_filename, sep=':', lineterminator='\n', encoding='cp1252')

results_csv_answer_N = results_csv["Ans #"]
results_csv_answer_correctness = results_csv["Ans correct"]
results_csv_temperature = results_csv["Temperature"]
results_csv_LLM_correctness = results_csv["LLM correct"]
results_csv_runtime = results_csv["Run time"]
# results_csv_response = results_csv["LLM response\r"] # For files generated on Windows
# results_csv_response = results_csv["LLM response"] # For files generated on Linux

# Set limited number of rows to display in a wide table
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 120)
pd.set_option('display.max_colwidth', 40)

print(results_csv)

      Ans #  Ans correct  Temperature  LLM correct  Run time                           LLM response\r
0         1         True          0.0         True  5.640768  <think>Okay, so I need to figure out...
1         1         True          0.0         True  1.905903  <think>Okay, so I need to figure out...
2         1         True          0.0         True  1.944042  <think>Okay, so I need to figure out...
3         1         True          0.0         True  1.936503  <think>Okay, so I need to figure out...
4         1         True          0.0         True  1.939966  <think>Okay, so I need to figure out...
...     ...          ...          ...          ...       ...                                      ...
8826     41        False          0.3        False  3.536721  <think>Okay, so I need to figure out...
8827     41        False          0.3         True  4.690751  <think>Okay, so I need to figure out...
8828     41        False          0.3        False  2.560262  <think>Okay, so I ne

In [5]:
# Get unique answers numbers and unique temperatures from the .csv file
unique_answers_N = set(results_csv_answer_N)
unique_temperatures = set(results_csv_temperature)

# Populate a table of Answer № -> [accuracies @ temperatures]
final_table = []
for answer_N in unique_answers_N:
    curr_row = []
    curr_row.append(answer_N)
    ids_match_answer_N = [id for id in range(len(results_csv_answer_N)) if results_csv_answer_N[id]==answer_N]
    for (i, temp) in enumerate(unique_temperatures):
        ids_match_temp = [id for id in ids_match_answer_N if results_csv_temperature[id]==temp]
        LLM_correctnesses = results_csv_LLM_correctness[ids_match_temp]
        LLM_correctness_mean = mean(LLM_correctnesses)*100 if len(LLM_correctnesses) != 0 else 0.0
        curr_row.append(LLM_correctness_mean)
    final_table.append(curr_row)

# Add a final row with overall accuracies @ temperatures
final_row = ["Overall",] + [0 for i in range(len(unique_temperatures))]
for (i, temp) in enumerate(unique_temperatures):
    curr_correctnesses = [final_table[j][i+1] for j in range(len(unique_answers_N))]
    overall_correctness = mean(curr_correctnesses)
    final_row[i+1] = overall_correctness
final_table.append(final_row)

# Set umlimited number of rows to display
pd.set_option('display.max_rows', None)

print(f"\nCorrectness table (answer number -> correctness with different temperatures)")
pd.DataFrame(final_table, columns=["Answer #",] + [f"Correctness (t={temp})" for temp in unique_temperatures])


Correctness table (answer number -> correctness with different temperatures)


Unnamed: 0,Answer #,Correctness (t=0.0),Correctness (t=0.4),Correctness (t=0.1),Correctness (t=0.3),Correctness (t=0.2),Correctness (t=0.5),Correctness (t=0.6),Correctness (t=0.7),Correctness (t=0.8),Correctness (t=0.9),Correctness (t=1.0)
0,1,100.0,95.0,85.0,85.0,100.0,90.0,90.0,70.0,95.0,90.0,89.473684
1,2,100.0,95.0,95.0,100.0,80.0,95.0,90.0,94.736842,65.0,80.0,85.0
2,3,100.0,95.0,100.0,100.0,95.0,100.0,100.0,90.0,85.0,85.0,85.0
3,4,100.0,75.0,80.0,80.0,75.0,70.0,75.0,70.0,70.0,65.0,70.0
4,5,95.0,70.0,73.684211,90.0,75.0,85.0,78.947368,95.0,80.0,89.473684,75.0
5,6,0.0,75.0,70.0,60.0,75.0,80.0,80.0,90.0,85.0,65.0,80.0
6,7,100.0,95.0,95.0,95.0,90.0,85.0,85.0,75.0,80.0,90.0,90.0
7,8,0.0,94.736842,80.0,80.0,100.0,95.0,95.0,85.0,55.0,88.888889,95.0
8,9,100.0,100.0,100.0,100.0,95.0,100.0,94.736842,85.0,95.0,95.0,95.0
9,10,100.0,80.0,95.0,75.0,100.0,70.0,75.0,75.0,60.0,89.473684,80.0


---