### Генерация ответов LLM, запись в .csv файл

---

In [1]:
import ollama
import pandas as pd

import time
from statistics import mean
from pathlib import Path

In [2]:
# Set LLM model, choose from ollama.com
# Ollama model chould be installed beforehand 
LLM_model = "deepseek-r1:1.5b"

In [3]:
# Set path to answers data directory
p_dataset = (Path('..')/"Test Data"/"mike_and_classical_music").resolve().absolute()
print(f"Dataset directory: {p_dataset}")

# Set path to results directory
p_results = (Path('..')/"responses").resolve().absolute()
print(f"Results directory: {p_results}")

Dataset directory: /home/alex/Desktop/LLMLocalCompare/Test Data/mike_and_classical_music
Results directory: /home/alex/Desktop/LLMLocalCompare/responses


In [4]:
# Function to load text from .txt file
def load_from_txt(path):
    with open(path) as f:
        texttt = f.read().replace("\n", "")
    return texttt

# Load test question and correct answer from answers data directory
test_question = load_from_txt(p_dataset/"test_question.txt")
key_question = load_from_txt(p_dataset/"key_question.txt")
correct_answer = load_from_txt(p_dataset/"correct_answer.txt")

print(f"Key question: {key_question}")
print(f"Correct answer: {correct_answer}")

Key question: Does Mike like classical music because it helps him relax or calm down?
Correct answer: Mike likes listening to classical music because it is calming.


In [5]:
# Load student's answers from .csv file from answers data directory
answers_data_csv = pd.read_csv(p_dataset/"answers_data.csv", sep=':', lineterminator='\n')
print(answers_data_csv)

     N  Correctness                                             Answer
0    1         True  Mike likes listening to classical music, it is...
1    2         True  Mike likes listening to classical music, it is...
2    3         True                    Classical music calms him down.
3    4         True  When Mike listens to classical music, he feels...
4    5         True                 Mike is calmed by classical music.
..  ..          ...                                                ...
67  68        False  Mike likes the sounds of classical music, they...
68  69        False  Mike likes waking up to the sounds of classica...
69  70        False  Mike likes classical music because he composes...
70  71        False  Mike likes classical music because it is diffi...
71  72        False                             Mike likes its rhythm.

[72 rows x 3 columns]


In [6]:
answers_data_N = answers_data_csv["N"]
answers_data_correctness = answers_data_csv["Correctness"]
answers_data_answer = answers_data_csv["Answer"]

In [7]:
# Create results .csv file and create header for data colomns
results_filename = f"v7_res_{time.time()}.csv"
with open(p_results/results_filename, 'w') as file:
    file.write("Ans #:Ans correct:Temperature:LLM correct:Run time:LLM response")

In [8]:
# Function to remove <think>...</think> from the LLM response
# Does nothing if "</think>" is not found
def remove_thinking(response):
    think_token = r"</think>"
    think_token_pos = response.find(think_token)
    if think_token_pos == -1:
        return response
    response_without_thinking = response[think_token_pos+len(think_token)+2:]
    return response_without_thinking

In [9]:
# Function to create a prompt from a key_question and a student_answer
def prompt_func2(key_question, student_answer):
    return student_answer + \
    " Based on the previous sentence, answer the question: " + \
    key_question + \
    " Answer only yes or no." + \
    " If the answer is incorrect, just answer \"no\"."

print(prompt_func2(key_question, answers_data_answer[0]))

Mike likes listening to classical music, it is calming. Based on the previous sentence, answer the question: Does Mike like classical music because it helps him relax or calm down? Answer only yes or no. If the answer is incorrect, just answer "no".


In [None]:
# Test parameters
# answers_range = [0, 56]
# temperatures = [0.0, 0.1]
# epochs = 2

# Real parameters
answers_range = list(range(len(answers_data_N)))
temperatures = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
epochs = 20

# Nested loops to generate LLM responses, mark them as correct/incorrect and save to .csv file
# for every student's answer, for every temperature, for every epoch
for answer_N in answers_range:
    prompt = prompt_func2(key_question, answers_data_answer[answer_N])
    answer_data_number = answers_data_N[answer_N]
    answer_correctness = answers_data_correctness[answer_N]
    for (j, temp) in enumerate(temperatures):
        for epoch in range(epochs):
            print(f"Running answer #{answer_N}; Running temp #{j}; Running epoch #{epoch};      ", end="\r")
            time_start = time.time()
            gen = ollama.generate(model=LLM_model, prompt=prompt, options={"temperature":temp})
            time_end = time.time()
            run_time = time_end - time_start
            rs = gen.response
            response_without_thinking = remove_thinking(rs)
            LLM_correct = (("yes" in response_without_thinking.lower()) == answer_correctness)
            with open(p_results/results_filename, 'a') as file:
                rs_safe_for_csv = rs.replace(":","").replace("\n","")
                to_write = f"\n{answer_data_number}:{answer_correctness}:{temp}:{LLM_correct}:{run_time}:{rs_safe_for_csv}"
                try: 
                    file.write(to_write) # Can throw an unexpected error, if response contains some charecters
                except:
                    pass


Далее ответы LLM, сохраненные в .csv файле, обрабатываются в `final-csv_results_processing.ipynb`

---