In [2]:
import numpy as np
import ollama
import time

#### `Описание просчёта`
Класс <b>LLMmodel()</b> при инициализации принимает набор параметров <b>params={...}</b>. В нашем случае он содержит только температуру.
В начале определения класса LLMmodel() вручную задаётся два параметра:
1. Название модели: `deepseek_model = "deepseek-r1:7b"`;
2. Сколько раз рассчитать точность: `epochs = 10`.


При создании объекта класса LLMmodel() он сразу же будет считать точность на нашем датасете (пример с Майком и классической музыкой).
После этого в атрибутах: 
1. `Ac_list` - хранится массив точностей, рассчитанных для каждой эпохи (итерации);
2. `Ac` - хранится средняя точность за все эпохи.

In [None]:
class LLMmodel():
    # Задание модели для Ollama
    deepseek_model = "deepseek-r1:1.5b"
    epochs = 10  # Сколько раз просчитывать эксперимент

    # DeepSeek выводит свой "мыслительный процесс" между токенами <think>...</think>. Если он нам не нужен, то можем его показывать
    show_thinking = True
    show_prompt = True

    # В инициализации задаются все необходимые массивы с вопросами и ответами
    def __init__(self, parameters = {"temperature": 0.8}):
        self.test_question = "Why does Mike like listening to classical music?"

        # Правильный ответ
        self.correct_answer = "Mike likes listening to classical music because it is calming."

        # Ключевой вопрос, на который ответ только "да" или "нет"
        self.keyQuestion = "Does Mike like classical music because it helps him relax or calm down?"

        # Правильные ответы - ответ "да" на ключевой вопрос
        self.student_answers_correct = [
            "Mike likes listening to classical music, it is calming.",
            "Mike likes listening to classical music, it is relaxing.",
            "Classical music calms him down.",
            "When Mike listens to classical music, he feels calm.",
            "Mike is calmed by classical music.",
            "When classical music is playing, Mike feels calm.",
            "When listening to classical music, Mike can relax.",
            "Classical music helps him relax, calm down.",
            "Mike enjoys classical music because it helps him relax.",
            "Listening to classical music soothes Mike's mind.",
            "Mike finds peace in the melodies of classical music.",
            "Classical music brings a sense of calm to Mike's.",
            "Mike loves how classical music creates a tranquil atmosphere.",
            "Mike often listens to classical music to unwind.",
            "The soothing sounds of classical music are Mike's favorite.",
            "Mike feels at ease when he listens to classical music.",
            "Classical music is Mike's go-to for relaxation.",
            "Mike appreciates the calming effect of classical music.",
            "Mike turns to classical music to de-stress.",
            "The harmony of classical music comforts Mike.",
            "Classical music helps Mike feel more centered.",
            "Mike's mood improves when he listens to classical music.",
            "Mike cherishes the tranquility that classical music brings.",
            "Mike feels a sense of calm while listening to classical music.",
            "For Mike, classical music is the way to relax."
        ]

        # Неправильные ответы - ответ "нет" на ключевой вопрос
        self.student_answers_negative_incorrect = [
            "Mike does not enjoy classical music because it does not help him relax.",
            "Classical music does not have a calming effect on Mike.",
            "Mike finds classical music unappealing and not soothing.",
            "Listening to classical music does not bring Mike any peace.",
            "Mike does not feel relaxed when he hears classical music.",
            "Classical music is not Mike’s choice for relaxation.",
            "Mike does not find classical music comforting or calming.",
            "Classical music does not help Mike unwind or de-stress.",
            "Mike does not connect with the tranquility of classical music.",
            "Classical music does not create a relaxing atmosphere for Mike.",
            "Mike does not appreciate the calming qualities of classical music.",
            "Classical music does not make Mike feel at ease.",
            "Mike does not turn to classical music for relaxation.",
            "Classical music does not resonate with Mike’s idea of calm.",
            "Mike does not feel any sense of serenity from classical music.",
            "Classical music does not help Mike feel more centered or peaceful.",
            "Mike does not enjoy the soothing sounds of classical music.",
            "Classical music does not provide Mike with a sense of tranquility.",
            "Mike does not find classical music to be a source of relaxation.",
            "Classical music does not appeal to Mike as a way to unwind."
        ]

        # Неправильные ответы - не имеют связи с вопросом
        self.student_answers_incorrect = [
            "Mike likes listening to classical music because it inspires him.",
            "Mike enjoys classical music, though he can not quite explain why.",
            "Mike loves the melodies of classical music; they lift his spirits.",
            "Mike likes starting his day with classical music playing in the background.",
            "Mike appreciates classical music because he plays it on the piano.",
            "Mike is drawn to classical music because of its complexity.",
            "Mike enjoys the harmonies in classical music.",
            "Mike likes listening to classical music because it tells stories without words.",
            "Mike finds classical music fascinating, though he not sure what makes it special.",
            "Mike loves how classical music makes him feel nostalgic.",
            "Mike likes classical music because it helps him focus.",
            "Mike enjoys the elegance of classical music.",
            "Mike likes classical music because it reminds him of his childhood.",
            "Mike is captivated by the emotions in classical music.",
            "Mike likes classical music because it feels timeless.",
            "Mike enjoys the way classical music evolves throughout a piece.",
            "Mike likes classical music because it challenges him to listen carefully.",
            "Mike loves the grandeur of classical music.",
            "Mike likes classical music because it transports him to another world.",
            "Mike enjoys classical music because it feels like a puzzle to unravel.",
            "Mike likes listening to classical music because it is interesting.",
            "Mike likes listening to classical music, but it is not clear why.",
            "Mike likes the sounds of classical music, they cheer him up.",
            "Mike likes waking up to the sounds of classical music.",
            "Mike likes classical music because he composes it himself.",
            "Mike likes classical music because it is difficult to understand.",
            "Mike likes its rhythm."
        ]
        
        # Запись переданного параметра температуры
        self.temperature = parameters["temperature"]
        self.params = {"temperature": self.temperature}

        # Задание массива для сохранения времени обработки запроса
        self.PromtTimes = []

        # Дополнительные переменные для рассчёта точности
        self.nCorrect = 0
        self.nAll = len(self.student_answers_correct) + len(self.student_answers_incorrect) + len(self.student_answers_negative_incorrect)

        # Цикл просчёта и запись точности
        self.Ac_list = []
        for i in range(self.epochs):
            self.Ac_list.append(self.get_accurancy())
            self.nCorrect = 0

        self.Ac = np.mean(self.Ac_list)  # Средняя точность

        # Вывод результатов
        print("model: " + self.deepseek_model + "\t" + "temperature: " +  str(self.temperature) + "\t" + "Accuracy list: " + str(self.Ac_list) + "\n" + "Accuracy Mean: " + str(self.Ac) + 
              "\n" + "Promt time max: " + str(max(self.PromtTimes)) + " s. \n" + "Promt time min: " + str(min(self.PromtTimes)) + " s. \n" + "Promt time mean: " + str(np.mean(self.PromtTimes)) + " s. ")
        
        # Максимальное отклонение от среднего значения точности
        self.MaxDeviation = max(abs(max(self.Ac_list) - self.Ac), abs(min(self.Ac_list) - self.Ac))
        self.STDAccuracy = np.std(self.Ac_list)   # СКО

        print("Max Deviation: ", self.MaxDeviation)
        print("STD Accuracy: ", self.STDAccuracy)
        print("Min Accuracy: ", min(self.Ac_list))
        print("Max Accuracy: ", max(self.Ac_list))

    # Функция для создания запроса (промта) на основе алгоритма, описанного в статье
    def create_prompt(self, keyQ, studentA):
        return studentA + " Based on the previous sentence, answer the question: " + keyQ + " Answer only yes or no." + " If the answer is incorrect, just answer \"no\"."
    
    # Удаление части, где deepseek рассуждает
    def remove_thinking(self, response):
        think_token = r"</think>"
        response_without_thinking = response[response.find(think_token)+len(think_token)+2:]
        return response_without_thinking

    # Генерация ответа моделью / Замер времени обработки запроса (сколько времени занимает просчёт)
    def get_response_and_time(self, prompt):
        start_time = time.time()
        gen = ollama.generate(model=self.deepseek_model, prompt=prompt, options=self.params)
        end_time = time.time()
        rs = gen.response
        return (rs, round(end_time-start_time, 1))
    
    # Функция для просчёта точности
    def get_accurancy(self):
        self.benchmark_answers(answers=self.student_answers_correct, typeA = True)
        self.benchmark_answers(answers=self.student_answers_negative_incorrect, typeA = False)
        self.benchmark_answers(answers=self.student_answers_incorrect, typeA = False)
        return round(self.nCorrect / self.nAll, 4)

    # Основная функция, прогонка по всем ответам и сравнение с ответом модели
    def benchmark_answers(self, answers, typeA: bool):
        for (i,ans) in enumerate(answers):
            rs, curr_time = self.get_response_and_time(self.create_prompt(self.keyQuestion, ans))
            rs_w_thinking = self.remove_thinking(rs)
            rs_w_thinking = rs_w_thinking.lower()
            self.PromtTimes.append(curr_time)

            if "yes" in rs_w_thinking and typeA == True:
                self.nCorrect = self.nCorrect + 1
            if "no" in rs_w_thinking and typeA == False:
                self.nCorrect = self.nCorrect + 1

In [4]:
params = {"temperature": 0.0}
model = LLMmodel(params)

model: deepseek-r1:1.5b	temperature: 0.0	Accuracy list: [0.6389, 0.6389]
Accuracy Mean: 0.6389
Promt time max: 10.7 s. 
Promt time min: 2.1 s. 
Promt time mean: 4.492361111111111 s. 
Max Deviation:  0.0
STD Accuracy:  0.0
Min Accuracy:  0.6389
Max Accuracy:  0.6389
