In [78]:
import re
from llama_cpp import Llama
import pandas as pd

In [79]:
MODEL_PATH = '/Users/vladco/.cache/huggingface/hub/models--TheBloke--Llama-2-13B-chat-GGML/snapshots/3140827b4dfcb6b562cd87ee3d7f07109b014dd0/llama-2-13b-chat.ggmlv3.q5_1.bin'

In [80]:
class LlamaPrompter:
    def __init__(self, base_template):
        self.model = Llama(
            model_path=MODEL_PATH,
            n_threads=10,
            n_batch=512,
            n_gpu_layers=32
        )
        self.base_template = base_template

    def prepare_prompt(self, **kwargs):
        return self.base_template.format(**kwargs)

    def prompt_model(self, prompt):
        response=self.model(
            prompt=prompt, 
            max_tokens=256,
            temperature=0.5,
            top_p=0.95,
            repeat_penalty=1.2,
            top_k=150,
            echo=True)
        
        return response

In [81]:
class LlamaParser:
    def __init__(self, parse_function):
        self.parse_function = parse_function

    def parse_output(self, model_output):
        return self.parse_function(model_output)

In [82]:
class LlamaSession:
    def __init__(self, paragraph):
        self.base_template_1 = '''SYSTEM: You are a humble learning assistant trained to generate questions and provide answers based on the provided text. Be clear and concise.

USER: Generate three question and corresponding answers based on the following paragraph.
Output should be in format: 
Q: [your question]
A: [your answer to your question]
Context: {paragraph}

ASSISTANT:
'''
        self.base_template_2 = '''SYSTEM: You are a humble learning assistant trained to analyze learner answer on provided text and corresponding question. Be clear and concise.

USER: Based on the following paragraph, question, correct answer and user answer, tell to user if his answer was correct, and very briefly explain why.
Paragraph: {paragraph}
Question: {question}
Correct answer: {correct_answer}
User answer: {user_answer}

ASSISTANT:
'''
        self.prompter = LlamaPrompter(self.base_template_1)
        self.prompter_grader = LlamaPrompter(self.base_template_2)
        self.parser_questions_answers = LlamaParser(self.parse_questions_answers)
        self.parser_grades = LlamaParser(self.parse_grades)
        self.questions = None
        self.answers = None
        self.paragraph = paragraph

    def start_session(self):
        prompt = self.prompter.prepare_prompt(paragraph=self.paragraph)
        # print(prompt)
        raw_output = self.prompter.prompt_model(prompt)
        # print(raw_output)
        self.questions, self.answers = self.parser_questions_answers.parse_output(raw_output['choices'][0]['text'])

    def receive_answers(self, user_answers, idx):
        prompt = self.prompter_grader.prepare_prompt(
            paragraph=self.paragraph,
            question=self.questions[idx],
            correct_answer=self.answers[idx],
            user_answer=user_answers
        )
        print('prompt:', prompt, '\n')
        raw_output = self.prompter_grader.prompt_model(prompt)
        print(raw_output['choices'][0]['text'])
        grading_result = self.parser_grades.parse_output(raw_output['choices'][0]['text'])
        
        return grading_result

    @staticmethod
    def parse_questions_answers(model_output):
        # parsira i prvo Q i A koji su placeholderi, zajdeb'o sam se, treba prvo split to asistentu
        questions = re.findall(r"Q: (.*?)\s+A:", model_output)
        answers = re.findall(r"A: (.*?)(?=Q:|$)", model_output, flags=re.DOTALL)
        return questions[1:], [answer.strip() for answer in answers][1:]

    @staticmethod
    def parse_grades(model_output):
        return model_output.split('ASSISTANT:')[1].strip()

    def get_results(self):
        return self.questions, self.answers

In [83]:
dataset = pd.read_csv('dataset_clean.csv', index_col=0)

In [84]:
dataset.loc[0]['context']

'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.'

In [85]:
session = LlamaSession(dataset.loc[0]['context'])

llama.cpp: loading model from /Users/vladco/.cache/huggingface/hub/models--TheBloke--Llama-2-13B-chat-GGML/snapshots/3140827b4dfcb6b562cd87ee3d7f07109b014dd0/llama-2-13b-chat.ggmlv3.q5_1.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_head_kv  = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 5.0e-06
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 9 (mostly Q5_1)
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.11 MB
llama_model_load_int

In [86]:
session.start_session()


llama_print_timings:        load time = 15711.50 ms
llama_print_timings:      sample time =   121.09 ms /   188 runs   (    0.64 ms per token,  1552.55 tokens per second)
llama_print_timings: prompt eval time = 15711.35 ms /   264 tokens (   59.51 ms per token,    16.80 tokens per second)
llama_print_timings:        eval time = 31382.46 ms /   187 runs   (  167.82 ms per token,     5.96 tokens per second)
llama_print_timings:       total time = 47411.19 ms


In [87]:
session.questions, session.answers

(['What architectural style does the school have?',
  'What are the names of the statues located in front and next to the Main Building?',
  'What is special about the Grotto located behind the basilica?'],
 ['The school has a Catholic character with a gold dome on top of its Main Building featuring a golden statue of Virgin Mary.',
  'In front of the Main Building, there is a copper statue of Christ with arms upraised bearing the legend "Venite Ad Me Omnes". Next to the Main Building stands the Basilica of the Sacred Heart.',
  'The Grotto is a replica of the grotto at Lourdes, France where Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. It serves as a Marian place of prayer and reflection.'])

- ## Kod je takav da prima jedno pitanje, pa ga ocenjue. Zbog toga bi bilo najbolje da napravis tako da odstampas prvo pitanje, cekas odgovor, posaljes grader-u taj odgovor, a da u medjuvremenu, ti user-u das sledece pitanje da na njega odgovara, dok se ovo ocenjuje (treba mu tridesetak sekundi da oceni jedan odgovor).
- ## Najveci problem je generisanje pitanja i odgovora, posto to traje 45-55 sekundi. Tu prosto moramo cekati ili namesti da neki majmun igra dok se ceka. Ono sto je dobro jeste da ce biti generisano sva tri para (pitanje, odgovor), pa ce gornji bullet biti moguc

In [89]:
user_answers = ['nemam', 'blage', 'veze'] # ovo isto treba ti da pokupis od usera, samo ne sva tri odjednom (v. sta sam napisao u MD celiji iznad)
user_answer = user_answers[0]

In [90]:
grading_result = session.receive_answers(user_answer, 0)

prompt: SYSTEM: You are a humble learning assistant trained to analyze learner answer on provided text and corresponding question. Be clear and concise.

USER: Based on the following paragraph, question, correct answer and user answer, tell to user if his answer was correct, and very briefly explain why.
Paragraph: Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone sta


llama_print_timings:        load time = 12120.19 ms
llama_print_timings:      sample time =    87.69 ms /   136 runs   (    0.64 ms per token,  1550.88 tokens per second)
llama_print_timings: prompt eval time = 12120.11 ms /   304 tokens (   39.87 ms per token,    25.08 tokens per second)
llama_print_timings:        eval time = 22449.40 ms /   135 runs   (  166.29 ms per token,     6.01 tokens per second)
llama_print_timings:       total time = 34796.82 ms


In [91]:
print("Grading Results:")
print(grading_result)

Grading Results:
Your answer was incorrect, but I understand that you might be new to this topic. Here's why the correct answer is "Catholic": The architectural style of the school is Catholic because it features a gold dome on top of its Main Building with a golden statue of Virgin Mary, and also has a Basilica of the Sacred Heart immediately next to it. Additionally, the Grotto behind the basilica is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. All these elements are characteristic of Catholic architecture and design.
