In [14]:
import importlib
import resource_utils
importlib.reload(resource_utils)
from resource_utils import get_total_vram, get_available_vram, get_gpu_processes_usage, kill_gpu_processes


print(f"PROCCESSES: ", get_gpu_processes_usage())
print("TOTAL VRAM: ", get_total_vram())
print("AVAILABLE VRAM: ", get_available_vram())

PROCCESSES:  []
TOTAL VRAM:  23.988
AVAILABLE VRAM:  23.61


In [10]:
kill_gpu_processes(get_gpu_processes_usage(), verbose=True)

: 

In [16]:
# Config, sys prompts, qyery_list
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd
import numpy as np
import random
import cohere

with open('./data/questions.md', 'r') as file:
    query_list = file.read().splitlines()

CONFIG = {
    'csv_path': './data/text_chunks_with_embeddings.csv',
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'attn_implementation': 'sdpa',
    'model_id': 'meta-llama/Meta-Llama-3-8B-Instruct',
    'embedding_model_id': 'all-mpnet-base-v2',
}

SYS_PROMPT = {
    "education": """
    You are Study-Buddy. An educational chatbot that will aid students in their studies.
    You are given the extracted parts of curriculum specific documents and a question. Provide a conversational and educational answer with good and easily read formatting.
    Give yourself room to think by extracting relevant passages from the context before answering the query.
    Don't return the thinking, only return the answer.
    If you don't know the answer, just say "I do not know." Don't make up an answer.
    """,
    "relevance": """
    You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
    """,
    "socratic_sage": """
    You are an AI assistant capable of having in-depth Socratic style conversations on a wide range of topics. Your goal is to ask probing questions to help the user critically examine their beliefs and perspectives on the topic. Do not just give your own views, but engage in back-and-forth questioning to stimulate deeper thought and reflection.
    """
}


## Text and Embedding

In [17]:
def import_chunks_with_embeddings(csv_path: str):
    """
    Imports the chunks with embeddings from a csv file.
    """
    text_chunks_with_embeddings_df = pd.read_csv(csv_path, index_col=0)
    text_chunks_with_embeddings_df['embedding'] = text_chunks_with_embeddings_df['embedding'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
    chunks_with_embeddings = text_chunks_with_embeddings_df.to_dict(orient='records')
    return chunks_with_embeddings

def get_chunks_embeddings_as_tensor(chunks_with_embeddings: list[dict]):
    """
    Converts the embeddings of chunks to a tensor.
    """
    embeddings_list = [chunk['embedding'] for chunk in chunks_with_embeddings]
    embeddings = torch.tensor(np.stack(embeddings_list, axis=0), dtype=torch.float32)
    # embeddings = torch.tensor(np.stack(chunks_with_embeddings['embedding'].tolist(), axis=0), dtype=torch.float32)
    return embeddings

# Load chunks and embeddings
chunks_with_embeddings = import_chunks_with_embeddings(CONFIG['csv_path'])
embeddings = get_chunks_embeddings_as_tensor(chunks_with_embeddings).to(CONFIG['device'])
# chunks_with_embeddings

## Retrieval and Inference

- Using Llama 3: https://huggingface.co/blog/llama3


In [18]:
# LOAD MODELS

from dotenv import load_dotenv
import os
import cohere
import getpass

load_dotenv(override=True)

os.environ["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY") or getpass.getpass("Enter your Cohere API key: ")

co = cohere.Client(os.environ["COHERE_API_KEY"])

embedding_model = SentenceTransformer(model_name_or_path=CONFIG['embedding_model_id'], device=CONFIG['device'])
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=CONFIG['model_id'])
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=CONFIG['model_id'], 
    torch_dtype=torch.float16, 
    #low_cpu_mem_usage=False, 
    attn_implementation=CONFIG['attn_implementation']
    ).to(CONFIG['device'])

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]



tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [19]:
from typing import List, Dict
def retrieve_relevant_resources(query: str,
                                embeddings: torch.tensor,
                                embedding_model: SentenceTransformer,
                                top_k: int=5):
    """
    Embeds a query with model and returns top k scores and indices from embeddings.
    """
    # Embed query
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    
    # Get dot product scores on embeddings
    dot_scores = util.dot_score(a=query_embedding, b=embeddings)[0]
    
    scores, indices = torch.topk(dot_scores, k=top_k)
    return scores, indices



def retrieve(query: str) -> List[Dict[str, str]]: 
    """" Replacement for retrieve_relevant_resources """    
    ...




def generate_model_response(prompt: str, tokenizer, model, terminators, device="cuda"):
    input_ids = tokenizer.apply_chat_template(
        prompt,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(device)
    
    outputs = model.generate(
        input_ids, 
        max_new_tokens=1024, 
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Generated response at outputs[0] but starting at position input_ids.shape[-1]. 
    # [input_ids.shape[-1]:] is done to remove the input tokens and only keep the generated text.
    response = outputs[0][input_ids.shape[-1]:] 
    return tokenizer.decode(response)


def get_user_prompt(query: str, retrieved_documents: list[dict]):
    """
    Formats the prompt with the query and the retreived documents.
    """
    base_prompt = f"Query: {query}\nContext:"
    for item in retrieved_documents:
        base_prompt += f"\n- {item['text']}"
    return base_prompt

def format_prompt(formatted_prompt: str, sys_prompt: str):
    message = [
        { "role": "system", "content": SYS_PROMPT[sys_prompt] },
        { "role": "user", "content": formatted_prompt }
    ]
    return message



## Query

In [20]:
query = random.choice(query_list)
print(f"Query: {query}")

# similarity scores and indices on chunk embeddings
scores, indices = retrieve_relevant_resources(
    query=query, 
    embeddings=embeddings, 
    embedding_model=embedding_model)

user_prompt = get_user_prompt(query=query, retrieved_documents=[chunks_with_embeddings[i] for i in indices])

formatted_prompt = format_prompt(user_prompt, "education") 

response = generate_model_response(formatted_prompt, tokenizer, model, terminators)

print("----")
print(response)

Query: What are the differences between symmetric and asymmetric cryptosystems? When is each type more appropriate?
----
The main difference between symmetric and asymmetric cryptosystems lies in the way they handle encryption and decryption keys.

Symmetric cryptosystems use the same key for both encryption and decryption. This means that the sender and receiver must share the same key, and it must be kept secret to ensure the security of the system. Symmetric cryptosystems are often faster and more efficient than asymmetric cryptosystems, but they can be more vulnerable to attacks if the shared key is compromised.

Asymmetric cryptosystems, on the other hand, use a pair of keys: a public key for encryption and a private key for decryption. The public key can be shared openly, while the private key must be kept secret. This allows for secure communication between two parties without the need for a shared secret key. Asymmetric cryptosystems are often used for key exchange and digital 

TODO:
- Contextual Compression


## Test.

### Rerank

In [16]:
# grade_retrieval function
from helpers import print_top_results_and_scores, print_wrapped 

def grade_retreival(query: str, retrieved_document: str, verbose: bool = False, temperature: float = 0.6, top_p: float =0.9):
    """
    Grades the retrieval of documents based on the query.
    """
    
    # Adds retrieved documents to the prompt
    # user_prompt = get_user_prompt(query=query, retrieved_documents=[retrieved_document]) 
    
    user_prompt = f"Query: {query}\nRetrieved Document: {retrieved_document}"
    
    # Format prompt with system info and user query
    message = format_prompt(user_prompt, "relevance")
    
    
    #  Apply chat template to prompt
    prompt = tokenizer.apply_chat_template(
        message,
        tokenize=False,
        add_generation_prompt=True #, 
        # return_tensors="pt"
    )

    # Tokenize prompt ( can be done in previous step with return_tensors="pt" and tokenize=True )
    input_ids =  tokenizer(prompt, return_tensors="pt").to(CONFIG['device'])["input_ids"]
    if verbose:
        print(prompt)
        #print(input_ids)
        print(input_ids.shape)
    # Generate response, gets it decoded.
    outputs = model.generate(
        input_ids, 
        max_new_tokens=256, 
        eos_token_id=terminators,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Decode response
    outputs = outputs[0][input_ids.shape[-1]:]
    return outputs



In [22]:
from helpers import print_wrapped
q = "Why is achieving full distribution transparency often impractical or even undesirable?"
scores, indices = retrieve_relevant_resources(
    query=q, 
    embeddings=embeddings, 
    embedding_model=embedding_model)
retrieved_documents = [chunks_with_embeddings[i] for i in indices]
for doc in retrieved_documents:
    print(doc["type"])
    print_wrapped(doc["text"])

P[48]
Although distribution transparency is generally considered preferable for any
distributed system, there are situations in which blindly attempting to hide all
distribution aspects from users is not a good idea. A simple example is
requesting your electronic newspaper to appear in your mailbox before 7 AM local
time, as usual, while you are currently at the other end of the world living in
a different time zone. Your morning paper will not be the morning paper you are
used to.
P[53]
There are other arguments against distribution transparency. Recognizing that
full distribution transparency is simply impossible, we should ask ourselves
whether it is even wise to pretend that we can achieve it. It may be much better
to make distribution explicit so that the user and application developer are
never tricked into believing that there is such a thing as transparency. The
result will be that users will much better understand the (sometimes unexpected)
behavior of a distributed system, an

In [55]:
grad = grade_retreival(q, retrieved_documents[0]["text"], True)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.<|eot_id|><|start_header_id|>user<|end_header_id|>

Query: Why is achieving full distribution transparency often impractical or even undesirable?
Retrieved Document: Although distribution transparency is generally considered preferable for any distributed system, there are situations in which blindly attempting to hide all distribution aspects from users is not a good idea. A simple example is requesting your electronic newspaper to appear in your mailbox before 7 AM local ti

## Vector Store

In [5]:
import importlib
import App.document_processing

importlib.reload(App.document_processing)
from App.document_processing import ingest_document

path = "./data/Distributed_Systems_4.pdf"
pods = ingest_document(path, 16, 632)

100%|██████████| 616/616 [00:17<00:00, 35.31it/s]


Removing potential headers


613it [00:09, 64.56it/s]


Removing potential footers


613it [00:06, 88.22it/s]


In [9]:
import pandas as pd
df = pd.DataFrame(pods)

df.to_csv('output.csv', index=False, escapechar='\\')
df.to_pickle('output.pkl')

In [2]:
# init db

import importlib
import App.knowledge_db as db
importlib.reload(db)
from App.knowledge_db import create_connection, create_tables, insert_data
def init_db():
    
    courses = [
        {
            "CourseName": "Programming of Distributed systems",
            "CourseCode": "dt136g",
            "Department": "Computer Science",
            "Term": "ht23",
            "Description": 
                """"
                The real world is distributed. That means that software often has to mirror this distributed-ness. In this course, we deal with the theory behind distributed software systems and connect it to practical application and programming of such systems. Important questions concern how an interaction protocol could look like or how different processes should be best coordinated and synchronized when dealing with shared memory structures. Questions about security and robustness play an important role in the course as well.
                """,
            "Keywords": "DT136G, pods, distributed systems",
        },
        {
            "CourseName":   "Human-Computer Interaction",
            "CourseCode":   "dt137g",
            "Department":   "Computer Science",
            "Term":         "ht23",
            "Description": 
                """
                This course introduces foundations of multimodal interaction methods and technologies addressing basic principles in human-centred design, embodied interaction, and human perception. Special emphasis is devoted to:
                - Evidence-based empirical methods for the study of human behavior in naturalistic interaction settings, and
                - Application of multimodal human interaction principles in visual and visuo-auditory design (e.g., as relevant in media, interfaces, imagery, immersion)
                The course will introduce students to the landscape of multimodality and human interaction from cognitive, formal modelling, computational, design, and empirical perspectives. Practical work will involve learning to conduct systematic multimodal analysis of human factors as relevant to interaction design.
                """,
            "Keywords": "",
        }
    ]
    references = [
        {
            "CourseCode": "dt137g",
            "Title": "Distributed Systems 4 by Maarten Van Steen",
            "Type": "textbook",
            "StartPage": 16,
            "EndPage": 632,
            "Summary": "",
            "Keywords": "",
            "Path": "/home/buddy/Study-Buddy/data/Distributed_Systems_4.pdf",
        },
    ] # 2

    data = {
        "courses": courses,
        "references": references
    }
    
    db_file = './data/knowledge.db'

    conn = create_connection(db_file)

    if conn is not None:
        # Create tables
        
        create_tables(conn)

        insert_data(conn, data)

        conn.close()

# init_db()

100%|██████████| 616/616 [00:17<00:00, 35.28it/s]


Removing potential headers


613it [00:09, 66.69it/s]


Removing potential footers


613it [00:07, 87.20it/s]


In [11]:
df.to_json('output.json', orient='records')

In [13]:
import json
with open('output.json', 'r') as f:
    data = json.load(f)
data

[{'OrderID': 2,
  'Chunk': 'The pace at which computer systems change was, is, and continues to be overwhelming. From 1945, when the modern computer era began, until about 1985, computers were large and expensive. Moreover, lacking a way to connect them, these computers operated independently of one another.',
  'Page': 18,
  'Chapter': ' INTRODUCTION',
  'ParentChapter': 'None',
  'Type': 'n/a',
  'Summary': '',
  'Keywords': ''},
 {'OrderID': 3,
  'Chunk': 'Starting in the mid-1980s, however, two advances in technology began to change that situation. The first was the development of powerful microproces- sors. Initially, these were 8-bit machines, but soon 16-, 32-, and 64-bit CPUs became common. With powerful multicore CPUs, we now are again facing the challenge of adapting and developing programs to exploit parallelism. In any case, the current generation of machines have the computing power of the mainframes deployed 30 or 40 years ago, but for 1/1000th of the price or less.',
  '

In [3]:
import cohere
import uuid
import hnswlib
from typing import List, Dict
from config import CONFIG
import pandas as pd
from App.knowledge_db import create_connection, create_tables
import json

chunked_data = [
    {
        "title": "Distributed System",
        "path": "./output.json"
    }
]

class Vectorstore:
    def __init__(self, db_path: str):
        self.chunked_data = chunked_data
        self.docs = []
        self.docs_embs = []
        self.retrieve_top_k = 10
        self.rerank_top_k = 3
        self.load_chunked()
        self.init_data_dict()
        self.embed()
        self.index()
        
    # def init_data_dict(self) -> None:
    #     for collection in collections:
    #         
    #     ...
        
    def load_chunked(self) -> None:
        """
        Loads the chunked text from the sources.
        """
        print("Loading documents...")
        for data in self.chunked_data:
            
            with open(data["path"], 'r') as f:
                data = json.load(f)
            self.docs.append(data)

                
    def embed(self) -> None:
        """
        Embeds the document chunks using Cohere API 
        """
        print("Embedding document chunks...")
        
        batch_size = 90
        self.docs_len = len(self.docs)
        for i in range(0, self.docs_len, batch_size):
            batch = self.docs[i : min(i + batch_size, self.docs_len)]
            texts = [item["Chunk"] for item in batch]
            docs_embs_batch = co.embed(
                texts=texts, model="embed-english-v3.0", input_type="search_document"
            ).embeddings
            self.docs_embs.extend(docs_embs_batch)

    def index(self) -> None:
        """
        Indexes the documents for efficient retrieval.
        """
        print("Indexing documents...")

        self.idx = hnswlib.Index(space="ip", dim=1024)
        self.idx.init_index(max_elements=self.docs_len, ef_construction=512, M=64)
        self.idx.add_items(self.docs_embs, list(range(len(self.docs_embs))))

        print(f"Indexing complete with {self.idx.get_current_count()} documents.")
        
    def retrieve(self, query: str) -> List[Dict[str, str]]:
        """
        Retrieves document chunks based on the given query.

        Parameters:
        query (str): The query to retrieve document chunks for.

        Returns:
        List[Dict[str, str]]: A list of dictionaries representing the retrieved document chunks, with 'title', 'text', and 'url' keys.
        """

        # Dense retrieval
        query_emb = co.embed(
            texts=[query], model="embed-english-v3.0", input_type="search_query"
        ).embeddings

        doc_ids = self.idx.knn_query(query_emb, k=self.retrieve_top_k)[0][0]

        # Reranking
        rank_fields = ["title", "text"] # We'll use the title and text fields for reranking

        docs_to_rerank = [self.docs[doc_id] for doc_id in doc_ids]

        rerank_results = co.rerank(
            query=query,
            documents=docs_to_rerank,
            top_n=self.rerank_top_k,
            model="rerank-english-v3.0",
            rank_fields=rank_fields
        )

        docs_retrieved = []
        for doc_id in doc_ids_reranked:
            docs_retrieved.append(
                {
                    "title": self.docs[doc_id]["title"],
                    "text": self.docs[doc_id]["text"],
                    "url": self.docs[doc_id]["url"],
                }
            )

        return docs_retrieved
    
    

            
def import_chunks_with_embeddings(csv_path: str):
    """
    Imports the chunks with embeddings from a csv file.
    """
    text_chunks_with_embeddings_df = pd.read_csv(csv_path, index_col=0)
    text_chunks_with_embeddings_df['embedding'] = text_chunks_with_embeddings_df['embedding'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
    chunks_with_embeddings = text_chunks_with_embeddings_df.to_dict(orient='records')
    return chunks_with_embeddings

def get_chunks_embeddings_as_tensor(chunks_with_embeddings: list[dict]):
    """
    Converts the embeddings of chunks to a tensor.
    """
    embeddings_list = [chunk['embedding'] for chunk in chunks_with_embeddings]
    embeddings = torch.tensor(np.stack(embeddings_list, axis=0), dtype=torch.float32)
    # embeddings = torch.tensor(np.stack(chunks_with_embeddings['embedding'].tolist(), axis=0), dtype=torch.float32)
    return embeddings

# Load chunks and embeddings
# chunks_with_embeddings = import_chunks_with_embeddings(CONFIG['csv_path'])
# embeddings = get_chunks_embeddings_as_tensor(chunks_with_embeddings).to(CONFIG['device'])
# chunks_with_embeddings

#### Cohere


In [19]:
def retrieve(query: str, embeddings: torch.tensor, embedding_model: SentenceTransformer, top_k: int=5):
    """
    Retrieves document chunks based on the given query. 
    """
    # Embed query
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    
    # Get dot product scores on embeddings
    dot_scores = util.dot_score(a=query_embedding, b=embeddings)[0]
    
    scores, indices = torch.topk(dot_scores, k=top_k)
    return scores, indices

def retrieve(query: str, tok_k: int=10) -> List[Dict[str, str]]: 
    """" Replacement for retrieve_relevant_resources """    
    ...
    query_embedding = embedding_model.encode(query, convert_to_tensor=True)
    
    dot_scores = util.dot_score(a=query_embedding, b=embeddings)[0]
    
    doc_ids = torch.topk(dot_scores, k=top_k)[1]
    
    docs = [chunks_with_embeddings[i]["text"] for i in indices]

    doc_ids = self.idx.knn_query(query_emb, k=self.retrieve_top_k)[0][0]

In [18]:
scores, indices = retrieve_relevant_resources(query=q, embeddings=embeddings, embedding_model=embedding_model, top_k=10)
docs = [chunks_with_embeddings[i]["text"] for i in indices]
docs = {i: chunks_with_embeddings[i]["text"] for i in indices}
docs = {i: chunk["text"] for i, chunk in enumerate(chunks_with_embeddings)}
docs = {chunks_with_embeddings[index]["text"]: i for i, index in enumerate(indices)}
docs.keys()
#print("\n------\n".join(docs.keys()))

dict_keys(['Although distribution transparency is generally considered preferable for any distributed system, there are situations in which blindly attempting to hide all distribution aspects from users is not a good idea. A simple example is requesting your electronic newspaper to appear in your mailbox before 7 AM local time, as usual, while you are currently at the other end of the world living in a different time zone. Your morning paper will not be the morning paper you are used to. ', 'There are other arguments against distribution transparency. Recognizing that full distribution transparency is simply impossible, we should ask ourselves whether it is even wise to pretend that we can achieve it. It may be much better to make distribution explicit so that the user and application developer are never tricked into believing that there is such a thing as transparency. The result will be that users will much better understand the (sometimes unexpected) behavior of a distributed system

In [127]:
docs_to_rerank = [chunks_with_embeddings[i]["text"] for i in indices]
rerank_docs = co.rerank(query=q, documents=list(docs.keys()), top_n = 10)

In [128]:
rerank_docs



In [88]:
scores, indices = retrieve_relevant_resources(query=q, embeddings=embeddings, embedding_model=embedding_model, top_k=10)
retrieved_documents = [chunks_with_embeddings[i] for i in indices]
print(q)
for doc in retrieved_documents:
    print(doc["type"])
    print_wrapped(doc["text"])

Why is achieving full distribution transparency often impractical or even undesirable?
P[48]
Although distribution transparency is generally considered preferable for any
distributed system, there are situations in which blindly attempting to hide all
distribution aspects from users is not a good idea. A simple example is
requesting your electronic newspaper to appear in your mailbox before 7 AM local
time, as usual, while you are currently at the other end of the world living in
a different time zone. Your morning paper will not be the morning paper you are
used to.
P[53]
There are other arguments against distribution transparency. Recognizing that
full distribution transparency is simply impossible, we should ask ourselves
whether it is even wise to pretend that we can achieve it. It may be much better
to make distribution explicit so that the user and application developer are
never tricked into believing that there is such a thing as transparency. The
result will be that users will

In [None]:
rerank_docs = co.rerank(
    
)

#### Tests

In [78]:
from helpers import print_wrapped
print(f"Q: {q}\n--")
print_wrapped(retrieved_documents[0]['text'])
output_text = tokenizer.decode(grad, skip_special_tokens=True)
print(output_text)

Q: Why is achieving full distribution transparency often impractical or even undesirable?
--
Although distribution transparency is generally considered preferable for any
distributed system, there are situations in which blindly attempting to hide all
distribution aspects from users is not a good idea. A simple example is
requesting your electronic newspaper to appear in your mailbox before 7 AM local
time, as usual, while you are currently at the other end of the world living in
a different time zone. Your morning paper will not be the morning paper you are
used to.
{"score": "no"}


In [79]:
for i, document in enumerate(retrieved_documents):
    grad = grade_retreival(q, document['text'])
    output_text = tokenizer.decode(grad, skip_special_tokens=True)
    print(f"assessment {i+1}: {output_text}")


assessment 1: {"score": "no"}
assessment 2: {"score": "yes"}
assessment 3: {"score": "yes"}
assessment 4: {"score": "no"}
assessment 5: {"score": "yes"}


In [80]:
def assess_documents(temperature = 0.6, top_p = 0.9):
    for i, document in enumerate(retrieved_documents):
        grad = grade_retreival(q, document['text'], False, temperature, top_p)
        output_text = tokenizer.decode(grad, skip_special_tokens=True)
        print(f"assessment {i+1}: {output_text}")

In [81]:
assess_documents(temperature=0.6, top_p=0.9)

assessment 1: {"score": "no"}
assessment 2: {"score": "yes"}
assessment 3: {"score": "yes"}
assessment 4: {"score": "no"}
assessment 5: {"score": "yes"}


In [82]:
assess_documents(temperature=0.1, top_p=0.9)

assessment 1: {"score": "no"}
assessment 2: {"score": "yes"}
assessment 3: {"score": "yes"}
assessment 4: {"score": "no"}
assessment 5: {"score": "yes"}


In [83]:
for i, text in enumerate(retrieved_documents):
    print(f"<start_assessment_{i+1}>")
    print(f"QUERY:\n{q}")
    print("RETRIEVED TEXT:")
    print_wrapped(text['text'])
    grad = grade_retreival(q, text['text'])
    output_text = tokenizer.decode(grad, skip_special_tokens=True)
    print(f"RELEVANCE:\n{output_text}")
    print(f"<end_assessment_{i+1}>\n")

    
#grad = grade_retreival(q, chunks_with_embeddings[0]['text'])

#output_text = tokenizer.decode(grad, skip_special_tokens=True)
#print(output_text)


<start_assessment_1>
QUERY:
Why is achieving full distribution transparency often impractical or even undesirable?
RETRIEVED TEXT:
Although distribution transparency is generally considered preferable for any
distributed system, there are situations in which blindly attempting to hide all
distribution aspects from users is not a good idea. A simple example is
requesting your electronic newspaper to appear in your mailbox before 7 AM local
time, as usual, while you are currently at the other end of the world living in
a different time zone. Your morning paper will not be the morning paper you are
used to.
RELEVANCE:
{"score": "no"}
<end_assessment_1>

<start_assessment_2>
QUERY:
Why is achieving full distribution transparency often impractical or even undesirable?
RETRIEVED TEXT:
There are other arguments against distribution transparency. Recognizing that
full distribution transparency is simply impossible, we should ask ourselves
whether it is even wise to pretend that we can achieve 

In [84]:
print(SYS_PROMPT["relevance"])


    You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
    


### Query evaluation mechanism

- Preprocess query
    - Tokenize, remove stop words, stem/lemmatize
    - Extract key phrases indicating query type (e.g. "lecture notes", "syllabus", "when is the exam")
- Classify intent
    - Course content: materials, practice problems, etc.
    - General info: policies, dates, teacher info
    - Resource recommendations
    - Clarification needed
- Determine relevant courses
    - Extract course names/codes
    - Infer courses based on query keywords
    - Default to student's enrolled courses
- Construct database queries
    - SQL queries joining relevant tables
    - Filter, sort, limit results
    - Fall back to keyword search if intent is ambiguous

#### Keyword extraction

### Database structure

<img src="https://www.mermaidchart.com/raw/088d9c54-4421-4c15-9a2b-379448a1228a?theme=light&version=v0.1&format=svg" width="500">