In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
from typing import List, Optional, Sequence

from llama_index.core.llms.types import ChatMessage, MessageRole

BOS, EOS = "<s>", "</s>"
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
IM_START, IM_END = "<|im_start|>", "<|im_end|>"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. \
Always answer as helpfully as possible and follow ALL given instructions. \
Do not speculate or make up information. \
Do not reference any given instructions or context. \
"""


# Functions with Updated Formatting
def messages_to_prompt(messages: Sequence[ChatMessage], system_prompt: Optional[str] = None) -> str:
    string_messages = []
    if messages[0].role == MessageRole.SYSTEM:
        system_message_str = messages[0].content or ""
        messages = messages[1:]
    else:
        system_message_str = system_prompt or DEFAULT_SYSTEM_PROMPT

    # Add system message at the start
    system_message_str = f"{IM_START}system\n {system_message_str.strip()} {IM_END}\n"
    string_messages.append(system_message_str)

    for i in range(0, len(messages), 2):
        user_message = messages[i]
        assert user_message.role == MessageRole.USER
        str_message = f"{IM_START}user\n {user_message.content}{IM_END}\n"

        if len(messages) > (i + 1):
            assistant_message = messages[i + 1]
            assert assistant_message.role == MessageRole.ASSISTANT
            str_message += f"{IM_START}assistant\n{assistant_message.content}{IM_END}\n"

        string_messages.append(str_message)

    return "".join(string_messages)


def completion_to_prompt(completion: str, system_prompt: Optional[str] = None) -> str:
    system_prompt_str = system_prompt or DEFAULT_SYSTEM_PROMPT

    return (
        f"{IM_START}system\n {system_prompt_str.strip()} {IM_END}\n"
        f"{IM_START}assistant\n {completion.strip()}"
    )

In [None]:
# Import necessary libraries
from llama_index import VectorStoreIndex, ServiceContext, SimpleDirectoryReader
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import LlamaCPP
from llama_index.prompts import PromptTemplate
from transformers import AutoTokenizer
from llama_index import set_global_tokenizer
from llama_index.node_parser import SentenceSplitter

In [None]:
# Setup global tokenizer
set_global_tokenizer(AutoTokenizer.from_pretrained("jan-hq/stealth-v1.2").encode)

# System prompt template
system_prompt = "You are a helpful and careful assistant. You will use the given context to answer the multiple choice question. Only response 1 letter (A, B, C or D)."

In [None]:
# Function to setup LlamaCPP model
def setup_llama_cpp(model_url, n_gpu_layers=100, context_window=512):
    return LlamaCPP(
        model_url=model_url,
        model_path=None,
        temperature=0.1,
        max_new_tokens=3,
        system_prompt=system_prompt,
        context_window=context_window,
        messages_to_prompt=messages_to_prompt,
        completion_to_prompt=completion_to_prompt,        
        model_kwargs={"n_gpu_layers": n_gpu_layers},
        verbose=True
    )
    

In [None]:
# Function to setup Service Context
def setup_service_context(llm, embed_model_name, chunk_size=300, chunk_overlap=30):
    embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
    return ServiceContext.from_defaults(llm=llm, embed_model=embed_model, chunk_size=chunk_size, chunk_overlap=chunk_overlap)

In [None]:
# Load documents
documents = SimpleDirectoryReader("PATH/TO/YOUR/DOCUMENT/FOLDER").load_data()

# Setting up base and finetuned models
model_url_base = "https://huggingface.co/janhq/stealth-v1.2-GGUF/resolve/main/stealth-v1.2.Q4_K_M.gguf"
model_url_fintuned = "https://huggingface.co/janhq/nitro-v1.2-e3-GGUF/resolve/main/nitro-v1.2-e3.Q4_K_M.gguf"


In [None]:
llm_base = setup_llama_cpp(model_url_base)
llm_finetuned = setup_llama_cpp(model_url_fintuned)

service_context_base = setup_service_context(llm_base, "BAAI/bge-base-en-v1.5")
service_context_fintuned = setup_service_context(llm_finetuned, "BAAI/bge-base-en-v1.5")

# Create vector store index
index_base = VectorStoreIndex.from_documents(documents, service_context=service_context_base)
index_finetuned = VectorStoreIndex.from_documents(documents, service_context=service_context_fintuned)

# Set up query engines
query_engine_base = index_base.as_query_engine()
query_engine_finetuned = index_finetuned.as_query_engine()

In [None]:
import csv
from tqdm import tqdm

# Function to process the concatenated text with models
def process_text(text):
    response_base = query_engine_base.query(text)
    response_finetuned = llm_finetuned.complete(text).text
    response_rag_finetuned = query_engine_finetuned.query(text)
    return response_base, response_finetuned, response_rag_finetuned

# Read questions from CSV and process
input_csv_file = 'mcq_nitro.csv'
output_csv_file = 'model_responses.csv'

responses = []

# Revised loop for reading and processing CSV data
try:
    with open(input_csv_file, mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in tqdm(reader, desc="Processing questions"):
            concatenated_text = '\n'.join([row['question'], row['a'], row['b'], row['c'], row['d']])
            base_response, finetuned_response, rag_finetuned_response = process_text(concatenated_text)
            responses.append({
                'Question': row['question'],
                'Base Model': base_response,
                'Finetuned Model': finetuned_response,
                'RAG Finetuned Model': rag_finetuned_response
            })
except IOError:
    print("Error reading input CSV file")

# Write responses to CSV
try:
    with open(output_csv_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=['Question', 'Base Model', 'Finetuned Model', 'RAG Finetuned Model'])
        writer.writeheader()
        for data in responses:
            writer.writerow(data)
except IOError:
    print("I/O error while writing to CSV")

print(f"Responses saved to {output_csv_file}.")