<a href="https://colab.research.google.com/github/philosophy-question-answerer/model-tests-automated/blob/main/model_tests_automated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pip Installs

In [None]:
! pip install cohere
! pip install ctransformers
! pip install huggingface_hub
! pip install langchain
! pip install pinecone-client
! pip install sentence-transformers
! pip install transformers

# Imports

In [None]:
import os
import time
import uuid

import torch.cuda
from tqdm.auto import tqdm

from ctransformers import AutoModelForCausalLM
import cohere

import pinecone
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone

from google.colab import userdata, drive

# Set Up Google Drive

In [None]:
drive.mount('/content/drive')
!mkdir -p "/content/drive/My Drive/Model Tests Results"

# Environment Variables

In [None]:
PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')
PINECONE_ENVIRONMENT = userdata.get('PINECONE_ENVIRONMENT')
COHERE_API_KEY = userdata.get('COHERE_API_KEY')
PINECONE_INDEX_NAME = 'test-aristotle'
VECTOR_DIMENSION = 384 # sentence-transformers/all-MiniLM-L6-v2
TEST_QUESTIONS = [
    'What determines the meaning of a word?',
    'When can something can be classified as a game?',
    'What is the concept of family resemblance?',
    'Is the existence of a private language possible?',
    'What does following a rule entail?',
    'Can the concept of sameness be used to teach a rule?',
    'What is the role of language-games?',
    'How is the idea that mental processes form the basis of our understanding of language critiqued?',
    'Do private mental objects exist?',
    'What is the relationship between forms of life and language?',
    'How is it explained that the meaning of a word is its use in language?',
    'How do philosophical problems arise from misunderstandings of language?',
    'What is problematic about the Augustinian view on meaning?',
    'What is said about the misguided nature of philosophical questions?',
    'What is the purpose of the analogy with the toolbox?',
    'In what ways does the notion of \'language-games\' challenge traditional epistemology?',
    'What implications does the critique of private language have for theories of consciousness?',
    'How is the concept of pain used to argue against the possibility of a private language?',
    'In what ways does the examination of pain\'s sameness challenge the understanding of subjective experiences?',
    'What are the limitations of language?',
    'What is the relationship between forms of life and understanding philosophy?'
]

# Loading the Dataset

In [None]:
dataset_path = './dataset'

files = os.listdir(dataset_path)

documents = []

for file in files:
    if file.endswith('.txt'):
        file_path = os.path.join(dataset_path, file)

        loader = TextLoader(file_path, encoding='UTF-8')
        document = loader.load()

        documents.append(document)

print(f'Loaded {len(documents)} documents.')

# Utility Functions

## Create Chunks

In [None]:
def create_chunks(documents, chunk_size=750, chunk_overlap=50):

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
                                                   chunk_overlap=chunk_overlap,
                                                   separators=["\n\n", "\n", ".", " ", ""])
    all_chunks = []

    for document in documents:
        chunks = text_splitter.split_documents(document)
        all_chunks.extend(chunks)

    return all_chunks

## Load Model Locally

In [None]:
models_dict =
    { 'Mistral': {
                    'pretrained_model_name_or_path': 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF',
                    'model_file': 'mistral-7b-instruct-v0.1.Q4_K_M.gguf',
                    'model_type': 'mistral',
                }

    }

def load_model_locally(model_choice):

    return AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_choice['pretrained_model_name_or_path'],
                                            model_file=model_choice['model_file'],
                                            model_type=model_choice['model_type'],
                                            gpu_layers=0
                                            )

## Query Cohere Model

In [None]:
co = cohere.Client(COHERE_API_KEY)

def query_cohere(prompt):
    response = co.chat(message=prompt, model="command", temperature=0.9)
    return response.text

## Save Results to .txt File

In [None]:
def save_results_to_txt(result_string, model_name, chunk_size, chunk_overlap, is_reranking):

  results_dir = '/content/drive/My Drive/Model Tests Results'

  output_file_path = f'{results_dir}/{model_name}_{chunk_size}_{chunk_overlap}_{is_reranking}.txt'

  with open(output_file_path, 'w', encoding='utf-8') as file:

    file.write(f'\nModel = {model_name}\n')
    file.write(f'Chunk Size = {chunk_size}\n')
    file.write(f'Chunk Overlap = {chunk_overlap}\n')
    file.write(f'Reranking on = {is_reranking}\n\n')

    file.write(f'{result_string}\n')

## Format Question-Context-Answer (QCA) String

In [None]:
def format_qca_string(new_question, new_context, new_answer, inference_time):
    return f'''
        QUESTION:\n
        {new_question}\n\n

        CONTEXT:\n
        {new_context}\n\n

        ANSWER:\n
        {new_answer}\n\n

        INFERENCE TIME:\n
        {inference_time}\n\n

        ####################### \n\n
    '''

# Creating Vector Database

In [None]:
pc = pineconePinecone(api_key=PINECONE_API_KEY)

## Load the Embedding Model

In [None]:
embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 10}
)

## Create and Populate the Database

In [None]:
def create_and_populate_db(chunks, vector_dimension):

    spec = pinecone.PodSpec(environment=PINECONE_ENVIRONMENT)

    if any(index.name == PINECONE_INDEX_NAME for index in pc.list_indexes()):
    pc.delete_index(PINECONE_INDEX_NAME)

    pc.create_index(
        name=PINECONE_INDEX_NAME,
        dimension=vector_dimension,
        metric='cosine',
        spec=spec
        )

    while not pc.describe_index(PINECONE_INDEX_NAME).status['ready']:
    time.sleep(1)

    index = pc.Index(PINECONE_INDEX_NAME)
    index.describe_index_stats()

    batch_size = 10

    for i in tqdm(range(0, len(all_chunks), batch_size)):
        i_end = min(len(all_chunks), i+batch_size)
        batch = all_chunks[i:i_end]
        texts = [chunk.page_content for chunk in batch]
        ids = [ str(uuid.uuid4()) ] * len(batch)
        embeds = embed_model.embed_documents(texts)
        metadata = [
            {
                'text': chunk.page_content,
                'source': chunk.metadata['source']
            } for chunk in batch
        ]
        index.upsert(vectors=zip(ids, embeds, metadata))

## Querying the Database (Semantic Search)

In [None]:
def semantic_search(question, num_matched_excerpts):

    index = pc.Index(PINECONE_INDEX_NAME)
    question_embedding = embed_model.embed_query(question)
    res = index.query(vector=question_embedding, top_k=num_matched_excerpts, include_metadata=True)

    docs = [
        {
          'source': result['metadata']['source'],
          'text': result['metadata']['text']
        } for result in res['matches']
    ]

    return docs

# Reranking (Cohere API)

In [None]:
def semantic_search_reranking(question, num_matched_excerpts, num_reranked_excerpts):
    docs = get_docs(query=question, top_k=num_matched_excerpts)
    text_to_source = { doc['text'] : doc['source'] for doc in docs }
    reranked_docs = co.rerank(query=question,
                             documents=[ doc['text'] for doc in docs ],
                             top_n=num_reranked_excerpts,
                             model='rerank-english-v2.0')
    reranked_texts = [ doc.document['text'] for doc in reranked_docs ]

    docs = [
        {
          'text': text,
          'source': text_to_source[text]
        } for text in reranked_texts
    ]

    return docs

# Retrieval Augmented Generation (RAG)

## Create the Question-Context Prompt

In [None]:
def extract_context(question, num_matched_excerpts, is_reranking, num_reranked_excerpts):

    if is_reranking:
        docs = semantic_search_reranking(question=question,
                                         num_matched_excerpts=num_matched_excerpts,
                                         num_reranked_excerpts=num_reranked_excerpts
                                        )
    else:
        docs = semantic_search(question=question, num_matched_excerpts=num_matched_excerpts)

    context = ''
    for doc in docs:
        context = context + textwrap.fill(doc['text'], 150) + '\n' + textwrap.fill(doc['source'], 150) + '\n\n'

    return context

In [None]:
def create_prompt(question, context):

  prompt = f'''
  Answer the following QUESTION with the given CONTEXT. \n\n
  QUESTION: {question} \n
  CONTEXT: \n {context} \n
  ANSWER:
  '''

  return prompt

## Run Tests - Vary Model, Vector DB Parameters, and Reranking On/Off

In [None]:
chunk_parameter_combinations = [
    {'chunk_size': 256, 'chunk_overlap': 25},
    {'chunk_size': 256, 'chunk_overlap': 50},
    {'chunk_size': 512, 'chunk_overlap': 50},
    {'chunk_size': 512, 'chunk_overlap': 100},
    {'chunk_size': 1024, 'chunk_overlap': 100},
    {'chunk_size': 1024, 'chunk_overlap': 150},
    {'chunk_size': 1500, 'chunk_overlap': 100},
    {'chunk_size': 1500, 'chunk_overlap': 300},
    {'chunk_size': 2048, 'chunk_overlap': 200},
    {'chunk_size': 2048, 'chunk_overlap': 350}
]

In [None]:
mistral = load_model_locally(models_dict['Mistral'])
# falcon = load_model_locally(models_dict['Falcon'])

for combination in tqdm(chunk_parameter_combinations):

    chunk_size = combination['chunk_size']
    chunk_overlap = combination['chunk_overlap']

    chunks = create_chunks(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    create_and_populate_db(chunks=chunks, vector_dimension=VECTOR_DIMENSION)

    for is_reranking in tqdm([True, False]):

        combination_i_output = ''

        print(f'Processing combination: Chunk Size: {chunk_size}, Chunk Overlap: {chunk_overlap}, Reranking: {is_reranking}')

        for question in tqdm(TEST_QUESTIONS):

            print(f'Querying question {TEST_QUESTIONS.index(question)}')

            context = extract_context(question=question,
                                      num_matched_excerpts=25,
                                      is_reranking=is_reranking,
                                      num_reranked_excerpts=3)

            prompt = create_prompt(question=question, context=context)

            start_time = time.time()

            answer = mistral(prompt)
            # answer = query_cohere(prompt)

            end_time = time.time()
            inference_time = end_time - start_time

            combination_i_output += format_qca_string(new_question=question,
                                                     new_context=context
                                                     new_answer=answer,
                                                     inference_time=inference_time)

        save_results_to_txt(result_string=combination_i_output,
                            model_name='Mistral',
                            chunk_size=chunk_size,
                            chunk_overlap=chunk_overlap,
                            is_reranking=is_reranking)