<a href="https://colab.research.google.com/github/philosophy-question-answerer/model-tests-automated/blob/main/model_tests_automated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Package Installs

In [None]:
! pip install cohere
! pip install ctransformers
! pip install faiss-cpu
! pip install huggingface_hub
! pip install langchain
! pip install sentence-transformers
! pip install transformers
! pip install wandb

# Set-up Weights and Biases

In [None]:
import wandb

wandb.init()

# Imports


In [None]:
import os
import time
import uuid
import textwrap

from torch import cuda
from tqdm.auto import tqdm

from ctransformers import AutoModelForCausalLM
import cohere

from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

from google.colab import userdata, drive

# Set Up Google Drive

In [None]:
drive.mount('/content/drive')
# ! mkdir -p '/content/drive/My Drive/Model Tests Results'

# Environment & Global Variables

## API Keys & Third Party Services

In [None]:
COHERE_API_KEY = userdata.get('COHERE_API_KEY')

## Chunking Parameters Combinations

In [None]:
chunk_parameter_combinations = [
    {'chunk_size': 512, 'chunk_overlap': 50},
    {'chunk_size': 512, 'chunk_overlap': 100},
    {'chunk_size': 1024, 'chunk_overlap': 100},
    {'chunk_size': 1024, 'chunk_overlap': 150},
    {'chunk_size': 1500, 'chunk_overlap': 100},
    {'chunk_size': 1500, 'chunk_overlap': 300},
    {'chunk_size': 2048, 'chunk_overlap': 200},
    {'chunk_size': 2048, 'chunk_overlap': 350}
]

## Models To Be Tested

In [None]:
models_dict = {
    'Mistral_7B_Instruct_v0p1': {
        'model_path_or_repo_id': 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF',
        'model_file': 'mistral-7b-instruct-v0.1.Q5_K_M.gguf',
        'model_type': 'mistral',
    },
    'Mistral_7B_Instruct_v0p2': {
        'model_path_or_repo_id': 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF',
        'model_file': 'mistral-7b-instruct-v0.2.Q5_K_M.gguf',
        'model_type': 'mistral',
    },
    'Llama_2_7b_Chat': {
        'model_path_or_repo_id': 'TheBloke/Llama-2-7b-Chat-GGUF',
        'model_file': 'llama-2-7b-chat.Q5_K_M.gguf',
        'model_type': 'llama',
    },
    'Llama_2_13B_chat': {
        'model_path_or_repo_id': 'TheBloke/Llama-2-13B-chat-GGUF',
        'model_file': 'llama-2-13b-chat.Q5_K_M.gguf',
        'model_type': 'llama',
    },
    'SOLAR_10p7B_Instruct_v1p0': {
        'model_path_or_repo_id': 'TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF',
        'model_file': 'solar-10.7b-instruct-v1.0.Q5_K_M.gguf',
        'model_type': 'solar',
    },
    'orca_mini_v3_7B': {
        'model_path_or_repo_id': 'TheBloke/orca_mini_v3_7B-GGUF',
        'model_file': 'orca_mini_v3_7b.Q5_K_M.gguf',
        'model_type': 'llama',
    }
}

## Test Philosophy Questions â€” Ludwig Wittgenstein's *Philosophical Investigations*

In [None]:
TEST_QUESTIONS = [
    'What determines the meaning of a word?',
    'When can something can be classified as a game?',
    'What is the concept of family resemblance?',
    'Is the existence of a private language possible?',
    'What does following a rule entail?',
    'Can the concept of sameness be used to teach a rule?',
    'What is the role of language-games?',
    'How is the idea that mental processes form the basis of our understanding of language critiqued?',
    'Do private mental objects exist?',
    'What is the relationship between forms of life and language?',
    'How is it explained that the meaning of a word is its use in language?',
    'How do philosophical problems arise from misunderstandings of language?',
    'What is problematic about the Augustinian view on meaning?',
    'What is said about the misguided nature of philosophical questions?',
    'What is the purpose of the analogy with the toolbox?',
    'In what ways does the notion of \'language-games\' challenge traditional epistemology?',
    'What implications does the critique of private language have for theories of consciousness?',
    'How is the concept of pain used to argue against the possibility of a private language?',
    'In what ways does the examination of pain\'s sameness challenge the understanding of subjective experiences?',
    'What are the limitations of language?',
    'What is the relationship between forms of life and understanding philosophy?'
]
print(len(TEST_QUESTIONS))

# Loading the Dataset



In [None]:
def load_dataset():

  dataset_path = '/content/drive/My Drive/Dataset'

  files = os.listdir(dataset_path)

  documents = []

  for file in files:
      if file.endswith('.txt'):
          file_path = os.path.join(dataset_path, file)

          loader = TextLoader(file_path, encoding='UTF-8')
          document = loader.load()

          documents.append(document)

  print(f'Loaded {len(documents)} documents.')

  return documents

# Utility Functions

## Create Chunks

In [None]:
def create_chunks(documents, chunk_size=750, chunk_overlap=50):

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
                                                   chunk_overlap=chunk_overlap,
                                                   separators=['\n\n', '\n', '.', ' ', ''])
    all_chunks = []

    for document in documents:
        chunks = text_splitter.split_documents(document)
        all_chunks.extend(chunks)

    print(f'Chunks successfully created. Number of chunks = {len(all_chunks)}')

    return all_chunks

In [None]:
docs = load_dataset()
chunks = create_chunks(docs)

In [None]:
chunks[0].metadata['author'] = 'xyz'
chunks[0].metadata['title'] = 'xyz'
c = chunks[0].metadata
print(c)

## Load Model Locally

In [None]:
def load_model_locally(model_choice):

    return AutoModelForCausalLM.from_pretrained(model_path_or_repo_id=model_choice['model_path_or_repo_id'],
                                                model_file=model_choice['model_file'],
                                                model_type=model_choice['model_type'],
                                                gpu_layers=64,
                                                context_length=6200
                                                )

## Query Cohere Model

In [None]:
co = cohere.Client(COHERE_API_KEY)

def query_cohere(prompt):
    response = co.chat(message=prompt, model='command', temperature=0.9)
    return response.text


## Save Results to .txt File

In [None]:
results_dir = '/content/drive/My Drive/Model Tests Results'

def save_results_to_txt(result_string, model_name, chunk_size, chunk_overlap, is_reranking):

    output_file_path = f'{results_dir}/{model_name}_{chunk_size}_{chunk_overlap}_{is_reranking}.txt'

    with open(output_file_path, 'a', encoding='utf-8') as file:
        file.write(f'{result_string}\n')

def add_headings():
    for model in tqdm(models_dict, desc='Models'):
        for combination in tqdm(chunk_parameter_combinations, desc='Chunking Combinations'):

            chunk_size = combination['chunk_size']
            chunk_overlap = combination['chunk_overlap']

            for is_reranking in tqdm([True, False], desc='Reranking'):
                output_file_path = f'{results_dir}/{model}_{chunk_size}_{chunk_overlap}_{is_reranking}.txt'


                with open(output_file_path, 'r', encoding='utf-8') as file:
                    original_content = file.read()

                with open(output_file_path, 'w', encoding='utf-8') as file:
                    file.write(f'\nModel = {model}\n')
                    file.write(f'Chunk Size = {chunk_size}\n')
                    file.write(f'Chunk Overlap = {chunk_overlap}\n')
                    file.write(f'Reranking on = {is_reranking}\n\n')
                    file.write(original_content)

    print(f'Successfully added all headings.')


## Format Question-Context-Answer (QCA) String

In [None]:
def format_qca_string(new_question, new_context, new_answer, inference_time):
    return f'''
#######################\n\n

QUESTION:\n
{new_question}\n\n

CONTEXT:\n
{new_context}\n\n

ANSWER:\n
{new_answer}\n\n

INFERENCE_TIME:\n
{inference_time} seconds = {inference_time/60} minutes\n\n
    '''

# Creating Vector Database

## Load the Embedding Model

In [None]:
embedding_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
print(device)

embedding_model = HuggingFaceEmbeddings(
    model_name=embedding_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 10}
)

## Create, Populate, Save, and Load FAISS Indices

In [None]:
FAISS_INDICES_PATH = '/content/drive/My Drive/FAISS Indices'

def make_faiss_index_name(chunk_size, chunk_overlap):
  return f'ChunkSize_{chunk_size}_ChunkOverlap_{chunk_overlap}'

def create_populate_and_save_faiss_index(index_name, chunks, embedding_model):
  faiss_index = FAISS.from_documents(chunks, embedding_model)
  faiss_index.save_local(f'{FAISS_INDICES_PATH}/{index_name}')

def load_faiss_index(index_name, embedding_model):
  return FAISS.load_local(f'{FAISS_INDICES_PATH}/{index_name}', embedding_model)

## Querying the Database (Semantic Search)

In [None]:
def semantic_search(faiss_index, question, num_matched_excerpts):

  results = faiss_index.similarity_search(query=question, k=num_matched_excerpts)

  docs = [
    {
      'text': result.page_content,
      'source': result.metadata['source']
    } for result in results
  ]

  return docs

# Reranking (Cohere API)


In [None]:
def semantic_search_reranking(faiss_index, question, num_matched_excerpts, num_reranked_excerpts):
    docs = semantic_search(faiss_index=faiss_index, question=question, num_matched_excerpts=num_matched_excerpts)
    text_to_source = { doc['text'] : doc['source'] for doc in docs }
    reranked_docs = co.rerank(query=question,
                             documents=[ doc['text'] for doc in docs ],
                             top_n=num_reranked_excerpts,
                             model='rerank-english-v2.0')
    reranked_texts = [ doc.document['text'] for doc in reranked_docs ]

    docs = [
        {
          'text': text,
          'source': text_to_source[text]
        } for text in reranked_texts
    ]

    return docs

# Retrieval Augmented Generation (RAG)

## Create the Question-Context Prompt

In [None]:
def extract_context(faiss_index, question, num_matched_excerpts, is_reranking, num_reranked_excerpts):

    if is_reranking:
        docs = semantic_search_reranking(faiss_index=faiss_index,
                                         question=question,
                                         num_matched_excerpts=num_matched_excerpts,
                                         num_reranked_excerpts=num_reranked_excerpts
                                         )
    else:
        docs = semantic_search(faiss_index=faiss_index, question=question, num_matched_excerpts=num_reranked_excerpts)

    context = ''
    for doc in docs:
        context = context + textwrap.fill(doc['text'], 150) + '\n' + textwrap.fill(doc['source'], 150) + '\n\n'

    return context

In [None]:
def create_prompt(question, context):

  prompt = f'''
  Answer the following QUESTION with the given CONTEXT. \n\n
  QUESTION: {question} \n
  CONTEXT: \n {context} \n
  ANSWER:
  '''

  return prompt

# FAISS Indices Creation for Every Chunk Parameters Combination

In [None]:
def create_faiss_indices(documents, embedding_model):
  for combination in tqdm(chunk_parameter_combinations, desc='Chunk Parameter Combinations'):

    chunk_size = combination['chunk_size']
    chunk_overlap = combination['chunk_overlap']

    print(f'\nProcessing combination: Chunk Size = {chunk_size}, Chunk Overlap = {chunk_overlap}')

    chunks = create_chunks(documents, chunk_size=chunk_size, chunk_overlap=chunk_overlap)

    index_name = make_faiss_index_name(chunk_size, chunk_overlap)

    print(f'Creating {index_name} index.')

    start_time = time.time()
    create_populate_and_save_faiss_index(index_name=index_name, chunks=chunks, embedding_model=embedding_model)
    end_time = time.time()

    index_creation_time = (end_time - start_time)/60

    print(f'{index_name} index was successfully created in {index_creation_time} minutes.')

In [None]:
# documents = load_dataset()
# create_faiss_indices(documents, embedding_model)

# Model Tests


## Load All Models



In [None]:
loaded_models = {model_name: load_model_locally(models_dict[model_name]) for model_name in models_dict}

## Run Tests

In [None]:
for combination in tqdm(chunk_parameter_combinations, desc='Chunking Combinations'):

    chunk_size = combination['chunk_size']
    chunk_overlap = combination['chunk_overlap']

    index_name = make_faiss_index_name(chunk_size, chunk_overlap)
    index = load_faiss_index(index_name,embedding_model=embedding_model)

    print(f'Loaded {index_name} index.')

    for is_reranking in tqdm([True, False], desc='Reranking'):

        print(f'Processing combination: Chunk Size = {chunk_size}, Chunk Overlap = {chunk_overlap}, Reranking = {is_reranking}')

        for question in tqdm(TEST_QUESTIONS, desc='Test Questions'):

          question_number = TEST_QUESTIONS.index(question) + 1
          print(f'Querying question {question_number}')

          context = extract_context(faiss_index = index,
                                    question=question,
                                    num_matched_excerpts=25,
                                    is_reranking=is_reranking,
                                    num_reranked_excerpts=3)

          prompt = create_prompt(question=question, context=context)

          for model in tqdm(loaded_models, desc='Models'):

            print(f'Querying model {model}')

            result_string = ''

            start_time = time.time()

            answer = loaded_models[model](prompt)

            end_time = time.time()
            inference_time = (end_time - start_time)

            print(f'Question {question_number} was answered in {inference_time} seconds = {inference_time/60} minutes.')

            result_string += format_qca_string(new_question=question,
                                               new_context=context,
                                               new_answer=answer,
                                               inference_time=inference_time)

            save_results_to_txt(result_string=result_string,
                                model_name=model,
                                chunk_size=chunk_size,
                                chunk_overlap=chunk_overlap,
                                is_reranking=is_reranking)

In [None]:
add_headings()