In [None]:
!pip install -q -U torch datasets transformers langchain playwright html2text sentence_transformers faiss-cpu

In [None]:
# !playwright install
# !playwright install-deps

In [None]:
!pip install -q accelerate peft bitsandbytes trl
!pip install bitsandbytes

### What include this notebook
- Load a Mistral 7B model with quantization config.
- Compare base model answers vs a simple RAG version.
- As documents  for RAG use the dataset created (text from Web and Pdfs) using public data extracted from https://www.thoughtworks.com, check the other notebook to be see the scrapper. https://drive.google.com/file/d/1GzSC4F0uGoHGcRHl9FEdvA3WLA7NIjvp/view?usp=drive_link


In [1]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

In [8]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.2'

tokenizer = AutoTokenizer.from_pretrained(model_name,
                                          trust_remote_code=True,
                                          device_map='auto',
                                          padding_side="left",
                                          add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token
#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    #torch_dtype=torch.float16,
    device_map="auto",
    quantization_config=bnb_config,
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [10]:
total_params = model.num_parameters()
print(f"num params:", total_params)
trainable_params = model.num_parameters(only_trainable=True)
print(f"num trainable params:", trainable_params)
print(f"PCT trainable", (trainable_params/total_params) * 100)

num params: 7241732096
num trainable params: 262410240
PCT trainable 3.6235839233122604


In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for name, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
            print(name, "trainable")
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

In [13]:
# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

In [14]:
total_params = model.num_parameters()
print(f"num params:", total_params)
trainable_params = model.num_parameters(only_trainable=True)
print(f"num trainable params:", trainable_params)
print(f"PCT trainable", (trainable_params/total_params) * 100)

num params: 7241732096
num trainable params: 0
PCT trainable 0.0


In [21]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    #temperature=0.2,
    device_map='auto',
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=800,
    batch_size=16 # This does not work
)

In [22]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline )

In [78]:
prompt_template = """
### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:
{context}

### QUESTION:
{question} [/INST]
 """


In [79]:
# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,

)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

#### Call to the base Mistral model without context

In [42]:
resp = llm_chain.invoke({"context": "", "question": "What is the buisness model of Thoughtworks?"})
print(resp["text"].split("[/INST]")[1].strip())

Thoughtworks is a global technology consulting firm that follows a unique business model called "agile software development" or "agile consulting." They provide services primarily in software development, digital platform engineering, and strategic consulting. Their clients range from startups to large enterprises across various industries.

Thoughtworks' revenue comes mainly from project-based engagements where they collaborate with their clients to design, build, and operate software solutions using agile methodologies. They do not sell any proprietary software or hardware but instead focus on delivering customized solutions tailored to each client's needs.

References:
1. About Us - Thoughtworks: https://www.thoughtworks.com/about
2. Services - Thoughtworks: https://www.thoughtworks.com/services
3. Business Model - Thoughtworks (not explicitly mentioned but can be inferred from their mission statement and services offered)


In [32]:
import pandas as pd
df = pd.read_parquet("./thoughtworks_cleaned_dataset.parquet")

In [58]:
def templated_text(row):
  """
  Prepare templated text to be embedded using FAISS and HF model
  """
  template = f"""
  Source: {row["url"]}
  Title: {row["title"]}
  {'Document language: ' + row["lang"] if "lang" in row else ""}
  {row["text"]}
  """
  return template
df["final_text"] = df.apply(templated_text, axis=1)

In [59]:
for i in df.sample(2)["final_text"].values:
  print(i)


  Source: https://www.thoughtworks.com/en-in/about-us/partnerships/cloud/microsoft
  Title: Partnerships: Microsoft
  Document language: en
  Microsoft and Thoughtworks are partnering to help our clients to leverage cloud along every step of their transformation journey.

Benefiting from the strong platform, tools and resources provided by Microsoft, alongside our global delivery expertise, we help our clients to strengthen their core technology foundation, build seamless data-driven customer experiences and unlock new revenue steam opportunities for their evolving business.
  

  Source: https://www.thoughtworks.com/en-ca/insights/topic/security
  Title: Security
  Document language: en
  Security is so much more than just table stakes for today’s digital business: it goes to the heart of trust in the relationship you build with your customers.

High profile breaches and increased public awareness of security and privacy issues have resulted in a loss of trust. We need to rebuild. At

In [65]:
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

# Chunk text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)

docs = text_splitter.create_documents(texts=df["final_text"].values.tolist())
chunked_documents = text_splitter.split_documents(docs)

# Load chunked documents into the FAISS index
import sys
import os
emb_model=HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
db = FAISS.from_documents(
    chunked_documents,
    emb_model)

retriever = db.as_retriever()

In [67]:
db.save_local("./dbthoughtworks_faiss.db")

### Call the model with the in-memory FAISS retriever

In [44]:
quest = "What is your recommendation platform to create machine learning model projects?"
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke(quest)
print(result['text'])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide references links if available. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:
[Document(page_content='Title: Guide to evaluating MLOps\n  Source: https://www.thoughtworks.com/what-we-do/data-and-ai/cd4ml/guide-to-evaluating-mlops-platforms\n  Document language: \n  Find the right platform to accelerate your AI journey.\n\nThere’s a plethora of tools and platforms to help organizations get machine learning models into production. However, the amount of options can be overwhelming and navigating the trade-offs is difficult. Should we buy or build a platform? When buying, which choices should we consider? What should be the key selection criteria? Just understanding which software to evaluate can be confusing.'), Document(page_content='According to a VentureBeat report from 2019, 87% of data science projects never make it into production.

In [45]:
questions = []
for row in df["questions"].values:
  questions.extend(row)
questions = list(set(questions))
len(questions)

In [None]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

results2 = []
index = 0
for q in questions:
  print(index)
  res = rag_chain.invoke(q)
  results2.append((res["question"], res["context"], res["text"].split("[/INST]")[1]))
  index += 1

In [100]:
import pickle


with open('results2.pickle', 'wb') as handle:
    pickle.dump(results2, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [102]:
# Print some questions extracted with regex explicitly from text
for result in results2[:10]:
  print("Question: ", result[0])
  print("Context: ", result[1])
  print("Answer: ", result[2])
  print("_____________________________________________")

Question:  At what scale does it pay off to fine-tune a model with your organization’s code?
Context:  [Document(page_content='specific use cases, e.g. by combining a reusable architecture and tech stack definition with user stories to generate task plans or test code, similar to what my colleague Xu Hao is describing here. Prompt composition applications like this are most commonly used with OpenAI’s models today, as they are most easily available and relatively powerful. Experiments are moving more and more towards open source models and the big hyperscalers hosted models though, as people are looking for more control over their data. As a next step forward, beyond advanced prompt composition, people are putting lots of hopes for future improvements into the model component. Do larger models, or smaller but more specifically trained models work better for coding assistance? Will models with larger context windows enable us to feed them with more code to reason about the quality and a

In [68]:
### Calling the Chain with The retrievers

quest = "What Thoughworks thinks Terragrunt is valuable for?"
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke(quest)

In [69]:
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide references links if available. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:
[Document(page_content="Source: https://www.thoughtworks.com/en-th/clients/terrascope\n  Title: Terrascope\n  Document language: en\n  Climate change has made mitigating carbon emissions increasingly critical for individuals, governments and businesses globally.\n\nBusiness operations and their supply chains account for the majority of emissions. Despite this, almost 85% of companies are unable to comprehensively measure their indirect emissions, especially those originating from sources they don't control or own (known in the industry as ‘Scope 3’ emissions). This makes accurately reporting and mitigating their carbon footprints near impossible.\n\nBacked by Olam Ventures, Terrascope is a Singapore-based climate-tech venture founded in 2021 to empower compan

In [80]:
quest = "What is the recommended cloud platform to use?"
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke(quest)
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:
[Document(page_content="And the end state is also dependent on the specific context that you have and the architectural needs that you'll have for those systems at the end of it.\n\nNow, Alexandre, you did mention different vendors and different features and also aggressive commercial strategies from them. We've been talking in Thoughtworks about clouds' stickiness for awhile. So it's lock in to a specific cloud vendor. How do you see that? Are there tools that help us avoid that? Or to what extent do you really need to embrace whatever the cloud provider offers you?\n\nMm-hmm (affirmative). Yeah. That's a good point. Nowadays, we have a very varying level of infrastructure available in the cloud. Well, I think it's common sense from the main ve

In [74]:
### Native Model version
quest = "What is the recommended cloud platform to use?"


result = llm_chain.invoke(dict(question=quest, context=''))
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide references links if available. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:


### QUESTION:
What is the recommended cloud platform to use? [/INST]
  Based on publicly available information from Thoughtworks, they do not explicitly recommend a specific cloud platform over others. Thoughtworks is known for its technology-agnostic approach and provides services across various cloud platforms including Amazon Web Services (AWS), Microsoft Azure, Google Cloud Platform (GCP), and IBM Cloud, among others. They help clients choose the right cloud platform based on their unique business needs and requirements. For more details, you can refer to their Cloud Services page at https://www.thoughtworks.com/services/cloud or contact them directly for personalized advice.


In [84]:
### Native Model version
quest = "Recommend some graphic tool to explore code?"


result = llm_chain.invoke(dict(question=quest, context=''))
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:


### QUESTION:
Recommend some graphic tool to explore code? [/INST]
  At Thoughtworks, we use various tools for exploring and visualizing code, but none of them are exclusively graphic or dedicated to this purpose. Here are a few popular tools among our developers that can be used for exploring code with some level of graphical representation:

1. **GitHub**: GitHub provides several ways to visualize code through its web interface. You can view files side-by-side using file comparisons, examine commit history through graphs, and even create pull requests to propose changes. (Source: https://github.com/)

2. **Visual Studio Code**: Visual Studio Code offers built-in features like the integrated terminal, debugger, and explorer windows that can h

In [83]:
quest = "Recommend some graphic tool to explore code?"
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke(quest)
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:
[Document(page_content="So, a bit like Korny, I also settled on saying, let's use some basic building blocks if you will. And we assembled them with different toolings and the idea really was that you try to find something that can... If you talk about static analysis of code, I think we will probably go beyond that later in the conversation. But especially when you look at static analysis, to have one class of tools that provides analysis of the code base and outputs of textural format. And that is something you don't rewrite all the time. You have another class of tools that take some textual format and generates graphics and diagrams from it. And that again is something that is reusable.\n\nAnd that's not only for code analysis. You can use t

In [87]:
quest = """
What is the recommend approach to integrate LLMs ?:
- Use private LLMs like Chatgpt, Llama
- Use finetuned open-source models
- Use Retrieval-Augmented Generation (RAG) applications
"""
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke(quest)
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:
[Document(page_content="Neal: I think there's an interesting split between the general use or the very general purpose and very surprisingly useful, but also hyper-specific context and training an LLM around a very specific context, either because language is hyper-precise like in a legal or medical realm or in gaming or something like that where you want a narrow way to look at that. That's one interesting aspect. The other interesting aspect is, as technologists, we love to peel back abstraction layers and see how things work. That's, you get into LLMs and then start looking at the individual pieces and how they fit together, which is always fascinating.\n\nRebecca: One of the interesting use cases that I've heard about, and we actually have, 

In [88]:
### Native Model version
quest = """
What is the recommend approach to integrate LLMs ?:
- Use private LLMs like Chatgpt, Llama
- Use finetuned open-source models
- Use Retrieval-Augmented Generation (RAG) applications
"""

result = llm_chain.invoke(dict(question=quest, context=''))
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:


### QUESTION:

What is the recommend approach to integrate LLMs ?:
- Use private LLMs like Chatgpt, Llama
- Use finetuned open-source models
- Use Retrieval-Augmented Generation (RAG) applications
 [/INST]
  Based on my knowledge from publicly available information about Thoughtworks and their approach to Large Language Models (LLMs), I would suggest using a combination of finetuned open-source models and Retrieval-Augmented Generation (RAG) applications for integration.

Thoughtworks has been actively exploring the use of large language models in various projects and solutions. They have published several articles and blog posts discussing their experiences with fine-tuning open-source models such as Hugging Face Transformers and T5. These mo

In [89]:
quest = """
What about use Retrieval-Augmented Generation (RAG) applications, is that something recommended?, mention some tools for it.
"""
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke(quest)
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:
[Document(page_content='Source: https://www.thoughtworks.com/de-de/radar/techniques/retrieval-augmented-generation-rag\n  Title: Retrieval-augmented generation (RAG) | Technology Radar\n  Document language: \n  Retrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of responses generated by a large language model (LLM). We’ve successfully used it in several projects, including the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy documents — in formats like HTML and PDF — are stored in databases that supports a vector data type or efficient document search, such as pgvector, Qdrant or Elasticsearch Relevance Engine. For a given prompt, the database is queried to retri

In [90]:
### Native Model version
quest = """
What about use Retrieval-Augmented Generation (RAG) applications, is that something recommended?, mention some tools for it.
"""

result = llm_chain.invoke(dict(question=quest, context=''))
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:


### QUESTION:

What about use Retrieval-Augmented Generation (RAG) applications, is that something recommended?, mention some tools for it.
 [/INST]
  According to Thoughtworks' technology radar reports, Retrieval-Augmented Generation (RAG) applications have been identified as a "radar" technology since Q3 2021. RAG applications combine retrieval systems and generation models to provide more accurate and contextually relevant responses than traditional retrieval systems alone. This approach can be particularly useful in areas like customer service, content generation, and information retrieval.

Some popular tools for building RAG applications include:

1. DALL-E 2: A text-to-image model from OpenAI that can generate images based on descriptio

In [91]:
### Native Model version
quest = """
What are the tools to implement Retrieval-Augmented Generation (RAG) applications?.
"""

result = llm_chain.invoke(dict(question=quest, context=''))
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:


### QUESTION:

What are the tools to implement Retrieval-Augmented Generation (RAG) applications?.
 [/INST]
  Based on my knowledge from publicly available information about Thoughtworks and their work in the field of Retrieval-Augmented Generation (RAG), I cannot find a definitive list of specific tools that Thoughtworks recommends for implementing RAG applications. However, they have published several articles and blog posts discussing the concept and implementation of RAG.

One such article titled "Retrieval-augmented generation: Combining human creativity with AI" published on Thoughtworks' Insights blog discusses the importance of RAG and how it can be implemented using various techniques and technologies like deep learning models, retrie

In [92]:
quest = """
What are the tools to implement Retrieval-Augmented Generation (RAG) applications?.
"""
rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke(quest)
print(result["text"])


### [INST] Instruction: Answer the question based on your knowledge in public documents of Thoughtworks company. Provide source links if there is any in the context. If there is no clear answer just write 'NOT FOUND'.

Here is context to help:
[Document(page_content='Source: https://www.thoughtworks.com/radar/techniques/retrieval-augmented-generation-rag\n  Title: Retrieval-augmented generation (RAG) | Technology Radar\n  Document language: \n  We feel strongly that the industry should be adopting these items. We use them when appropriate on our projects.\n\nRetrieval-augmented generation (RAG) is the preferred pattern for our teams to improve the quality of responses generated by a large language model (LLM). We’ve successfully used it in several projects, including the popular Jugalbandi AI Platform. With RAG, information about relevant and trustworthy documents — in formats like HTML and PDF — are stored in databases that supports a vector data type or efficient document search, su