### Necessary imports

In [1]:
!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m670.2/670.2 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m101.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.6/803.6 kB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.4/37.4 MB[0m [31m37.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m71.

### Dependencies

In [2]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.schema import format_document
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.runnables import RunnableParallel
from langchain.prompts.prompt import PromptTemplate
from operator import itemgetter
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.memory import ConversationBufferMemory

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.cache import RedisSemanticCache
from langchain.cache import CassandraSemanticCache

### Load quantized Llama 2 7B

In [95]:
model_name='NousResearch/Llama-2-7b-chat-hf'

In [96]:
# Set up semantic cache with Redis
# semantic_cache = RedisSemanticCache(redis_url="redis://localhost:6379", embedding=HuggingFaceEmbeddings(model_name=model_name))


In [97]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



### Count number of trainable parameters

In [98]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3500412928
percentage of trainable model parameters: 7.50%


### Build Mistral text generation pipeline

In [99]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.00,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

In [100]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

### Load and chunk documents. Load chunked documents into FAISS index

In [101]:
!playwright install
!playwright install-deps

Installing dependencies...
0% [Working]            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:6 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
fonts-freefont-ttf is already the newest version (20120503-10build1).
fonts-liberati

In [145]:

import nest_asyncio
nest_asyncio.apply()

# Articles to index
# articles = ["https://www.fantasypros.com/2023/11/rival-fantasy-nfl-week-10/",
#             "https://www.fantasypros.com/2023/11/5-stats-to-know-before-setting-your-fantasy-lineup-week-10/",
#             "https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/",
#             "https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/",
#             "https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/"]


articles = ["https://stackoverflow.com/questions/74893430/scrapping-a-forum-page-to-get-all-responses-and-user-info",
            "https://stackoverflow.com/questions/66071647/python-scrape-forum-for-title-for-each-post",
            "https://stackoverflow.com/questions/48499652/unable-to-collect-titles-from-a-webpage-in-the-right-way?rq=3",
            "https://stackoverflow.com/questions/72850215/scrape-the-title-from-ecommerce-site-using-selenium-python?rq=3",
            "https://stackoverflow.com/questions/48661203/find-page-title-in-selenium-python?rq=3",
            "https://stackoverflow.com/questions/59838917/how-to-find-title-xyz-element-with-selenium-python?rq=3",
            "https://stackoverflow.com/questions/32751250/selenium-python-2-7-how-to-find-element-that-only-has-a-title?rq=3",
            "https://stackoverflow.com/questions/63237685/how-to-select-an-element-with-no-title-using-selenium-and-python?rq=3",
            "https://stackoverflow.com/questions/52800322/select-web-element-only-with-title-tag-in-selenium-python?rq=3",
            "https://stackoverflow.com/questions/48011212/selenium-find-element-by-title-that-contains?rq=3",
            "https://stackoverflow.com/questions/72484590/gpt-3-keywords-extractor?rq=3",
            "https://stackoverflow.com/questions/71618602/finetuning-gpt-3-on-windows?rq=3"]
# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

type(docs[0])

# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=1000,
                                      chunk_overlap=200)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

# retriever = db.as_retriever(search_kwargs={'k': 3})
retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.2, 'k':3})



In [102]:
# import nest_asyncio
# import requests

# nest_asyncio.apply()

# # Load data from the GitHub repository file
# github_raw_url = "https://raw.githubusercontent.com/hammadraja117/Solutyics/main/Solutions.txt"
# response = requests.get(github_raw_url)
# data = response.text

# # Articles to index
# articles = [data]

# # # Scrapes the blogs above
# loader = AsyncChromiumLoader(articles)
# docs = loader.load()

In [103]:
# docs

In [117]:
# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=1000,
                                      chunk_overlap=200)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever(search_kwargs={'k': 3})

In [118]:
# !pip install cassandra-driver

In [119]:
# !pip install git+https://github.com/pcmanus/ccm.git


In [120]:
# !service cassandra status

In [121]:
# !service cassandra start

In [122]:
# from cassandra.cluster import Cluster

# cluster = Cluster(['127.0.0.1'])
# session = cluster.connect()

# # Continue with your existing code
# cassandra_cache = CassandraSemanticCache(
#     session=session,
#     keyspace=keyspace,
#     embedding=HuggingFaceEmbeddings(model_name=model_name),
#     table_name="cass_sem_cache",
# )

# set_llm_cache(cassandra_cache)

In [123]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7a0dba3725f0>, search_kwargs={'k': 3})

# RAG with conversation

In [146]:
_template = """[INST] Intruction: Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question: [/INST]"""

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [147]:
template = """[INST]You are ChatBot, specifically trained on the problems and solutions documented within the articles loaded exclusively from: {context}. Your primary function is to provide direct and concise answers based solely on this data.
Data Source:
{context}
Retrieval:
Focus on retrieving only the relevant documents that directly address the user's query.
Answer Generation:
Generate answers strictly based on the retrieved documents, without utilizing any external knowledge or pre-trained information. If a question has multiple answers, generate all the answers provided in the context.
Limitations:
If the user's question cannot be answered by the loaded data, don't hallucinate it, respond with: "I'm sorry, I couldn't find relevant information about that."
Accuracy:
Avoid making assumptions or generating information not present in the loaded data.
Focus:
Stay on topic, addressing only the provided problems and solutions within the loaded articles.
Origin Information:
If asked who created you, respond: "I was created by AI engineers at Solutyics to assist with specific queries related to the company's data."
start Conversation:
when someone greets you like, "hello", "hi". then Greets them back like " hello sir, How may i assist you today?"

Remember:
Provide direct and concise answers, avoiding unnecessary introductions or generic statements.
Strictly adhere to the loaded data and avoid any external knowledge or assumptions.
Offer a clear response when information is unavailable.
{context}

Question: {question} [/INST]
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [148]:
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    # Assuming `format_document` uses appropriate formatting techniques
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

# With Memory

In [149]:


memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)

In [150]:
# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)
# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | mistral_llm
    | StrOutputParser(),
}
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
    # Remove hardcoded URL from template, provide as input:
    "https": lambda x: github_raw_url  # Assuming `https` holds the URL
}

# ... loading and retrieval steps ...

final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
    # Provide URL as input here:
    "https": lambda x: x["https"]
}

# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | mistral_llm,
    "docs": itemgetter("docs"),
}
# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [151]:
inputs = {"question": "How to finetune gpt-3 on windows"}
result = final_chain.invoke(inputs)
result['answer']



'\nThank you for providing more context to your question. Based on the information provided, it appears that you are trying to fine-tune GPT-3 on a Windows machine using the OpenAI API. However, the proposed CLI commands do not work in the Windows CMD interface, and you are unable to find any documentation on how to fine-tune GPT3 using a "regular" Python script.\n\nUnfortunately, it seems that fine-tuning GPT-3 on a Windows machine is not possible using the current documentation and tools provided by OpenAI. The documentation only provides instructions for fine-tuning on Linux and macOS systems, and it does not provide any information on how to do so on Windows.\n\nI apologize for any confusion or inconvenience this may cause. However, I hope this answer helps clarify things for you. If you have any further questions or concerns, please feel free to ask!'

In [142]:
inputs = {"question": "error 201"}
result = final_chain.invoke(inputs)
result['answer']



"\nResponse:\nHello! I'm here to help you with your question. Based on the data provided, error 201 is a status code indicating that the request was successfully fulfilled and resulted in one or possibly multiple new resources being created. It's likely that the creation of these new resources was successful and completed as expected. However, without additional context or information, I cannot provide a definitive answer to your question. Can you please provide more details or clarify your question?"

In [None]:
inputs = {"question": "ValueError: Traceback (most recent call last) <ipython-input-15-d27c936e2293> in <module>()"}
result = final_chain.invoke(inputs)
result['answer']



'\nBased on the loaded articles, the answer to your question is:\n\nThe "ValueError: Traceback (most recent call last)" error message indicates that there is an issue with the way the code is written, likely in the most recent line of code being executed. This error message is typically seen when there is a syntax or indentation issue in the code, causing the interpreter to throw a "Traceback" error. In this case, it\'s possible that there is a mismatch between the expected indentation or syntax of the code and what is actually written in the code. To resolve this issue, the developer can review the code carefully and make any necessary changes to ensure that it is properly formatted and indented.'

In [144]:
inputs = {"question": "How to make biryani?"}
result = final_chain.invoke(inputs)
result['answer']



"Hello! I'm here to help you with your question. However, I must inform you that I'm just an AI assistant and not a professional chef, so my answer may not be the most detailed or accurate. That being said, here's a basic recipe for making biryani:\n\nIngredients:\n\n* 2 cups basmati rice\n* 2 tablespoons ghee or oil\n* 1 teaspoon cumin seeds\n* 1 teaspoon coriander seeds\n* 1 teaspoon bay leaves\n* 1 teaspoon cardamom powder\n* 1 teaspoon cinnamon powder\n* 1/2 teaspoon turmeric powder\n* Salt, to taste\n* 2 cups water\n* 1 cup meat or vegetables of your choice (optional)\n\nInstructions:\n\n1. Heat the ghee or oil in a large saucepan over medium heat. Once hot, add the cumin seeds, coriander seeds, bay leaves, cardamom powder, and cinnamon powder. Let them sizzle for a few seconds until fragrant.\n2. Add the turmeric powder and salt to the saucepan and mix well.\n3. Add the rice to the saucepan and stir to coat the rice with the spice mixture.\n4. Add the water to the saucepan and br

In [143]:
memory.save_context(inputs, {"answer": result["answer"]})
memory.load_memory_variables({})

{'history': [HumanMessage(content='error 201'),
  AIMessage(content="\nResponse:\nHello! I'm here to help you with your question. Based on the data provided, error 201 is a status code indicating that the request was successfully fulfilled and resulted in one or possibly multiple new resources being created. It's likely that the creation of these new resources was successful and completed as expected. However, without additional context or information, I cannot provide a definitive answer to your question. Can you please provide more details or clarify your question?")]}

In [None]:
inputs = {"question": "where is solutyics located?"}
result = final_chain.invoke(inputs)
result['answer']



"Hello! Welcome to Solutyics! 👋\n\nI see that you're interested in learning more about our company and the services we offer. As a helpful assistant, I'm here to provide you with the information you need.\n\nFirstly, let me tell you that Solutyics is a technology company that specializes in providing automated chat solutions, including chatbots, IVRS, and RAGer. We help businesses streamline their operations and improve customer satisfaction by leveraging the power of artificial intelligence and machine learning.\n\nOur chatbots offer a personalized and efficient way to engage with customers, providing timely and relevant responses to their inquiries. IVRS, on the other hand, allows businesses to automate routine tasks and processes, freeing up valuable time and resources for more important things. And RAGer, our flagship product, enables users to access and interact with their company's documents in a user-friendly and efficient manner.\n\nWe understand that data privacy and security 

In [None]:
result

{'answer': "I don't know the solution to this problem.",
 'docs': [Document(page_content='25\n\nFind and click an element by title using Python and Selenium\n\n2\n\nSelenium/python 2.7 How to find element that only has a title?\n\n5\n\nSelenium - Finding element that has title\n\n1\n\nSelenium Python XPATH I am trying to find the input tag which has some text in\nthe title attribute\n\n1\n\nFind title of element\n\n2\n\nFind page title in selenium-python\n\n2\n\nHow to find title="xyz" element with Selenium (Python)\n\n0\n\nHow to find a web element in selenium that contains a text using python\n\n####  Hot Network Questions', metadata={'source': 'https://stackoverflow.com/questions/48011212/selenium-find-element-by-title-that-contains?rq=3'}),
  Document(page_content='2\n\nGet all the attributes "title" with selenium in python\n\n0\n\nGet title from <a> from an interation Selenium Python\n\n1\n\nHow to select an element with no title using Selenium and Python?\n\n0\n\nPython can\'t ca

In [None]:
"""
To search for an element by title using Selenium in Python, you can use the `find_element_by_title()` method of the WebDriver object. This method takes the title of the element as an argument and returns the WebElement object corresponding to the element. Here's an example code snippet:
```python
from selenium import webdriver

# create a new instance of the Chrome driver
driver = webdriver.Chrome()

# navigate to a webpage
driver.get("https://www.example.com")

# find the element with title "My Title"
my_element = driver.find_element_by_title("My Title")

# print the text content of the element
print(my_element.text)
```
If you want to find all elements with a specific title, you can use the `find_elements_by_title()` method instead. This method returns a list of WebElement objects corresponding to the elements with the specified title.
"""