<a href="https://colab.research.google.com/github/Wahiba275/chatbot_using_langchain_hagging_face_transformers/blob/main/ChatbotV3_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install openai langchain Chroma chromadb transformers tiktoken

Collecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.0.325-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Chroma
  Downloading Chroma-0.2.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting chromadb
  Downloading chromadb-0.4.15-py3-none-any.whl (479 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m479.8/479.8 kB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m99.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.5.1-cp310-cp310-manylinux_

In [None]:
import getpass
from transformers import pipeline
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import re
import openai


In [None]:
import logging
logging.getLogger("transformers").setLevel(logging.ERROR)


In [None]:
class ChromaKnowledgeStore:
    def __init__(self, document_path, openai_api_key):
        raw_documents = TextLoader(document_path).load()
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
        documents = text_splitter.split_documents(raw_documents)
        self.db = Chroma.from_documents(documents, OpenAIEmbeddings(openai_api_key=openai_api_key))
        self.embedder = OpenAIEmbeddings(openai_api_key=openai_api_key)

    def get_similar_content(self, query):
        embedding_vector = self.embedder.embed_query(query)
        docs = self.db.similarity_search_by_vector(embedding_vector)
        return docs[0].page_content if docs else None

    def extract_relevant_sentences(self, text, query):
        sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
        relevant_sentences = [sentence for sentence in sentences if query.lower() in sentence.lower()]
        return ' '.join(relevant_sentences)

def is_relevant_query(query):
    classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli",
    revision="c626438",
    silence=True,
    use_auth_token=False
)
    result = classifier(
        query,
        candidate_labels=["risk assessment", "other"],
        hypothesis_template="This text is about {}."
    )
    return result['labels'][0] == 'risk assessment' and result['scores'][0] > 0.75


In [None]:
def main():
    openai.api_key = getpass.getpass('OpenAI API Key:')
    kvs = ChromaKnowledgeStore('ml_and_ai_overview.txt', openai_api_key=openai.api_key)

    print("Welcome to the Chatbot! Type 'quit' to exit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "quit":
            print("Goodbye!")
            break

        if is_relevant_query(user_input):
            response_from_kvs = kvs.get_similar_content(user_input)
            if response_from_kvs:
                relevant_content = kvs.extract_relevant_sentences(response_from_kvs, user_input)
                if relevant_content:
                    print(f"Chatbot (from KVS): {relevant_content.strip()}")
                else:
                    response = openai.Completion.create(
                        engine="text-davinci-003",
                        prompt=user_input,
                        max_tokens=150,
                        temperature=0
                    )
                    print(f"Chatbot: {response['choices'][0]['text'].strip()}")
            else:
                response = openai.Completion.create(
                    engine="text-davinci-003",
                    prompt=user_input,
                    max_tokens=150,
                    temperature=0
                )
                print(f"Chatbot: {response['choices'][0]['text'].strip()}")
        else:
            print("Sorry! I can only answer questions related to risk assessment.")

In [None]:
if __name__ == "__main__":
    main()


OpenAI API Key:··········
Welcome to the Chatbot! Type 'quit' to exit.
You: what is risk assessment
Chatbot: Risk assessment is the process of identifying, analyzing, and evaluating potential risks to an organization, its employees, customers, or the general public. It involves assessing the likelihood of a risk occurring and the potential impact it could have. Risk assessments are used to inform decisions about how to manage risks and to help organizations prepare for and respond to potential risks.
You: what is machine learning ?
Sorry! I can only answer questions related to risk assessment.
You: Quelles sont les étapes clés du processus d'évaluation des risques ?
Chatbot: 1. Identification des risques : identifier les risques potentiels qui pourraient affecter l'organisation et ses activités.

2. Analyse des risques : évaluer la probabilité et l'impact des risques identifiés.

3. Évaluation des risques : déterminer le niveau de risque et décider si des mesures supplémentaires sont n