In [109]:
!pip install -Uqqq pip
!pip install -qqq transformers
!pip install -qqq langchain
!pip install -qqq torch
!pip install -qqq pypdf
!pip install -qqq openai
!pip install -qqq tiktoken
!pip install faiss-gpu
!pip install dotenv

[0mCollecting dotenv
  Using cached dotenv-0.0.5.tar.gz (2.4 kB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.


In [110]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [111]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [112]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import create_qa_with_sources_chain
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# import dotenv

# dotenv.load_dotenv()

In [114]:

'''
Step 1 -
Following manual data collection, the chatbot uses the PyPDFLoader to load and extract information from the compiled PDF documents.
This establishes the foundation for the chatbot's knowledge base, enabling it to provide accurate and up-to-date information.'''

batch_docs=[]
pdf_file_path="/content/saasacademy.com-ARR Vs MRR - Choosing the right metric for your SaaS business.pdf"
pdf_loader  = PyPDFLoader(pdf_file_path)
batch_docs.extend(pdf_loader.load())

tmp_file_path="/content/Indian_startup_funding_2023.csv"
csv_loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={'delimiter': ','})
batch_docs.extend(csv_loader.load())

In [115]:
'''
Step 2:
The loaded data undergoes text splitting using the RecursiveCharacterTextSplitter.
This process involves breaking down the text into chunks to enhance the efficiency of subsequent operations.
The parameters for chunking, such as chunk_size and chunk_overlap, are set to control the granularity of the text segments.'''
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=20, length_function=len
)

final_documents = text_splitter.split_documents(batch_docs)
final_documents

[Document(page_content='1 / 6ARR Vs. MRR - Choosing the right metric for your SaaS\nbusiness\nsaasacademy .com /blog/arr-vs-mrr-how-to-calculate\nMRR (Monthly Recurring Revenue) is possibly the most important metric for a SaaS\ncompany . ARR (Annual Recurring Revenue or sometimes used to refer to Annualized Run\nRate) is a related metric but with important dif ferences. Let’ s dive deeper into MRR and ARR\nand consider which one of them to use and when.\nWhat is the difference between ARR and MRR?', metadata={'source': '/content/saasacademy.com-ARR Vs MRR - Choosing the right metric for your SaaS business.pdf', 'page': 0}),
 Document(page_content='While ARR and MRR are similar , the dif ference between the two lies in the finer points:\n ARR provides an overall view of your business, while MRR takes a more in-depth look\nARR assesses the success of your company in the long term, while MRR gives you\ninsights into your short-term operational ef ficiency\nARR is more suitable when subscr

In [118]:
''''
Step 3:
The text documents are embedded using the OpenAIEmbeddings model, transforming the textual content into numerical representations.
These embeddings serve as a basis for understanding semantic relationships between words and phrases. Subsequently, a FAISS vector store
is created from the embedded documents. This step involves converting the embeddings into vectors and indexing them using FAISS for
efficient similarity searches.'''

embeddings_model = OpenAIEmbeddings()
db = FAISS.from_documents(final_documents, embeddings_model)
db

<langchain_community.vectorstores.faiss.FAISS at 0x7ed4f4ae0be0>

In [119]:
'''
Step 4:
Document retrieval is performed using the FAISS retriever. Given a user query, the retriever identifies relevant documents based
on the similarity of their embeddings. This step ensures that the chatbot retrieves pertinent information from the preprocessed documents.'''
retriever = db.as_retriever()


'''
Step 5:
The chatbot employs a ConversationalRetrievalChain to enhance user interactions. This chain integrates the ChatOpenAI language model
(llm_src) with the document retriever. It allows the chatbot to engage in conversations, answer questions, and provide information
from the source documents. The chain is configured to return source documents along with the model's responses, enabling transparency
in information retrieval.

'''
llm_src = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k")
qa_chain = create_qa_with_sources_chain(llm_src)
retrieval_qa = ConversationalRetrievalChain.from_llm(
    llm_src,
    retriever,
    return_source_documents=True,
)

In [121]:
output = retrieval_qa({
    "question": "How much funding did Freshwork raise?",
    "chat_history": []
})
print(f"Question: {output['question']}")
print(f"Answer: {output['answer']}")
print(f"Source: {output['source_documents'][0].metadata['source']}")

Question: How much funding did Freshwork raise?
Answer: Freshworks raised a funding amount of $150,000,000.
Source: /content/Indian_startup_funding_2023.csv


In [122]:
output = retrieval_qa({
    "question": "Importance of MRR for your SaaS business?",
    "chat_history": []
})
print(f"Question: {output['question']}")
print(f"Answer: {output['answer']}")
print(f"Source: {output['source_documents'][0].metadata['source']}")

Question: Importance of MRR for your SaaS business?
Answer: MRR is important for SaaS businesses because it provides monthly trends, helps with financial planning and forecasting, assesses growth and momentum, and allows for a focus on high-value clients. It helps companies understand their performance, make informed decisions, and track their revenue on a recurring basis.
Source: /content/saasacademy.com-ARR Vs MRR - Choosing the right metric for your SaaS business.pdf


In [None]:
!pip install gradio

In [124]:
import gradio as gr

In [125]:
agents = {
    "Engineer": "Explain it in a very technical language",
    "Mathematician": "Explain with a lot of numbers",
    "Teacher": "Explain like i am a 10 year old",
    "Investor": "Explain like an investor",
    "Startup founder": "Explain with a real life use case"
}

def Question(question,agent):
    input=f"{agents[agent]},{question}"
    output = retrieval_qa({
        "question": input,
        "chat_history": []
    })
    return output['answer']

In [126]:
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes
from typing import Iterable

class VibrantCustomTheme(Base):
    def __init__(
        self,
        *,
        primary_hue: colors.Color | str = colors.purple,
        secondary_hue: colors.Color | str = colors.orange,
        neutral_hue: colors.Color | str = colors.gray,
        spacing_size: sizes.Size | str = sizes.spacing_md,
        radius_size: sizes.Size | str = sizes.radius_md,
        text_size: sizes.Size | str = sizes.text_md,
        font: fonts.Font | str | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("Poppins"),
            "ui-sans-serif",
            "sans-serif",
        ),
        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("Inconsolata"),
            "ui-monospace",
            "monospace",
        ),
    ):
        super().__init__(
            primary_hue=primary_hue,
            secondary_hue=secondary_hue,
            neutral_hue=neutral_hue,
            spacing_size=spacing_size,
            radius_size=radius_size,
            text_size=text_size,
            font=font,
            font_mono=font_mono,
        )
        super().set(
            body_background_fill="radial-gradient(circle, #ffb8b8, #ffe5b7)",
            button_primary_background_fill="#8A4D76",
            button_primary_background_fill_hover="#AB83A1",
            button_primary_text_color="black",
            slider_color="#6A0572",
            slider_color_dark="#AB83A1",
            block_title_text_weight="bold",
            block_title_text_color="black",
            block_border_width="2px",
            block_shadow="0px 0px 10px 0px rgba(0,0,0,0.1)",
            button_shadow="0px 0px 10px 0px rgba(0,0,0,0.1)",
        )


In [127]:
description = "Welcome to the Chatbot Interface! Ask any questions or start a conversation."
custom_theme = VibrantCustomTheme()
dropdown_options = ["Engineer", "Mathematician", "Teacher", "Investor", "Startup founder"]
# Create the Gradio interface
chat_bot = gr.Interface(fn=Question, inputs=["text",gr.Dropdown(dropdown_options)], outputs="text", title="VCExplorerBot",theme=custom_theme)

# Launch the interface
chat_bot.launch(inline=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://a438514d7b6602bfc9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


