# Document Search Pipeline

Ask anything related to documment and get answer based on the context from document.

In [2]:
! pip install vectorshift --upgrade




[notice] A new release of pip is available: 23.3.2 -> 24.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## Pipeline Overview

This pipeline takes two inputs, a document and a question. You will get answer from LLM based on context provided from the document.
![alt text](images/document_search/1-overview.png "pipeline overview")


In [6]:
import vectorshift as vs
from vectorshift.node import InputNode, URLLoaderNode, TextNode, SemanticSearchNode, OpenAILLMNode, OutputNode, ChatMemoryNode
from vectorshift.pipeline import Pipeline
from vectorshift.knowledge_base import *

In [4]:
vs_api_key = "YOUR_API_KEY"
vs.api_key = vs_api_key

## Input Nodes
Input seperated into two, document input and question. File loader is included in file loader, so you dont need to define it.
![alt text](images/document_search/2-inputs.png)

In [31]:
document_input = InputNode(name="document_input",input_type="file")
questions_input = InputNode(name="question",input_type="text")

In [32]:
search_node = SemanticSearchNode(query_input=[questions_input.output()], documents_input=[document_input.output()], max_docs_per_query=4)

In [20]:
system_text = """You are a helpful assistant that answers User Question based on Context"""
system_text_node = TextNode(text=system_text)

In [33]:
llm = OpenAILLMNode(
    model="gpt-3.5-turbo",
    system_input=system_text_node.output(),
    prompt_input=questions_input.output(),
    text_inputs={"context":search_node.output()}
    )

In [34]:
output_node = OutputNode(name="output",output_type="text",input=llm.output())

## Deploy Pipeline

In [35]:
document_search_pipeline_nodes = [
    document_input, questions_input, search_node, system_text, llm, system_text_node, output_node
]

In [None]:
document_search_pipeline = Pipeline(
    name="Document Searchss",
    description="Ask your document questions and get answers",
    nodes=document_search_pipeline_nodes
)

In [None]:
config = vectorshift.deploy.Config(
    api_key=vs_api_key,
)

config.save_new_pipeline(csv_search_pipeline)

## Run The Pipeline

In [None]:
pipeline = Pipeline.fetch(pipeline_name='Vectorshift Chatbot')

response = pipeline.run(
    inputs = {"input_1": "https://www.vectorshift.ai/", "input_2": "/files/cv.pdf"},
    api_key= vs_api_key
)

print(response)