# Document Search Pipeline

Ask anything related to documment and get answer based on the context from document.

In [13]:
! pip install vectorshift --upgrade



## Pipeline Overview

This pipeline takes two inputs, a document and a question. You will get answer from LLM based on context provided from the document.
![alt text](images/document_search/1-overview.png "pipeline overview")


In [14]:
import vectorshift as vs
from vectorshift.node import InputNode, URLLoaderNode, TextNode, SemanticSearchNode, OpenAILLMNode, OutputNode, ChatMemoryNode
from vectorshift.pipeline import Pipeline
from vectorshift.knowledge_base import *

In [15]:
vs_api_key ="YOUR_API_KEY"

## Input Nodes
Input seperated into two, document input and question. File loader is included in file loader, so you dont need to define it.
![alt text](images/document_search/2-inputs.png)

In [16]:
questions_input = InputNode(name="question",input_type="text")
document_input = InputNode(name="document_input",input_type="file", process_files=True)

In [17]:
search_node = SemanticSearchNode(
    query_input=[questions_input.output()],
    documents_input=[document_input.output()], 
    max_docs_per_query=4)

In [18]:
system_text = """You are a helpful assistant that answers User Question based on Context"""
system_text_node = TextNode(text=system_text)

In [19]:
llm = OpenAILLMNode(
    model="gpt-3.5-turbo",
    system_input=system_text_node.output(),
    prompt_input='User Question\n{{User_Question}}\n\n\nContext\n{{Context}}',
    max_tokens=4000,
    text_inputs={'User_Question': questions_input.output(), 'Context': search_node.output()}
    )

In [20]:
output_node = OutputNode(name="output",output_type="text",input=llm.output())

## Deploy Pipeline

In [21]:
document_search_pipeline_nodes = [
    document_input, questions_input, search_node, llm, system_text_node, output_node
]

In [22]:
document_search_pipeline = Pipeline(
    name="Document Search with Vectorshift",
    description="Ask your document questions and get answers",
    nodes=document_search_pipeline_nodes
)

In [23]:
config = vectorshift.deploy.Config(
    api_key=vs_api_key,
)

config.save_new_pipeline(document_search_pipeline)

Successfully saved pipeline with ID 668eb54ea9e3a72447b7d36e.


{'pipeline': {'name': 'Document Search with Vectorshift',
  'description': 'Ask your document questions and get answers',
  'nodes': [{'id': 'customInput-1',
    'type': 'customInput',
    'data': {'id': 'customInput-1',
     'nodeType': 'customInput',
     'category': 'input',
     'task_name': 'input',
     'inputName': 'document_input',
     'inputType': 'File',
     'processFiles': True},
    'position': {'x': 0, 'y': -400},
    'positionAbsolute': {'x': 0, 'y': -400},
    'selected': False,
    'dragging': False},
   {'id': 'customInput-2',
    'type': 'customInput',
    'data': {'id': 'customInput-2',
     'nodeType': 'customInput',
     'category': 'input',
     'task_name': 'input',
     'inputName': 'question',
     'inputType': 'Text'},
    'position': {'x': 0, 'y': 0},
    'positionAbsolute': {'x': 0, 'y': 0},
    'selected': False,
    'dragging': False},
   {'id': 'vectorQuery-1',
    'type': 'vectorQuery',
    'data': {'id': 'vectorQuery-1',
     'nodeType': 'vectorQuery'

## Run The Pipeline

In [24]:
pipeline = Pipeline.fetch(pipeline_name='Document Search with Vectorshift', api_key=vs_api_key)

response = pipeline.run(
    inputs = {"question": "What is Ekki's last name?", "document_input": "email.csv"},
    api_key= vs_api_key
)

print(response)

{}
