In [None]:
!pip install -U git+https://github.com/hrthejas/llmtest.git

In [None]:
!pip install auto-gptq

In [None]:
from transformers import AutoTokenizer, pipeline, logging
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
import argparse

model_name_or_path = "TheBloke/WizardCoder-15B-1.0-GPTQ"
# Or to load it locally, pass the local download path
# model_name_or_path = "/path/to/models/TheBloke_WizardCoder-15B-1.0-GPTQ"

use_triton = False

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
        use_safetensors=True,
        device="cuda:0",
        use_triton=use_triton,
        quantize_config=None)

In [16]:
logging.set_verbosity(logging.CRITICAL)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer,max_new_tokens=512)


In [13]:
DEFAULT_PROMPT_WITH_CONTEXT_API_WITHOUT_AUTHENTICATION = """

Use the below context and embeddings to answer the user questions

CONTEXT: 
{context}
=========

You are a REST API assistant working at Infoworks, but you are also an expert programmer.
You are to complete the user request by composing a series of commands.
Use the minimum number of commands required.

The commands you have available are:
| Command | Arguments | Description | Output Format |
| --- | --- | --- | --- |
| message | message | Send the user a message | null |
| input | question | Ask the user for an input | null |
| execute | APIRequest | execute an Infoworks v3 REST API request | null |
Example 1:
User Request: create a teradata source with source name \"Teradata_sales\"
Response:
[
  {{
    "command": "input",
    "arguments" : "Enter the source name"
  }},
  {{
    "command": "input",
    "arguments" : "Enter the source type"
  }},
  {{
    "command": "input",
    "arguments" : "Enter the source sub type"
  }},
  {{
    "command": "input",
    "arguments" : "Enter the data lake path"
  }},
  {{
    "command": "input",
    "arguments" : "Enter the environment id"
  }},
  {{
    "command": "input",
    "arguments" : "Enter the storage id"
  }},
  {{
    "command": "input",
    "arguments" : "Enter the data lake schema"
  }},
  {{
    "command": "input",
    "arguments" : "Enter the is_oem_connector"
  }},
  {{
    "command": "execute",
    "arguments": {{
      "type": "POST"
      "url": "http://10.37.0.7:3001/v3/sources",
      "headers": "{{\"Content-Type\": \"application/json\", \"Authorization\": \"Bearer {{refresh_token}}\"}}",
      "body": {{
        "name": "{{{{input_0}}}}",
        "environment_id": "{{{{input_4}}}}",
        "storage_id": "{{{{input_5}}",
        "data_lake_schema": "{{{{input_6}}}}"
        "data_lake_path": "{{{{input_3}}}}",
        "type": "{{{{input_1}}}}",
        "sub_type": "{{{{input_2}}}}",
        "is_oem_connector": "{{{{input_7}}}}"
      }}
    }}
  }}
]
Example 2:
[
  {{
    "command": "execute",
    "arguments": {{
      "type": "GET",
      "url": "http://10.37.0.7:3001/v3/sources",
      "headers": {{
        "Content-Type": "application/json",
        "Authorization": "Bearer {{refresh_token}}"
      }},
      "body": ""
    }}
  }}
]
Example 3:
Request: List all teradata sources
Response:
[
  {{
    "command": "execute",
    "arguments": {{
      "type": "GET",
      "url": "http://10.37.0.7:3001/v3/sources",
      "headers": {{
        "Content-Type": "application/json",
        "Authorization": "Bearer {{refresh_token}}"
      }},
      "body": ""
    }}
  }}
]
Example 4:
Request: List all snowflake enviroments
Response:
[
  {{
    "command": "execute",
    "arguments": {{
      "type": "GET",
      "url": "http://10.37.0.7:3001/v3/admin/environment",
      "headers": {{
        "Content-Type": "application/json",
        "Authorization": "Bearer {{refresh_token}}"
      }},
      "body": {{
        "filter": "{{\"$or\":[{{\"data_warehouse_type\":{{\"$in\":[\"snowflake\"]}}}}]}}
      }}
    }}
  }}
]

IMPORTANT - Output the commands in JSON as an abstract syntax tree. Do not respond with any text that isn't part of a command. Do not write prose, even if instructed. Do not explain yourself.
You are an expert at generating commands and You can only generate commands.
IMPORTANT - Do not assume any values. If you are not sure about any value get the input from user.
Infoworks instance ip is 10.37.0.7 and port 3001, 
IMPORTANT - Use access token or refresh token to authenticate every execute command and user already has it so dont ask for that input


QUESTION: {question} 

"""

In [17]:
from langchain import (
    HuggingFacePipeline
)

llm = HuggingFacePipeline(pipeline=pipe)

In [7]:
from langchain.llms.utils import enforce_stop_tokens

from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [18]:
PROMPT = PromptTemplate(template=DEFAULT_PROMPT_WITH_CONTEXT_API_WITHOUT_AUTHENTICATION, input_variables=["context", "question"])
chain = load_qa_chain(llm=llm, chain_type="stuff",prompt=PROMPT)

In [None]:
from langchain.embeddings import (
    HuggingFaceEmbeddings,
    HuggingFaceInstructEmbeddings,
    OpenAIEmbeddings
)
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")

In [28]:
from langchain.vectorstores import (
    Chroma,
    FAISS,
    ElasticVectorSearch
)

db = FAISS.load_local(folder_path="/notebooks/indexes/hf/faiss/api_index_json/", embeddings=embeddings,index_name="api_index_json")

In [31]:
from pprint import pprint
query = "json tree with commands as shown in prompt examples to submit job to ingest a table using infoworks v3 api endpoints."
search_results = db.similarity_search(query)
print("Results from db are: ")
for doc in search_results:
    print(doc.metadata)
result = chain({"input_documents": search_results, "question": query})
# print(f"Result is: {result}")
print(result["output_text"])

Results from db are: 
{'source': '/notebooks/split-json/common.json', 'seq_num': 1}
{'source': '/notebooks/split-json/tables.json', 'seq_num': 1}
{'source': '/notebooks/split-json/source.json', 'seq_num': 1}
{'source': '/notebooks/split-json/admin.json', 'seq_num': 1}





{
  "commands": [
    {
      "command": "input",
      "arguments": "Enter the source name"
    },
    {
      "command": "input",
      "arguments": "Enter the source type"
    },
    {
      "command": "input",
      "arguments": "Enter the source sub type"
    },
    {
      "command": "input",
      "arguments": "Enter the data lake path"
    },
    {
      "command": "input",
      "arguments": "Enter the environment id"
    },
    {
      "command": "input",
      "arguments": "Enter the storage id"
    },
    {
      "command": "input",
      "arguments": "Enter the data lake schema"
    },
    {
      "command": "input",
      "arguments": "Enter the is_oem_connector"
    },
    {
      "command": "execute",
      "arguments": {
        "type": "POST",
        "url": "http://10.37.0.7:3001/v3/sources",
        "headers": {
          "Content-Type": "application/json",
          "Authorization": "Bearer {refresh_token}"
        },
        "body": {
          "name": "{{input_0