In [1]:
import os
import pysnc
import getpass

# Create the directory if it does not exist
os.makedirs('data/sys_script_include', exist_ok=True)

client = pysnc.ServiceNowClient('dev168935', ('admin', getpass.getpass('Dev Instance Password:')))
gr = client.GlideRecord('sys_script_include')
gr.add_query('sys_package', '16ce0f75e8e1211076da10591ad28708')
gr.query()
for i, r in enumerate(gr):
    with open(f'data/sys_script_include/{r.name}.js', 'w') as f:
        # If r.script is None, replace it with an empty string
        name = r.script.get_value() if r.script is not None else ''
        # Write the script to the file
        f.write(name)
    print(f"Finished writing file for record {i}: {r.name}.js")
# end

Finished writing file for record 0: Optional.js
Finished writing file for record 1: GQ.js
Finished writing file for record 2: Schema.js
Finished writing file for record 3: GlideToJavaScriptMapper.js
Finished writing file for record 4: GlideQuery.js
Finished writing file for record 5: GlideQueryActions.js
Finished writing file for record 6: Stream.js
Finished writing file for record 7: GlideQueryParser.js
Finished writing file for record 8: GlideQueryEvaluator.js
Finished writing file for record 9: NiceError.js


In [2]:
import os
import getpass

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
activeloop_token = getpass.getpass("Activeloop Token:")
os.environ["ACTIVELOOP_TOKEN"] = activeloop_token
# end

In [3]:
embeddings = OpenAIEmbeddings(disallowed_special=())
# end

In [4]:
import os
from langchain.document_loaders import TextLoader

root_dir = 'data/sys_script_include'
docs = []

# Go through each folder
for dirpath, dirnames, filenames in os.walk(root_dir):
    
    # Go through each file
    for file in filenames:
        try: 
            # Load up the file as a doc and split
            loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
            docs.extend(loader.load_and_split())
        except Exception as e: 
            pass
# end

In [5]:
# Confirm files were chunked

print (f"You have {len(docs)} documents\n")
print ("------ Start Document -----")
print (docs[0].page_content[:300])
# end

You have 35 documents

------ Start Document -----
/**
 * GQ is a utility class intended for use with GlideQuery
 * @namespace
 */
function GQ() {}

/**
 * Returns the values of an Object
 * @param {Object} obj Object
 * @returns {Array}
 */
GQ.values = function values(obj) {
	var values = [];
	for (var i in obj) {
		values.push(obj[i]);
	}
	return 


In [6]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)
# end

Created a chunk of size 1619, which is longer than the specified 1000
Created a chunk of size 1243, which is longer than the specified 1000
Created a chunk of size 1408, which is longer than the specified 1000
Created a chunk of size 1402, which is longer than the specified 1000
Created a chunk of size 1275, which is longer than the specified 1000
Created a chunk of size 1318, which is longer than the specified 1000
Created a chunk of size 1056, which is longer than the specified 1000
Created a chunk of size 1293, which is longer than the specified 1000
Created a chunk of size 1394, which is longer than the specified 1000
Created a chunk of size 1295, which is longer than the specified 1000
Created a chunk of size 1761, which is longer than the specified 1000


In [9]:
# This will make an API call to Open AI
from langchain.vectorstores import DeepLake

username = "jessems"  # replace with your username from app.activeloop.ai
db = DeepLake(
    dataset_path=f"hub://{username}/glidequery",
    embedding_function=embeddings,
)
db.add_documents(texts)
# end

Deep Lake Dataset in hub://jessems/glidequery already exists, loading from the storage


|

Dataset(path='hub://jessems/glidequery', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
 embedding  embedding  (276, 1536)  float32   None   
    id        text      (276, 1)      str     None   
 metadata     json      (276, 1)      str     None   
   text       text      (276, 1)      str     None   


 

['aa54a7c6-223a-11ee-b18c-0e26fb762f4c',
 'aa54a906-223a-11ee-b18c-0e26fb762f4c',
 'aa54a974-223a-11ee-b18c-0e26fb762f4c',
 'aa54a9c4-223a-11ee-b18c-0e26fb762f4c',
 'aa54aa0a-223a-11ee-b18c-0e26fb762f4c',
 'aa54aa5a-223a-11ee-b18c-0e26fb762f4c',
 'aa54aaa0-223a-11ee-b18c-0e26fb762f4c',
 'aa54ab4a-223a-11ee-b18c-0e26fb762f4c',
 'aa54aba4-223a-11ee-b18c-0e26fb762f4c',
 'aa54abea-223a-11ee-b18c-0e26fb762f4c',
 'aa54ac30-223a-11ee-b18c-0e26fb762f4c',
 'aa54ac76-223a-11ee-b18c-0e26fb762f4c',
 'aa54acbc-223a-11ee-b18c-0e26fb762f4c',
 'aa54acf8-223a-11ee-b18c-0e26fb762f4c',
 'aa54ad3e-223a-11ee-b18c-0e26fb762f4c',
 'aa54ad7a-223a-11ee-b18c-0e26fb762f4c',
 'aa54adc0-223a-11ee-b18c-0e26fb762f4c',
 'aa54adfc-223a-11ee-b18c-0e26fb762f4c',
 'aa54ae42-223a-11ee-b18c-0e26fb762f4c',
 'aa54ae92-223a-11ee-b18c-0e26fb762f4c',
 'aa54aece-223a-11ee-b18c-0e26fb762f4c',
 'aa54af28-223a-11ee-b18c-0e26fb762f4c',
 'aa54af78-223a-11ee-b18c-0e26fb762f4c',
 'aa54afb4-223a-11ee-b18c-0e26fb762f4c',
 'aa54b00e-223a-

In [10]:
db = DeepLake(
    dataset_path="hub://jessems/glidequery",
    read_only=True,
    embedding_function=embeddings,
)
# end

Deep Lake Dataset in hub://jessems/glidequery already exists, loading from the storage


In [11]:
retriever = db.as_retriever()
retriever.search_kwargs["distance_metric"] = "cos"
retriever.search_kwargs["fetch_k"] = 100
retriever.search_kwargs["maximal_marginal_relevance"] = True
retriever.search_kwargs["k"] = 10
# end

In [38]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

general_system_template = r""" 
Your are a professional ServiceNow dveloper. Give a detailed answers aimed at other programmers. Start your explanations off in simple terms. Respond with markdown. Include code snippets if appropriate. If you don't know the answer, simply say you don't know.
 ----
{context}
----
"""
general_user_template = "Question:```{question}```"
messages = [
            SystemMessagePromptTemplate.from_template(general_system_template),
            HumanMessagePromptTemplate.from_template(general_user_template)
]
qa_prompt = ChatPromptTemplate.from_messages( messages )

model = ChatOpenAI(model_name="gpt-4")
qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever, combine_docs_chain_kwargs={"prompt": qa_prompt})
# end

In [39]:
chat_history = []

def ask(question, chat_history):
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result["answer"]))
    with open('answers.md', 'a') as f:
        f.write(f"**Question**: {question} \n\n")
        f.write(f"**Answer**: {result['answer']} \n\n")
# end

In [40]:
ask("How does GlideQuery work?", chat_history)

In [41]:
ask("How is GlideQuery implemented that it allows for you to chain methods on top of each other?", chat_history)

In [43]:
ask("What about the toArray() method, I don't understand why we need that. What does it do?", chat_history)

In [45]:
ask("How can I use regex with GlideQuery?", chat_history)