In [4]:
!pip install faiss-cpu==1.7.4

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting faiss-cpu==1.7.4
  Using cached faiss_cpu-1.7.4-cp310-cp310-macosx_11_0_arm64.whl.metadata (1.3 kB)
Using cached faiss_cpu-1.7.4-cp310-cp310-macosx_11_0_arm64.whl (2.7 MB)
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4


In [1]:
from typing import List, Dict
from langchain.schema.vectorstore import VectorStore
from langchain.docstore.document import Document
import pinecone
import logging
from text_embeddings import TextEmbeddings
from langchain.vectorstores import Pinecone
from langchain.vectorstores import FAISS
from adapter import ArxivStoreAdapter
from langchain.schema.document import Document


class VoltScriptStore:
    def __init__(self, docs: List[Document]=None, db_dir: FAISS=None):
        logging.info("initialising vector database...")
        embed_model = TextEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
        self.local_dir = db_dir
        if not docs and self.local_dir:
            self.vectorstore = FAISS.load_local(self.local_dir, embed_model)
        elif docs and not db_dir:
            self.vectorstore = FAISS.from_documents(docs, embed_model)
        else:
            raise Exception("No Documents and local FAISS file")

    def insert_docs(self, texts: List[Dict]):
        docs = []
        for i in range(0, len(texts)):
            doc = Document(page_content=texts[i]['text'], metadata= {"filename": texts[i].get("filename", "/")})
            docs.append(doc)
        res = self.vectorstore.add_documents(docs)
        if self.local_dir:
            self.vectorstore.save_local(self.local_dir)
        return res

  from tqdm.autonotebook import tqdm


In [2]:
import document_loader as docloader
import os


LOTUSCRIPT_DATA_DIR = os.path.join("data")
VOLTSCRIPT_DATA = os.path.join("data", "voltscript.csv")

db = VoltScriptStore(docs=docloader.column_in_csv_to_list_of_docs(file=VOLTSCRIPT_DATA, col="code", metadata_cols=['filename']))

In [3]:
db.vectorstore.add_documents(docloader.crawled_csv_to_list_of_docs(dir=LOTUSCRIPT_DATA_DIR))

['feeca100-7d2d-45c6-af6e-aee765abc653',
 '6508b92e-ab0f-42dc-baed-342939bb995a',
 '2782621c-3a0b-41b8-8b9a-2bc051d5d9f3',
 '27e57f33-4b75-42d1-ad6c-691be58d8bfd',
 '6302de3a-d4fe-4ee1-8990-c1f871082600',
 '444dea95-0798-41e0-a360-8ba8f3a13ea8',
 '30aa7be9-89cb-4008-a924-ac0a3153e53f',
 '0c2cd145-70d7-49e7-b0f3-3242c434d542',
 '6ebcd290-c630-489a-b9b1-aafae9d94bbf',
 '63c3b93f-a992-4af0-8ff8-030649b0d4b4',
 'a32f2f1f-7478-4c5d-b29e-3a0d4902bb83',
 '52bbca00-0f56-486d-95f0-df9f2fe300b3',
 'bcaebf75-5c5f-4872-abca-1c07fa6f394b',
 '0ec88446-b59d-4f62-8d47-04d511f53dec',
 'b788fddc-ebf1-475e-94d0-9fa8586f3d30',
 '125496cb-3bd8-48a6-bdbf-904853b4e9ff',
 'f6ae7c03-77df-4063-9fa8-2926b8dd93bf',
 '395de707-9ebe-478c-94ce-35c3aa075055',
 '79bb6d7b-6004-48ee-b489-0797d6686b90',
 '82bf6aa3-df06-4987-a10f-7e037030f0e4',
 'ee2b8ef5-6f0a-4aa1-a115-1f4dcecfd941',
 '4467c9dd-5c24-4f2c-836e-3bb2089ad0b9',
 '599fc5e7-e3aa-4b09-aa62-27615ac9996b',
 '7c483240-48d7-40a0-8455-79f5e936acf4',
 '1e0b16f4-27f8-

In [4]:
# db.vectorstore.search("lotuscript", search_type="similarity")

In [5]:
# Init your retriever.
retriever = db.vectorstore.as_retriever(
    search_type="similarity",  # Also test "similarity", "mmr"
    search_kwargs={"k": 3},)

In [6]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from llm import Codey

# LLM
code_llm = Codey()

# RAG template
prompt_RAG = """
    You are a proficient voltscript developer. Respond with the syntactically correct code for to the question below. Make sure you follow these rules:
    1. Use context to understand the APIs and how to use it & apply.
    2. Do not add license information to the output code.
    3. Do not include colab code in the output.
    4. Ensure all the requirements in the question are met.

    Question:
    {question}

    Context:
    {context}

    Helpful Response :
    """

prompt_RAG_tempate = PromptTemplate(
    template=prompt_RAG, input_variables=["context", "question"]
)

qa_chain = RetrievalQA.from_llm(
    llm=code_llm, prompt=prompt_RAG_tempate, retriever=retriever, return_source_documents=True
)

In [9]:
user_question = """
translate this lotusscript code to voltscript:
Sub Click(Source As Button)
	Dim answers As Variant
	Dim doc As NotesUIDocument
	Dim questionElements(1 To 5) As String
	questionElements(1) = "question_1"
	questionElements(2) = "question_2"
	questionElements(3) = "question_3"
	questionElements(4) = "question_4"
	questionElements(5) = "question_5"
	Forall element In questionElements
		Set selectedRadio = doc.GetObject(element)
		If selectedRadio.IsChecked Then
			Call answers.AppendToTextList(element, selectedRadio.Value) ' Associate question with answer
		Else
           	' Handle cases where no radio button is selected (optional)
			Print "No answer selected for question: " + element
		End If
	End Forall
	Set resultInput = doc.GetObject("result")
	resultInput.Value = answers.Text ' Set the result field's value
End Sub
"""

results = qa_chain({"query": user_question})
print(results["result"])
print([results['source_documents']])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 ```voltscript
Sub Click(Source As Button)
    Dim answers As Variant
    Dim doc As NotesUIDocument
    Dim questionElements(1 To 5) As String
    questionElements(1) = 'question_1'
    questionElements(2) = 'question_2'
    questionElements(3) = 'question_3'
    questionElements(4) = 'question_4'
    questionElements(5) = 'question_5'
    Forall element In questionElements
        Set selectedRadio = doc.GetObject(element)
        If selectedRadio.IsChecked Then
            Call answers.AppendToTextList(element, selectedRadio.Value) ' Associate question with answer
        Else
           ' Handle cases where no radio button is selected (optional)
            Print 'No answer selected for question: ' + element
        End If
    End Forall
    Set resultInput = doc.GetObject('result')
    resultInput.Value = answers.Text ' Set the result field's value
End Sub
```
[[Document(page_content='\nLesson 1: Printing the title of a database\n\n\nLesson 1 creates a script that displays the tit

In [29]:
user_question = "generate code for initializing server database named 'testDB' in voltscript"

results = qa_chain({"query": user_question})
print(results["result"])
print([results['source_documents']])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 ```voltscript
Option Declare
Option Public

Use '../libs/functions.vss'

Sub Initialize
	Dim server As CouchServer
	Dim db As CouchDatabase
    Dim loginResult as Boolean

	loginResult = loginCouchDbServer(server, USERNAME, PASSWORD, SERVER_URL, '')
	If Not loginResult Then Return

    Print 'Login server: ' & loginResult
    Print 'CouchServer.CouchVersion: ' & server.CouchVersion
	Print 'CouchServer UUID: ' & server.getUUID()

	Call getCouchDbFeatures(server)
	Call getCouchDbDatabaseNames(server)
	Set db = getOrCreateCouchDbDatabase(server, 'testDB')

	Call PrintCouchDbDocumentIDs(db)

End Sub
```
[[Document(page_content='Option Declare\nOption Public\n\nUse "../libs/functions.vss"\n\nSub Initialize\n\tDim server As CouchServer\n\tDim db As CouchDatabase\n    Dim loginResult as Boolean\n\n\tloginResult = loginCouchDbServer(server, USERNAME, PASSWORD, SERVER_URL, "")\n\tIf Not loginResult Then Return\n\n    Print "Login server: " & loginResult\n    Print "CouchServer.CouchVersion: " 

In [10]:
user_question = """translate this python code to voltscript: 
my_array = ["a", "b", "c"]
i = 0
while i < len(my_array):
    print(my_array[i])
    i+=1
"""

results = qa_chain({"query": user_question})
print(results["result"])
print([results['source_documents']])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 ```voltscript
Dim my_array(0 To 2) As Variant
my_array(0) = "a"
my_array(1) = "b"
my_array(2) = "c"
Dim i As Integer
i = 0
While i < UBound(my_array)
    Print my_array(i)
    i++
Wend
```
[[Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "a"\nmy_array(1) = "b"\nmy_array(2) = "c"\nDim i as Integer\ni = 0\nWhile i < UBound(my_array)\n    Print my_array(i)\n    i++\nWend', metadata={'filename': 'loop.vss'}), Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "a"\nmy_array(1) = "b"\nmy_array(2) = "c"\nDim i as Integer\ni = 0\nWhile i < UBound(my_array)\n    Print my_array(i)\n    i++\nWend'), Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "first record"\nmy_array(1) = "second record"\nmy_array(2) = "third record"\nDim i as Integer\ni = 0\nWhile i < UBound(my_array)\n    Print my_array(i)\n    i++\nWend', metadata={'filename': 'loop.vss'})]]


In [10]:
user_question = """translate this python code to voltscript: 
my_array = ['hi micheal', 'hi jim', 'hi pam', 'hi dwight']
i = 0
while i < len(my_array):
    print(my_array[i])
    i+=1
"""

results = qa_chain({"query": user_question})
print(results["result"])
print([results['source_documents']])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 ```voltscript
Dim my_array(0 To 3) As Variant
my_array(0) = "hi micheal"
my_array(1) = "hi jim"
my_array(2) = "hi pam"
my_array(3) = "hi dwight"
Dim i As Integer
i = 0
While i < UBound(my_array)
    Print my_array(i)
    i = i + 1
Wend
```
[[Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "a"\nmy_array(1) = "b"\nmy_array(2) = "c"\nDim i as Integer\ni = 0\nWhile i < UBound(my_array)\n    Print my_array(i)\n    i++\nWend', metadata={'filename': 'loop.vss'}), Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "a"\nmy_array(1) = "b"\nmy_array(2) = "c"\nDim i as Integer\ni = 0\nWhile i < UBound(my_array)\n    Print my_array(i)\n    i++\nWend'), Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "first record"\nmy_array(1) = "second record"\nmy_array(2) = "third record"\nDim i as Integer\ni = 0\nWhile i < UBound(my_array)\n    Print my_array(i)\n    i++\nWend', metadata={'filename': 'loop.vss'})]]


In [11]:
user_question = "generate a while loop in voltscript"

results = qa_chain({"query": user_question})
print(results["result"])
print([results['source_documents']])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 ```voltscript
While True
    Print "Hello, world!"
Wend
```
[[Document(page_content='https://help.hcltechsw.com/docs/voltscript/early-access/howto/language/loops.html'), Document(page_content='loop.vss'), Document(page_content='%REM\n\tCopyright 2023 HCL America, Inc.\n\tLicensed under the Apache License, Version 2.0 (the "License");\n\tyou may not use this file except in compliance with the License.\n\tYou may obtain a copy of the License at\n\n\thttp://www.apache.org/licenses/LICENSE-2.0\n\n\tUnless required by applicable law or agreed to in writing,\n\tsoftware distributed under the License is distributed on an "AS IS" BASIS,\n\tWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n\tSee the License for the specific language governing permissions and limitations under the License\n%END REM\n\nOption Declare\nOption Public\n\nUse "../libs/functions"\nUse "../libs/VoltScriptTesting"\n\nConst goodJson = |{"firstName":"John","lastName":"Doe","age":42,"children":["Ja

In [12]:
user_question = """annotate comments in this voltscript code: 
Dim my_array(0 To 3) As Variant
my_array(0) = "hi micheal"
my_array(1) = "hi jim"
my_array(2) = "hi pam"
my_array(3) = "hi dwight"
Dim i As Integer
i = 0
While i < UBound(my_array)
    Print my_array(i)
    i++
Wend
"""

results = qa_chain({"query": user_question})
print(results["result"])
print([results['source_documents']])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 ```voltscript
'Declare an array of variants with 4 elements
Dim my_array(0 To 3) As Variant

'Assign values to the array elements
my_array(0) = 'hi micheal'
my_array(1) = 'hi jim'
my_array(2) = 'hi pam'
my_array(3) = 'hi dwight'

'Declare an integer variable i and initialize it to 0
Dim i As Integer
i = 0

'Use a while loop to iterate through the array and print each element
While i < UBound(my_array)
    Print my_array(i)
    i++
Wend
```
[[Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "a"\nmy_array(1) = "b"\nmy_array(2) = "c"\nDim i as Integer\ni = 0\nWhile i < UBound(my_array)\n    Print my_array(i)\n    i++\nWend', metadata={'filename': 'loop.vss'}), Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "a"\nmy_array(1) = "b"\nmy_array(2) = "c"\nDim i as Integer\ni = 0\nWhile i < UBound(my_array)\n    Print my_array(i)\n    i++\nWend'), Document(page_content='Dim my_array(0 To 3) As Variant\nmy_array(0) = "first record"\nmy_array(1) = "secon

In [13]:
user_question = """translate this lotusscript code to voltscript: 
Sub Initialize
     Dim session As New NotesSession
     Dim db As NotesDatabase
     Dim dc As NotesDocumentCollection
     Dim doc As NotesDocument
     Set db = session.CurrentDatabase
     Set dc = db.AllDocuments
     Set doc = dc.GetFirstDocument
     While Not(doc Is Nothing)
          category = doc.Category
          totalSales = doc.TotalSales
          Select Case totalSales(0)
          Case Is >= 200000 : category(0) = "Above Quota"
          Case Is >= 100000 : category(0) = "OK"
          Case Else : category(0) = "Below Quota"
          End Select
          doc.Category = category
          Call doc.Save(True, False)
          Set doc = dc.GetNextDocument(doc)
     Wend
End Sub
"""

results = qa_chain({"query": user_question})
print(results["result"])
print([results['source_documents']])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


 ```voltscript
Sub Initialize
    Dim session As New NotesSession
    Dim db As NotesDatabase
    Dim dc As NotesDocumentCollection
    Dim doc As NotesDocument
    Set db = session.CurrentDatabase
    Set dc = db.AllDocuments
    Set doc = dc.GetFirstDocument
    While Not(doc Is Nothing)
        category = doc.Category
        totalSales = doc.TotalSales
        Select Case totalSales(0)
        Case Is >= 200000 : category(0) = 'Above Quota'
        Case Is >= 100000 : category(0) = 'OK'
        Case Else : category(0) = 'Below Quota'
        End Select
        doc.Category = category
        Call doc.Save(True, False)
        Set doc = dc.GetNextDocument(doc)
    Wend
End Sub
```
[[Document(page_content='\nLesson 2: Counting the documents in a view category\n\n\nThis is the second of three lessons designed to introduce\nyou to using the LotusScript® language\nin Domino®. You should already\nhave completed Lesson 1.\nLesson 2 helps you create a script that counts the number of docum

In [8]:
from typing import Optional

from fastapi import FastAPI
from pathlib import Path
import os, sys
import uvicorn
import json
import nest_asyncio

nest_asyncio.apply()
app = FastAPI()

@app.post("/domino/code_interpretor/")
async def create_item(data: dict):
    print(data)
    data = data['data']
    response  = qa_chain({"query": data})
    return json.dumps({"data":response})



if __name__ == "__main__":
    PORT = "8081"
    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('PORT', PORT)))

INFO:     Started server process [96250]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8081 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [96250]


In [26]:
!pip install fastapi uvicorn

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting uvicorn
  Downloading uvicorn-0.27.1-py3-none-any.whl.metadata (6.3 kB)
Collecting h11>=0.8 (from uvicorn)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading uvicorn-0.27.1-py3-none-any.whl (60 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.8/60.8 kB[0m [31m176.0 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m433.0 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: h11, uvicorn
Successfully installed h11-0.14.0 uvicorn-0.27.1


In [10]:
print(" ```voltscript\nDim num As Integer\nnum = CInt(InputBox(\"Enter a number: \"))\nIf (num Mod 2) = 0 Then\n    Print \"0 is Even\"\nElse\n    Print \"0 is Odd\"\nEnd If\n```")

 ```voltscript
Dim num As Integer
num = CInt(InputBox("Enter a number: "))
If (num Mod 2) = 0 Then
    Print "0 is Even"
Else
    Print "0 is Odd"
End If
```


In [11]:
print(" ```voltscript\n' Check if anInt% is greater than myLong&\nIf anInt% > myLong& Then\n    ' Print a message indicating that anInt% is greater than myLong&\n    Print 'anInt% is greater than myLong&.'\nEnd If\n```")

 ```voltscript
' Check if anInt% is greater than myLong&
If anInt% > myLong& Then
    ' Print a message indicating that anInt% is greater than myLong&
    Print 'anInt% is greater than myLong&.'
End If
```


In [12]:
print(" ```voltscript\nSub loopArrayExitThe(passedArr as Variant)\n    Dim i as Integer\n    Dim the as Integer\n    Do Until i > UBound(passedArr)\n        If (LCase(passedArr(i)) = 'the') Then\n            If the++ > 0 Then Exit Do\n        End If\n        Print passedArr(i++)\n    Loop\nEnd Sub\n\nSub main()\n    Dim arr as Variant\n    arr = {'hi micheal', 'hi jim', 'hi pam', 'hi dwight'}\n    loopArrayExitThe(arr)\nEnd Sub\n```")

 ```voltscript
Sub loopArrayExitThe(passedArr as Variant)
    Dim i as Integer
    Dim the as Integer
    Do Until i > UBound(passedArr)
        If (LCase(passedArr(i)) = 'the') Then
            If the++ > 0 Then Exit Do
        End If
        Print passedArr(i++)
    Loop
End Sub

Sub main()
    Dim arr as Variant
    arr = {'hi micheal', 'hi jim', 'hi pam', 'hi dwight'}
    loopArrayExitThe(arr)
End Sub
```
