Skip to content

Commit

Permalink
Merge pull request #109 from arc53/main
Browse files Browse the repository at this point in the history
updates
  • Loading branch information
dartpain committed Feb 20, 2023
2 parents d4ede13 + 3c7ac76 commit 962be4d
Show file tree
Hide file tree
Showing 46 changed files with 887 additions and 6,075 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,7 @@ Copy .env_sample and create .env with your openai api token

## [How to use any other documentation](https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation)

## [How to host it locally (so all data will stay on-premises)](https://github.com/arc53/DocsGPT/wiki/How-to-use-different-LLM's#hosting-everything-locally)

Built with [🦜️🔗 LangChain](https://github.com/hwchase17/langchain)

139 changes: 94 additions & 45 deletions application/app.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,88 @@
import os
import pickle
import json

import dotenv
import datetime
import requests
from flask import Flask, request, render_template
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
import faiss
from langchain import OpenAI, VectorDBQA
from langchain import FAISS
from langchain import OpenAI, VectorDBQA, HuggingFaceHub, Cohere
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
import requests

# os.environ["LANGCHAIN_HANDLER"] = "langchain"

if os.getenv("LLM_NAME") is not None:
llm_choice = os.getenv("LLM_NAME")
else:
llm_choice = "openai"

if os.getenv("EMBEDDINGS_NAME") is not None:
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
else:
embeddings_choice = "openai_text-embedding-ada-002"



if llm_choice == "manifest":
from manifest import Manifest
from langchain.llms.manifest import ManifestWrapper

manifest = Manifest(
client_name="huggingface",
client_connection="http://127.0.0.1:5000"
)

# Redirect PosixPath to WindowsPath on Windows
import platform

if platform.system() == "Windows":
import pathlib

temp = pathlib.PosixPath
pathlib.PosixPath = pathlib.WindowsPath

# loading the .env file
dotenv.load_dotenv()


with open("combine_prompt.txt", "r") as f:
template = f.read()

# check if OPENAI_API_KEY is set
if os.getenv("OPENAI_API_KEY") is not None:
api_key_set = True
with open("combine_prompt_hist.txt", "r") as f:
template_hist = f.read()

if os.getenv("API_KEY") is not None:
api_key_set = True
else:
api_key_set = False


if os.getenv("EMBEDDINGS_KEY") is not None:
embeddings_key_set = True
else:
embeddings_key_set = False

app = Flask(__name__)


@app.route("/")
def home():
return render_template("index.html", api_key_set=api_key_set)
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
embeddings_choice=embeddings_choice)


@app.route("/api/answer", methods=["POST"])
def api_answer():
data = request.get_json()
question = data["question"]
history = data["history"]
if not api_key_set:
api_key = data["api_key"]
else:
api_key = os.getenv("OPENAI_API_KEY")
api_key = os.getenv("API_KEY")
if not embeddings_key_set:
embeddings_key = data["embeddings_key"]
else:
embeddings_key = os.getenv("EMBEDDINGS_KEY")


# check if the vectorstore is set
if "active_docs" in data:
Expand All @@ -59,25 +93,37 @@ def api_answer():
vectorstore = ""

# loading the index and the store and the prompt template
index = faiss.read_index(f"{vectorstore}docs.index")
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
if embeddings_choice == "openai_text-embedding-ada-002":
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
elif embeddings_choice == "cohere_medium":
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))

with open(f"{vectorstore}faiss_store.pkl", "rb") as f:
store = pickle.load(f)

store.index = index
# create a prompt template
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
# create a chain with the prompt template and the store

#chain = VectorDBQA.from_llm(llm=OpenAI(openai_api_key=api_key, temperature=0), vectorstore=store, combine_prompt=c_prompt)
# chain = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key=api_key, temperature=0), chain_type='map_reduce',
# vectorstore=store)

qa_chain = load_qa_chain(OpenAI(openai_api_key=api_key, temperature=0), chain_type="map_reduce",
if history:
history = json.loads(history)
template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1])
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2")
else:
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2")

if llm_choice == "openai":
llm = OpenAI(openai_api_key=api_key, temperature=0)
elif llm_choice == "manifest":
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
elif llm_choice == "huggingface":
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
elif llm_choice == "cohere":
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)

qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
combine_prompt=c_prompt)
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=store)


chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)

# fetch the answer
result = chain({"query": question})
Expand All @@ -94,30 +140,33 @@ def api_answer():
# }
return result


@app.route("/api/docs_check", methods=["POST"])
def check_docs():
# check if docs exist in a vectorstore folder
data = request.get_json()
vectorstore = "vectors/" + data["docs"]
base_path = 'https://raw.githubusercontent.com/arc53/DocsHUB/main/'
#
if os.path.exists(vectorstore):
if os.path.exists(vectorstore) or data["docs"] == "default":
return {"status": 'exists'}
else:
r = requests.get(base_path + vectorstore + "docs.index")
# save to vectors directory
# check if the directory exists
if not os.path.exists(vectorstore):
os.makedirs(vectorstore)

with open(vectorstore + "docs.index", "wb") as f:
f.write(r.content)
# download the store
r = requests.get(base_path + vectorstore + "faiss_store.pkl")
with open(vectorstore + "faiss_store.pkl", "wb") as f:
f.write(r.content)

return {"status": 'loaded'}
r = requests.get(base_path + vectorstore + "index.faiss")

if r.status_code != 200:
return {"status": 'null'}
else:
if not os.path.exists(vectorstore):
os.makedirs(vectorstore)
with open(vectorstore + "index.faiss", "wb") as f:
f.write(r.content)

# download the store
r = requests.get(base_path + vectorstore + "index.pkl")
with open(vectorstore + "index.pkl", "wb") as f:
f.write(r.content)

return {"status": 'loaded'}


# handling CORS
@app.after_request
Expand Down
4 changes: 2 additions & 2 deletions application/combine_prompt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ Source: 0-pl
FINAL ANSWER: You can't eat vegetables using pandas. You can only eat them using your mouth.
SOURCES:

QUESTION: {question}
QUESTION: {{ question }}
=========
{summaries}
{{ summaries }}
=========
FINAL ANSWER:
27 changes: 27 additions & 0 deletions application/combine_prompt_hist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
You are a DocsGPT bot assistant by Arc53 that provides help with programming libraries. You give thorough answers with code examples.
Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
ALWAYS return a "SOURCES" part in your answer. You can also remeber things from previous questions and use them in your answer.

QUESTION: How to merge tables in pandas?
=========
Content: pandas provides various facilities for easily combining together Series or DataFrame with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations.
Source: 28-pl
Content: pandas provides a single function, merge(), as the entry point for all standard database join operations between DataFrame or named Series objects: \n\npandas.merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)
Source: 30-pl
=========
FINAL ANSWER: To merge two tables in pandas, you can use the pd.merge() function. The basic syntax is: \n\npd.merge(left, right, on, how) \n\nwhere left and right are the two tables to merge, on is the column to merge on, and how is the type of merge to perform. \n\nFor example, to merge the two tables df1 and df2 on the column 'id', you can use: \n\npd.merge(df1, df2, on='id', how='inner')
SOURCES: 28-pl 30-pl

QUESTION: {{ historyquestion }}
=========
CONTENT:
SOURCE:
=========
FINAL ANSWER: {{ historyanswer }}
SOURCES:

QUESTION: {{ question }}
=========
{{ summaries }}
=========
FINAL ANSWER:
Binary file removed application/docs.index
Binary file not shown.
Binary file removed application/faiss_store.pkl
Binary file not shown.
Binary file added application/index.faiss
Binary file not shown.
Binary file added application/index.pkl
Binary file not shown.
3 changes: 2 additions & 1 deletion application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pytz==2022.7.1
PyYAML==6.0
regex==2022.10.31
requests==2.28.2
retry==0.9.2
six==1.16.0
snowballstemmer==2.2.0
Sphinx==6.1.3
Expand All @@ -64,6 +65,6 @@ typer==0.7.0
typing-inspect==0.8.0
typing_extensions==4.4.0
urllib3==1.26.14
Werkzeug==2.2.2
Werkzeug==2.2.3
XlsxWriter==3.0.8
yarl==1.8.2
5 changes: 5 additions & 0 deletions application/static/src/chat.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ if (el) {

body: JSON.stringify({question: message,
api_key: localStorage.getItem('apiKey'),
embeddings_key: localStorage.getItem('apiKey'),
history: localStorage.getItem('chatHistory'),
active_docs: localStorage.getItem('activeDocs')}),
})
.then(response => response.json())
Expand All @@ -38,9 +40,12 @@ if (el) {
chatWindow.scrollTop = chatWindow.scrollHeight;
document.getElementById("button-submit").innerHTML = 'Send';
document.getElementById("button-submit").disabled = false;
let chatHistory = [message, data.answer];
localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
})
.catch((error) => {
console.error('Error:', error);
console.log(error);
document.getElementById("button-submit").innerHTML = 'Send';
document.getElementById("button-submit").disabled = false;
});
Expand Down
12 changes: 8 additions & 4 deletions application/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,17 @@ <h2>Before you can start using DocsGPT we need you to provide an API key for llm
var option = document.createElement("option");
if (docsIndex[key].name == docsIndex[key].language) {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/";
select.add(option);
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
if (docsIndex[key].model == "{{ embeddings_choice }}") {
select.add(option);
}
}
else {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/";
select.add(option);
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
if (docsIndex[key].model == "{{ embeddings_choice }}") {
select.add(option);
}
}
}

Expand Down
14 changes: 7 additions & 7 deletions frontend/.eslintrc.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ module.exports = {
plugins: ['react'],
rules: {
'react/react-in-jsx-scope': 'off',
'prettier/prettier': [
'error',
{
endOfLine: 'auto',
},
],
},
settings: {
'import/parsers': {
Expand All @@ -34,10 +40,4 @@ module.exports = {
},
},
},
'prettier/prettier': [
'error',
{
endOfLine: 'auto',
},
],
}
};
Loading

0 comments on commit 962be4d

Please sign in to comment.