In [1]:
import requests
from bs4 import BeautifulSoup
from pprint import pprint

In [1]:
from src.data_processor import parse_html, process_text
from data.source import URL
import re


chunks = []
for url in URL:
    text = parse_html(url)
    text = re.sub("\{[^)]*\}", "", text)
    chunks.append(text)


https://www.va.gov/disability/after-you-file-claim/
https://www.va.gov/change-address/
https://www.va.gov/change-direct-deposit/
https://www.va.gov/claim-or-appeal-status/
https://www.va.gov/decision-reviews/
https://www.va.gov/disability/eligibility/hazardous-materials-exposure/
https://www.va.gov/disability/eligibility/illnesses-within-one-year-of-discharge/
https://www.va.gov/disability/eligibility/ptsd/
https://www.va.gov/disability/eligibility/special-claims/
https://www.va.gov/disability/how-to-file-claim/additional-forms/
TABLE!!
https://www.va.gov/disability/how-to-file-claim/evidence-needed/fully-developed-claims/
https://www.va.gov/disability/upload-supporting-evidence/
https://www.va.gov/va-payment-history/
https://www.va.gov/disability/view-disability-rating/
https://www.va.gov/disability/how-to-file-claim/additional-forms/
TABLE!!
https://www.va.gov/disability/dependency-indemnity-compensation/
https://www.va.gov/disability/how-to-file-claim/when-to-file/
https://www.va.go

In [82]:
url = 'https://www.va.gov/disability/file-disability-claim-form-21-526ez/introduction'
data = requests.get(url)

my_data = []

html = BeautifulSoup(data.text, 'html.parser')
articles = html.select('p')


In [2]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings

In [3]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                   model_kwargs={'device': 'cpu'})

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain.text_splitter import MarkdownHeaderTextSplitter

headers_to_split_on = [
    ("<topic>", "topic"), ("<descriptor>", "context")
]
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
md_header_splits = markdown_splitter.split_text("\n\n".join(chunks))

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=100
)
docs = text_splitter.split_documents(md_header_splits)

In [6]:
vectorstore = FAISS.from_documents(docs, embeddings)
# vectorstore.save_local('vectorstore/db_faiss')

In [50]:
results = vectorstore.similarity_search("Do I need to turn in any additional forms?", k=5)

for res in results:
    print(res.page_content)

<content> Do I need to turn in any additional forms? In some cases, you may need to turn in one or more additional forms to support your disability claim. If you’re: You’ll need to turn in: If you’re: A National Guard and Reserve member You’ll need to turn in: All your service treatment and personnel records from your unit(s) If you’re: Claiming Individual Unemployability You’ll need to turn in: Both of these: A Veteran’s Application for Increased Compensation Based on Unemployability (VA Form
<content> Do I need to turn in any additional forms? In some cases, you may need to turn in one or more additional forms to support your disability claim. If you’re: You’ll need to turn in: If you’re: A National Guard and Reserve member You’ll need to turn in: All your service treatment and personnel records from your unit(s) If you’re: Claiming Individual Unemployability You’ll need to turn in: Both of these: A Veteran’s Application for Increased Compensation Based on Unemployability (VA Form
<c

In [7]:
# notes:
# page title: html.title.text
# topic content: "va-introtext"
# tags for questions: "vads-u-display--flex"
#          answers to the question: "processed-content"
#          additional questions: "va-nav-linkslist-heading" and "va-nav-linkslist-list"

In [7]:
from langchain.llms import CTransformers

# Local CTransformers wrapper for Llama-2-7B-Chat
llm = CTransformers(model='model/llama-2-7b-chat.ggmlv3.q4_0.bin',  # Location of downloaded GGML model
                    model_type='llama',  # Model type Llama
                    config={'max_new_tokens': 256,
                            'temperature': 0.1})


In [8]:
from langchain.chains import RetrievalQA, StuffDocumentsChain, LLMChain, ConversationalRetrievalChain
from langchain import PromptTemplate


prompt = """
You are a helpful and knowledgeable agent familiar with US veteran affairs (VA).
You will be asked questions about VA insurance policies.
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Make sure to use complete sentence to answer the question.
Helpful answer:
"""

def set_qa_prompt(prompt):
    prompt = PromptTemplate(template=prompt,
                            input_variables=['context', 'question'])
    return prompt


qa_prompt = set_qa_prompt(prompt)

dbqa = RetrievalQA.from_chain_type(llm=llm,
                                   chain_type='stuff',
  	                               retriever=vectorstore.as_retriever(search_kwargs={'k':3}),
                                   return_source_documents=True,
                                   chain_type_kwargs={'prompt': qa_prompt})


document_prompt = PromptTemplate(
    input_variables=["page_content"],
    template="{page_content}"
)
document_variable_name = "title"
# The prompt here should take as an input variable the
# `document_variable_name`
prompt_ = PromptTemplate.from_template(
    "Summarize this content: {title}"
)
llm_chain = LLMChain(llm=llm, prompt=prompt_)
combine_docs_chain = StuffDocumentsChain(
                     llm_chain=llm_chain,
                     document_prompt=document_prompt,
                     document_variable_name=document_variable_name
                     )


template = (
    "Combine the chat history and follow up question into "
    "a standalone question. Chat History: {chat_history}"
    "Follow up question: {question}"
    "Make sure to use complete sentence to answer the question."
)

prompt_combine = PromptTemplate.from_template(template)
question_generator_chain = LLMChain(llm=llm, prompt=prompt_combine)
chain = ConversationalRetrievalChain(
        combine_docs_chain=combine_docs_chain,
        return_source_documents=True,
        retriever=vectorstore.as_retriever(search_kwargs={'k':3}),
        question_generator=question_generator_chain,
        )


        
        # qa_prompt=SUPPORT_PROMPT,
        

In [51]:
import json
chat_history = []
def generate_response(support_qa, prompt):
    response = support_qa({"question": prompt, "chat_history": chat_history})
    chat_history.append((prompt, response["answer"]))
    print( json.dumps(chat_history))
    return response['answer']

In [43]:
ans = chain({'question': "what additional information should I provide?", 'chat_history': []})

{'question': 'what additional information should I provide?',
 'chat_history': [],
 'answer': ' a condition that’s related to an original service-connected condition. You can submit evidence of: A current physical or mental disability from a medical professional or layperson (someone who’s not a trained professional), and An event, injury, or disease that happened during your active-duty service, and A link between your current disability and the original service-connected condition. You can ask for supporting statements from people who know about, or who you’ve talked to about, your claimed medical condition and how and when it occurred. You can also submit evidence of any secondary conditions that have developed as a result of the original service-connected condition. Secondary service-connected claim—a claim for a condition that’s related to an original service-connected condition. You can submit evidence of: A current physical or mental disability from ail mental disability from a 

In [53]:
generate_response(chain, "what else should I know?")

[["what information should I provide?", " a condition that\u2019s related to an original service-connected condition. You can submit evidence of: A current physical or mental disability from a medical professional or layperson (someone who\u2019s not a trained professional), and An event, injury, or disease that happened during your active-duty service, and A link between your current disability and the original service-connected condition. Usually, we need medical records or medical opinions from health care providers to support this link.</content>\n\n"], ["what else should I know?", ". If you experienced one of these events while serving in the military, we may consider it a service-connected condition.\n\n<title> VA Decision Reviews And Appeals | Veterans Affairs</title> "]]


'. If you experienced one of these events while serving in the military, we may consider it a service-connected condition.\n\n<title> VA Decision Reviews And Appeals | Veterans Affairs</title> '

In [10]:
dbqa({'query': "what is the review process?"})

{'query': 'what is the review process?',
 'result': 'A Higher-Level Review typically takes an average of 125 days (4 to 5 months) to complete, with the goal of resolving any identified errors in the decision-making process. If you request an informal conference as part of your Higher-Level Review, it may take longer to complete. Alternatively, you can submit a written statement with your application to identify errors and help us make a decision faster.',
 'source_documents': [Document(page_content='information you want to talk about with the reviewer ready. Prepare to explain any errors in your case.', metadata={'topic': 'How do I ask for an informal conference?'}),
  Document(page_content='You can also request a Higher-Level Review by filling\xa0out a Decision Review Request: Higher-Level Review (VA Form 20-0996). Get VA Form 20-0996 to download Learn more about how to request a Higher-Level Review Note: You can’t submit any evidence. You and/or your representative can speak with the

In [9]:
dbqa({'query': "how do I file additional information?"})

{'query': 'how do I file additional information?',
 'result': 'To file additional information with your VA claim, you can use the Decision Review Request: Supplemental Claim (VA Form 20-0995). This form allows you to add new and relevant evidence that supports your case or identifies evidence for review by a reviewer. You can download the form from the VA website or pick one up at any VA regional office. Once you have completed the form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, you can, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, you can, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form, form',
 'source_documents': [Document(page_content='<content