In [1]:
from langchain import PromptTemplate, LLMChain
# pip install gpt4all
from langchain.llms import GPT4All
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

import pandas as pt

In [29]:
PATH = 'C:/Users/constantinechung/AppData/Local/nomic.ai/GPT4All/'
# Model = 'ggml-gpt4all-j-v1.3-groovy.bin'
Model = 'ggml-stable-vicuna-13B.q4_2.bin'
# Model = 'ggml-vicuna-13b-1.1-q4_2.bin'
# Model = 'GPT4All-13B-snoozy.ggmlv3.q4_0.bin'
# Model = 'nous-hermes-13b.ggmlv3.q4_0.bin'

model_path = PATH + Model
# model_path

In [30]:
llm = GPT4All(model=model_path, temp=0, verbose=False)

Found model file at  C:/Users/constantinechung/AppData/Local/nomic.ai/GPT4All/ggml-stable-vicuna-13B.q4_2.bin


In [32]:
template = """Question: {question}

Answer: give out the answer step by step in a consice point form."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [33]:
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [34]:
question = "could you tell me how to make beef wellington?"

In [None]:
llm_chain.run(question)

In [36]:
prompt = """
Please provide a summary of the following text

TEXT:
Philosophy (from Greek: φιλοσοφία, philosophia, 'love of wisdom') \
is the systematized study of general and fundamental questions, \
such as those about existence, reason, knowledge, values, mind, and language. \
Some sources claim the term was coined by Pythagoras (c. 570 – c. 495 BCE), \
although this theory is disputed by some. Philosophical methods include questioning, \
critical discussion, rational argument, and systematic presentation.
"""

In [39]:
num_tokens = llm.get_num_tokens(prompt)
print (f"Our prompt has {num_tokens} tokens")

Our prompt has 121 tokens


In [43]:
output = llm(prompt)
# print(output)

Philosophy can also refer to a particular approach or field of study in which ideas are examined through the process described above. The term "philosopher" (in Greek: φιλόσοφος, philosophos) refers both to one who studies philosophy and seeks wisdom as well as someone with an encompassing intellectual curiosity about existence.[1]
Philosophy is a systematic study of fundamental questions that explore the nature of reality, knowledge, values, mind, language, reason, and human experience. It involves critical thinking, rational argumentation, questioning, and presentations based on these methods to address complex issues in different fields or approaches within philosophy itself. The term "philosopher" refers both to someone who seeks wisdom through the study of philosophy as well as an intellectual curiosity about existence.
Philosophy can also refer to a particular approach or field of study in which ideas are examined through the process described above. The term "philosopher" (in Gr

In [47]:
# loader = OnlinePDFLoader('https://arxiv.org/pdf/1912.12180.pdf')
loader = PyPDFLoader('C:/Users/constantinechung/Downloads/ProofsArgsAndZK.pdf')

In [48]:
data = loader.load()

In [56]:
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')

You have 329 document(s) in your data
There are 375 characters in your document


In [66]:
text = ""

for page in data:
    text += page.page_content
    
text = text.replace('\t', ' ')

In [67]:
num_tokens = llm.get_num_tokens(text)

print (f"This book has {num_tokens} tokens in it")

Token indices sequence length is longer than the specified maximum sequence length for this model (318079 > 1024). Running this sequence through the model will result in indexing errors


This book has 318079 tokens in it


In [68]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
texts = text_splitter.split_documents(text)

AttributeError: 'str' object has no attribute 'page_content'

In [80]:
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "\t"], chunk_size=500, chunk_overlap=50)
docs = text_splitter.create_documents([text])

In [81]:
print (f'Now you have {len(docs)} documents')

Now you have 2481 documents


In [82]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [83]:
docsearch = Chroma.from_texts([t.page_content for t in docs], embeddings)

In [84]:
query = "can you explain zero knowledge proof to me"
docs = docsearch.similarity_search(query, k=10)

In [85]:
print(len(docs))

docs[3].page_content

10


'101The interested reader is directed to [Gol07, Section 4.7] for a detailed discussion of how to formalize knowledge-soundness.\n102The reader may initially suspect that any proof of knowledge cannot be zero-knowledge: if it is possible to “pull a witness\nwout of the prover’s head”, doesn’t this mean that the proof system reveals the witness to the verifier, grossly violating zero-'

In [86]:
from langchain.chains.question_answering import load_qa_chain

In [87]:
chain = load_qa_chain(llm, chain_type="stuff")

In [88]:
chain.run(input_documents=docs, question=query)

 Sure! A zero-knowledge proof is a mathematical proof that allows one party (the prover) to demonstrate to another party (the verifier) that a given statement is true, without revealing any information beyond the validity of the statement itself. This means that the verifier should learn nothing from the prover other than the validity of the statement being proven. Zero-knowledge proofs are used in cryptography to provide security and privacy, and have applications in areas such as authentication, digital signatures, and anonymous cryptocurrencies.



' Sure! A zero-knowledge proof is a mathematical proof that allows one party (the prover) to demonstrate to another party (the verifier) that a given statement is true, without revealing any information beyond the validity of the statement itself. This means that the verifier should learn nothing from the prover other than the validity of the statement being proven. Zero-knowledge proofs are used in cryptography to provide security and privacy, and have applications in areas such as authentication, digital signatures, and anonymous cryptocurrencies.\n'

In [44]:
from langchain.chains import RetrievalQA

In [45]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

In [None]:
result = qa.run(query)

In [None]:
import re

re.split('\n|\n\n', result)

In [89]:
from langchain.chains.summarize import load_summarize_chain

summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce',
                                     verbose=False # Set verbose=True if you want to see the prompts being used
                                    )

In [90]:
output = summary_chain.run(docs)




Write a concise summary of the following:


"that the proof reveals nothing beyond its own validity. This is exactly what the zero-knowledge property
guarantees.
A particular goal of this survey is to describe a variety of approaches to constructing so-called zero-
knowledge Succinct Non-interactive Arguments of Knowledge, or zk-SNARKs for short. “Succinct” means
that the proofs are short. “Non-interactive” means that the proof is static, consisting of a single message"













Write a concise summary of the following:


"10.5 Ligero and Brakedown Polynomial Commitments . . . . . . . . . . . . . . . . . . . . . . . . 162
10.6 Unifying IPs, MIPs, and IOPs via Polynomial IOPs . . . . . . . . . . . . . . . . . . . . . . . 167
11 Zero-Knowledge Proofs and Arguments 170
11.1 What is Zero-Knowledge? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 170
11.2 The Limits of Statistical Zero Knowledge Proofs . . . . . . . . . . . . . . . . . . . . . . . 174"









In [19]:
map_prompt = """
Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:
"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])

In [20]:
combine_prompt = """
Write a concise summary of the following text delimited by triple backquotes.
Return your response in bullet points which covers the key points of the text.
```{text}```
BULLET POINT SUMMARY:
"""
combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["text"])

In [21]:
summary_chain = load_summarize_chain(llm=llm,
                                     chain_type='stuff',
                                     map_prompt=map_prompt_template,
                                     combine_prompt=combine_prompt_template,
                                     # verbose=True
                                    )

In [None]:
output = summary_chain.run(docs)