In [1]:
!pip install -qqq accelerate transformers sentence_transformers huggingface_hub langchain==0.0.166 pygpt4all==1.1.0 chromadb==0.3.22 llama-cpp-python==0.1.48 urllib3==1.26.6 pdfminer.six==20221105 > /dev/null

In [2]:
!wget https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin

--2023-05-15 11:30:21--  https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin
Resolving gpt4all.io (gpt4all.io)... 104.26.1.159, 104.26.0.159, 172.67.71.169, ...
Connecting to gpt4all.io (gpt4all.io)|104.26.1.159|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3785248281 (3.5G)
Saving to: ‘ggml-gpt4all-j-v1.3-groovy.bin.1’


2023-05-15 11:31:39 (46.2 MB/s) - ‘ggml-gpt4all-j-v1.3-groovy.bin.1’ saved [3785248281/3785248281]



In [1]:
from langchain.chains import RetrievalQA
from langchain.embeddings import LlamaCppEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from langchain.llms import GPT4All, LlamaCpp
import os
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings

In [2]:
 callbacks = [StreamingStdOutCallbackHandler()]

In [3]:
 llm = GPT4All(model="/content/ggml-gpt4all-j-v1.3-groovy.bin", 
               n_ctx=1000, 
               backend='gptj', 
               callbacks=callbacks, 
               verbose=False)

In [None]:
llm("Who landed on the moon on July 21 1969")

?
The United States of America.

'?\nThe United States of America.'

In [11]:
from langchain import PromptTemplate,  LLMChain

template = """Use text below and answer the question: {question}
{text}
Answer:"""
prompt = PromptTemplate(template=template, 
                        input_variables=["question","text"])

In [12]:
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [10]:
question = "What is electroencephalography?"
text = """Electroencephalography (EEG) is a method to record an electrogram of the spontaneous electrical activity of the brain. The biosignals detected by EEG have been shown to represent the postsynaptic potentials of pyramidal neurons in the neocortex and allocortex.[1] It is typically non-invasive, with the EEG electrodes placed along the scalp (commonly called "scalp EEG") using the International 10-20 system, or variations of it. Electrocorticography, involving surgical placement of 
electrodes, is sometimes called "intracranial EEG". Clinical interpretation of
 EEG recordings is most often performed by visual inspection of the
  tracing or quantitative EEG analysis."""

In [None]:
print(llm_chain.run(question=question,text=text))

In [6]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [7]:
from langchain.schema import Document

docs = [
    Document(page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose", metadata={"year": 1993, "rating": 7.7, "genre": "science fiction","source":"local"}),
    Document(page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2,"source":"local"}),
    Document(page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea", metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6,"source":"local"}),
    Document(page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them", metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3,"source":"local"}),
    Document(page_content="Toys come alive and have a blast doing so", metadata={"year": 1995, "genre": "animated","source":"local"}),
    Document(page_content="Three men walk into the Zone, three men walk out of the Zone", metadata={"year": 1979, "rating": 9.9, "director": "Andrei Tarkovsky", "genre": "science fiction", "rating": 9.9,"source":"local"})
]

In [8]:
vectorstore = Chroma.from_documents(
    docs, embeddings
)

In [12]:
vectorstore.similarity_search("Movie on dinosaur")

[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction', 'source': 'local'}),
 Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated', 'source': 'local'}),
 Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6, 'source': 'local'}),
 Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.2, 'source': 'local'})]

In [13]:
#from langchain.chains import RetrievalQAWithSourcesChain
#chain = RetrievalQAWithSourcesChain.from_chain_type(llm, 
 #                                                   chain_type="stuff", 
  #                                                  retriever=vectorstore.as_retriever())
#chain({"question": "What movie did dinosaur escape"}, 
 #     return_only_outputs=True)

In [13]:
def get_text(question):
  similar_data = vectorstore.similarity_search(question)
  text = ''
  for elem in similar_data:
    text = text + elem.page_content
  print(text)
  return text

In [14]:
def retrieve_answer(question):
  support_text = get_text(question)
  answer = llm_chain.run(question=question, 
                         text= support_text)
  return answer

In [None]:
retrieve_answer("Who brought the dinosaurs back")

A bunch of scientists bring back dinosaurs and mayhem breaks looseToys come alive and have a blast doing soThree men walk into the Zone, three men walk out of the ZoneA bunch of normal-sized women are supremely wholesome and some men pine after them
 Who brought

In [None]:
retrieve_answer("Dream within Dream")

In [None]:
from langchain.agents import load_huggingface_tool

tool = load_huggingface_tool("lysandre/hf-model-downloads")

print(f"{tool.name}: {tool.description}")

In [None]:
tool.run("text-generation")

'gpt2'

In [None]:
tool_tg = load_huggingface_tool("huggingface-tools/text-download")

In [None]:
tool_tg("https://python.langchain.com/en/latest/modules/models/llms/integrations/huggingface_pipelines.html")

In [None]:
tool_classify = load_huggingface_tool("Sj8287/Sentiment_Classification")