In [4]:
!pip install weaviate-client sentence-transformers langchain huggingface_hub>=0.14.1 git+https://github.com/huggingface/transformers@v4.29.0 -q diffusers accelerate datasets torch soundfile sentencepiece opencv-python openai > /dev/null
     

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [6]:
from langchain.embeddings import (
    HuggingFaceEmbeddings, 
    SentenceTransformerEmbeddings
)

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.vectorstores import Weaviate

In [7]:
def get_text_splits(text_file):
  """Function takes in the text data and returns the  
  splits so for further processing can be done."""
  with open(text_file,'r') as txt:
    data = txt.read()

  textSplit = RecursiveCharacterTextSplitter(chunk_size=150,
                                             chunk_overlap=15,
                                             length_function=len)
  doc_list = textSplit.split_text(data)
  return doc_list

In [None]:
#testing out the above function with the open source 
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [10]:
import weaviate
from weaviate.embedded import EmbeddedOptions

client = weaviate.Client(
  embedded_options=EmbeddedOptions(),
  additional_headers={
        "X-HuggingFace-Api-Key": ""
    }
)

embedded weaviate is already listening on port 6666


In [11]:
client.schema.delete_all()

client.schema.create_class(
    {
        "class": "Sphere",
        "description" : "Sphere vectorizer pipeline",
        "moduleConfig": {
        "text2vec-huggingface": {
          "model": "sentence-transformers/all-MiniLM-L6-v2",
          "options": {
            "waitForModel": True,
            "useGPU": False,
            "useCache": True
            }
          }
        },
        "properties": [
            {
                "name": "raw",
                "dataType": ["text"]
            }
        ],
     "vectorizer":"text2vec-huggingface"
    }
)

Embedded weaviate wasn't listening on port 6666, so starting embedded weaviate again
Started /root/.cache/weaviate-embedded: process ID 5361


In [14]:
mail_docs = get_text_splits("/content/mail_collector.txt")

In [None]:
mail_docs[0]

'Space via IFTTT <action@ifttt.com>\nAstronomy Picture of the Day:'

In [None]:
import time
# Configure a batch process
with client.batch as batch:
    batch.batch_size=5
    batch = 0
    for iterator in mail_docs:
        print(f"processing batch {batch}")
        properties = {
            "raw": iterator,
        }
        time.sleep(8)
        client.batch.add_data_object(properties,"Sphere")

In [73]:
def get_text_docs(question:str):
  nearText = {"concepts": [question]}
  result = (
      client.query
      .get("Sphere", ["raw"])
      .with_near_text(nearText)
      .with_limit(5)
      .do()
  )
  temp_result = result['data']['Get']['Sphere']
  print(temp_result)
  text = ''
  for txt in temp_result:
    text = text + txt['raw']
  return text

In [18]:
update_result = get_text_docs("photographs taken")

In [56]:
new_template = """I will ask you to perform a task, your job is to come up with a series of simple commands in Python that will perform the task.
To help you, I will give you access to a set of tools that you can use. Each tool is a Python function and has a description explaining the task it performs, 
the inputs it expects and the outputs it returns.
You should first explain which tool you will use to perform the task and for what reason, then write the code in Python.
Each instruction in Python should be a simple assignment. You can print intermediate results if it makes sense to do so.

Tools:
- text_qa: This is a tool that answers a question from a given `text`. 
It takes an input named `text` which should be the text containing the information, 
as well as a `question` that is the question about the text. 
It returns a text that contains the answer to the question.
- document_qa: This is a tool that answers a question about a document (pdf). It takes an input named `document` which should be the document containing the information, as well as a `question` that is the question about the document. It returns a text that contains the answer to the question.
- image_captioner: This is a tool that generates a description of an image. It takes an input named `image` which should be the image to the caption and returns a text that contains the description in English.


Task: "Answer the question in the variable `question` about the text in the variable `text`. Use the answer to generate an image."

I will use the following tools: `text_qa` to create the answer, then `image_generator` to generate an image according to the answer.

Answer:
```py
answer = text_qa(text=text, question=question)
print(f"The answer is {answer}.")```

Task: "Identify the oldest person in the `document` and create an image showcasing the result as a banner."

I will use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.

Answer:
```py
answer = document_qa(document, question="What is the oldest person?")
print(f"The answer is {answer}.")
image = image_generator("A banner showing " + answer)
```
"""

In [63]:
from transformers.tools import HfAgent
plain_agent = HfAgent("https://api-inference.huggingface.co/models/bigcode/starcoder")

In [64]:
plain_agent.run(task="""Task is to answer following question.""", 
                    question= """How many photographs are taken?""",
                    text="""2023-04-26 10:44:41+05:30Was this a lucky shot? Although many amazing photographs are taken by someone who just happenedÂ to be in the right place atthat it took many hours of exposure with a telescope in Seven Persons, Alberta , Canada to create the featured image.April 24, 2023via NASAright place at the right time, this image took skill and careful planning. First was the angularÂ scale: if you shoot too close to the famous Arc deplanning was successful, bringing two of humanity's most famous icons photographically together for all to enjoy.April 26, 2023via NASAof time 
                    -- from this distance less 
                    than a minute. Other planned features include lighting, relative brightness, 
                    height, capturing a good foreground""",
                   return_code=True)

==Explanation from the agent==
I will use the following  tools: `text_qa` to create the answer.


==Code generated by the agent==
answer = text_qa(text=text, question=question)
print(f"The answer is {answer}.")


'from transformers import load_tool\n\ntext_qa = load_tool("text-question-answering")\n\nanswer = text_qa(text=text, question=question)\nprint(f"The answer is {answer}.")'

In [None]:
plain_agent.run(task="""Task is to answer following question.""", 
                    question= """How many photographs are taken?""",
                    text="""2023-04-26 10:44:41+05:30Was this a lucky shot? Although many amazing photographs are taken by someone who just happenedÂ to be in the right place atthat it took many hours of exposure with a telescope in Seven Persons, Alberta , Canada to create the featured image.April 24, 2023via NASAright place at the right time, this image took skill and careful planning. First was the angularÂ scale: if you shoot too close to the famous Arc deplanning was successful, bringing two of humanity's most famous icons photographically together for all to enjoy.April 26, 2023via NASAof time 
                    -- from this distance less 
                    than a minute. Other planned features include lighting, relative brightness, 
                    height, capturing a good foreground""")

In [69]:
def agent_qa_fun(user_question):
  text_docs = get_text_docs(user_question)
  #print(text_docs)
  answer = plain_agent.run(task="Task is to answer following question", 
                question=user_question,
                  text=text_docs)
  print(answer)
  return answer

In [70]:
agent_qa_fun("How many pictures are taken?")

==Explanation from the agent==
I will use the following  tools: `text_qa` to create the answer.


==Code generated by the agent==
answer = text_qa(text=text, question=question)
print(f"The answer is {answer}.")


==Result==




The answer is many amazing photographs are taken by someone who just happened to be in the right place atof.
many amazing photographs are taken by someone who just happened to be in the right place atof


'many amazing photographs are taken by someone who just happened to be in the right place atof'

In [None]:
agent_qa_fun("How the sky must be?")