In [170]:
# Import os to set API key
import os
# Import OpenAI as main LLM service
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
# Bring in streamlit for UI/app interface
# Import PDF document loaders...there's other ones as well!
from langchain.document_loaders import PyPDFLoader
# Import chroma as the vector store 
from langchain.vectorstores import Chroma
from dotenv import dotenv_values
env_variables = dotenv_values('env.env')
os.environ['OPENAI_API_KEY'] = env_variables['Open_API']
from langchain.document_loaders import YoutubeLoader
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(
    openai_api_key=env_variables['Open_API'],
    temperature=0,
    model_name='gpt-3.5-turbo'
)

In [171]:
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration

hf_model = "Salesforce/blip-image-captioning-large"
device = 'cuda' if torch.cuda.is_available() else 'cpu'

processor = BlipProcessor.from_pretrained(hf_model)
model = BlipForConditionalGeneration.from_pretrained(hf_model).to(device)

In [172]:
from langchain.tools import BaseTool
from math import pi
from typing import Union


class CircumferenceTool(BaseTool):
    name = "Circumference calculator"
    description = "use this tool when you need to calculate a circumference using the radius of a circle"

    def _run(self, radius: Union[int, float]):
        return float(radius)*2.0*pi
    
    def _arun(self, radius: Union[int, float]):
        raise NotImplementedError("This tool does not support async")

In [173]:

import torch
from transformers import BlipProcessor, BlipForConditionalGeneration

hf_model = "Salesforce/blip-image-captioning-large"
device = 'cuda' if torch.cuda.is_available() else 'cpu'

processor = BlipProcessor.from_pretrained(hf_model)
model = BlipForConditionalGeneration.from_pretrained(hf_model).to(device)

In [174]:

import requests
from PIL import Image



In [175]:
from langchain.tools import BaseTool

desc = (
    "use this tool when given the URL of an image that you'd like to be "
    "described. It will return a simple caption describing the image."
)

class ImageCaptionTool(BaseTool):
    name = "Image captioner"
    description = desc

    def _run(self, url: str):
        # download the image and convert to PIL object
        image = Image.open(requests.get(url, stream=True).raw).convert('RGB')
        # preprocess the image
        inputs = processor(image, return_tensors="pt").to(device)
        # generate the caption
        out = model.generate(**inputs, max_new_tokens=20)
        # get the caption
        caption = processor.decode(out[0], skip_special_tokens=True)
        caption = " I know the answer, inside the image " + caption
        return caption
    
    def _arun(self, query: str):
        raise NotImplementedError("This tool does not support async")
from langchain.agents import load_tools


In [176]:

embeddings = OpenAIEmbeddings()
vid_url = 'https://www.youtube.com/watch?v=i3OYlaoj-BM'
vectorstore = Chroma("YouTube_store", embeddings, persist_directory="./YouTube_db")

from langchain.text_splitter import RecursiveCharacterTextSplitter
def add_video_to_DB(url):
    loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
    result = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=20)
    texts = text_splitter.split_documents(result)
    text =[texts[0]]
    vectorstore.add_documents(text)
    vectorstore.persist()


In [177]:
from langchain.tools import BaseTool
from langchain.chains import RetrievalQA

desc = (
    "use this tool when given the URL of a youtube video and that you need you need to watch video to answer questions about video"
)
url_list = []
class YouTubeRetrival(BaseTool):
    name = "watch video"
    description = desc

    def _run(self, url: str):
        if url not in url_list:
            url_list.append(url)
            add_video_to_DB(url)
            return "video saved into vectort database. Now use Retrieval Question Answering tool to answer question. Input to Retrieval Question Answering tool is the question you received about the video "
        else:
            return "video already existed in the database. Now use Retrieval Question Answering tool to answer question. Input to Retrieval Question Answering tool is the question you received about the video "        
              
        
    def _arun(self, query: str):
        raise NotImplementedError("This tool does not support async")



In [178]:
from langchain.tools import BaseTool, StructuredTool, Tool, tool

image_cap = ImageCaptionTool()
calcu = CircumferenceTool()
QA = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())
watch_video  =YouTubeRetrival()

tools = [
    Tool.from_function(
        func=image_cap.run,
        name="caption",
        description="use this tool only when given the URL of an image that you'd like to be described. It will return a simple string describing the image. once you recived the string don't use any tool and use it as final answer"
        
    ),
    Tool.from_function(
        func=calcu.run,
        name="calculator",
        description="use this tool only when you need to calculate a circumference using the radius of a circle. it receives a floating point number "
        
    ),
    Tool.from_function(
        func=watch_video.run,
        name="Watch Youtube video tool",
        description= "use this tool Only when given the URL of a youtube video and that you need you need to watch video to answer questions about video"
        
    ),
    Tool.from_function(
        func=QA.run,
        name="Retrieval Question Answering tool",
        description= "Use this tool only after Watch Youtube video tool. It is designed to answer questions about the video using the URL provided."
        
    )
]

In [179]:
from langchain.agents import AgentOutputParser
from langchain.agents.conversational_chat.prompt import FORMAT_INSTRUCTIONS
from langchain.output_parsers.json import parse_json_markdown
from langchain.schema import AgentAction, AgentFinish

class OutputParser(AgentOutputParser):
    def get_format_instructions(self) -> str:
        return FORMAT_INSTRUCTIONS

    def parse(self, text: str) -> AgentAction | AgentFinish|str:
        
        try:
            # this will work IF the text is a valid JSON with action and action_input
            response = parse_json_markdown(text)
            print(response)
            action, action_input, Thought= response["action"], response["action_input"],response["Thought"]
            if action == "Final Answer":
                # this means the agent is finished so we call AgentFinish
                return AgentFinish({"output": action_input}, text)
            else:
                # otherwise the agent wants to use an action, so we call AgentAction
                return AgentAction(action, action_input, text)
        except Exception:
            # sometimes the agent will return a string that is not a valid JSON
            # often this happens when the agent is finished
            # so we just return the text as the output
            return "AI:"

    @property
    def _type(self) -> str:
        return "conversational_chat"
parser = OutputParser()

In [180]:
from langchain.agents import initialize_agent
from langchain.chains.conversation.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history")
from langchain.agents import AgentType
llm=OpenAI(temperature=0)
agent_chain = initialize_agent(tools, llm, agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory=memory)


In [181]:
agent_chain("hi, i am bob")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? No
AI: Hi Bob, nice to meet you! How can I help you?[0m

[1m> Finished chain.[0m


{'input': 'hi, i am bob',
 'chat_history': '',
 'output': 'Hi Bob, nice to meet you! How can I help you?'}

In [163]:
agent_chain("what was my name?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? No
AI: Your name is Bob. Is there anything else I can help you with?[0m

[1m> Finished chain.[0m


{'input': 'what was my name?',
 'chat_history': 'Human: hi, i am bob\nAI: Hi Bob, nice to meet you! How can I help you?',
 'output': 'Your name is Bob. Is there anything else I can help you with?'}

In [164]:
img_url = "https://images.unsplash.com/photo-1568605117036-5fe5e7bab0b7?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1740&q=80"
agent_chain(f"What does the image in the following link show?\n{img_url}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? Yes
Action: caption
Action Input: https://images.unsplash.com/photo-1568605117036-5fe5e7bab0b7?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1740&q=80[0m
Observation: [36;1m[1;3m I know the answer, inside the image arafed white sports car driving down a winding road at sunset[0m
Thought:[32;1m[1;3m Do I need to use a tool? No
AI: The image shows a white sports car driving down a winding road at sunset.[0m

[1m> Finished chain.[0m


{'input': 'What does the image in the following link show?\nhttps://images.unsplash.com/photo-1568605117036-5fe5e7bab0b7?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1740&q=80',
 'chat_history': 'Human: hi, i am bob\nAI: Hi Bob, nice to meet you! How can I help you?\nHuman: what was my name?\nAI: Your name is Bob. Is there anything else I can help you with?',
 'output': 'The image shows a white sports car driving down a winding road at sunset.'}

#### Calculate the circumference of a circle

In [165]:
question = "can you calculate the circumference of a circle that has a radius of 7.81"
agent_chain(question)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? Yes
Action: calculator
Action Input: 7.81[0m
Observation: [33;1m[1;3m49.071677249072565[0m
Thought:[32;1m[1;3m Do I need to use a tool? No
AI: The circumference of a circle with a radius of 7.81 is 49.071677249072565.[0m

[1m> Finished chain.[0m


{'input': 'can you calculate the circumference of a circle that has a radius of 7.81',
 'chat_history': 'Human: hi, i am bob\nAI: Hi Bob, nice to meet you! How can I help you?\nHuman: what was my name?\nAI: Your name is Bob. Is there anything else I can help you with?\nHuman: What does the image in the following link show?\nhttps://images.unsplash.com/photo-1568605117036-5fe5e7bab0b7?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1740&q=80\nAI: The image shows a white sports car driving down a winding road at sunset.',
 'output': 'The circumference of a circle with a radius of 7.81 is 49.071677249072565.'}

In [30]:
from math import pi
(7.81 * 2) * pi

49.071677249072565

#### Describe an image

In [166]:
img_url = "https://images.unsplash.com/photo-1502680390469-be75c86b636f?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=2370&q=80"
agent_chain(img_url)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? Yes
Action: caption
Action Input: https://images.unsplash.com/photo-1502680390469-be75c86b636f?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=2370&q=80[0m
Observation: [36;1m[1;3m I know the answer, inside the image surfer riding a wave in the ocean on a clear day[0m
Thought:[32;1m[1;3m Do I need to use a tool? No
AI: The image shows a surfer riding a wave in the ocean on a clear day.[0m

[1m> Finished chain.[0m


{'input': 'https://images.unsplash.com/photo-1502680390469-be75c86b636f?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=2370&q=80',
 'chat_history': 'Human: hi, i am bob\nAI: Hi Bob, nice to meet you! How can I help you?\nHuman: what was my name?\nAI: Your name is Bob. Is there anything else I can help you with?\nHuman: What does the image in the following link show?\nhttps://images.unsplash.com/photo-1568605117036-5fe5e7bab0b7?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1740&q=80\nAI: The image shows a white sports car driving down a winding road at sunset.\nHuman: can you calculate the circumference of a circle that has a radius of 7.81\nAI: The circumference of a circle with a radius of 7.81 is 49.071677249072565.',
 'output': 'The image shows a surfer riding a wave in the ocean on a clear day.'}

#### Question Answering about a YouTube video

In [167]:
vid_url = 'https://www.youtube.com/watch?v=i3OYlaoj-BM'

In [168]:
agent_chain(f" According to following YouTube video, When did the Imaginative Laws Scale Visual Recognition Challenge take place??\n{vid_url}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? Yes
Action: Watch Youtube video tool
Action Input: https://www.youtube.com/watch?v=i3OYlaoj-BM[0m
Observation: [38;5;200m[1;3mvideo saved into vectort database. Now use Retrieval Question Answering tool to answer question. Input to Retrieval Question Answering tool is the question you received about the video [0m
Thought:[32;1m[1;3m Do I need to use a tool? Yes
Action: Retrieval Question Answering tool
Action Input: When did the Imaginative Laws Scale Visual Recognition Challenge take place?[0m
Observation: [36;1m[1;3mThe Imaginative Laws Scale Visual Recognition Challenge took place from around 2010 to 2017.[0m
Thought:[32;1m[1;3m Do I need to use a tool? No
AI: The Imaginative Laws Scale Visual Recognition Challenge took place from around 2010 to 2017.[0m

[1m> Finished chain.[0m


{'input': ' According to following YouTube video, When did the Imaginative Laws Scale Visual Recognition Challenge take place??\nhttps://www.youtube.com/watch?v=i3OYlaoj-BM',
 'chat_history': 'Human: hi, i am bob\nAI: Hi Bob, nice to meet you! How can I help you?\nHuman: what was my name?\nAI: Your name is Bob. Is there anything else I can help you with?\nHuman: What does the image in the following link show?\nhttps://images.unsplash.com/photo-1568605117036-5fe5e7bab0b7?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1740&q=80\nAI: The image shows a white sports car driving down a winding road at sunset.\nHuman: can you calculate the circumference of a circle that has a radius of 7.81\nAI: The circumference of a circle with a radius of 7.81 is 49.071677249072565.\nHuman: https://images.unsplash.com/photo-1502680390469-be75c86b636f?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=2370&q=80\nAI: The im

In [169]:

agent_chain(f" In the following video, What was the purpose of the competition that acted as the go-to place for finding the current state of the art in image classification, object localization, and object detection?\n{vid_url}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? Yes
Action: Watch Youtube video tool
Action Input: https://www.youtube.com/watch?v=i3OYlaoj-BM[0m
Observation: [38;5;200m[1;3mvideo already existed in the database. Now use Retrieval Question Answering tool to answer question. Input to Retrieval Question Answering tool is the question you received about the video [0m
Thought:[32;1m[1;3m Do I need to use a tool? Yes
Action: Retrieval Question Answering tool
Action Input: In the following video, What was the purpose of the competition that acted as the go-to place for finding the current state of the art in image classification, object localization, and object detection?[0m
Observation: [36;1m[1;3mThe purpose of the competition was to serve as a platform for finding the current state of the art in image classification, object localization, and object detection. It acted as a go-to place for researchers and practitioners to showcase th

{'input': ' In the following video, What was the purpose of the competition that acted as the go-to place for finding the current state of the art in image classification, object localization, and object detection?\nhttps://www.youtube.com/watch?v=i3OYlaoj-BM',
 'chat_history': 'Human: hi, i am bob\nAI: Hi Bob, nice to meet you! How can I help you?\nHuman: what was my name?\nAI: Your name is Bob. Is there anything else I can help you with?\nHuman: What does the image in the following link show?\nhttps://images.unsplash.com/photo-1568605117036-5fe5e7bab0b7?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1740&q=80\nAI: The image shows a white sports car driving down a winding road at sunset.\nHuman: can you calculate the circumference of a circle that has a radius of 7.81\nAI: The circumference of a circle with a radius of 7.81 is 49.071677249072565.\nHuman: https://images.unsplash.com/photo-1502680390469-be75c86b636f?ixlib=rb-4.0.3&ixid=Mn