# AI 图像识别

利用LangChina框架实现基于OpenAI的图像识别应用

## 1. 必备名安装

In [None]:
!pip install langchain transformers python-dotenv
!pip install langchain-openai 

## 2. 引入函数

In [None]:
from langchain.agents import initialize_agent
from langchain_openai import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
import os

In [None]:
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())

In [None]:
llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')


## 3. 引入模型

这里使用的是BLIP的多模态模型

In [None]:
#!pip install torch
!pip install pillow

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration
image_to_text_model = "Salesforce/blip-image-captioning-large"

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")


## 4. 分析模型

In [None]:
import requests
from PIL import Image

def describeImage(image_url):
    image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
    
    inputs = processor(image, return_tensors='pt')
    outputs = model.generate(**inputs)
    
    return processor.decode(outputs[0], skip_special_tokens=True)

In [None]:
description = describeImage("https://images.unsplash.com/photo-1673207520321-c27d09eb0955?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=1035&q=80")

In [None]:
description

## 5. 定义Tool

In [None]:
from langchain.tools import BaseTool

class DescribeImageTool(BaseTool):
    name = "Describe Image Tool"
    description = "use this tool to describe an image"
    
    def _run(
        self,
        url: str
    ):
        return describeImage(url)
    
    def _arun(
        self,
        query: str):
        raise NotImplementedError("Async operation not supported yet")
    

tools = [DescribeImageTool()]
    

## 6. 创建LangChain Agent

In [None]:
agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_methon='generate',
    memory=ConversationBufferWindowMemory(
        memory_key='chat_history',
        k=5,
        return_messages=True
    )
)

## 7. 使用Agent

In [ ]:
image_url = 'https://images.unsplash.com/photo-1705171600800-e12460074415?q=80&w=4151&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D'
agent(f"Describe the following image:\n{image_url}")

In [ ]:
image_url = 'https://images.unsplash.com/photo-1705310683793-69a58304e5ce?q=80&w=3968&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D'
agent(f"Describe the following image:\n{image_url}")

In [ ]:
agent.memory.buffer