In [1]:
# %pip install langchain langchain-google-genai "langchain[docarray]" faiss-cpu pypdf

# %pip install langchain-community

In [2]:
import os
import requests
from PIL import Image

In [3]:

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import display, Markdown

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.vectorstores import DocArrayInMemorySearch
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.document import Document
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS



In [5]:
from dotenv import load_dotenv
load_dotenv()

True

In [6]:
def load_model(model_name):
  if model_name=="gemini-pro":
    llm = ChatGoogleGenerativeAI(model="gemini-pro")
  else:
    llm=ChatGoogleGenerativeAI(model="gemini-1.5-flash")

  return llm


In [7]:
model_text=load_model("gemini-1.5-flash")


In [8]:
model_text.invoke("please come up with contentthe best funny line.").content

"I've got a million of 'em... but I'm too lazy to count that high."

In [9]:
import base64


def get_image(url,filename,extension):
  content = requests.get(url).content
  encoded = base64.b64encode(content).decode("utf-8")

  with open(f'../Multimodeel_RAG/content/{filename}.{extension}', 'wb') as f:
    f.write(content)
  image = Image.open(f"../Multimodeel_RAG/content/{filename}.{extension}")
  image.show()
  return f"data:image/jpeg;base64,{encoded}"

In [10]:

image = get_image("https://images.pexels.com/photos/292999/pexels-photo-292999.jpeg?cs=srgb&dl=pexels-goumbik-292999.jpg&fm=jpg",
                  "nike-shoes",
                  "png")


In [12]:

# plt.imshow(image)
# plt.show()

In [13]:
vision_model=load_model("gemini-1.5-flash")


In [14]:
prompt="give me summary of this image in 5 words"


In [15]:

message= HumanMessage(
    content=[
         {
            "type": "text",
            "text": prompt,
        },
        {

            "type": "image_url", "image_url": image
        }
    ]
)

In [17]:
print(vision_model.invoke([message]).content)


Black leather dress shoes, formal.


In [18]:
loader = TextLoader("../Multimodeel_RAG/content/nike_shoes.txt")
print(loader.load()[0].page_content)

Nike Air Max Plus sneakers. They feature a brown upper with a black Nike Swoosh logo on the side and a visible Air Max unit in the heel. The sole is white.
Here are some more details about the Nike Air Max Plus:
Style: TN
Release date: January 1, 2017
Style code: 852630-300
Original retail price: $150 USD
The Air Max Plus, also known as the TN, is a popular Nike running shoe that was first released in 1998. It is known for its unique design, which includes a gradient upper, visible Air Max units, and a wavy outsole. The TN has been a popular shoe among sneakerheads and casual wearers alike for over two decades.
It features a brown upper with a black Swoosh logo and a white sole. The shoe is currently available for resale on the StockX marketplace for an average price of around $150 USD.
Nike Air Max Plus Store Location: "Kings Way, Kathmandu, Nepal".
\n\n\n



Nike Dunk Low Retro sneakers.
Here are some more details about the Nike Dunk Low Retro:
Style: Low-top
Release date: October 31

In [30]:
from langchain_core.runnables import RunnableLambda, RunnablePassthrough


In [31]:
text=loader.load()[0].page_content


In [32]:

def get_text_chunks_langchain(text):
  text_splitter = CharacterTextSplitter(chunk_size=20, chunk_overlap=10)
  docs = [Document(page_content=x) for x in text_splitter.split_text(text)]
  return docs
     

In [33]:
docs = get_text_chunks_langchain(text)

Created a chunk of size 869, which is longer than the specified 20
Created a chunk of size 588, which is longer than the specified 20


In [34]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [35]:
vectorstore = FAISS.from_documents(docs,embedding=embeddings)


In [36]:
retriever=vectorstore.as_retriever()


In [37]:
retriever.invoke("Nike slide/sandal.")

[Document(id='504b5c9f-5dfc-40e4-ad11-36351723c8cc', metadata={}, page_content='Nike slide/sandal.\nThey appear to be the Nike Slide/Sandal, which is a unisex slide/sandal.\nHere are some of the features:\nSoft, one-piece upper: The upper is made of a soft, synthetic material that provides comfort and support.\nPhylon midsole: The midsole is made of Phylon, which provides cushioning and support.\nRubber outsole: The outsole is made of rubber for traction and durability.\nSwoosh logo: The Nike Swoosh logo is on the strap of the sandal.\nAvailable in a variety of colors: The Nike Benassi Solarsoft Sandal is available in a variety of colors, including black, white, and beige.\nNike off courte slides store location: "Bhaktapur, Nepal".'),
 Document(id='8671f9b4-2a4f-4b68-b51c-2f68bbcc5e6b', metadata={}, page_content='Nike Air Max Plus sneakers. They feature a brown upper with a black Nike Swoosh logo on the side and a visible Air Max unit in the heel. The sole is white.\nHere are some more

In [38]:
llm_vision = load_model("gemini-1.5-flash")


In [39]:

template = """
```
{context}
```

{query}


Provide brief information and store location.
"""

In [41]:
prompt = ChatPromptTemplate.from_template(template)


In [43]:
rag_chain = (
    {"context": retriever, "query": RunnablePassthrough()}
    | prompt
    | llm_vision
    | StrOutputParser()
)

In [44]:
result = rag_chain.invoke("can you give me a detail of nike sandal?")


In [45]:
display(Markdown(result))


Nike Slide/Sandal: A unisex slide/sandal with a soft, one-piece synthetic upper, Phylon midsole, rubber outsole, and Nike Swoosh logo.  Available in various colors.

Store Location: Bhaktapur, Nepal

In [46]:
rag_chain


{
  context: VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002411B94CE90>, search_kwargs={}),
  query: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'query'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'query'], input_types={}, partial_variables={}, template='\n```\n{context}\n```\n\n{query}\n\n\nProvide brief information and store location.\n'), additional_kwargs={})])
| ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000002411B94F890>, default_metadata=(), model_kwargs={})
| StrOutputParser()

In [47]:
full_chain = (
    RunnablePassthrough() | llm_vision | StrOutputParser() | rag_chain
)

In [48]:
url_1 = "https://static.nike.com/a/images/t_PDP_1728_v1/f_auto,q_auto:eco/252f2db6-d426-4931-80a0-8b7f8f875536/calm-slides-K7mr3W.png"


In [49]:
image = get_image(url_1, "nike3", "png")

In [50]:

message = HumanMessage(
    content=[
        {
            "type": "text",
            "text": "Provide information on given sandle image Brand and model.",
        },  # You can optionally provide text parts
        {"type": "image_url", "image_url": image},
    ]
)

In [51]:
result = full_chain.invoke([message])


In [52]:
display(Markdown(result))


The provided text describes Nike Benassi Solarsoft Slides (it mentions "Nike Slide/Sandal" and "Nike Benassi Solarsoft Sandal" which are likely the same product),  a unisex slide with a soft upper, Phylon midsole, and rubber outsole.  They are available in various colors.  A store location in Bhaktapur, Nepal is mentioned.