In [11]:
import os
from dotenv import load_dotenv
import requests
import base64
from google.cloud import storage
from google.cloud import aiplatform
from google.protobuf import struct_pb2
import sys
import time
import typing
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from pyspark.sql.functions import pandas_udf, PandasUDFType
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from vertexai.preview.language_models import TextEmbeddingModel
from vertexai.preview.generative_models import GenerativeModel, Part
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType
from pyspark.sql.types import ArrayType, FloatType

load_dotenv()

#from IPython.display import Image, display
url = "https://www.googleapis.com/customsearch/v1"
total_images = 20 # Total number of images to download
images_per_request = 5  # Maximum number of images per request
params = {
    "cx": os.getenv("Google_CSE_ID"),
    "key": os.getenv("Google_API_Key"),
    "searchType": "image",
    "num": images_per_request,
    "fileType": "BMP, GIF, JPEG, PNG"
}

In [12]:
def image_search(query):
    all_images = []  # List to hold all the images

    for start_index in range(1, total_images + 1, images_per_request):
        response = requests.get(url, params=params)
        data = response.json()
        params["q"] = query
        params["start"] = start_index
        if 'items' in data:
            all_images.extend(item['link'] for item in data['items'])
        else:
            print('an error occured while searching')
    return all_images

imgs = image_search('Image of the Eiffel Tower of Paris')
imgs

an error occured while searching


['https://upload.wikimedia.org/wikipedia/commons/thumb/8/85/Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg/640px-Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg',
 'https://cdn.britannica.com/54/75854-050-E27E66C0/Eiffel-Tower-Paris.jpg',
 'https://upload.wikimedia.org/wikipedia/commons/thumb/8/85/Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg/1200px-Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg',
 'https://cdn.pariscityvision.com/library/image/5144.jpg',
 'https://www.travelandleisure.com/thmb/SPUPzO88ZXq6P4Sm4mC5Xuinoik=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/eiffel-tower-paris-france-EIFFEL0217-6ccc3553e98946f18c893018d5b42bde.jpg',
 'https://i.natgeofe.com/k/c41b4f59-181c-4747-ad20-ef69987c8d59/eiffel-tower-night_3x4.jpg',
 'https://cdn.thecollector.com/wp-content/uploads/2023/01/how-tall-is-the-eiffel-tower-facts.jpg?width=1400&quality=70',
 'https://media.cntraveler.com/photos/58de89946c3567139f9b6cca/16:9/w_2560%2Cc_limit/GettyImages-468366251.jpg',
 'http

In [13]:
def generate(image_link):
    """
    Description of the images 

    Args:
        image_link (str): link of the image

    Returns:
        str: A complete description of the images link
    """
    model = GenerativeModel("gemini-pro-vision")
    responses = model.generate_content(
        [image_link, """Give me a very brief description of this image"""],
        generation_config={
            "max_output_tokens": 1024,
            "temperature": 0.4,
            "top_p": 1,
            "top_k": 32
        },
    stream=True,
    )
    return " ".join([response.candidates[0].content.parts[0].text for response in responses])
print(generate('https://cdn.pariscityvision.com/library/image/5144.jpg'))

The image shows the Eiffel Tower in Paris, France, illuminated at night.


In [14]:
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed

# Function to apply captioning to a list of image URLs using multiple threads
def image_captioning_parallel(items_links):
    captions = {}
    with ThreadPoolExecutor(max_workers=10) as executor:
        future_to_url = {executor.submit(generate, url): url for url in items_links}
        for future in as_completed(future_to_url):
            url = future_to_url[future]
            try:
                caption = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (url, exc))
            else:
                captions[url] = caption
    return captions
captions = image_captioning_parallel(imgs)

In [15]:
captions

{'https://cdn.britannica.com/54/75854-050-E27E66C0/Eiffel-Tower-Paris.jpg': 'The Eiffel Tower, a wrought iron lattice tower on the Champ de Mars in Paris , France.',
 'https://cdn.pariscityvision.com/library/image/5144.jpg': 'The image shows the Eiffel Tower in Paris, France, illuminated at night.',
 'https://www.travelandleisure.com/thmb/SPUPzO88ZXq6P4Sm4mC5Xuinoik=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/eiffel-tower-paris-france-EIFFEL0217-6ccc3553e98946f18c893018d5b42bde.jpg': 'The Eiffel Tower in Paris, France, at night.',
 'https://i.natgeofe.com/k/c41b4f59-181c-4747-ad20-ef69987c8d59/eiffel-tower-night_3x4.jpg': 'The Eiffel Tower illuminated at night, with the city lights of Paris in the background.',
 'https://media.cntraveler.com/photos/58de89946c3567139f9b6cca/16:9/w_2560%2Cc_limit/GettyImages-468366251.jpg': 'A woman in a white dress is sitting on a bench in front of a body of  water, surrounded by trees.',
 'https://cdn.thecollector.com/wp-content/uploads/

In [16]:
PROJECT_ID = 'ping38' # @param {type: "string"}

# Inspired from https://stackoverflow.com/questions/34269772/type-hints-in-namedtuple.
class EmbeddingResponse(typing.NamedTuple):
  text_embedding: typing.Sequence[float]
  image_embedding: typing.Sequence[float]

class EmbeddingPredictionClient:
  """Wrapper around Prediction Service Client."""
  def __init__(self, project : str,
    location : str = "us-central1",
    api_regional_endpoint: str = "us-central1-aiplatform.googleapis.com"):
    client_options = {"api_endpoint": api_regional_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    self.client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    self.location = location
    self.project = project

  def get_embedding(self, text : str = None, image_bytes : bytes = None):
    if not text and not image_bytes:
      raise ValueError('At least one of text or image_bytes must be specified.')

    instance = struct_pb2.Struct()
    if text:
      instance.fields['text'].string_value = text

    if image_bytes:
      encoded_content = base64.b64encode(image_bytes).decode("utf-8")
      image_struct = instance.fields['image'].struct_value
      image_struct.fields['bytesBase64Encoded'].string_value = encoded_content

    instances = [instance]
    endpoint = (f"projects/{self.project}/locations/{self.location}"
      "/publishers/google/models/multimodalembedding@001")
    response = self.client.predict(endpoint=endpoint, instances=instances)

    text_embedding = None
    if text:
      text_emb_value = response.predictions[0]['textEmbedding']
      text_embedding = [v for v in text_emb_value]

    image_embedding = None
    if image_bytes:
      image_emb_value = response.predictions[0]['imageEmbedding']
      image_embedding = [v for v in image_emb_value]

    return EmbeddingResponse(
      text_embedding=text_embedding,
      image_embedding=image_embedding)

client = EmbeddingPredictionClient(project=PROJECT_ID)

In [17]:
# Extract image embedding
def getImageEmbeddingFromImageContent(content):
  response = client.get_embedding(text=None, image_bytes=content)
  return response.image_embedding

# Extract text embedding
def getTextEmbedding(text):
  response = client.get_embedding(text=text, image_bytes=None)
  return response.text_embedding

def getImageEmbeddingFromUrl(url):
    try:
      response = requests.get(url)
    except:
      pass
    return getImageEmbeddingFromImageContent(response.content)

getImageEmbeddingFromUrl('https://learn.microsoft.com/en-us/power-bi/connect-data/media/desktop-connect-to-data/get-data-from-web.png')

[-0.00621091574,
 0.0146486359,
 0.00745765399,
 -0.00406848686,
 0.0134876752,
 -0.0129733179,
 0.00425028661,
 -0.000804784533,
 -0.052062016,
 -0.0511286594,
 -0.0559581742,
 -0.0180540048,
 -0.00114576251,
 0.0850081071,
 0.00780107873,
 -0.0124911526,
 0.04939989,
 -0.0154251819,
 0.0284642596,
 -0.0143254511,
 0.00641967403,
 -0.00949991,
 0.0152427284,
 0.0112587046,
 0.0142073715,
 -0.0112485467,
 -0.00173915189,
 -0.0174035858,
 0.00919316523,
 0.00993117783,
 -0.000368564681,
 0.00881721359,
 0.0016585436,
 -0.0145162018,
 0.00722565548,
 -0.0204348,
 0.00136110792,
 -0.0108108,
 0.00714082876,
 0.0158091709,
 -0.0163562857,
 -0.036199227,
 -0.0287119728,
 -0.0106638,
 0.00673536956,
 -0.0158925522,
 0.00438093487,
 -0.00859847106,
 0.0177234896,
 -0.0289033037,
 0.0135135055,
 -0.0192392245,
 0.00970596727,
 0.0286750905,
 -0.0324636698,
 0.0391437225,
 -0.0137267392,
 -0.0190137383,
 -0.0233200919,
 0.000764889119,
 -0.00390813639,
 -0.00814874936,
 -0.00553518767,
 0.00596

In [18]:
df_items = pd.DataFrame(list(captions.items()), columns=['ImageURL', 'Caption'])
df_items

Unnamed: 0,ImageURL,Caption
0,https://cdn.britannica.com/54/75854-050-E27E66...,"The Eiffel Tower, a wrought iron lattice tower..."
1,https://cdn.pariscityvision.com/library/image/...,"The image shows the Eiffel Tower in Paris, Fra..."
2,https://www.travelandleisure.com/thmb/SPUPzO88...,"The Eiffel Tower in Paris, France, at night."
3,https://i.natgeofe.com/k/c41b4f59-181c-4747-ad...,"The Eiffel Tower illuminated at night, with th..."
4,https://media.cntraveler.com/photos/58de89946c...,A woman in a white dress is sitting on a bench...
5,https://cdn.thecollector.com/wp-content/upload...,"The Eiffel Tower in Paris, France, stands tall..."
6,https://upload.wikimedia.org/wikipedia/commons...,"This is a picture of the Eiffel Tower, a wroug..."
7,https://upload.wikimedia.org/wikipedia/commons...,"The image shows the Eiffel Tower in Paris, Fra..."
8,https://i.natgeofe.com/k/6d4021bf-832e-49f6-b8...,This image shows the Eiffel Tower taken from t...
9,https://cdn.thecollector.com/wp-content/upload...,"The image shows the Eiffel Tower in Paris, Fra..."


In [19]:

def image_selection(query, data):
    """
    Research of the relevant images

    Args:
        query (str): The initial query we are looking for an image
        data (pd.DataFrame): DataFrame which contains description of each image

    Returns:
        str: A link of the relevant image
    """
    # Get the embedding for the query text
    query_vect = getTextEmbedding(query)
    query_vect = np.array(query_vect).reshape(1, -1)  # Reshape to 2D array

    # Get embeddings for each caption and compute cosine similarity
    data['embedding'] = data['Caption'].apply(lambda x: getTextEmbedding(x))
    data['cos_sim'] = data['embedding'].apply(lambda x: cosine_similarity(query_vect, np.array(x).reshape(1, -1))[0][0])


    # Sort by cosine similarity in descending order and get the most relevant item
    most_relevant_item = data.sort_values(by='cos_sim', ascending=False).iloc[0]

    return most_relevant_item['ImageURL']

# Convert dictionary to DataFrame
df_items = pd.DataFrame(list(captions.items()), columns=['ImageURL', 'Caption'])

# Call the function with a query and the DataFrame
print(image_selection("Image of the Eiffel Tower of Paris", df_items))

https://upload.wikimedia.org/wikipedia/commons/thumb/8/85/Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg/1200px-Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg


In [20]:
def image_selection(query, data):
    """
    Research of the relevant images

    Args:
        query (str): The initial query we are looking for an image
        data (pd.DataFrame): DataFrame which contains description of each image

    Returns:
        str: A link of the relevant image
    """
    # Get the embedding for the query text
    query_vect = getTextEmbedding(query)

    # Get embeddings for all captions at once
    embeddings = getTextEmbedding(data['Caption'].tolist())

    # Compute cosine similarities for all embeddings at once
    cosine_similarities = cosine_similarity(query_vect, embeddings)[0]

    # Add cosine similarities to the dataframe
    data['cos_sim'] = cosine_similarities

    # Sort by cosine similarity in descending order
    sorted_data = data.sort_values(by='cos_sim', ascending=False)

    # Get the most relevant item (skipping the first which will be the query itself)
    most_relevant_item = sorted_data.iloc[1]

    return most_relevant_item['ImageURL']

# Example usage
# data is a DataFrame with a 'Caption' column containing text descriptions of images
print(image_selection("Image of the Eiffel Tower of Paris", df_items))

TypeError: bad argument type for built-in operation