In [9]:
!pip install openai pillow requests sentence-transformers scikit-learn



In [10]:
import openai
import requests
from PIL import Image
from io import BytesIO
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import re
import numpy as np
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [11]:
openai.api_key = "(MY_API_KEY)"

In [12]:
class CaptioningAgent:
    def generate_caption(self, url):
        resp = openai.chat.completions.create(
            model="gpt-4o",
            messages=[{
                "role": "user",
                "content": [
                    {"type": "text", "text": "Describe the hotel room in terms of followings: Room capacity in terms of person, its view (sea, city or no view), if it includes air conditioner or desk. Do not give further information."},
                    {"type": "image_url", "image_url": {"url": url}}
                ]
            }],
            max_tokens=100
        )
        return resp.choices[0].message.content.strip()

class IndexingAgent:
    def __init__(self):
        self.index = []

    def add(self, url, caption):
        self.index.append({"url": url, "caption": caption})

    def get_index(self):
        return self.index


class QueryMatchingEngine:
    def __init__(self, index, embedding_model):
        self.index = index
        self.embedding_model = embedding_model

    def extract_number(self, text):
        """Extract the first integer found in the text."""
        match = re.search(r'\b(\d+)\b', text)
        return int(match.group(1)) if match else None

    def keyword_match(self, query, caption):
        """Enhanced keyword match that checks both substring and capacity number match."""
        query_lower = query.lower()
        caption_lower = caption.lower()

        # Substring keyword match
        if query_lower in caption_lower:
            return 1.0

        # Capacity number match (e.g., "4 people" vs "capacity: 4 people")
        query_number = self.extract_number(query_lower)
        caption_number = self.extract_number(caption_lower)
        if query_number is not None and caption_number == query_number:
            return 1.0

        return 0.0

    def match_queries(self, queries, threshold=0.6):
        results = {query: [] for query in queries}
        for item in self.index:
            caption = item["caption"].lower()
            url = item["url"]
            caption_embedding = self.embedding_model.encode([caption])[0]

            for query in queries:
                query_lower = query.lower()
                query_embedding = self.embedding_model.encode([query_lower])[0]
                semantic_score = cosine_similarity([query_embedding], [caption_embedding])[0][0]

                # Enhanced Keyword Matching
                keyword_score = self.keyword_match(query_lower, caption)

                # Hybrid Scoring: Take the higher of the two
                combined_score = max(semantic_score, keyword_score)

                if combined_score >= threshold:
                    results[query].append(url)

        return results



In [20]:
queries = [
    "double room with sea view",
    "room with a balcony and air conditioning, with a city view",
    "triple room with a desk",
    "room with a capacity of 4 people"
]

image_urls = [
    "https://github.com/ilhansertelli/case/blob/main/Assets/1.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/2.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/3.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/4.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/5.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/6.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/7.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/8.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/9.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/10.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/11.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/12.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/13.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/14.jpg?raw=true",
    "https://github.com/ilhansertelli/case/blob/main/Assets/15.jpg?raw=true"
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/16.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/17.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/18.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/19.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/20.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/21.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/22.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/23.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/24.jpg",
    # "https://static.obilet.com.s3.eu-central-1.amazonaws.com/CaseStudy/HotelImages/25.jpg",
]


In [21]:
# Initialize Agents
captioning_agent = CaptioningAgent()
indexing_agent = IndexingAgent()
embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

for url in image_urls:
    try:
        print(f"Processing: {url}")
        caption = captioning_agent.generate_caption(url)
        print(f"Generated Caption: {caption}")
        indexing_agent.add(url, caption)
    except Exception as e:
        print(f"Failed to process {url}: {e}")

engine = QueryMatchingEngine(indexing_agent.get_index(), embedding_model)
matches = engine.match_queries(queries, threshold=0.6)

Processing: https://github.com/ilhansertelli/case/blob/main/Assets/1.jpg?raw=true
Generated Caption: - Room capacity: 2 persons
- View: Sea view
- Air conditioner: Yes
- Desk: Yes
Processing: https://github.com/ilhansertelli/case/blob/main/Assets/2.jpg?raw=true
Generated Caption: - Room capacity: 2 persons
- View: No view
- Air conditioner: Yes
- Desk: Yes
Processing: https://github.com/ilhansertelli/case/blob/main/Assets/3.jpg?raw=true
Generated Caption: - Room capacity: 2 persons
- View: City view
- Air conditioner: Yes
- Desk: Yes
Processing: https://github.com/ilhansertelli/case/blob/main/Assets/4.jpg?raw=true
Generated Caption: - Room capacity: 2 persons
- View: No view
- Includes air conditioner: Yes
- Includes desk: Yes
Processing: https://github.com/ilhansertelli/case/blob/main/Assets/5.jpg?raw=true
Generated Caption: - Room capacity: 1-2 persons
- View: No view
- Includes air conditioner: Yes
- Includes desk: Yes
Processing: https://github.com/ilhansertelli/case/blob/main/Asse

In [26]:
# Report Results
for query, urls in matches.items():
    print(f"\nQuery: '{query}' (threshold ≥ 0.6)")
    if urls:
        for u in urls:
            print(u)
    else:
        print("(no images matched)")


Query: 'double room with sea view' (threshold ≥ 0.6)
https://github.com/ilhansertelli/case/blob/main/Assets/1.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/13.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/14.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/15.jpg?raw=true

Query: 'room with a balcony and air conditioning, with a city view' (threshold ≥ 0.6)
https://github.com/ilhansertelli/case/blob/main/Assets/1.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/3.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/4.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/5.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/9.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/11.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/12.jpg?raw=true
https://github.com/ilhansertelli/case/blob/main/Assets/15.jpg?raw=true

Query: 'trip