In [None]:
import google.generativeai as genai
import base64
from PIL import Image
from io import BytesIO
import requests
import os
import json
import re
import pandas as pd
from vertexai.preview.generative_models import Part
                

In [None]:

genai.configure(api_key="")


In [None]:
model = genai.GenerativeModel("gemini-2.0-flash-lite")


In [None]:
def encode_image(path):
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

def escape_inches(st):
    # Add a single backslash before a " that follows a digit
    return re.sub(r'(?<=\d)"', '\\"', st)


In [None]:
img_path = '../Tell2Design Data/images/centroid_adjacent_removal_neighbor/'
save_path = '../Tell2Design Data/common_neighbor_generated/'
json_path = '../Tell2Design Data/t2d.json'

In [None]:
with open(json_path) as f:
    data = json.load(f)

In [None]:
BASE_INSTRUCTIONS = """
You are a top-level architect with years of experience reading detailed architectural site plans. Give chain of thoughts for this floor plan.

<color-scheme>
- balcony:      (107, 142, 35)
- common room:  (255, 215, 0)
- bathroom:     (173, 216, 230)
- kitchen:      (240, 128, 128)
- master room:  (255, 165, 0)
- living room:  (238, 232, 170)
- entrance:     (255, 0, 0)
- dining room:  (218, 112, 214)
- storage:      (221, 160, 221)
</color-scheme>

<assumptions>
- The top of the image is North.
- Image size is 256×256 pixels.
- If multiple rooms share a type, label them type_1, type_2, … (e.g., common_1, common_2).
</assumptions>
"""

TASK_TMPL = """
<task>
1. Provide a step-by-step reasoning process to answer: Name a room that is adjacent to both the {room1} and {room2}?
If any of the rooms are absent, return "absent".
</task>
"""

FORMAT = """
Strictly follow this format:
<output format>
"CoT:" ...
"Answer:" ...
</output format>
"""

In [None]:
def save_pred(output, path, dp, pair):
    output = output.replace('*', "")
    reason_start = re.search(r'CoT:', output, re.DOTALL).span()[0]
    reason_end = re.search(r'CoT:', output, re.DOTALL).span()[1]
    answer_start = re.search(r'Answer:', output, re.DOTALL).span()[0]
    answer_end = re.search(r'Answer:', output, re.DOTALL).span()[1]
    reason_text = output[reason_end:answer_start]
    reason_lines = [line.strip() for line in reason_text.strip().split("\n") if line.strip()]
    answer_text = output[answer_end:]
    answer_lines = [line.strip() for line in answer_text.strip().split("\n") if line.strip()]

    result = {
            "Reason": reason_lines,
            "Answer": answer_lines,
            "gt_desc": dp['desc'],
            "num_rooms": dp['num_rooms'],
            "rooms":dp['rooms'],
            "pair0":pair[0],
            "pair1":pair[1]
            
        }
    with open(path, "w") as f:
        json.dump(result, f, indent=2)
     

In [None]:
PAIRS = [
    ("kitchen", "bathroom"),
    ("balcony", "living room"),
    ("common room", "master room"),
    ("dining room", "bathroom"),
]


In [None]:
for i, each in enumerate(data.keys()):
    print(i)
    #if i < 47:
    #    continue
    
    index = i % len(PAIRS)
    pair = PAIRS[index]
    
    task = TASK_TMPL.format(room1=pair[0], room2=pair[1])
    text = BASE_INSTRUCTIONS + "\n" + task + "\n" + FORMAT
    text = '"""' + text + '"""'
    system_prompt = {"role": "user", "parts": [{"text": text}]}
    
    
    messages = [system_prompt]
    
    img = Image.open(img_path + each)   
    img_resized = img.resize((128, 128), Image.ANTIALIAS)
    img_resized.save(each)
    
    
    test_img = encode_image(each)
    messages.append({
    "role": "user",
    "parts": [
        {"mime_type": "image/jpeg", "data": test_img},
        {"text": "Give chain of thoughts and answer the question for this floor plan."}
        ]
    })
    
    response = model.generate_content(messages)
    output = response.text
    path = save_path + each.split('.png')[0] + '.json'
    save_pred(output, path, data[each], pair)
    os.remove(each)
   

In [None]:
response