In [None]:
# search parameters
num_iters = 12
num_variations = 10
num_randomized = 3
model_iters = 5
download_depth_models = True
num_images_per_variation = 32
num_image_proc_threads = 4
do_vision_test_mode = False
do_load_tabby_model = True
quadro_delay = 1.0
delay_3090 = 0.5
batchsize_3090 = 25

total_num_variations_per_steps = num_variations * (num_randomized + 1) * model_iters
total_images_per_step = total_num_variations_per_steps * num_images_per_variation

model_iter = 4

dpo_lora_id = "llama8B_vision_2_temp_DPO/checkpoint-500"
use_dpo_lora = False
model_id = "./llama8B_vision_4"
do_clear_screenshot_dir = True
do_open_best_images = False

use_multi_gpu_llm = False
use_multi_gpu_vision = False

maxModelFaces = 5 * 1000
minModelFaces = 1 * 1000
min_image_size = 350
freeze_model_database_models = False

model_download_path = "./node_webgl_render_agentic/public/downloaded_models"

sketchfab_api_token = "----"
openai_api_token = "----"
huggingface_key = "----"

small_ai_model = "gpt-4o-mini"
normal_ai_model = "gpt-4o"

max_sleeps = 400

print("total_num_variations_per_steps", total_num_variations_per_steps)
print("total_images_per_step", total_images_per_step)

In [None]:
import gc
import json
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from io import BytesIO

import numpy as np
import requests
import torch
from PIL import Image

import time
import open_clip
from openai import OpenAI
import zipfile
from huggingface_hub import login

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import StoppingCriteria, StoppingCriteriaList
from peft import PeftModel
import re

In [None]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = True

# --------------------------------------------------------------------------
# Configuration
# --------------------------------------------------------------------------
TABBY_BASE_URLS = [ "http://localhost:5000", "http://localhost:5001" ]
TABBY_ADMIN_KEY = "----"   # Must be admin-level token to load/unload models
MODEL_NAME = "llama8B_Vision_4"    # The exact folder name in your TabbyAPI `models/` directory

def is_model_loaded(url, model_name):
    """
    Returns True if the model specified by `model_name` is currently loaded.
    """
    url_model = f"{url}/v1/model"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {TABBY_ADMIN_KEY}"
    }
    response = requests.get(url_model, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        current_model = data.get("model_name")
        return current_model == model_name
    else:
        # Depending on how your API signals no model loaded, you may adjust this.
        return False

# --------------------------------------------------------------------------
# 1) Load Model
# --------------------------------------------------------------------------
def load_model(url, model_name, max_seq_len=4096 * 2):
    if is_model_loaded(TABBY_BASE_URLS[0], MODEL_NAME):
        return
    
    """
    Calls the /v1/model/load endpoint to load a model into TabbyAPI.
    """
    url = f"{url}/v1/model/load"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {TABBY_ADMIN_KEY}"
    }
    payload = {
        "model_name": model_name,
        "max_seq_len": max_seq_len
        # You can pass other parameters like "gpu_split" if desired
    }

    print(f"Attempting to load model: {model_name}")
    response = requests.post(url, headers=headers, data=json.dumps(payload))

    if response.status_code == 200:
        print(f"Successfully loaded model: {model_name}")
    else:
        print(f"Failed to load model. Response: {response.status_code} {response.text}")
        raise RuntimeError("Model load request failed.")

def unload_model(url):
    """
    Calls the /v1/model/unload endpoint to load a model into TabbyAPI.
    """
    url = f"{url}/v1/model/unload"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {TABBY_ADMIN_KEY}"
    }
    payload = {
        # You can pass other parameters like "gpu_split" if desired
    }

    print(f"Unloading TabbyAPI model")
    response = requests.post(url, headers=headers, data=json.dumps(payload))

    if response.status_code == 200:
        print(f"Successfully unloaded model")
    else:
        print(f"Failed to unload model. Response: {response.status_code} {response.text}")
        raise RuntimeError("Model unload request failed.")


# --------------------------------------------------------------------------
# 2) Generate Text (OpenAI-Compatible Endpoint)
# --------------------------------------------------------------------------
def generate_text(url, prompt, model_name, max_tokens=100):
    """
    Calls the /v1/completions endpoint to generate text from the loaded model.
    """
    url = f"{url}/v1/completions"
    # This can be a normal API token if you only want to *use* the loaded model,
    # but if you expect to inline load a model here too, you need the admin token again.
    # For safety, let's use the admin token in this example as well.
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {TABBY_ADMIN_KEY}"
    }

    data = {
        #"model": model_name,      # Must match the currently loaded model
        "prompt": prompt,
        "max_tokens": max_tokens,
        "temperature": 0.9
    }

    response = requests.post(url, headers=headers, data=json.dumps(data))
    if response.status_code != 200:
        raise RuntimeError(
            f"Generation request failed ({response.status_code}): {response.text}"
        )

    result = response.json()
    # The structure is similar to OpenAI:
    # {
    #   "choices": [{"text": "..."}],
    #   "id": "...",
    #   ...
    # }
    return result["choices"][0]["text"]

gpu_energy_store = [1, 1]
use_second_gpu = use_multi_gpu_llm

def gen_vr_content(in_javascript, base_prompt, extra_string):
    global gpu_energy_store
    gpu_index = 0
    if use_second_gpu:
        if gpu_energy_store[1] > gpu_energy_store[0]:
            gpu_index = 1
            gpu_energy_store[1] -= 60
        else:
            gpu_energy_store[0] -= 30

        if gpu_energy_store[0] < 0 or gpu_energy_store[1] < 0:
            gpu_energy_store[0] += 100
            gpu_energy_store[1] += 100

    
    formatted_prompt = f"""You are a helpful assistant designing a VR world titled "{base_prompt}" {extra_string}
[EXISTING_JAVASCRIPT]
{in_javascript}
[/EXISTING_JAVASCRIPT]
"""
    generation_output = generate_text(TABBY_BASE_URLS[gpu_index], formatted_prompt, MODEL_NAME, max_tokens=2048)

    print("gpu_index", gpu_index, gpu_energy_store)
    # (Optional) print the prompt for debugging
    print(">>>>>>>>>>>>>>>>>>>>", formatted_prompt)

    # (Optional) print the result for debugging
    print("<<<<<<<<<<<<<<<<<<<<", generation_output)

    return generation_output, formatted_prompt

    # 
    # print(gen_vr_content("""room("Cozy bedroom").topLeft(0, 0).bottomRight(15, 15);""", "Cozy Bedroom"))

if do_load_tabby_model and not do_vision_test_mode:
    load_model(TABBY_BASE_URLS[0], MODEL_NAME)

In [None]:
# load the clip model
device = "cuda:1" if torch.cuda.is_available() else "cpu"
devices = ["cpu"]
if torch.cuda.is_available():
    if use_multi_gpu_vision:
        devices = ['cuda:0', 'cuda:1']
    else:
        devices = ['cuda:1']
device_free = { }

clip_model = {}
clip_preprocess = {}

def load_clip_model(
    d,
    model_name: str = "ViT-H/14-378",
    pretrained: str = "dfn5b",
    compile_model: bool = True
):
    # Create model and transform
    model, _, preprocess = open_clip.create_model_and_transforms(model_name, pretrained=pretrained)
    
    # Optional PyTorch 2.x compile step
    if compile_model:
        model = torch.compile(model)

    # Move model to the device
    model = model.to(d)
    model.eval()

    # Fill dictionaries
    clip_model[d] = model
    clip_preprocess[d] = preprocess


def unload_clip_model(d):
    clip_model[d] = None
    clip_preprocess[d] = None
    gc.collect()
    torch.cuda.empty_cache()

load_clip_model("cuda:0")
load_clip_model("cuda:1")

for i in range(100):
    device_free[f"cuda:{i}"] = True

In [None]:
print(devices)

In [None]:
# Attempt to load the database
num_model_loads = 0
existing_image_features = None
existing_text_features = None
list_of_all_models = { }
existing_searches_done = []
index_to_model_uid = []
num_models_downloaded = 0
stack_existing_image_features = True
image_ratings_data = []

saved_model_data_filename = 'saved_model_data.json'
saved_model_data_filename_backup = 'saved_model_data_backup.json'
existing_image_features_filename = 'existing_image_features_big.pt'
existing_text_features_filename = "existing_text_features_big.pt"
existing_image_features_filename_backup = 'existing_image_features_backup.pt'
existing_text_features_filename_backup = "existing_text_features_backup.pt"
loaded_good = False

try:
    with open(saved_model_data_filename, 'r') as json_file:
        saved_model_data = json.load(json_file)
        existing_searches_done = saved_model_data["existing_searches_done"]
        list_of_all_models = saved_model_data["list_of_all_models"]
        index_to_model_uid = saved_model_data["index_to_model_uid"]
        
        existing_image_features = torch.load(existing_image_features_filename)
        print("existing_image_features shape:", existing_image_features.shape)

        existing_text_features = torch.load(existing_text_features_filename)
        print("existing_text_features shape:", existing_text_features.shape)

        loaded_good = True
        

        print("index_to_model_uid length:", len(index_to_model_uid))
except Exception as e:
    print("Exception", e)    
    with open(saved_model_data_filename_backup, 'r') as json_file:
        saved_model_data = json.load(json_file)
        existing_searches_done = saved_model_data["existing_searches_done"]
        list_of_all_models = saved_model_data["list_of_all_models"]
        index_to_model_uid = saved_model_data["index_to_model_uid"]
    
    existing_image_features = torch.load(existing_image_features_filename_backup)
    print("existing_image_features shape:", existing_image_features.shape)

    existing_text_features = torch.load(existing_text_features_filename_backup)
    print("existing_text_features shape:", existing_text_features.shape)
    


print("list_of_all_models", len(list_of_all_models))

#assert existing_image_features.shape == existing_text_features.shape == len(index_to_model_uid)

print(len(index_to_model_uid))

# Get the lengths of the Python lists
len_models = len(index_to_model_uid)

# Get the size along the first dimension (0th axis) of the PyTorch tensors
len_image = existing_image_features.shape[0]
len_text = existing_text_features.shape[0]

# Find the minimum length
shortest_len = min(len_models, len_image, len_text)

print("shortest_len", shortest_len)

# If any of them doesn't match, truncate them all to the shortest length
if not (len_models == len_image == len_text):
    print(f"Mismatch found. Truncating all arrays/tensors to shortest length = {shortest_len}.")

    # Truncate the Python lists
    index_to_model_uid = index_to_model_uid[:shortest_len]
    existing_searches_done = existing_searches_done[:shortest_len]
    index_to_model_uid = index_to_model_uid[:shortest_len]

    # Truncate the PyTorch tensors
    existing_image_features = existing_image_features[:shortest_len]
    existing_text_features = existing_text_features[:shortest_len]

# Now they should all match
assert len(index_to_model_uid) == existing_image_features.shape[0] == existing_text_features.shape[0]

if loaded_good:
    with open(saved_model_data_filename_backup, "w") as f:
        saved_model_data = { "existing_searches_done": existing_searches_done, "list_of_all_models": list_of_all_models, "index_to_model_uid": index_to_model_uid }
        json.dump(saved_model_data, f)
    torch.save(existing_image_features.cpu().detach(), existing_image_features_filename_backup)
    torch.save(existing_text_features.cpu().detach(), existing_text_features_filename_backup)


print("All arrays/tensors have been truncated (if needed) and are now consistent.")
# except Exception as e:
#     print(e)

In [None]:
def addSearchableModel(uid, image_url, name):
    global existing_image_features
    global existing_text_features
    global num_model_loads

    d_zero = "cuda:1"

    if not uid in list_of_all_models:
        response = requests.get(image_url)
        if response.status_code == 200:
            image = Image.open(BytesIO(response.content))

            if image.width < min_image_size or image.height < min_image_size:
                return
            
            tokenized_text = open_clip.tokenize(name).to(device)

            with torch.no_grad():
                new_text_features = clip_model[d_zero].encode_text(tokenized_text).detach()
                
            # Normalize the features (optional but recommended)
            new_text_features = new_text_features / new_text_features.norm(dim=-1, keepdim=True)
            new_text_features_np = new_text_features.to(device).detach()

            if existing_text_features is not None:
                existing_text_features = torch.cat([existing_text_features.to(device), new_text_features_np], dim=0)
            else:
                existing_text_features = new_text_features_np
            
            new_text_features_np.detach()
            del new_text_features
            del new_text_features_np

            # image checking       
            image_tensor = clip_preprocess[d_zero](image).to(device).detach().unsqueeze(0)
            
            # image_batch = torch.cat(image_tensor).to(device)  # Combine images into a batch
            image_features = clip_model[d_zero].encode_image(image_tensor).to(device).detach()
            image_features /= image_features.norm(dim=-1, keepdim=True)  # Normalize features
            new_image_features_np = image_features.to(device).detach()

            if existing_image_features is not None:
                existing_image_features = torch.cat([existing_image_features.to(device), new_image_features_np], dim=0)
            else:
                existing_image_features = new_image_features_np
            
            list_of_all_models[uid] = { "name": name, "image": image_url, "uid": uid }
            index_to_model_uid.append(uid)
            num_model_loads += 1
            
            del new_image_features_np
            del image
            del image_tensor
            del image_features

            # Trigger garbage collection and print uncollectable objects
            gc.collect()

            torch.cuda.empty_cache()  # Clear GPU memory
            print("addSearchableModel", name, uid, image_url)
            
            with open("saved_model_data.json", "w") as f:
                saved_model_data = { "existing_searches_done": existing_searches_done, "list_of_all_models": list_of_all_models, "index_to_model_uid": index_to_model_uid }
                json.dump(saved_model_data, f)
            torch.save(existing_image_features.cpu().detach(), existing_image_features_filename)
            torch.save(existing_text_features.cpu().detach(), existing_text_features_filename)
        else:
            print(f"Failed to fetch {image_url}. Status code: {response.status_code}") 

In [None]:
def extract_js_blocks(text, doJoin = True):
    pattern = r"```(?:js|javascript)(.*?)```"
    matches = re.findall(pattern, text, re.DOTALL)
    unique_matches = set(match.replace(";", "").strip() for match in matches)
    if len(unique_matches) == 0:
        pattern = r"```(.*?)```"
        matches = re.findall(pattern, text, re.DOTALL)
        unique_matches = set(match.replace(";", "").strip() for match in matches)

    if doJoin:
        return "\n".join(unique_matches)
    else:
        return unique_matches

def remove_js_comments(text):
    pattern = r'(?<!["\':/])//.*$'
    lines = text.split('\n')
    cleaned_lines = [re.sub(pattern, '', line) for line in lines]
    return '\n'.join(line.rstrip() for line in cleaned_lines)

In [None]:
vision_data = []
try:
    with open('./model_outputs_llama8b_vision.json', 'r') as file:
        vision_data = json.load(file)
        print(len(vision_data['data']))
except Exception as e:
    print(e)

In [None]:
import gc

modelsDownloaded = { }
existing_models_searched = { }

# respect users who don't want their models used in any any related things
def hasNoAITag(model):
    if model["tags"]:
        for tagIter in model["tags"]:
            if "noai" in tagIter["name"].lower():
                return True
    return False

def getModelList(objectName):

    # Specify the URL you want to request
    url = "https://api.sketchfab.com/v3/search?type=models&q=" + objectName + "&downloadable=true&min_face_count=" + str(minModelFaces) +  "&max_face_count=" + str(maxModelFaces) +  "&archives_flavours=false"
    print("url", url)

    # Send an HTTPS GET request
    response = requests.get(url)
    if response.status_code == 429:
        print("response.status_code == 429 sleeping for a minute")
        return ""    

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        data = json.loads(response.text)

        response = requests.get(url)
        if response.status_code == 200:
            data = json.loads(response.text)
            return_data = []
            if data and data["results"] and data["results"][0]:
                for iter in data["results"]:
                    if not hasNoAITag(iter):
                        return_data.append(iter)
                        addSearchableModel(iter["uid"], iter["thumbnails"]["images"][0]["url"], iter["name"])
                        
                return return_data
            
        else:
            print(f"Failed to fetch {url}. Status code: {response.status_code}")

    return []

def download_one_model(model, depth):
    global num_model_loads

    objectName = model["name"]
    modelUID = model["uid"]
    baseDir = model_download_path
    print("os.mkdir(baseDir): " + baseDir)

    if not os.path.exists("zipped_models"):
        os.mkdir("zipped_models")
    if not os.path.exists(baseDir):
        os.mkdir(baseDir)

    object_name_plus_depth = objectName + "_" + str(depth)

    unzipped_file_path = baseDir + os.sep + modelUID
    
    if os.path.exists(unzipped_file_path):
        print("os.path.exists(unzipped_file_path): " + unzipped_file_path)
        modelsDownloaded[object_name_plus_depth] = modelUID
        return modelUID

    zipped_file_path = "zipped_models" + os.sep + modelUID + ".zip"

    if os.path.exists(zipped_file_path):
        with zipfile.ZipFile(zipped_file_path, 'r') as zip_ref:
            zip_ref.extractall(unzipped_file_path)
            print("os.path.exists(file_path): " + zipped_file_path)
        modelsDownloaded[object_name_plus_depth] = modelUID
        
        return modelUID

    url = "https://api.sketchfab.com/v3/models/" + model["uid"] + "/download"
    headers = {
        'Authorization': f"Token {sketchfab_api_token}"
    }
    params = {
        'mode': 'cors'
    }
    
    response = requests.get(url, headers=headers, params=params)
    modelData = json.loads(response.text)
            
    # Specify the URL of the file you want to download
    print("modelData", modelData, "response", response)

    if not "gltf" in modelData:
        return None

    file_url = modelData["gltf"]['url']
    print(file_url)

    # Send a GET request to the URL
    response = requests.get(file_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Get the content of the response and save it to a local file
        with open(zipped_file_path, "wb") as file:
            file.write(response.content)
            
            # Open the zip file
            with zipfile.ZipFile(zipped_file_path, 'r') as zip_ref:
                
                zip_ref.extractall(unzipped_file_path)
                print("Extracted", unzipped_file_path)
                modelsDownloaded[objectName] = modelUID
                
            num_model_loads += 1
            return modelUID
    else:
        print(f"Failed to download the file. Status code: {response.status_code}")
        
def zscore(similarity):
    return (similarity - similarity.mean()) / similarity.std()

with open('generate_from_llama8_bad_models.txt', 'r') as file:
    baaaaad_models = file.read().split('\n')
    print("baaaaad_models", len(baaaaad_models))

stored_model_vec_searches = { }
def load_model_from_vec(objectName, model_depth):
    d_zero = devices[0]

    if objectName in stored_model_vec_searches:
        top_X_models = stored_model_vec_searches[objectName]
    else:
        pos_prompts = [objectName, "cohesive attractive design", 'high poly high quality', 'attractive and pretty colors', 'cohesive single object', 'object with a solid base']
        pos_weights=[1.1, 0.2, 0.2, 0.2, 0.4, 0.2]
    
        neg_prompts=[f"object in a room", "a complete room", 'ugly or low quality', 'low poly', 'abstract and disconnected', 'black and white', 'greyscale', 'indoor room', 'small room within a room', 'room within a room', "inverted normals", 'distorded, floating, or confused object'] 
        neg_weights=[0.3, 0.3, 0.3, 0.3, 0.2, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3]
        
        print("load_model_from_vec - Positive Prompts:", pos_prompts)
        print("load_model_from_vec - Negative Prompts:", neg_prompts)
        
        # ----- Compute positive embeddings -----
        pos_tokens = open_clip.tokenize(pos_prompts).to(device)
        with torch.no_grad(), torch.amp.autocast(device_type="cuda:0", dtype=torch.float16):
            pos_embeddings = clip_model[d_zero].encode_text(pos_tokens).to(device)
            pos_embeddings = pos_embeddings / pos_embeddings.norm(dim=-1, keepdim=True)
        
        # Weight and combine the positive embeddings.
        weighted_pos = torch.stack([w * emb for w, emb in zip(pos_weights, pos_embeddings)], dim=0)
        combined_pos = weighted_pos.sum(dim=0)
        
        # ----- Compute negative embeddings (if any) -----
        if neg_prompts:
            neg_tokens = open_clip.tokenize(neg_prompts).to(device)
            
            with torch.no_grad(), torch.amp.autocast(device_type="cuda:0", dtype=torch.float16):
                neg_embeddings = clip_model[d_zero].encode_text(neg_tokens).to(device)
                neg_embeddings = neg_embeddings / neg_embeddings.norm(dim=-1, keepdim=True)
            weighted_neg = torch.stack([w * emb for w, emb in zip(neg_weights, neg_embeddings)], dim=0)
            combined_neg = weighted_neg.sum(dim=0)
        else:
            # If no negative prompts are provided, use a zero tensor of the same shape.
            combined_neg = torch.zeros_like(combined_pos)
        
        # ----- Combine positive and negative embeddings -----
        combined_text = combined_pos - combined_neg
        combined_text = combined_text / combined_text.norm()
        combined_text = combined_text.to(device)
        
        # ----- Compute similarity with existing image features -----
        # (Assuming existing_image_features is a [N x D] tensor where D is the embedding dim.)
        combined_similarity = (existing_image_features.to(device) @ combined_text.T).squeeze()
        combined_similarity = zscore(combined_similarity.cpu().numpy())

        # Get the best match index based on the combined similarity
        num_top_models = len(combined_similarity)
        
        top_X_indices = np.argsort(-combined_similarity)[:10000]
        top_X_models = [list_of_all_models[index_to_model_uid[idx]] for idx in top_X_indices]
        stored_model_vec_searches[objectName] = top_X_models
        
    gc.collect()  # Run garbage collection
    torch.cuda.empty_cache()  # Clear GPU memory


    base_depth = model_depth

    for one_model in top_X_models:
        if one_model['uid'] in baaaaad_models:
            continue

        if base_depth > 0 and not download_depth_models:
            unzipped_file_path = model_download_path + os.sep + one_model["uid"]
            if not os.path.exists(unzipped_file_path):
                continue
    
        modelUID = one_model["uid"]
        best_model = one_model
        model_depth -= 1
        if model_depth < 0:
            break
    
    if not "loaded" in best_model:
        download_one_model(best_model, base_depth)
    
    modelsDownloaded[f"objectName_{base_depth}"] = modelUID
    

    unzipped_file_path = model_download_path + os.sep + modelUID
    if not os.path.exists(unzipped_file_path):
        for one_model in top_X_models:
            if not os.path.exists(unzipped_file_path):
                continue
    
            modelUID = one_model["uid"]
            break

    return modelUID


def downloadModel(objectName, model_depth):
    global existing_models_searched
    
    room_plus_object = objectName
    depth_plus_obj = f"{objectName}_{model_depth}"

    if depth_plus_obj in existing_models_searched:
        return existing_models_searched[depth_plus_obj]

    if False:
        if f"objectName_{model_depth}" in modelsDownloaded:
            print("return modelsDownloaded[objectName]" + "_" + str(model_depth), objectName)
            download_one_model({ "uid": modelsDownloaded[objectName], 'name': objectName})
            return modelsDownloaded[objectName]

    if objectName in existing_searches_done or freeze_model_database_models:
        the_result = load_model_from_vec(room_plus_object, model_depth)
        existing_models_searched[depth_plus_obj] = the_result
        return the_result
    existing_searches_done.append(objectName)

    getModelList(objectName)
    modelTokens = objectName.split(' ')
    if False:
        if len(modelTokens) > 5:
            subDesc = askMultiQuestion([f"If I was searching in a model database for \"{objectName}\" and could only search using five words, which words should I use? Avoid using plural words.", "Say the words.  Just say five words.  Don't say anything else.  Just five.  That's it."])[0].strip().replace("\"", "").replace("\'","")
            if subDesc not in existing_searches_done:
                existing_searches_done.append(subDesc)
                getModelList(subDesc)

        if len(modelTokens) > 4:
            subDesc = askMultiQuestion([f"If I was searching in a model database for \"{objectName}\" and could only search using four words, which words should I use? Avoid using plural words.", "Say the words.  Just say four words.  Don't say anything else.  Just four.  That's it."])[0].strip().replace("\"", "").replace("\'","")
            if subDesc not in existing_searches_done:
                existing_searches_done.append(subDesc)
                getModelList(subDesc)

        if len(modelTokens) > 3:
            subDesc = askMultiQuestion([f"If I was searching in a model database for \"{objectName}\" and could only search using three words, which words should I use? Avoid using plural words.", "Say the words.  Just say three words.  Don't say anything else.  Just three.  That's it."])[0].strip().replace("\"", "").replace("\'","")
            if subDesc not in existing_searches_done:
                existing_searches_done.append(subDesc)
                getModelList(subDesc)

        if len(modelTokens) > 2:
            subDesc = askMultiQuestion([f"If I was searching in a model database for \"{objectName}\" and could only search using two words, which words should I use? Avoid using plural words.", "Say the words.  Just say two words.  Don't say anything else.  Just two.  That's it."])[0].strip().replace("\"", "").replace("\'","")
            if subDesc not in existing_searches_done:
                existing_searches_done.append(subDesc)
                getModelList(subDesc)
    
    for subDesc in modelTokens:
        if subDesc not in existing_searches_done:
            existing_searches_done.append(subDesc)
            getModelList(subDesc)
    
    the_result = load_model_from_vec(room_plus_object, model_depth)
    existing_models_searched[depth_plus_obj] = the_result
    return the_result

In [None]:
import os
import torch
from PIL import Image
import open_clip
from torch.cuda.amp import autocast
import torch.backends.cudnn as cudnn
cudnn.benchmark = True
torch.backends.cudnn.deterministic = False 

import os
import torch
from PIL import Image
import open_clip

In [None]:
import os
import time
import open_clip
import win32api, win32process, win32con
import threading

threading_lock = threading.Lock()

def find_similar_images_with_negatives_threaded(
    pos_prompts, 
    neg_prompts, 
    image_folder,   
    model,     
    preprocess, 
    pos_weights, 
    neg_weights, 
    device,
    batch_size,
    group_similarities,
    images_already_seen,
    timeout_after_batch
):
    global baseline_image_features
    global total_num_variations_per_steps
    if pos_prompts:
        pos_tokens = open_clip.tokenize(pos_prompts).to(device)
      
        with torch.no_grad(), torch.amp.autocast(device_type=device, dtype=torch.float16):
            pos_embeddings = model.encode_text(pos_tokens)
            pos_embeddings = pos_embeddings / pos_embeddings.norm(dim=-1, keepdim=True)
        # Multiply each embedding by its corresponding weight.
        weighted_pos = torch.stack([w * emb for w, emb in zip(pos_weights, pos_embeddings)], dim=0)
        combined_pos = weighted_pos.sum(dim=0)
    else:
        combined_pos = 0

    print("pos_prompts:", pos_prompts)

    # --- Compute negative text embeddings if any negative prompts exist ---
    if neg_prompts:
        neg_tokens = open_clip.tokenize(neg_prompts).to(device)
        with torch.no_grad(), torch.amp.autocast(device_type=device, dtype=torch.float16):
            neg_embeddings = model.encode_text(neg_tokens)
            neg_embeddings = neg_embeddings / neg_embeddings.norm(dim=-1, keepdim=True)
        weighted_neg = torch.stack([w * emb for w, emb in zip(neg_weights, neg_embeddings)], dim=0)
        combined_neg = weighted_neg.sum(dim=0)
    else:
        combined_neg = 0

    print("neg_prompts:", neg_prompts)

    # --- Combine positive and negative embeddings ---
    combined_text = combined_pos - combined_neg
    # combined_text = combined_text * 0.95 + baseline_image_features.to(device) * 0.05
    combined_text = combined_text / combined_text.norm()

    num_sleeps = 0
    prev_num_files = 0

    def process_batch(image_batch, group_ids_batch):
        global device_free
        device_free[device] = False
        print("process_batch", len(image_batch), device)
      
        if not image_batch:
            return

        # Stack all images in the batch into one tensor
        image_input = torch.stack(image_batch, dim=0).to(device)

        # Compute image features for the entire batch
        with torch.no_grad(), torch.amp.autocast(device_type=device, dtype=torch.float16):
            image_features = model.encode_image(image_input)
            image_features = image_features / image_features.norm(dim=-1, keepdim=True)

        # Compute similarity for each image in the batch
        similarities = (combined_text.unsqueeze(0) @ image_features.T).squeeze(0)
        # similarities will have shape [batch_size]

        for gid, sim in zip(group_ids_batch, similarities):
            if gid not in group_similarities:
                group_similarities[gid] = []
            group_similarities[gid].append(sim.item())

        print("process_batch_finish", len(image_batch), device)
        time.sleep(timeout_after_batch)
        device_free[device] = True

    prev_num_sims = 0

    num_images = 0
    image_batch = []
    group_ids_batch = []

    while True:
        try:
            #print("prev_num_sims", prev_num_sims, "len(group_similarities)", len(group_similarities), "num_sleeps", num_sleeps)
            if len(image_batch) == 0:
                if len(group_similarities) >= total_num_variations_per_steps:
                    break
                elif prev_num_sims == len(group_similarities):
                    num_files = len(os.listdir(image_folder))
                    if prev_num_files > 0:
                        print("num_sleeps", num_sleeps, "prev_num_files", prev_num_files, "num_files", num_files)
                    if num_files == prev_num_files and prev_num_files > 0:
                        num_sleeps += 1
                        if num_sleeps >= max_sleeps:
                            break
                    else:
                        num_sleeps = 0
                    prev_num_files = num_files

                # If we reached the batch size, process and reset
            if len(image_batch) < batch_size:
                for filename in os.listdir(image_folder):
                    
                    filepath = os.path.join(image_folder, filename)
                    # Skip non-image files.
                    if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                        continue
                    
                    # Extract group ID from filename: e.g., "xxxx_123.png" -> group_id = "xxxx"
                    base_name, _ = os.path.splitext(filename)  
                    if "_" not in base_name:
                        # If there's no underscore, skip or treat differently
                        continue
                    
                    with threading_lock:
                        if filename in images_already_seen:
                            continue

                        images_already_seen.add(filename)

                    group_id = base_name.split("_")[0]

                    # Load and preprocess the image.
                    image = Image.open(filepath).convert("RGB")
                    image_tensor = preprocess(image)
                    
                    # Add to our batch
                    image_batch.append(image_tensor)
                    group_ids_batch.append(group_id)
                    print("num_images", num_images, "images_already_seen", len(images_already_seen), "group_similarities", len(group_similarities))
                    num_images += 1

                    # If we reached the batch size, process and reset
                    if len(image_batch) >= batch_size:
                        break

            if len(image_batch) > 0 and device_free[device]:
                process_batch(image_batch, group_ids_batch)
                num_images = 0
                image_batch = []
                group_ids_batch = []

            prev_num_sims = len(group_similarities)
        except Exception as e:
            print("Exception", e)
        time.sleep(0.05)


In [None]:

def find_similar_images_with_negatives(
    pos_prompts, 
    neg_prompts, 
    image_folder, 
    model, 
    preprocess, 
    pos_weights=None, 
    neg_weights=None, 
    device="gpu",
    top_k=None,
    batch_size= 250  # new argument to control batch size
):  
    # Default weights: 1.0 for all prompts if not provided.
    if pos_weights is None:
        pos_weights = [1.0] * len(pos_prompts)
    if neg_weights is None:
        neg_weights = [1.0] * len(neg_prompts)
    
    # --- Compute positive text embeddings if any positive prompts exist ---
    if pos_prompts:
        pos_tokens = open_clip.tokenize(pos_prompts).to(device)
      
        with torch.no_grad(), torch.amp.autocast(device_type=device, dtype=torch.float16):
            pos_embeddings = model.encode_text(pos_tokens)
            pos_embeddings = pos_embeddings / pos_embeddings.norm(dim=-1, keepdim=True)
        # Multiply each embedding by its corresponding weight.
        weighted_pos = torch.stack([w * emb for w, emb in zip(pos_weights, pos_embeddings)], dim=0)
        combined_pos = weighted_pos.sum(dim=0)
    else:
        combined_pos = 0

    print("pos_prompts:", pos_prompts)

    # --- Compute negative text embeddings if any negative prompts exist ---
    if neg_prompts:
        neg_tokens = open_clip.tokenize(neg_prompts).to(device)
        with torch.no_grad(), torch.amp.autocast(device_type=device, dtype=torch.float16):
            neg_embeddings = model.encode_text(neg_tokens)
            neg_embeddings = neg_embeddings / neg_embeddings.norm(dim=-1, keepdim=True)
        weighted_neg = torch.stack([w * emb for w, emb in zip(neg_weights, neg_embeddings)], dim=0)
        combined_neg = weighted_neg.sum(dim=0)
    else:
        combined_neg = 0

    print("neg_prompts:", neg_prompts)

    # --- Combine positive and negative embeddings ---
    combined_text = combined_pos - combined_neg
    combined_text = combined_text / combined_text.norm()

    # Dictionary to store {group_id: [list_of_similarities]}
    group_similarities = {}

    # --- Prepare to process images in batches ---
    image_batch = []
    group_ids_batch = []
    num_images = 0

    def process_batch(image_batch, group_ids_batch):
        print("process_batch")
        """
        Encodes images in the batch, computes similarity, 
        and updates group_similarities.
        """
        if not image_batch:
            return

        # Stack all images in the batch into one tensor
        image_input = torch.stack(image_batch, dim=0).to(device)

        # Compute image features for the entire batch
        with torch.no_grad(), torch.amp.autocast(device_type=device, dtype=torch.float16):
            image_features = model.encode_image(image_input)
            image_features = image_features / image_features.norm(dim=-1, keepdim=True)

        # Compute similarity for each image in the batch
        similarities = (combined_text.unsqueeze(0) @ image_features.T).squeeze(0)
        # similarities will have shape [batch_size]

        for gid, sim in zip(group_ids_batch, similarities):
            if gid not in group_similarities:
                group_similarities[gid] = []
            group_similarities[gid].append(sim.item())

    # --- Iterate over all images in the specified folder ---
    num_images = 0
    for filename in os.listdir(image_folder):
        filepath = os.path.join(image_folder, filename)
        # Skip non-image files.
        if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            continue
        
        # Extract group ID from filename: e.g., "xxxx_123.png" -> group_id = "xxxx"
        base_name, _ = os.path.splitext(filename)  
        if "_" not in base_name:
            # If there's no underscore, skip or treat differently
            continue
        group_id = base_name.split("_")[0]

        # Load and preprocess the image.
        image = Image.open(filepath).convert("RGB")
        image_tensor = preprocess(image)
        
        # Add to our batch
        image_batch.append(image_tensor)
        group_ids_batch.append(group_id)
        print("num_images", num_images)
        num_images += 1

        # If we reached the batch size, process and reset
        if len(image_batch) == batch_size:
            process_batch(image_batch, group_ids_batch)
            image_batch = []
            group_ids_batch = []

    # Process any leftover images in the last batch
    if len(image_batch) > 0:
        process_batch(image_batch, group_ids_batch)

    # --- Compute average similarity for each group ---
    group_avg_similarities = []
    for gid, sims in group_similarities.items():
        avg_sim = sum(sims) / len(sims)
        group_avg_similarities.append((gid, avg_sim))

    # --- Sort groups by average similarity (descending) ---
    group_avg_similarities.sort(key=lambda x: x[1], reverse=True)

    # --- If top_k is specified, truncate to top_k groups ---
    if top_k is not None:
        group_avg_similarities = group_avg_similarities[:top_k]

    print(f"Processed {num_images} images in total.")
    return group_avg_similarities


In [None]:
import re
import random

def modify_function_calls(input_string, random_iter):
    # Define which function names we want to modify
    # Add more function names here if needed (e.g. 'clone', 'size', 'rotate')
    keywords = ['.rotate']
    
    # Create a single pattern that matches any of the keywords followed by (...)
    pattern = rf'({"|".join(keywords)})\(\s*([^)]*)\)'
    
    def randomize_numbers(match):
        
        """
        This function is called for every match (e.g., position(2, 0, 2)).
        It will parse out the numbers, add a random offset, and return
        the modified string.
        """
        func_name = match.group(1)   # e.g. 'position'

        args_str  = match.group(2)   # e.g. '2, 0, 2'
        
        # Split arguments by comma, strip spaces
        args = [arg.strip() for arg in args_str.split(',')]
        
        arg_index = 0

        # For each argument, convert to float, add random offset
        new_args = []
        for arg in args:
            try:
                val = float(arg)
                if func_name == '.rotate':
                    val = random_iter * 90
                new_args.append(str(int(val)))
            except ValueError:
                # If it's not a float, just keep it as-is
                new_args.append(arg)
            arg_index += 1
        
        # Reconstruct the function call with modified numbers
        new_args_str = ', '.join(new_args)
        return f"{func_name}({new_args_str})"
    
    # Perform the substitution on the entire string
    modified_string = re.sub(pattern, randomize_numbers, input_string)
    if ".rotate" not in modified_string:
        modified_string = modified_string.replace(")\n", f').rotate({str(int(random_iter) * 90)})\n')

    return modified_string

In [None]:

directory_path = "./node_webgl_render_agentic/screenshots"

def clear_screenshot_dir():
    if not do_clear_screenshot_dir:
        return
    
    if do_vision_test_mode:
        return
    
    run_without_error = False
    while not run_without_error:
        try:
            for file_name in os.listdir(directory_path):
                file_path = os.path.join(directory_path, file_name)
                if os.path.isfile(file_path):  # Ensure it's a file
                    os.remove(file_path)
            run_without_error = True
        except Exception as e:
            print("Exception", e)


with open('./node_webgl_render_agentic/world_text.json', 'w') as file:
    the_output_json = { 'options': [""], 'prompt': f"none" }
    json.dump(the_output_json, file)
clear_screenshot_dir()

In [None]:
def gen_baseline(device, image_folder, model, preprocess, base_javascript = """room("Baseline").topLeft(0, 0).bottomRight(15, 15);"""):
    clear_screenshot_dir()
    with open('./node_webgl_render_agentic/world_text.json', 'w') as file:
        the_output_json = { 'options': [base_javascript], 'prompt': f"baseline" }
        json.dump(the_output_json, file)
    while True:
        image_files = os.listdir(image_folder)
        if len(image_files) < num_images_per_variation:
            time.sleep(0.1)
            continue
        image_batch = []
        for filename in image_files:
            filepath = os.path.join(image_folder, filename)
            if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                continue
            
            # Extract group ID from filename: e.g., "xxxx_123.png" -> group_id = "xxxx"
            base_name, _ = os.path.splitext(filename)  
            if "_" not in base_name:
                # If there's no underscore, skip or treat differently
                continue

            image = Image.open(filepath).convert("RGB")
            image_tensor = preprocess(image)
            image_batch.append(image_tensor)
        
        # Stack all images in the batch into one tensor
        image_input = torch.stack(image_batch, dim=0).to(device)

        # Compute image features for the entire batch
        with torch.no_grad(), torch.amp.autocast(device_type=device, dtype=torch.float16):
            image_features = model.encode_image(image_input)
            image_features = image_features / image_features.norm(dim=-1, keepdim=True)
        
        avg_image_features = image_features.mean(dim=0)
        avg_image_features = avg_image_features / avg_image_features.norm(dim=-1, keepdim=True)

        return avg_image_features

d = "cuda:1"
if False:
    baseline_image_features = gen_baseline(d, "./node_webgl_render_agentic/screenshots", clip_model[d], clip_preprocess[d])
    print("baseline_image_features", len(baseline_image_features))

In [None]:

data_file_name = "./model_outputs_llama8b_vision.json"
data_file_name_backup = "./model_outputs_llama8b_vision_backup.json"
the_data = { 'data': [], 'sims': [] }
try:
    with open(data_file_name, 'r') as file:
        the_data = json.load(file)
        print("the_data", len(the_data['data']))
        
        with open(data_file_name_backup, 'w') as file:
            json.dump(the_data, file)

except Exception as e:
    print(e)
    
    with open(data_file_name_backup, 'r') as file:
        the_data = json.load(file)
        print("the_data", len(the_data['data']))


In [None]:
import json
import math
import uuid
from IPython.display import clear_output
import threading

def download_in_background(
    base_prompt,
    the_javascript,
    golden_model_uids,
    data_set_iters,
    the_output_json,
    extra_string,
    world_uuid
):
    the_output, formatted_prompt = gen_vr_content(the_javascript, base_prompt, extra_string)
    
    # Copy the golden UIDs to avoid mutating the original directly (if that's intended).
    model_UIDS = golden_model_uids.copy()

    for model_depth in range(model_iters):
        for random_iter in range(num_randomized + 1):
            this_output_iter = the_output
            if random_iter > 0:
                this_output_iter = modify_function_calls(this_output_iter, random_iter)

            the_data_set_iter = {"world_title": base_prompt, "javascript": the_javascript, "prompt": formatted_prompt, "response": this_output_iter, 'rank': -1, "model_depth": model_depth, 'world_uid': world_uuid}
            
            next_prompt = remove_js_comments(extract_js_blocks(this_output_iter)).replace('.','.').replace('\n\n','\n').replace('\n\n','\n').replace('\n\n','\n').replace('\n\n','\n')
            next_prompt = '\n'.join([line.strip() for line in next_prompt.split('\n')]).replace('\n.', '.')
            assert(len(next_prompt.split('\n')) > 0)
            next_prompt = next_prompt.split('\n')[0].strip()

            world_text = the_javascript + "\n" + next_prompt

            list_of_models = re.findall(r'place3dmodel\("(.*?)"', world_text)

            all_used_models = []

            model_UIDS = golden_model_uids.copy()
            for modelToDownload in list_of_models:
                if not modelToDownload in model_UIDS:
                    try:
                        # full_model_title = f"{modelToDownload} in a {base_prompt}"
                        full_model_title = f"{modelToDownload}"
                        print("modelToDownload", full_model_title, model_depth)
                        model_uid = downloadModel(full_model_title, model_depth)
                        all_used_models.append(model_uid)
                        model_UIDS[modelToDownload] = model_uid
                    except Exception as e:
                        print("Exception", e)
                        
            for key in model_UIDS:
                world_text = f"""load3dModel("{key}", "{model_UIDS[key]}")\n""" + world_text
                
            the_data_set_iter['model_UIDS'] = model_UIDS
            data_set_iters.append(the_data_set_iter)
            
            the_output_json['options'].append(world_text)
            print(world_text)
            
            with open('./node_webgl_render_agentic/world_text.json', 'w') as file:
                json.dump(the_output_json, file)
        time.sleep(0.01)
    time.sleep(0.05)

use_two_pass = False

def generate_vr_world(base_prompt):

    world_uuid = str(uuid.uuid4())

    global the_data
    random_number = random.randint(0, 10000)
    image_folder_name = f"./vr_screenshots/{base_prompt}_{random_number}"
    os.mkdir(image_folder_name)

    golden_model_uids = { }
    the_javascript = f'room("{base_prompt}").topLeft(0, 0).bottomRight(15, 15);'
    
    pos_prompts = [f"A gray room with correctly sized objects based on \"{base_prompt}\"", 
                   "a large gray room with well organized and correctly sized furnature", 
                   f"rich detailed 3D environment with well aligned & organized objects and large scale architecture features titled {base_prompt} (with no overlapping, disconnected, or hovering objects and no doors, windows, or pictures hoving in the middle)  Objects aren't overcrowded.", 'attractive & well organized room with doors, pictures, and windows flush (attached to) with the walls', 
                   'symmetrical room design with appealing lines and geometry (well organized and thoughtful)', 
                   'cohesive furnature layout (master interior design)', 
                   'gorgeous room design (as if done by a master artist)', 
                   'aesthetically pleasing architecture (as if designed by a master architecht)', 
                   'attractive room design (with wonderful colors, amazing design choices, and cool objects)']
    neg_prompts = ["ugly room design with clashing colors and poor lighting and overcrowded objects", "cluttered room design with intersecting, hovering, disconnected, or overlapping objects", "out of proportion objects (objects with bizarre sizes)", 'windows, doors, or pictures floating in the center of the room', 'objects too large or too crowded (objects crowding eachother)', 'bad design with poor object placements, sizes, and choices with crowded objects', 'awkward and overcrowded room design', 'overcrowded and unattractive room design', 'ugly object placement in a room', 'a room design that makes no sense']
    pos_weights=[0.2, 0.2, 4.5, 0.2, 0.2, 0.2, 0.2, 0.2, 0.5]
    neg_weights=[0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1, 0.5, 0.1]

    directory_path = "./node_webgl_render_agentic/screenshots"

    for i in range(num_iters):
        image_folder_name_withiter = f"{image_folder_name}/{i}"
        os.mkdir(image_folder_name_withiter)

        clear_output(wait=True)
        data_set_iters = []
        the_output_json = { 'options': [], 'prompt': f"{base_prompt}_{i}" }

        group_similarities = {}
        images_already_seen = set()

        image_proc_threads = []
        image_sims = []
        d = "cuda:1"
        for _ in range(num_image_proc_threads):
            image_proc_thread = threading.Thread(
                target=find_similar_images_with_negatives_threaded,
                args=(
                    pos_prompts, 
                    neg_prompts, 
                    directory_path,   
                    clip_model[d],     
                    clip_preprocess[d], 
                    pos_weights, 
                    neg_weights, 
                    d,
                    250,
                    group_similarities,
                    images_already_seen,
                    quadro_delay
                ),
                daemon=True  # or False, depending on whether you want
                            # to allow the main program to exit if this thread is still running
            )
            image_proc_thread.start()
            image_proc_threads.append(image_proc_thread)
            
        d = "cuda:0"
        for _ in range(num_image_proc_threads * 5):
            image_proc_thread = threading.Thread(
                target=find_similar_images_with_negatives_threaded,
                args=(
                    pos_prompts, 
                    neg_prompts, 
                    directory_path,   
                    clip_model[d],     
                    clip_preprocess[d], 
                    pos_weights, 
                    neg_weights, 
                    d,
                    batchsize_3090,
                    group_similarities,
                    images_already_seen,
                    delay_3090
                ),
                daemon=True  # or False, depending on whether you want
                            # to allow the main program to exit if this thread is still running
            )
            image_proc_thread.start()
            image_proc_threads.append(image_proc_thread)


        variation_threads = []
        for v in range(num_variations):
            # Create a thread that executes `download_in_background`

            extra_string = ""
            if v == 0:
                extra_string = "\nfocus on placing terrain features: windows, doors, ceiling beams, pools, gardens, balconies, stairs, & stairsteps etc..."
            elif v == 0:
                extra_string = "\nfocus objects on top of other objects.  Books on shelves.  Plates on tables.  Papers on chairs etc..."

            if i > 7:
                if i % 4 > 0:
                    extra_string = "\nfocus on placing new objects on the ceiling (focus on light objects and large ceiling features like beams or skylights)."
                else:
                    extra_string = "\nfocus on placing new objects on the walls (like lights, windows, doors, or archways)."

            background_thread = threading.Thread(
                target=download_in_background,
                args=(
                    base_prompt,
                    the_javascript,
                    golden_model_uids,
                    data_set_iters,
                    the_output_json,
                    extra_string,
                    world_uuid
                ),
                daemon=True  # or False, depending on whether you want
                            # to allow the main program to exit if this thread is still running
            )

            # Start the thread
            background_thread.start()       
            variation_threads.append(background_thread)
            time.sleep(min(12, v * 2))


        print("image_proc_thread.join()", len(image_sims))
        
        for b in variation_threads:
            b.join()

        # unload_model(TABBY_BASE_URLS[0])
        
        for image_proc_thread in image_proc_threads:
            image_proc_thread.join()

        # --- Compute average similarity for each group ---
        group_avg_similarities = []
        for gid, sims in group_similarities.items():
            avg_sim = sum(sims) / len(sims)
            group_avg_similarities.append((gid, avg_sim))
            
        # unload_clip_model("cuda:0")

        # --- Sort groups by average similarity (descending) ---
        group_avg_similarities.sort(key=lambda x: x[1], reverse=True)
        best_image_cat = group_avg_similarities[0][0]
        best_image_sim = group_avg_similarities[0][1]
        the_data['sims'].append({ 'best_image_sim': best_image_sim, 'iteration': i, 'uid': world_uuid, 'base_prompt': base_prompt })
        print("best_image_sim", best_image_sim)

        if do_open_best_images:
            try:
                for i in range(4):
                    image = Image.open(f"{directory_path}/{best_image_cat}_color_{i}.png")
                    image.show()
            except Exception as e:
                print("Exception", e)
        print("That's all folks!", len(group_avg_similarities), "best:", best_image_cat)

        import shutil
        for i in range(32):
            try:
                shutil.copy(f"{directory_path}/{best_image_cat}_color_{i}.png", image_folder_name_withiter + f"/{best_image_cat}_color_{i}.jpg")  
            except Exception as e:
                print("Exception", e)

        clear_screenshot_dir()
        image_sims = group_avg_similarities
        print("image_sims", image_sims)
        the_rank = 0
        for item in image_sims:
            name = int(item[0])
            print("name", name)
            
            if the_rank == 0:
                print("name", name)
                next_prompt = remove_js_comments(extract_js_blocks(data_set_iters[name - 1]['response'])).replace('.','.').replace('\n\n','\n').replace('\n\n','\n').replace('\n\n','\n').replace('\n\n','\n')
                next_prompt = '\n'.join([line.strip() for line in next_prompt.split('\n')]).replace('\n.', '.')

                golden_model_uids = data_set_iters[name - 1]['model_UIDS']
                the_lines = next_prompt.split('\n')
                if len(the_lines) > 0:
                    next_prompt = the_lines[0]
                    print(next_prompt)
                    the_javascript += "\n" + next_prompt.strip()

            data_set_iters[name - 1]['rank'] = the_rank
            print(data_set_iters[name - 1], the_rank)
            the_rank += 1
        
        for item in data_set_iters:
            the_data['data'].append(item)
        print("len(the_data)", len(the_data['data']))

        with open(data_file_name, 'w') as file:
            json.dump(obj=the_data,fp=file)
        
    for key in golden_model_uids:
        the_javascript = f"""load3dModel("{key}", "{golden_model_uids[key]}")\n""" + the_javascript
        
    return the_javascript


In [None]:
def read_vr_prompts(file_path = "./indoor_vr_prompts.txt"):
    vr_prompts = []
    with open(file_path, 'r') as file:
        for line in file:
            vr_prompts.append(line.strip())
        
    return vr_prompts

vr_prompts = read_vr_prompts()
print("vr_prompts", len(vr_prompts))

import random
random.shuffle(vr_prompts)

clear_screenshot_dir()

num_world_tries = 1
do_it = not do_vision_test_mode
if do_it:
    for prompt in vr_prompts:
        for _ in range(num_world_tries):
            the_final_world = generate_vr_world(prompt)
            print("Final World JS", the_final_world)

In [None]:
if do_vision_test_mode:
    directory_path = "./node_webgl_render_agentic/test_screenshots"

    total_num_variations_per_steps = len(os.listdir(directory_path))
    num_image_proc_threads = 3
    image_proc_threads = []
    base_prompt = "Lighthouse Keep's study"
   
    pos_prompts = [f"A gray room with correctly sized objects based on \"{base_prompt}\"", "a large gray room with well organized and correctly sized furnature", base_prompt, 'attractive & well organized room', 'symmetrical room design', 'cohesive furnature layout', 'good room design', 'aesthetically pleasing architecture', 'attractive room design']
    neg_prompts = ["ugly room design", "cluttered room design", "out of proportion objects", 'small room within a room', 'objects too large', 'bad design', 'awkward room design', 'unattractive room design', 'ugly object placement in a room', 'glitched, inverted, fragmented, compressed, or floating structures', 'Lighting Inconsistencies']
    pos_weights=[0.4, 0.4, 0.8, 0.2, 0.2, 0.2, 0.2, 0.2, 0.5]
    neg_weights=[0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.1, 0.1, 0.5, 0.4, 0.4]

    images_already_seen = set()
    group_similarities = {}

    d = "cuda:1"
    for _ in range(num_image_proc_threads):
        image_proc_thread = threading.Thread(
            target=find_similar_images_with_negatives_threaded,
            args=(
                pos_prompts, 
                neg_prompts, 
                directory_path,   
                clip_model[d],     
                clip_preprocess[d], 
                pos_weights, 
                neg_weights, 
                d,
                250,
                group_similarities,
                images_already_seen,
                8.0
            ),
            daemon=True  # or False, depending on whether you want
                        # to allow the main program to exit if this thread is still running
        )
        image_proc_thread.start()
        image_proc_threads.append(image_proc_thread)
        
    d = "cuda:0"
    for _ in range(num_image_proc_threads * 5):
        image_proc_thread = threading.Thread(
            target=find_similar_images_with_negatives_threaded,
            args=(
                pos_prompts, 
                neg_prompts, 
                directory_path,   
                clip_model[d],     
                clip_preprocess[d], 
                pos_weights, 
                neg_weights, 
                d,
                50,
                group_similarities,
                images_already_seen,
                0
            ),
            daemon=True  # or False, depending on whether you want
                        # to allow the main program to exit if this thread is still running
        )
        image_proc_thread.start()
        image_proc_threads.append(image_proc_thread)

    time.sleep(60)

    group_avg_similarities = []
    for gid, sims in group_similarities.items():
        avg_sim = sum(sims) / len(sims)
        group_avg_similarities.append((gid, avg_sim))
    
    # --- Sort groups by average similarity (descending) ---
    group_avg_similarities.sort(key=lambda x: x[1], reverse=True)

    print(group_avg_similarities)
    

In [None]:
with open('./model_outputs_llama8b_vision.json', 'r') as file:
    input_output_pairs = json.load(file)
    print(len(input_output_pairs))

In [None]:
unique_pairs = []
seen = set()

for pair in input_output_pairs:
    try:
        # Convert dict into something that CAN go into a set, e.g. JSON string
        # (which assumes everything in the dict is JSON-serializable).
        rep = json.dumps((pair['prompt'], pair['response']), sort_keys=True)
        
        if rep not in seen:
            seen.add(rep)
            unique_pairs.append(pair)
    except Exception as e:
        print(e)

print(len(input_output_pairs), "items before dedupe")
print(len(unique_pairs), "items after dedupe")

In [None]:

def format_response(in_response):
    new_response = ""
    has_first_model =  False
    for line in in_response.split('\n'):
        if "place3dmodel" in line:
            if has_first_model:
                break
            has_first_model = True
        if "```" in line and has_first_model:
            break

        new_response += line +  "\n"

    return new_response + "```"


actually_save = True
import json
input_output_pairs = unique_pairs

import itertools

longest_prompt = ""
longest_ans = ""
avg_prompt = 0
avg_ans = 0
num_counts = 0
top_slice_data = []

def generate_ab_pairs(items):
    global longest_prompt
    global longest_ans
    global avg_ans
    global avg_prompt
    global num_counts

    k = int(max(1, len(items) / 5))
    items_sorted = sorted(items, key=lambda x: x['rank'])  # Sort by rank (ascending)
    bottom_k_items = items_sorted[:k]                      # Get bottom k items
    for item in bottom_k_items:
        item['response'] = format_response(item['response'])
        top_slice_data.append(item)

        if len(item['prompt']) > len(longest_prompt):
            longest_prompt = item['prompt']
        if len(item['response']) > len(longest_prompt):
            longest_ans = item['response']
        avg_prompt += len(item['prompt'])
        avg_ans += len(item['response'])
        num_counts += 1

    pairs = []
    # Create all unique pairs (order doesn't matter initially)
    if False:
        for item1, item2 in itertools.combinations(items, 2):
            
            if item1['rank'] != item2['rank'] and item1['prompt'] == item2['prompt'] and item1['response'] != item2['response']:
                if item1['rank'] < item2['rank']:
                    pair = {'chosen': item1['response'], 'rejected': item2['response'], 'prompt': item1['prompt']}
                else:
                    pair = {'chosen': item2['response'], 'rejected': item1['response'], 'prompt': item1['prompt']}
                pairs.append(pair)

    return pairs

by_js = { }
for item in input_output_pairs:
    js = item['javascript']
    if js not in by_js:
        by_js[js] = []
    by_js[js].append(item)
    

the_pairs = []

for key in by_js:
    try:
        the_pairs += generate_ab_pairs(by_js[key])
    except Exception as e:
        print(e)

print("len(the_pairs)", len(the_pairs))

import json

if actually_save:
    with open('llama8b_vision_ab_pairs.json', 'w') as file:
        json.dump(the_pairs, file)
    with open('llama8b_vision_top_slice.json', 'w') as file:
        json.dump(top_slice_data, file)
print("actually_save", actually_save)

print("top_slice_data", len(top_slice_data))
print("len(longest_prompt)", len(longest_prompt))
print("len(longest_ans)", len(longest_ans))
print("avg_prompt", avg_prompt / num_counts)
print("avg_ans", avg_ans / num_counts)

In [None]:
print(longest_ans)

In [None]:
with open("saved_model_data.json", "w") as f:
    saved_model_data = { "existing_searches_done": existing_searches_done, "list_of_all_models": list_of_all_models, "index_to_model_uid": index_to_model_uid }
    json.dump(saved_model_data, f)
torch.save(existing_image_features.cpu().detach(), existing_image_features_filename)
torch.save(existing_text_features.cpu().detach(), existing_text_features_filename)
print("list_of_all_models", len(list_of_all_models))
print("the_data", len(the_data['sims']))