In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
from multObjGenContext import *
from multObjGenFunctions import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name_or_path = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="cuda", trust_remote_code=False, revision="main")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

CUDA extension not installed.
CUDA extension not installed.
The cos_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class
The sin_cached attribute will be removed in 4.39. Bear in mind that its contents changed in v4.38. Use the forward method of RoPE from now on instead. It is not used in the `LlamaAttention` class


In [3]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=50,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

In [4]:
object_list_assistant = ObjectListAssistant(pipe, objlist_context)
relational_mapping_assistant = RelationalMappingAssistant(pipe, relational_context)
grid_placement_assistant = GridPlacementAssistant(pipe, grid_context)

In [5]:
def multObjGen(prompt):
    # 1 - prompt -> objectlist (str)
    object_list_raw = object_list_assistant.process_request(prompt)
    print(object_list_raw)

    # 2 - objectlist (str) -> setName (str), objectList (list)
    set_name, object_list = parse_complete_prompt(object_list_raw)

    # 3 - objectlist (list) -> relationsList (str)
    relations = relational_mapping_assistant.process_request(object_list)
    print("\n"+relations)

    # 4 - relationsList (str) -> objectCoordinateList (str)
    coordinates_raw = grid_placement_assistant.process_request(relations)
    print("\n"+coordinates_raw)

    # 5 objectCoordinateList (str) -> coordinateDict (dict)
    coordinates = parse_coordinates(coordinates_raw)
    print("\n"+coordinates)

    # Visual
    grid = place_objects_on_grid(coordinates, grid_size=20)
    print_grid

    return coordinates

-----------------
TESTING


Full Pipeline

Often crahes at #3

In [6]:
test_cases_MOG = [
    "Complete a pc setup"
    "Complete a kitchen setup",
    "Complete batman's utility belt",
    "Complete american car set",
    "Complete a medieval weapons set",
    "Complete a bedroom setup",
    "Complete a fruit bowl",
    "Complete a pirate ship deck",
    "Complete a modern living room setup",
    "Complete a medieval marketplace",
    "Complete a science lab workspace"
]

for prompt in test_cases_MOG:
    print(multObjGen(prompt))

Assistant: PC setup, Objects: Desktop, Monitor, Mouse, Keyboard, Office Chair
Error parsing


AttributeError: 'NoneType' object has no attribute 'group'

In [7]:
test_cases_object_list = [
    "Complete a kitchen setup",
    "Complete batman's utility belt",
    "Complete american car set",
    "Complete a medieval weapons set",
    "Complete a bedroom setup",
    "Complete a fruit bowl",
    "Complete a pirate ship deck",
    "Complete a modern living room setup",
    "Complete a medieval marketplace",
    "Complete a science lab workspace"
]

for prompt in test_cases_object_list:
    print(object_list_assistant.process_request(prompt))
    print("\n" + "-"*20)

Assistant: Kitchen setup, Objects: sink, disposal unit, oven, microwave, refrigerator, countertop, stools, bar stool, dishwasher, garbage disposal, blender

--------------------
Response: batman's utility belt, Objects: belt buckle, zipper, whistle, shoehorn, hairbrush, brass knuckles, flashlight, carabiner, towel

--------------------
Complete: american car set, Objects: Ford F-150 pickup truck, GMC Sierra 1500 pickup truck, Dodge Charger muscle car, Chevrolet Corvette cou

--------------------
Complete: medieval weapons set, Objects: sword, shield, mace, helmet, chainmail, greaves, spear, dagger

--------------------
Complete: bedroom setup, Objects: bed, nightstand, lamp, dresser, wardrobe, chair

--------------------
Complete: fruit bowl, Objects: banana, apple, orange, grapefruit, peach, plum, pear, kiwi

--------------------
Assistant: Pirate ship deck, Objects: mast, rigging, sail, anchor, pennant, ropes, lantern, torch, coins (coin purse)

--------------------
Complete: modern 

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Complete: medieval marketplace, Objects: leather goods, silverware, pottery, linen, cloth, candlestick, basket, bowl, platter, coins, jewelry, armor, shield,

--------------------
Assistant: Science lab workspace, Objects: microscope, lab coat, testing equipment, chemical reagent, lab notebook, microscopic slides, temperature control unit, calibrated balance

--------------------


In [8]:
test_cases_spatial = [
    ["keyboard", "monitor", "mouse", "pc", "speakers", "gamning chair", "rgb lights", "desk", "monitor"],
    ["guitar", "amplifier", "microphone", "music stand"],
    ["pan", "stove", "spatula", "cutting board"],
    ["helmet", "armor", "sword", "shield"],
    ["camera", "tripod", "lens", "flash"],
    ["tree", "bench", "fountain", "lamp post"],
    ["basketball", "hoop", "scoreboard", "bleachers"],
    ["train", "track", "station", "ticket booth"],
    ["painting", "easel", "paintbrush", "palette"],
    ["fish tank", "filter", "air pump", "plants"],
]

for objects in test_cases_spatial:
    print(relational_mapping_assistant.process_request(objects))
    print("\n" + "-"*20)

keyboard infrontof monitor, mouse totherightof keyboard, pc totherightof keyboard, speakers totherightof monitor, gamning chair totherightof monitor, rgb lights totherightof monitor, desk

--------------------
guitar infrontof amplifier, amplifier totherightof guitar, microphone totherightof amplifier
amplifier infrontof music stand, music stand totherightof amplifier
music stand infr

--------------------
pan infrontof stove, spatula totherightof pan, cutting board totherightof pan

Note: This algorithm is only a suggestion. The exact rules for what constitutes a valid relation can vary depending on your

--------------------
Helmet: infrontof sword, armor totherightof helmet, shield totherightof sword
Armor: totherightof sword, helmet totherightof armor, sword totherightof shield

--------------------
camera infrontof tripod, tripod totherightof camera, lens totherightof flash

This code should handle all possible combinations and relationships between objects. If you have any questi

In [9]:
test_cases_coordinates = [
    ["keyboard infrontof monitor", "mouse totherightof keyboard", "pc totheleftof monitor"],
    ["guitar infrontof amplifier", "microphone totherightof guitar", "music stand totheleftof guitar"],
    ["pan infrontof stove", "spatula totherightof stove", "cutting board totheleftof stove"],
    ["helmet infrontof armor", "sword totherightof helmet", "shield totheleftof armor"],
    ["camera infrontof tripod", "lens totherightof camera", "flash totheleftof camera"],
    ["tree infrontof bench", "bench totherightof tree", "fountain totheleftof bench"],
    ["basketball infrontof hoop", "hoop totherightof scoreboard", "scoreboard totheleftof bleachers"],
    ["train infrontof station", "track totherightof train", "ticket booth totheleftof station"],
    ["painting infrontof easel", "paintbrush totherightof painting", "palette totheleftof easel"],
    ["fish tank infrontof filter", "filter totherightof fish tank", "air pump totheleftof filter"]
]

for relations in test_cases_coordinates:
    print(grid_placement_assistant.process_request(relations))
    print("\n" + "-"*20)

keyboard (10, 10), monitor (10, 12), mouse (12, 10), pc (6, 12)

--------------------
guitar (10, 10), microphone (10, 12), music stand (12, 10), amplifier (7, 12)

--------------------
pan (10, 10), spatula (10, 10), cutting board (6, 12)

--------------------
helmet (10, 10), sword (10, 4), shield (10, 8), armor (7, 10)

--------------------
camera (10, 10), lens (10, 12), flash (12, 10)

--------------------
tree (10, 40), bench (30, 20), fountain (70, 60)

--------------------
scoreboard (10, 10), basketball (10, 4), hoop (10, 8), scoreboard (8, 10)

--------------------
train (4, 3), track (4, 7), ticket booth (1, 8)

--------------------
painting (3, 8), brush (4, 7), palette (6, 9)

--------------------
fish tank (10, 10), filter (10, 12), air pump (12, 10)

--------------------


In [10]:
test_cases_coordinates = [
    "basketball (15, 10), hoop (10, 11), scoreboard (12, 8), bleachers (13, 10)",
    "train (15, 15), station (15, 10), track (10, 15), ticket booth (10, 10)",
    "painting (10, 5), easel (5, 10), paintbrush (11, 5), pallet (10, 5)",
    "fish tank (10, 10), filter (10, 11), air pump (9, 10)"
]

for idx, coord_output in enumerate(test_cases_coordinates):
    print(f"--- Test {idx+1} ---")
    
    obj_coords = parse_coordinates(coord_output)
    grid = place_objects_on_grid(obj_coords, grid_size=20)
    print_grid(grid)


--- Test 1 ---
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . S . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . B . B . . . . 
. . . . . . . . . . H . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
--- Test 2 ---
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . .