In [None]:
!pip install transformers -qqq
!pip install sentencepiece -qqq
!pip install bitsandbytes -qqq
!pip install accelerate -qqq

In [None]:
import torch
import json
import csv
from dataclasses import dataclass, field
from torch.utils.data import Dataset
from typing import List

import requests
from transformers import BlipProcessor, BlipForConditionalGeneration

import torch
from torchvision.models.detection import faster_rcnn
from torchvision.transforms import functional as F
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from sklearn.cluster import KMeans
import webcolors
from torchvision import transforms
@dataclass
class Step:
    action: str = ""
    text: str = ""
    arguments: List[str] = field(default_factory=list)

@dataclass
class SorterTask():
    action: str = ""
    text: str = ""
    goal: str = ""
    text: str = ""
    description: str = ""
    task_type: int = -1
    plan_id: int = -1
    image: str = ""
    steps: List[Step] = field(default_factory=list)
    arguments: List[str] = field(default_factory=list)

    def to_list(self):
        return [[step.action, [arg for arg in step.arguments]] for step in self.steps]

class SorterDataset(Dataset):
    def __init__(self, path_to_csv: str = ""):
        with open(path_to_csv, 'r') as f:
            self._data = json.load(f)
        self._size = len(self._data)

    def __len__(self):
        return self._size

    def __getitem__(self, idx) -> SorterTask:
        entry = self._data[idx]
        plan = eval(entry['plan'])
        steps = []
        for action, arguments in plan:
            steps.append(Step(action=action, arguments=arguments))
        return SorterTask(goal=entry['goal_eng'],
                          steps=steps,
                          task_type=entry['task_type'],
                          plan_id=entry["plan_id"],
                          description=entry["plan_id"],
                          image=entry["image"])


import torch
import torch.nn.functional as F

from tqdm import tqdm
from transformers import AutoModelForCausalLM, LlamaTokenizer
from transformers import pipeline
from typing import Any, List, Optional



@dataclass
class BaseInput:
    text: Optional[str] = None

@dataclass
class BaseOutput:
    text: Optional[str] = None

class LLAMA7B:
    MODEL_NAME = "decapoda-research/llama-7b-hf"

    def __init__(self, device: int = 0, max_new_tokens: int = 100) -> None:
        self.max_new_tokens = max_new_tokens
        self.device = device
        self._load()

    def _load(self) -> None:
        self.model = AutoModelForCausalLM.from_pretrained(
            "decapoda-research/llama-7b-hf",
            torch_dtype=torch.float16,
            load_in_8bit=True,
            device_map={"": self.device},
        )
        self.model.eval()

        self.tokenizer = LlamaTokenizer.from_pretrained(self.MODEL_NAME)
        self._prepare_for_generation()

    def _prepare_for_generation(self) -> None:
        self.generation_pipeline = pipeline(
            "text-generation", model=self.model, tokenizer=self.tokenizer
        )

    def generate(self, inputs: BaseInput, **kwargs) -> BaseOutput:
        output = self.generation_pipeline(
            inputs.text,
            do_sample=False,
            return_full_text=False,
            max_new_tokens=self.max_new_tokens,
        )
        output = BaseOutput(output[0]["generated_text"])
        return output

import re
from typing import List, Optional, Union

class PromptProcessor():
    def __init__(self, **kwargs) -> None:
        self.TERMINATING_STRING = 'done()'
        self._system_prompt = ""
        self._stop_step_pattern = ""
        self._stop_pattern = re.compile(f'\\d+\\. {self.TERMINATING_STRING}.')

    @property
    def system_prompt_is_set(self) -> bool:
        return len(self._system_prompt) > 0

    def is_terminating(self, step: Step) -> bool:
        return step.text == self.TERMINATING_STRING

    def build_system_prompt(self, example_tasks: List[SorterTask]) -> str:
        prompt = "Robot: Hi there, I’m a robot operating in a house.\n"
        prompt += "Robot: You can ask me to do various tasks and "
        prompt += "I’ll tell you the sequence of actions I would do to accomplish your task.\n"

        for task in example_tasks:
            prompt += self._task_to_prompt(task) + '\n'

        self._system_prompt = prompt
        self._stop_step_pattern = re.compile(
            r'(\s*\d+\.\s*)(\w+\(("[\w ]+"(,\s)?)*\))*')

    def load_prompt_from_file(self, filepath: str) -> None:
        with open(filepath, 'r') as file:
            self._system_prompt = file.read()
        self._stop_step_pattern = re.compile(
            r'(\s*\d+\.\s*)(\w+\(("[\w ]+"(,\s)?)*\))*')

    def _goal_to_query(self, goal: str) -> str:
        query = f"Human: How would you {goal.lower()}?\n"
        query += f'Robot: '
        return query

    def _step_to_text(self, step: Step) -> str:
        arguments = [f'"{argument}"' for argument in step.arguments]
        text = f'{step.action}({", ".join(arguments)})'
        return text

    def _steps_to_text(self,
                       steps: List[Step],
                       add_terminating_string: bool = True) -> str:
        text = ", ".join([f'{step_idx}. {self._step_to_text(step)}'
                          for step_idx, step in enumerate(steps, start=1)])
        if add_terminating_string:
            text += f", {len(steps) + 1}. {self.TERMINATING_STRING}."
        return text

    def _task_to_prompt(self, task: SorterTask) -> str:
        prompt = self._goal_to_query(task.goal)
        prompt += f"Description: {task.description}\n"
        text = self._steps_to_text(task.steps)
        task.text = text
        prompt += text
        return prompt

    def to_inputs(self,
                  task: SorterTask,
                  steps: Optional[List[Step]] = None,
                  options: Optional[List[Step]] = None) -> BaseInput:
        if not self.system_prompt_is_set:
            raise ValueError(
                "System prompt is not set. You need to set the system prompt.")
        else:
            text = self._system_prompt + self._goal_to_query(task.goal)
            text += f"Description: {task.description}\n"  # Используем описание в качестве дополнительного промпта
            if steps is not None:
                text += self._steps_to_text(steps, add_terminating_string=False)
            if options is not None:
                return ScoringInput(text=text, options=[f'{len(steps) + 1}. {option.text}' for option in options])
            return BaseInput(text=text)

    def _text_to_steps(self, task_text: str, cut_one_step: bool = False) -> Union[List[Step], Step, None]:
        if cut_one_step:
            stop_match = self._stop_step_pattern.match(task_text)
            if stop_match is None:
                return None
            else:
                return self._parse_action(stop_match.group(2))
        else:
            stop_match = self._stop_step_pattern.findall(task_text)
            steps = []
            if stop_match is None:
                return steps
            else:
                for i in range(len(stop_match) - 1):
                    step_text = stop_match[i][1]
                    step = self._parse_action(step_text)
                    if step is not None:
                        steps.append(step)
                return steps

    def _parse_action(self, step_text: str) -> Optional[Step]:
        """ Parse action with arguments to step.
        text: put_on('pepper', 'white box')
        action: put_on
        arguments: ['pepper', 'white box']
        """
        step_decomposition_pattern = re.compile(r'\s*([A-Za-z_][A-Za-z_\s]+)')
        arguments = step_decomposition_pattern.findall(step_text)

        if arguments is None:
            return None
        if len(arguments) == 1:
            step = Step(text=step_text)
        else:
            step = Step(action=arguments[0],
                        arguments=arguments[1:],
                        text=step_text)
            return step

    def to_task(self, task: BaseOutput) -> SorterTask:
        # Full plan generation mode
        stop_match = self._stop_pattern.search(task.text)

        if stop_match is not None:
            task.text = task.text[:stop_match.end() + 2].strip(' \n\t')
        else:
            task.text = task.text.strip(' \n\t')

        steps = self._text_to_steps(task_text=task.text)

        return SorterTask(text=task.text, steps=steps)

class FullPlanGeneration():
    def __init__(self,
                 model,
                 processor,
                 **kwargs):
        self._processor = processor
        self._model = model

    def predict(self, gt_task: SorterTask) -> SorterTask:
        inputs = self._processor.to_inputs(gt_task)
        model_ouputs = self._model.generate(inputs)
        predicted_task = self._processor.to_task(model_ouputs)
        return predicted_task


def description_image(image_path):
    image_path = f"/content/drive/MyDrive/mfti/{image_path}"
    image = Image.open(image_path)  # Load the image
    model = faster_rcnn.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()

    COCO_INSTANCE_CATEGORY_NAMES = [ # Ваш список категорий
        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
        'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
        'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
        'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
        'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
        'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
        'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
        'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
        'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    def get_average_color(image, xmin, ymin, xmax, ymax):
        area = (xmin, ymin, xmax, ymax)
        cropped_img = image.crop(area)
        np_image = np.array(cropped_img)
        reshape = np_image.reshape(-1, 3)

        # Используем KMeans, чтобы найти наиболее доминирующий цвет
        kmeans = KMeans(n_clusters=1)
        kmeans.fit(reshape)
        color = kmeans.cluster_centers_

        # возвращаем средний цвет в формате RGB
        return color.astype(int)[0]

    transform = transforms.ToTensor()
    image_tensor = transform(image)


    # Преобразование входного тензора изображения в пакет
    batch_tensor = torch.unsqueeze(image_tensor, 0)

    # Получение прогнозов объекта
    with torch.no_grad():
        predictions = model(batch_tensor)

    # Обработка прогноза объектов
    boxes = predictions[0]['boxes'].tolist()
    labels = predictions[0]['labels'].tolist()

    # Список для хранения описаний объектов
    object_descriptions = []

    for box, label in zip(boxes, labels):
        xmin, ymin, xmax, ymax = map(int, box)
        avg_color = get_average_color(image, xmin, ymin, xmax, ymax)

        # Преобразовать средний цвет в ближайшее имя цвета
        color_name = closest_color(avg_color)

        label_name = COCO_INSTANCE_CATEGORY_NAMES[label]

        # Добавление полей объекта в список объектов
        object_descriptions.append(f'{label_name}: {color_name}.')

    return "".join(object_descriptions)

def closest_color(requested_color):
    min_color_diff = 10000
    closest_color = ""

    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        color_diff = abs(r_c - requested_color[0])**2 + abs(g_c - requested_color[1])**2 + abs(b_c - requested_color[2])**2

        if color_diff < min_color_diff:
            min_color_diff = color_diff
            closest_color = name

    return closest_color


# Путь к вашему файлу modified_train.csv
path_to_csv='/content/modifiedtrain.json'
dataset = SorterDataset(path_to_csv=path_to_csv)

# Создаем и настраиваем PromptProcessor
processor = PromptProcessor()
processor.build_system_prompt([dataset[i] for i in range(10)])

# Создаем и настраиваем LLAMA7B модель
model = LLAMA7B(device=0, max_new_tokens=150)
model.generate(BaseInput('Hello'))

# Создаем метод генерации планов
gen_method = FullPlanGeneration(model, processor)

results = []


# Пример использования в цикле


Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Текст задачи: 1. move_to("unspecified", "orange kitten"), 2. pick_up("unspecified", "orange kitten"), 3. move_to("green box", "orange kitten"), 4. put("green box", "orange kitten"), 5. done().teddy bear: dimgray.teddy bear: darkolivegreen.fire hydrant: dimgray.teddy bear: dimgray.clock: gray.teddy bear: dimgray.toilet: darkslategray.teddy bear: dimgray.mouse: dimgray.cup: dimgray.toilet: dimgray.refrigerator: darkgray.refrigerator: gray.bowl: dimgray.bottle: dimgray.book: dimgray.
{'plan_id': 0}




Текст задачи: 1. move_to("floor", "toy cube"), 2. pick_up("floor", "toy cube"), 3. move_to("table", "toy cube"), 4. put("table", "toy cube"), 5. done().teddy bear: dimgray.teddy bear: darkolivegreen.fire hydrant: dimgray.teddy bear: dimgray.clock: gray.teddy bear: dimgray.toilet: darkslategray.teddy bear: dimgray.mouse: dimgray.cup: dimgray.toilet: dimgray.refrigerator: darkgray.refrigerator: gray.bowl: dimgray.bottle: dimgray.book: dimgray.
{'plan_id': 1}




Текст задачи: 1. move_to("table", "toy cat"), 2. pick_up("table", "toy cat"), 3. move_to("nightstand", "toy cat"), 4. put("nightstand", "toy cat"), 5. done().clock: darkslategray.refrigerator: dimgray.clock: dimgray.fire hydrant: dimgray.bowl: dimgray.teddy bear: gray.dining table: dimgray.fire hydrant: dimgray.oven: dimgray.clock: dimgray.refrigerator: gray.refrigerator: gray.oven: dimgray.refrigerator: dimgray.dining table: dimgray.oven: gray.sink: dimgray.sports ball: darkgray.toilet: dimgray.
{'plan_id': 2}




Текст задачи: 1. move_to("unspecified", "cucumber"), 2. pick_up("unspecified", "cucumber"), 3. move_to("drawer", "cucumber"), 4. put("drawer", "cucumber"), 5. done().clock: darkslategray.refrigerator: dimgray.clock: dimgray.fire hydrant: dimgray.bowl: dimgray.teddy bear: gray.dining table: dimgray.fire hydrant: dimgray.oven: dimgray.clock: dimgray.refrigerator: gray.refrigerator: gray.oven: dimgray.refrigerator: dimgray.dining table: dimgray.oven: gray.sink: dimgray.sports ball: darkgray.toilet: dimgray.
{'plan_id': 3}




Текст задачи: 1. move_to("table", "cucumber"), 2. pick_up("table", "cucumber"), 3. move_to("orange box", "cucumber"), 4. put("orange box", "cucumber"), 5. done().clock: dimgray.clock: gray.apple: sienna.refrigerator: gray.dining table: dimgray.apple: darkkhaki.fire hydrant: dimgray.sports ball: yellowgreen.bowl: darkolivegreen.clock: dimgray.apple: indianred.oven: dimgray.toilet: gray.oven: gray.refrigerator: darkgray.orange: sienna.chair: dimgray.orange: sienna.apple: sienna.motorcycle: dimgray.refrigerator: dimgray.dining table: gray.refrigerator: darkgray.donut: indianred.apple: sienna.cake: indianred.banana: darkslategray.refrigerator: gray.apple: darkolivegreen.traffic light: dimgray.
{'plan_id': 4}




Текст задачи: 1. move_to("unspecified", "cucumber"), 2. pick_up("unspecified", "cucumber"), 3. move_to("box", "cucumber"), 4. put("box", "cucumber"), 5. done().clock: dimgray.clock: gray.apple: sienna.refrigerator: gray.dining table: dimgray.apple: darkkhaki.fire hydrant: dimgray.sports ball: yellowgreen.bowl: darkolivegreen.clock: dimgray.apple: indianred.oven: dimgray.toilet: gray.oven: gray.refrigerator: darkgray.orange: sienna.chair: dimgray.orange: sienna.apple: sienna.motorcycle: dimgray.refrigerator: dimgray.dining table: gray.refrigerator: darkgray.donut: indianred.apple: sienna.cake: indianred.banana: darkslategray.refrigerator: gray.apple: darkolivegreen.traffic light: dimgray.
{'plan_id': 5}




Текст задачи: 1. move_to("orange container", "cube"), 2. put("orange container", "cube"), 3. done().tv: lightslategray.refrigerator: gray.
{'plan_id': 6}




Текст задачи: 1. move_to("table", "cube"), 2. put("table", "cube"), 3. done().tv: lightslategray.refrigerator: gray.
{'plan_id': 7}




Текст задачи: 1. move_to("green container", "toy cat"), 2. put("green container", "toy cat"), 3. done().refrigerator: gray.fire hydrant: dimgray.traffic light: dimgray.fire hydrant: darkslategray.traffic light: darkslategray.refrigerator: gray.teddy bear: dimgray.refrigerator: gray.refrigerator: gray.chair: dimgray.bench: dimgray.
{'plan_id': 8}




Текст задачи: 1. move_to("orange box", "gray cat"), 2. put("orange box", "gray cat"), 3. done().refrigerator: gray.fire hydrant: dimgray.traffic light: dimgray.fire hydrant: darkslategray.traffic light: darkslategray.refrigerator: gray.teddy bear: dimgray.refrigerator: gray.refrigerator: gray.chair: dimgray.bench: dimgray.
{'plan_id': 9}




Текст задачи: 1. move_to("chair", "toy"), 2. put("chair", "toy"), 3. done().clock: darkslategray.chair: darkslategray.sports ball: saddlebrown.orange: saddlebrown.chair: darkslategray.vase: maroon.bottle: dimgray.traffic light: dimgray.refrigerator: dimgray.chair: darkslategray.suitcase: darkslategray.refrigerator: gray.fire hydrant: dimgray.dining table: darkslategray.refrigerator: gray.chair: dimgray.bottle: dimgray.toilet: dimgray.apple: saddlebrown.clock: dimgray.couch: darkslategray.
{'plan_id': 10}




Текст задачи: 1. move_to("box", "cat"), 2. put("box", "cat"), 3. done().clock: darkslategray.chair: darkslategray.sports ball: saddlebrown.orange: saddlebrown.chair: darkslategray.vase: maroon.bottle: dimgray.traffic light: dimgray.refrigerator: dimgray.chair: darkslategray.suitcase: darkslategray.refrigerator: gray.fire hydrant: dimgray.dining table: darkslategray.refrigerator: gray.chair: dimgray.bottle: dimgray.toilet: dimgray.apple: saddlebrown.clock: dimgray.couch: darkslategray.
{'plan_id': 11}


In [None]:
for i, ground_true_plan in enumerate(dataset):
    answer = {'plan_id': ground_true_plan.plan_id}

    ground_true_plan.text = processor._steps_to_text(ground_true_plan.steps)

    # Получаем имя изображения из датасета
    image_name = ground_true_plan.image

    # Вызываем функцию description_image
    object_descriptions = description_image(image_name)

    # Преобразовываем описания в строку и добавляем к тексту задачи
    object_descriptions_str = "".join(object_descriptions)
    ground_true_plan.text += object_descriptions_str

    # Append the generated plan to the answer
    answer['plan'] = ground_true_plan.text  # Add this line

    # Печатаем результаты
    print(f"Текст задачи: {ground_true_plan.text}")
    print(answer)
    results.append(answer)

    if i > 10:
        break



Текст задачи: 1. move_to("unspecified", "orange kitten"), 2. pick_up("unspecified", "orange kitten"), 3. move_to("green box", "orange kitten"), 4. put("green box", "orange kitten"), 5. done().teddy bear: dimgray.teddy bear: darkolivegreen.fire hydrant: dimgray.teddy bear: dimgray.clock: gray.teddy bear: dimgray.toilet: darkslategray.teddy bear: dimgray.mouse: dimgray.cup: dimgray.toilet: dimgray.refrigerator: darkgray.refrigerator: gray.bowl: dimgray.bottle: dimgray.book: dimgray.
{'plan_id': 0, 'plan': '1. move_to("unspecified", "orange kitten"), 2. pick_up("unspecified", "orange kitten"), 3. move_to("green box", "orange kitten"), 4. put("green box", "orange kitten"), 5. done().teddy bear: dimgray.teddy bear: darkolivegreen.fire hydrant: dimgray.teddy bear: dimgray.clock: gray.teddy bear: dimgray.toilet: darkslategray.teddy bear: dimgray.mouse: dimgray.cup: dimgray.toilet: dimgray.refrigerator: darkgray.refrigerator: gray.bowl: dimgray.bottle: dimgray.book: dimgray.'}




Текст задачи: 1. move_to("floor", "toy cube"), 2. pick_up("floor", "toy cube"), 3. move_to("table", "toy cube"), 4. put("table", "toy cube"), 5. done().teddy bear: dimgray.teddy bear: darkolivegreen.fire hydrant: dimgray.teddy bear: dimgray.clock: gray.teddy bear: dimgray.toilet: darkslategray.teddy bear: dimgray.mouse: dimgray.cup: dimgray.toilet: dimgray.refrigerator: darkgray.refrigerator: gray.bowl: dimgray.bottle: dimgray.book: dimgray.
{'plan_id': 1, 'plan': '1. move_to("floor", "toy cube"), 2. pick_up("floor", "toy cube"), 3. move_to("table", "toy cube"), 4. put("table", "toy cube"), 5. done().teddy bear: dimgray.teddy bear: darkolivegreen.fire hydrant: dimgray.teddy bear: dimgray.clock: gray.teddy bear: dimgray.toilet: darkslategray.teddy bear: dimgray.mouse: dimgray.cup: dimgray.toilet: dimgray.refrigerator: darkgray.refrigerator: gray.bowl: dimgray.bottle: dimgray.book: dimgray.'}




Текст задачи: 1. move_to("table", "toy cat"), 2. pick_up("table", "toy cat"), 3. move_to("nightstand", "toy cat"), 4. put("nightstand", "toy cat"), 5. done().clock: darkslategray.refrigerator: dimgray.clock: dimgray.fire hydrant: dimgray.bowl: dimgray.teddy bear: gray.dining table: dimgray.fire hydrant: dimgray.oven: dimgray.clock: dimgray.refrigerator: gray.refrigerator: gray.oven: dimgray.refrigerator: dimgray.dining table: dimgray.oven: gray.sink: dimgray.sports ball: darkgray.toilet: dimgray.
{'plan_id': 2, 'plan': '1. move_to("table", "toy cat"), 2. pick_up("table", "toy cat"), 3. move_to("nightstand", "toy cat"), 4. put("nightstand", "toy cat"), 5. done().clock: darkslategray.refrigerator: dimgray.clock: dimgray.fire hydrant: dimgray.bowl: dimgray.teddy bear: gray.dining table: dimgray.fire hydrant: dimgray.oven: dimgray.clock: dimgray.refrigerator: gray.refrigerator: gray.oven: dimgray.refrigerator: dimgray.dining table: dimgray.oven: gray.sink: dimgray.sports ball: darkgray.toi



Текст задачи: 1. move_to("unspecified", "cucumber"), 2. pick_up("unspecified", "cucumber"), 3. move_to("drawer", "cucumber"), 4. put("drawer", "cucumber"), 5. done().clock: darkslategray.refrigerator: dimgray.clock: dimgray.fire hydrant: dimgray.bowl: dimgray.teddy bear: gray.dining table: dimgray.fire hydrant: dimgray.oven: dimgray.clock: dimgray.refrigerator: gray.refrigerator: gray.oven: dimgray.refrigerator: dimgray.dining table: dimgray.oven: gray.sink: dimgray.sports ball: darkgray.toilet: dimgray.
{'plan_id': 3, 'plan': '1. move_to("unspecified", "cucumber"), 2. pick_up("unspecified", "cucumber"), 3. move_to("drawer", "cucumber"), 4. put("drawer", "cucumber"), 5. done().clock: darkslategray.refrigerator: dimgray.clock: dimgray.fire hydrant: dimgray.bowl: dimgray.teddy bear: gray.dining table: dimgray.fire hydrant: dimgray.oven: dimgray.clock: dimgray.refrigerator: gray.refrigerator: gray.oven: dimgray.refrigerator: dimgray.dining table: dimgray.oven: gray.sink: dimgray.sports ba



Текст задачи: 1. move_to("table", "cucumber"), 2. pick_up("table", "cucumber"), 3. move_to("orange box", "cucumber"), 4. put("orange box", "cucumber"), 5. done().clock: dimgray.clock: gray.apple: sienna.refrigerator: gray.dining table: dimgray.apple: darkkhaki.fire hydrant: dimgray.sports ball: yellowgreen.bowl: darkolivegreen.clock: dimgray.apple: indianred.oven: dimgray.toilet: gray.oven: gray.refrigerator: darkgray.orange: sienna.chair: dimgray.orange: sienna.apple: sienna.motorcycle: dimgray.refrigerator: dimgray.dining table: gray.refrigerator: darkgray.donut: indianred.apple: sienna.cake: indianred.banana: darkslategray.refrigerator: gray.apple: darkolivegreen.traffic light: dimgray.
{'plan_id': 4, 'plan': '1. move_to("table", "cucumber"), 2. pick_up("table", "cucumber"), 3. move_to("orange box", "cucumber"), 4. put("orange box", "cucumber"), 5. done().clock: dimgray.clock: gray.apple: sienna.refrigerator: gray.dining table: dimgray.apple: darkkhaki.fire hydrant: dimgray.sports b



Текст задачи: 1. move_to("unspecified", "cucumber"), 2. pick_up("unspecified", "cucumber"), 3. move_to("box", "cucumber"), 4. put("box", "cucumber"), 5. done().clock: dimgray.clock: gray.apple: sienna.refrigerator: gray.dining table: dimgray.apple: darkkhaki.fire hydrant: dimgray.sports ball: yellowgreen.bowl: darkolivegreen.clock: dimgray.apple: indianred.oven: dimgray.toilet: gray.oven: gray.refrigerator: darkgray.orange: sienna.chair: dimgray.orange: sienna.apple: sienna.motorcycle: dimgray.refrigerator: dimgray.dining table: gray.refrigerator: darkgray.donut: indianred.apple: sienna.cake: indianred.banana: darkslategray.refrigerator: gray.apple: darkolivegreen.traffic light: dimgray.
{'plan_id': 5, 'plan': '1. move_to("unspecified", "cucumber"), 2. pick_up("unspecified", "cucumber"), 3. move_to("box", "cucumber"), 4. put("box", "cucumber"), 5. done().clock: dimgray.clock: gray.apple: sienna.refrigerator: gray.dining table: dimgray.apple: darkkhaki.fire hydrant: dimgray.sports ball:



Текст задачи: 1. move_to("orange container", "cube"), 2. put("orange container", "cube"), 3. done().tv: lightslategray.refrigerator: gray.
{'plan_id': 6, 'plan': '1. move_to("orange container", "cube"), 2. put("orange container", "cube"), 3. done().tv: lightslategray.refrigerator: gray.'}




Текст задачи: 1. move_to("table", "cube"), 2. put("table", "cube"), 3. done().tv: lightslategray.refrigerator: gray.
{'plan_id': 7, 'plan': '1. move_to("table", "cube"), 2. put("table", "cube"), 3. done().tv: lightslategray.refrigerator: gray.'}




Текст задачи: 1. move_to("green container", "toy cat"), 2. put("green container", "toy cat"), 3. done().refrigerator: gray.fire hydrant: dimgray.traffic light: dimgray.fire hydrant: darkslategray.traffic light: darkslategray.refrigerator: gray.teddy bear: dimgray.refrigerator: gray.refrigerator: gray.chair: dimgray.bench: dimgray.
{'plan_id': 8, 'plan': '1. move_to("green container", "toy cat"), 2. put("green container", "toy cat"), 3. done().refrigerator: gray.fire hydrant: dimgray.traffic light: dimgray.fire hydrant: darkslategray.traffic light: darkslategray.refrigerator: gray.teddy bear: dimgray.refrigerator: gray.refrigerator: gray.chair: dimgray.bench: dimgray.'}




Текст задачи: 1. move_to("orange box", "gray cat"), 2. put("orange box", "gray cat"), 3. done().refrigerator: gray.fire hydrant: dimgray.traffic light: dimgray.fire hydrant: darkslategray.traffic light: darkslategray.refrigerator: gray.teddy bear: dimgray.refrigerator: gray.refrigerator: gray.chair: dimgray.bench: dimgray.
{'plan_id': 9, 'plan': '1. move_to("orange box", "gray cat"), 2. put("orange box", "gray cat"), 3. done().refrigerator: gray.fire hydrant: dimgray.traffic light: dimgray.fire hydrant: darkslategray.traffic light: darkslategray.refrigerator: gray.teddy bear: dimgray.refrigerator: gray.refrigerator: gray.chair: dimgray.bench: dimgray.'}




Текст задачи: 1. move_to("chair", "toy"), 2. put("chair", "toy"), 3. done().clock: darkslategray.chair: darkslategray.sports ball: saddlebrown.orange: saddlebrown.chair: darkslategray.vase: maroon.bottle: dimgray.traffic light: dimgray.refrigerator: dimgray.chair: darkslategray.suitcase: darkslategray.refrigerator: gray.fire hydrant: dimgray.dining table: darkslategray.refrigerator: gray.chair: dimgray.bottle: dimgray.toilet: dimgray.apple: saddlebrown.clock: dimgray.couch: darkslategray.
{'plan_id': 10, 'plan': '1. move_to("chair", "toy"), 2. put("chair", "toy"), 3. done().clock: darkslategray.chair: darkslategray.sports ball: saddlebrown.orange: saddlebrown.chair: darkslategray.vase: maroon.bottle: dimgray.traffic light: dimgray.refrigerator: dimgray.chair: darkslategray.suitcase: darkslategray.refrigerator: gray.fire hydrant: dimgray.dining table: darkslategray.refrigerator: gray.chair: dimgray.bottle: dimgray.toilet: dimgray.apple: saddlebrown.clock: dimgray.couch: darkslategray.'}



Текст задачи: 1. move_to("box", "cat"), 2. put("box", "cat"), 3. done().clock: darkslategray.chair: darkslategray.sports ball: saddlebrown.orange: saddlebrown.chair: darkslategray.vase: maroon.bottle: dimgray.traffic light: dimgray.refrigerator: dimgray.chair: darkslategray.suitcase: darkslategray.refrigerator: gray.fire hydrant: dimgray.dining table: darkslategray.refrigerator: gray.chair: dimgray.bottle: dimgray.toilet: dimgray.apple: saddlebrown.clock: dimgray.couch: darkslategray.
{'plan_id': 11, 'plan': '1. move_to("box", "cat"), 2. put("box", "cat"), 3. done().clock: darkslategray.chair: darkslategray.sports ball: saddlebrown.orange: saddlebrown.chair: darkslategray.vase: maroon.bottle: dimgray.traffic light: dimgray.refrigerator: dimgray.chair: darkslategray.suitcase: darkslategray.refrigerator: gray.fire hydrant: dimgray.dining table: darkslategray.refrigerator: gray.chair: dimgray.bottle: dimgray.toilet: dimgray.apple: saddlebrown.clock: dimgray.couch: darkslategray.'}


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

# Сохраняем результаты в файл results.json
with open('./results.json', 'r') as f:
    results_file = json.load(f)
    pprint(results_file)

from pprint import pprint

def calculate_metrics(path_to_test: str,
                      path_to_results: str) -> float:
    test_records = {}
    metric = 0.

    with open(path_to_test, 'r') as f:
        test_file = json.load(f)
        for element in test_file:
            test_records[element['plan_id']] = element['plan']

    with open(path_to_results, 'r') as f:
        results_file = json.load(f)
        for element in results_file:
            if test_records[element['plan_id']] == element['plan']:
                metric += 1

    return metric / len(test_records)

# Вычисляем метрики
calculate_metrics(path_to_test='/content/modifiedtrain.json',
                  path_to_results='./results.json')

[{'plan_id': 0},
 {'plan_id': 1},
 {'plan_id': 2},
 {'plan_id': 3},
 {'plan_id': 4},
 {'plan_id': 5},
 {'plan_id': 6},
 {'plan_id': 7},
 {'plan_id': 8},
 {'plan_id': 9},
 {'plan_id': 10},
 {'plan_id': 11}]


KeyError: ignored

In [None]:
import torch
from torchvision.models.detection import faster_rcnn
from torchvision.transforms import functional as F
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from sklearn.cluster import KMeans
import webcolors

# Загрузка модели и предобученных весов Faster R-CNN
model = faster_rcnn.fasterrcnn_resnet50_fpn(pretrained=True)

# Переведите модель в режим оценки (не тренировки)
model.eval()

# Список классов COCO
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

# Функция для получения среднего цвета
def get_average_color(image, xmin, ymin, xmax, ymax):
    area = (xmin, ymin, xmax, ymax)
    cropped_img = image.crop(area)
    np_image = np.array(cropped_img)
    reshape = np_image.reshape(-1, 3)

    # Use kmeans to find most dominant color
    kmeans = KMeans(n_clusters=1)
    kmeans.fit(reshape)
    color = kmeans.cluster_centers_

    # return average color as rgb
    return color.astype(int)[0]

url = input()
image = Image.open(url)
image_tensor = F.to_tensor(image)

# Преобразование входного тензора изображения в пакет
batch_tensor = torch.unsqueeze(image_tensor, 0)

# Получение прогнозов объекта
with torch.no_grad():
    predictions = model(batch_tensor)

# Обработка прогноза объектов
boxes = predictions[0]['boxes'].tolist()
labels = predictions[0]['labels'].tolist()

# Отображение результатов на изображении
plt.figure(figsize=(10, 5))
plt.imshow(image)
ax = plt.gca()


def closest_color(requested_color):

    min_color_diff = 10000
    closest_color = ""

    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        color_diff = abs(r_c - requested_color[0])**2 + abs(g_c - requested_color[1])**2 + abs(b_c - requested_color[2])**2

        if color_diff < min_color_diff:
            min_color_diff = color_diff
            closest_color = name

    return closest_color

# Код модели Faster R-CNN и другие функции остаются без изменений...
# Список для хранения описаний объектов
object_descriptions = []

for box, label in zip(boxes, labels):
    xmin, ymin, xmax, ymax = box
    avg_color = get_average_color(image, xmin, ymin, xmax, ymax)

    # Преобразовать средний цвет в ближайшее имя цвета
    color_name = closest_color(avg_color)

    label_name = COCO_INSTANCE_CATEGORY_NAMES[label]

    # Добавление полей объекта в список объектов
    object_descriptions.append(f'{label_name}: {color_name}.')

# Вывод описаний объектов
print("".join(object_descriptions))

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 171MB/s]


KeyboardInterrupt: ignored