In [13]:
import json
import random

In [14]:
SHAPES = {
    "CUBE": {
        "templates": [
            {
                "description": "A cube with {value} {unit} sides.",
                "tags": ["O", "U-SHAPE", "O", "B-VALUE", "I-UNIT", "L-DIMENSION"]
            },
            {
                "description": "Create a cube with {value} {unit} width, {value} {unit} height, {value} {unit} depth.",
                "tags": ["O", "O", "U-SHAPE", "O", "B-VALUE", "I-UNIT", "L-DIMENSION", "B-VALUE", "I-UNIT", "L-DIMENSION", "B-VALUE", "I-UNIT", "L-DIMENSION"]
            }
        ]
    },
    "GEAR": {
        "templates": [
            {
                "description": "A helical gear with {teeth} teeth with {value} cm radius",
                "tags": ["O", "B-SHAPE_TYPE", "L-SHAPE", "O", "B-TEETH-COUNT", "L-TEETH", "O", "B-VALUE", "I-UNIT", "L-DIMENSION"]
            },
        ]
    }
}

In [15]:
def generate_dimensions(unit):
    if unit in ["mm", "cm"]:
        value = round(random.uniform(0.5, 1000), random.choice([0, 1, 2]))
    elif unit == "m":
        value = round(random.uniform(0.01, 10), random.choice([2, 3]))
    elif unit == "in":
        value = round(random.uniform(0.1, 48), random.choice([1, 2, 3]))
    elif unit == "ft":
        value = round(random.uniform(0.1, 10), random.choice([1, 2]))
    else:
        raise ValueError("Invalid unit")

    return value

In [16]:
def generate_description(shape_name):
    units = ["mm", "cm", "in", "ft"]
    unit = random.choice(units)

    value = generate_dimensions(unit)

    templates = SHAPES[shape_name]["templates"]
    template = random.choice(templates)

    template_description = template["description"]
    template_tags = template["tags"]

    description = template_description.format(value=value, unit=unit)
    return {
        "text": description, "labels": template_tags
    }

def generate_gear_description(shape_name):
    units = ["mm", "cm", "in", "ft"]
    unit = random.choice(units)

    value = generate_dimensions(unit)

    templates = SHAPES[shape_name]["templates"]
    template = random.choice(templates)

    teeth_count = random.randint(6, 12)

    description = template["description"]
    tags = template["tags"]

    description = description.format(value=value, unit=unit, teeth=teeth_count)
    return {
        "text": description, "labels": tags
    }

In [17]:
def generate_dataset(n=1000):
    dataset = []
    for i in range(n):
        if i < n / 2:
            dataset.append(generate_description("CUBE"))
        else:
            dataset.append(generate_gear_description("GEAR"))
    return dataset

In [18]:
data = generate_dataset()
with open("dataset.json", "w") as f:
    json.dump(data, f, indent=4)