In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, "/project/remote/golmi-dynamatt")
sys.path

['/project/remote/golmi-dynamatt',
 '/project/remote/golmi-dynamatt/app/compreg',
 '/usr/lib/python38.zip',
 '/usr/lib/python3.8',
 '/usr/lib/python3.8/lib-dynload',
 '',
 '/usr/local/lib/python3.8/dist-packages',
 '/usr/lib/python3/dist-packages']

In [11]:
import json
import os

import matplotlib.pyplot as plt

from model.pentomino import PieceConfig, RelPositions, Shapes, Colors, Board

def draw_piece_image(color, shape):
    target = PieceConfig(color, shape, RelPositions.CENTER)
    board = Board(15, 15)  # we need at least size of 3 * 5
    board.add_piece_from_config(target)
    arr = board.to_rgb_array()
    center = arr[5:10, 5:10]
    return center

def to_piece_image_name(color, shape):
    return f"{shape.value_name}_{color.value_name}.png"

def save_piece_image(color, shape, target_dir):
    filename = to_piece_image_name(color, shape)
    plt.axis("off")
    plt.imshow(draw_piece_image(color, shape))
    plt.savefig(target_dir + f"/{filename}", bbox_inches='tight')
    return filename

In [3]:
shapes = list(Shapes)
colors = list(Colors)
num_colors = len(colors)
num_shapes = len(shapes)
num_target_looks = num_shapes * num_colors
print(f"Shapes: {num_shapes} Colors: {num_colors} Combinations: {num_target_looks}")

Shapes: 12 Colors: 8 Combinations: 96


In [4]:
# NOTE: Actually more sensible to store all possible combinations of pieces only once!
# And then load them dynamically
num_distractors = 2
num_pieces = num_distractors + 1
print(f"Possible sets: {(num_target_looks)**(num_pieces)}")

Possible sets: 884736


In [5]:
# Compositional set on "Colors"
# All other distractors do NOT share the color, but at least one distractor shares the shape
num_other_looks = num_shapes*(num_colors-1) # unshare the color, but allow all shapes
num_share_looks = num_colors-1              # unshare the color, but shares the same shape as the target piece
print(f"Other looks: {num_other_looks}")
print(f"Share looks: {num_share_looks}")

print(f"Possible comp. sets: {num_target_looks * num_share_looks * num_other_looks**(num_distractors-1)}")

# Val/Test set on "Colors" (spare 1 combination of shape + color)
print(f"Possible val/test comp. sets: {12 * num_share_looks * num_other_looks**(num_distractors-1)}")

# Single piece set on "Colors" (1 combination of shape + color)
print(f"Possible single comp. sets with {num_distractors} distractors: {1 * num_share_looks * num_other_looks**(num_distractors-1)}")

Other looks: 84
Share looks: 7
Possible comp. sets: 56448
Possible val/test comp. sets: 7056
Possible single comp. sets with 2 distractors: 588


In [10]:
print(f"All possible sets per uniq prop. with {num_distractors} distractors: {96 * num_share_looks * num_other_looks**(num_distractors-1)}")

All possible sets per uniq prop. with 2 distractors: 56448


In [12]:
# We create all piece images first
# There are only 96 possible combinations (for now)
save_all_pieces = False
data_images_dir = "/data/compreg/sets/pieces"
if save_all_pieces:  
    for shape_idx, shape in enumerate(shapes):
        for color_idx, color in enumerate(colors):
            save_piece_image(color, shape, data_images_dir)
print("Pieces:", len(os.listdir(data_images_dir)))
os.listdir(data_images_dir)[:5]

Pieces: 96


['F_purple.png', 'T_yellow.png', 'V_purple.png', 'I_blue.png', 'T_grey.png']

# Create meta.json

We map label, colors, shapes etc. to integers

In [25]:
data_top_dir = "/data/compreg/sets"
save_meta = False
labels = ["uniq_color", "uniq_shape"]
if save_meta:
    metadata = {
        "shape_to_idx": dict(),
        "idx_to_shape": dict(),
        "color_to_idx": dict(),
        "idx_to_color": dict(),
        "label_to_idx": dict(),
        "idx_to_label": dict()
    }
    for prop_idx, prop in enumerate(shapes):
        metadata["shape_to_idx"][prop.name] = prop_idx
        metadata["idx_to_shape"][prop_idx] = prop.name
        
    for prop_idx, prop in enumerate(colors):
        metadata["color_to_idx"][prop.name] = prop_idx
        metadata["idx_to_color"][prop_idx] = prop.name
        
    for lbl_idx, label in enumerate(labels):
        metadata["label_to_idx"][label] = lbl_idx
        metadata["idx_to_label"][lbl_idx] = label
        
    with open(data_top_dir + "/meta.json", "w") as f:
        json.dump(metadata, f, indent=4, sort_keys=True)
else:
    with open(data_top_dir + "/meta.json") as f:
        metadata = json.load(f)
print(metadata)

{'color_to_idx': {'BLUE': 4, 'BROWN': 6, 'GREEN': 3, 'GREY': 7, 'ORANGE': 1, 'PURPLE': 5, 'RED': 0, 'YELLOW': 2}, 'idx_to_color': {'0': 'RED', '1': 'ORANGE', '2': 'YELLOW', '3': 'GREEN', '4': 'BLUE', '5': 'PURPLE', '6': 'BROWN', '7': 'GREY'}, 'idx_to_label': {'0': 'uniq_color', '1': 'uniq_shape'}, 'idx_to_shape': {'0': 'F', '1': 'I', '2': 'L', '3': 'N', '4': 'P', '5': 'T', '6': 'U', '7': 'V', '8': 'W', '9': 'X', '10': 'Y', '11': 'Z'}, 'label_to_idx': {'uniq_color': 0, 'uniq_shape': 1}, 'shape_to_idx': {'F': 0, 'I': 1, 'L': 2, 'N': 3, 'P': 4, 'T': 5, 'U': 6, 'V': 7, 'W': 8, 'X': 9, 'Y': 10, 'Z': 11}}


# The annotation structure

```
[
  {
    "id": 0,
    "label": 0, # 0: use target color, 1: use target shape to discriminate
    "target": {
        "color": 2,
        "shape": 5,
        "image": "F_blue.png"
    },
    "distractors": [{
        "color": 1,
        "shape": 5,
        "image": "F_red.png"
    },{
        "color": 2,
        "shape": 9,
        "image": "V_blue.png"
    }]
  }
]
```

Note: We produce one sample for each order (although the model should get invariant towards this anyway),
      but it seems easier to generate the dataset in this way (using for-loops) (for now)

In [7]:
from model.pentomino import create_all_distractor_configs, PropertyNames

In [8]:
# Test first on "single piece" combs. (should be 588 ones)
all_configs = create_all_distractor_configs(PieceConfig(Colors.BLUE, Shapes.T, RelPositions.CENTER), 
                                           unique_props={PropertyNames.COLOR},
                                           num_distractors=2,
                                           prop_values={
                                               PropertyNames.COLOR: list(Colors),
                                               PropertyNames.SHAPE: list(Shapes)
                                           })
print("Scenes: ", len(all_configs))

Scenes:  588


In [None]:
"""
Create for each piece combinations of color and shape, but leave out a combination for val/test. 
The hold-out combinations are never seen during training. Still, every color and shape is seen.

We produce the similiar amount of "mention the color" and "mention the shape" tasks for a combination. 
The meta-task is to find the unique property and extract that value from the target piece.
"""
counter = 0
samples = {
    "train": [],
    "val": [],
    "test": []
}

selected_shapes = [Shapes.T]
selected_colors = [Colors.BLUE]
for shape_idx, shape in enumerate(selected_shapes):
    for color_idx, color in enumerate(selected_colors):
        filename = to_image_name(color, shape)
        samples[split].append({
            "id": counter,
            "color": color.value,
            "shape": shape.value,
            "image": filename
        })
        counter += 1

In [None]:
for split in samples:
    with open(f"/data/compreg/sets/{num_distractors}dits/{split}.json", "w") as f:
        json.dump(samples[split], f)