In [1]:
import json
import numpy as np
from typing import Dict, List, Union
import logging
import argparse
import copy

In [2]:
logger = logging.getLogger(__name__)

def data_loader(file_path: str) -> Dict[str, Union[List[str], np.ndarray]]:
    """
    Loads grounded SCAN dataset from text file and ..
    :param file_path: Full path to file containing dataset (dataset.txt)
    :returns: dict with as keys all splits and values list of example dicts with input, target and situation.
    """
    with open(file_path, 'r') as infile:
        all_data = json.load(infile)
        grid_size = int(all_data["grid_size"])
        splits = list(all_data["examples"].keys())
        print("Found data splits: {}".format(splits))
        '''loaded_data = {}
        for split in splits:
            loaded_data[split] = []
            logger.info("Now loading data for split: {}".format(split))
            for data_example in all_data["examples"][split]:
                input_command = data_example["command"].split(',')
                target_command = data_example["target_commands"].split(',')
                situation = parse_sparse_situation(situation_representation=data_example["situation"],
                                                   grid_size=grid_size)
                loaded_data[split].append({"input": input_command,
                                           "target": target_command,
                                           "situation": situation.tolist()})  # .tolist() necessary to be serializable
            logger.info("Loaded {} examples in split {}.\n".format(len(loaded_data[split]), split))'''
    return all_data

In [3]:
dataset = data_loader('./data/compositional_splits/dataset.txt')

Found data splits: ['train', 'dev', 'test', 'visual', 'situational_1', 'situational_2', 'contextual', 'adverb_1', 'adverb_2', 'visual_easier']


In [4]:
splits = list(dataset["examples"].keys())

In [5]:
print(dataset.keys())

dict_keys(['grid_size', 'type_grammar', 'grammar', 'min_object_size', 'max_object_size', 'max_recursion', 'percentage_train', 'examples', 'intransitive_verbs', 'transitive_verbs', 'nouns', 'adverbs', 'color_adjectives', 'size_adjectives'])


In [6]:
print(dataset["examples"].keys())

dict_keys(['train', 'dev', 'test', 'visual', 'situational_1', 'situational_2', 'contextual', 'adverb_1', 'adverb_2', 'visual_easier'])


# Test dataset (same as paper)

In [7]:
test = dataset["examples"]["situational_2"]

#Info 
big = 0
small = 0
for example in test:
    if example["situation"]["target_object"]['object']['shape'] == 'circle':
        if example["situation"]["target_object"]['object']['size'] == '2':
            if example["referred_target"][0]=='b':
                big += 1
            elif example["referred_target"][0]=='s':
                small += 1

print('Examples: {}'.format(len(test)))
print('Size2 Reffered as "big": {}'.format(big))
print('Size2 Reffered as "small": {}'.format(small))


Examples: 16808
Size2 Reffered as "big": 0
Size2 Reffered as "small": 16808


In [8]:
testA = dataset["examples"]["test"]

#Info 
big = 0
small = 0
for example in testA:
    if example["situation"]["target_object"]['object']['shape'] == 'circle':
        if example["situation"]["target_object"]['object']['size'] == '2':
            if example["referred_target"][0]=='b':
                big += 1
            elif example["referred_target"][0]=='s':
                small += 1

print('Examples: {}'.format(len(test)))
print('Size2 Reffered as "big": {}'.format(big))
print('Size2 Reffered as "small": {}'.format(small))

Examples: 16808
Size2 Reffered as "big": 825
Size2 Reffered as "small": 0


# Original Training (same as paper)

In [9]:
original_train = dataset["examples"]["train"]

#Info 
big = 0
small = 0
for example in original_train:
    if example["situation"]["target_object"]['object']['shape'] == 'circle':
        if example["situation"]["target_object"]['object']['size'] == '2':
            if example["referred_target"][0]=='b':
                big += 1
            elif example["referred_target"][0]=='s':
                small += 1

print('Examples: {}'.format(len(test)))
print('Size2 Reffered as "big": {}'.format(big))
print('Size2 Reffered as "small": {}'.format(small))

Examples: 16808
Size2 Reffered as "big": 14979
Size2 Reffered as "small": 0


# Level 1 subdatasets not corrected

## Only Red

### Extract only red, and correct instruction

In [10]:
level1_color_r = [] 
color = 'red'

for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["color"] == color:
        control = 1
        for key, placed_object in example["situation"]["placed_objects"].items():
            if placed_object["object"]["color"] == color and key != '0':
                control = 0
    if control == 1:
        level1_color_r.append(copy.deepcopy(example))

for example in level1_color_r:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'cylinder' or i == 'square' or i == 'circle':
            pass
        elif i == 'green' or i == 'blue' or i == 'yellow':
            pass
        elif i == 'big' or i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == color:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,color)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command

'''for example in level1_color_r:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_color_r:
    for key, value in example['situation']['placed_objects'].items():
        if value['object']['color'] == color and key != '0':
            print('ERROR')
        
print('\n\nSize level1_color_r:  {}'.format(len(level1_color_r)))



Size level1_color_r:  15385


## Only Green

### Extract only green, and correct instruction

In [11]:
level1_color_g = []
color = 'green'

for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["color"] == color:
        control = 1
        for key, placed_object in example["situation"]["placed_objects"].items():
            if placed_object["object"]["color"] == color and key != '0':
                control = 0
    if control == 1:
        level1_color_g.append(copy.deepcopy(example))

for example in level1_color_g:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'cylinder' or i == 'square' or i == 'circle':
            pass
        elif i == 'red' or i == 'blue' or i == 'yellow':
            pass
        elif i == 'big' or i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == color:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,color)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])

'''for example in level1_color_g:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_color_g:
    for key, value in example['situation']['placed_objects'].items():
        if value['object']['color'] == color and key != '0':
            print('ERROR')
        
print('\n\nSize level1_color_g:  {}'.format(len(level1_color_g)))



Size level1_color_g:  22225


## Only Blue

### Extract only blue, and correct instruction

In [12]:
level1_color_b = []
color = 'blue'

for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["color"] == color:
        control = 1
        for key, placed_object in example["situation"]["placed_objects"].items():
            if placed_object["object"]["color"] == color and key != '0':
                control = 0
    if control == 1:
        level1_color_b.append(copy.deepcopy(example))

for example in level1_color_b:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'cylinder' or i == 'square' or i == 'circle':
            pass
        elif i == 'red' or i == 'green' or i == 'yellow':
            pass
        elif i == 'big' or i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == color:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,color)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])

'''for example in level1_color_b:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_color_b:
    for key, value in example['situation']['placed_objects'].items():
        if value['object']['color'] == color and key != '0':
            print('ERROR')
        
print('\n\nSize level1_color_b:  {}'.format(len(level1_color_b)))



Size level1_color_b:  22471


## Only Yellow

### Extract only yellow, and correct instruction

In [13]:
level1_color_y = []
color = 'yellow'

for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["color"] == color:
        control = 1
        for key, placed_object in example["situation"]["placed_objects"].items():
            if placed_object["object"]["color"] == color and key != '0':
                control = 0
    if control == 1:
        level1_color_y.append(copy.deepcopy(example))

for example in level1_color_y:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'cylinder' or i == 'square' or i == 'circle':
            pass
        elif i == 'red' or i == 'green' or i == 'blue':
            pass
        elif i == 'big' or i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == color:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,color)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])

'''for example in level1_color_y:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_color_y:
    for key, value in example['situation']['placed_objects'].items():
        if value['object']['color'] == color and key != '0':
            print('ERROR')
        
print('\n\nSize level1_color_y:  {}'.format(len(level1_color_y)))



Size level1_color_y:  20429


## Only Square

### Extract only square, not corrected

In [14]:
level1_shape_square = []
shape = 'square'

for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["shape"] == shape:
        control = 1
        for key, placed_object in example["situation"]["placed_objects"].items():
            if placed_object["object"]["shape"] == shape and key != '0':
                control = 0
    if control == 1:
        level1_shape_square.append(copy.deepcopy(example))

for example in level1_shape_square:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'cylinder' or i == 'circle':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'big' or i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == shape:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,shape)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_shape_square:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_shape_square:
    for key, value in example['situation']['placed_objects'].items():
        if value['object']['shape'] == shape and key != '0':
            print('ERROR')
        
print('\n\nSize level1_shape_square:  {}'.format(len(level1_shape_square)))



Size level1_shape_square:  22109


## Only Circle

### Extract only circle, and correct instruction

In [15]:
level1_shape_circle = []
shape = 'circle'

for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["shape"] == shape:
        control = 1
        for key, placed_object in example["situation"]["placed_objects"].items():
            if placed_object["object"]["shape"] == shape and key != '0':
                control = 0
    if control == 1:
        level1_shape_circle.append(copy.deepcopy(example))

for example in level1_shape_circle:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'cylinder' or i == 'square':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'big' or i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == shape:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,shape)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_shape_circle:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_shape_circle:
    for key, value in example['situation']['placed_objects'].items():
        if value['object']['shape'] == shape and key != '0':
            print('ERROR')
        
print('\n\nSize level1_shape_circle:  {}'.format(len(level1_shape_circle)))



Size level1_shape_circle:  33703


## Only Cylinder

### Extract only cylinder, and correct instruction

In [16]:
level1_shape_cylinder = []
shape = 'cylinder'

for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["shape"] == shape:
        control = 1
        for key, placed_object in example["situation"]["placed_objects"].items():
            if placed_object["object"]["shape"] == shape and key != '0':
                control = 0
    if control == 1:
        level1_shape_cylinder.append(copy.deepcopy(example))

for example in level1_shape_cylinder:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'circle' or i == 'square':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'big' or i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == shape:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,shape)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_shape_cylinder:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_shape_cylinder:
    for key, value in example['situation']['placed_objects'].items():
        if value['object']['shape'] == shape and key != '0':
            print('ERROR')
        
print('\n\nSize level1_shape_cylinder:  {}'.format(len(level1_shape_cylinder)))



Size level1_shape_cylinder:  33528


## Only Size 1 as small

### Extract only size 1 as small, and correct instruction

In [17]:
level1_size_1_small = []
size = '1'
reffered_size = 's'
sizeW = 'small'

n = 0
for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["size"] == size:
        if example["referred_target"][0]==reffered_size:
            control = 1
            count = 0
            for key, placed_object in example["situation"]["placed_objects"].items():
                if key != '0' and ( placed_object['object']['size'] == '1'):
                    count += 1
            if count < 3: 
                control = 1
                if count == 0:
                    n = n+1
            else: control = 0
        if control == 1:
            level1_size_1_small.append(copy.deepcopy(example))        


for example in level1_size_1_small:
    keep_key = ['0']
    for i in range(1,len(example['situation']['placed_objects'])):
        if example['situation']['placed_objects'][str(i)]['object']['size']== '1':
            example['situation']['placed_objects'].pop(str(i))
        else:
            keep_key.append(str(i))
    j = 0
    for i in keep_key:
        example['situation']['placed_objects'][str(j)] = example['situation']['placed_objects'].pop(i) 
        j += 1
            
            
for example in level1_size_1_small:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'circle' or i == 'square' or i == 'cylinder':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'big':
            pass
        else:
            new_command_list.append(i)
        if i == sizeW:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,sizeW)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_size_2_small:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_size_1_small:
    for key, value in example['situation']['placed_objects'].items():
        if key != '0' and (value['object']['size'] == '1'):
            print('ERROR')
            
print('\n\nSize level1_size_1_small:  {}'.format(len(level1_size_1_small)))



Size level1_size_1_small:  24902


## Only Size 2 as small

### Extract only size 2 as small,and correct instruction

In [18]:
level1_size_2_small = []
size = '2'
reffered_size = 's'
sizeW = 'small'

n = 0
for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["size"] == size:
        if example["referred_target"][0]==reffered_size:
            control = 1
            count = 0
            for key, placed_object in example["situation"]["placed_objects"].items():
                if key != '0' and ( placed_object['object']['size'] == '2' or placed_object['object']['size'] == '1'):
                    count += 1
            if count < 3: 
                control = 1
                if count == 0:
                    n = n+1
            else: control = 0
        if control == 1:
            level1_size_2_small.append(copy.deepcopy(example))        


for example in level1_size_2_small:
    keep_key = ['0']
    for i in range(1,len(example['situation']['placed_objects'])):
        if example['situation']['placed_objects'][str(i)]['object']['size']== '2' or example['situation']['placed_objects'][str(i)]['object']['size']== '1':
            example['situation']['placed_objects'].pop(str(i))
        else:
            keep_key.append(str(i))
    j = 0
    for i in keep_key:
        example['situation']['placed_objects'][str(j)] = example['situation']['placed_objects'].pop(i) 
        j += 1
            
            
for example in level1_size_2_small:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'circle' or i == 'square' or i == 'cylinder':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'big':
            pass
        else:
            new_command_list.append(i)
        if i == sizeW:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,sizeW)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_size_2_small:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_size_2_small:
    for key, value in example['situation']['placed_objects'].items():
        if key != '0' and (value['object']['size'] == '2'  or value['object']['size'] == '1'):
            print('ERROR')
            
print('\n\nSize level1_size_2_small:  {}'.format(len(level1_size_2_small)))



Size level1_size_2_small:  3842


## Only Size 3 as small

### Extract only size 3 as small, and correct instruction

In [19]:
level1_size_3_small = []
size = '3'
reffered_size = 's'
sizeW = 'small'

n = 0
for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["size"] == size:
        if example["referred_target"][0]==reffered_size:
            control = 1
            count = 0
            for key, placed_object in example["situation"]["placed_objects"].items():
                if key != '0' and ( placed_object['object']['size'] == '3' or placed_object['object']['size'] == '2' or placed_object['object']['size'] == '1'):
                    count += 1
            if count < 3: 
                control = 1
                if count == 0:
                    n = n+1
            else: control = 0
        if control == 1:
            level1_size_3_small.append(copy.deepcopy(example))        


for example in level1_size_3_small:
    keep_key = ['0']
    for i in range(1,len(example['situation']['placed_objects'])):
        if example['situation']['placed_objects'][str(i)]['object']['size']== '3' or example['situation']['placed_objects'][str(i)]['object']['size']== '2' or example['situation']['placed_objects'][str(i)]['object']['size']== '1':
            example['situation']['placed_objects'].pop(str(i))
        else:
            keep_key.append(str(i))
    j = 0
    for i in keep_key:
        example['situation']['placed_objects'][str(j)] = example['situation']['placed_objects'].pop(i) 
        j += 1
            
            
for example in level1_size_3_small:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'circle' or i == 'square' or i == 'cylinder':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'big':
            pass
        else:
            new_command_list.append(i)
        if i == sizeW:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,sizeW)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_size_3_small:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_size_3_small:
    for key, value in example['situation']['placed_objects'].items():
        if key != '0' and (value['object']['size'] == '3' or value['object']['size'] == '2'  or value['object']['size'] == '1'):
            print('ERROR')
            
print('\n\nSize level1_size_3_small:  {}'.format(len(level1_size_3_small)))



Size level1_size_3_small:  554


## Only Size 2 as big

### Extract only size 2 as big, and correct instruction

In [20]:
level1_size_2_big = []
size = '2'
reffered_size = 'b'
sizeW = 'big'

n = 0
for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["size"] == size:
        if example["referred_target"][0]==reffered_size:
            control = 1
            count = 0
            for key, placed_object in example["situation"]["placed_objects"].items():
                if key != '0' and ( placed_object['object']['size'] == '4' or placed_object['object']['size'] == '3' or placed_object['object']['size'] == '2'):
                    count += 1
            if count < 3: 
                control = 1
                if count == 0:
                    n = n+1
            else: control = 0
        if control == 1:
            level1_size_2_big.append(copy.deepcopy(example))        


for example in level1_size_2_big:
    keep_key = ['0']
    for i in range(1,len(example['situation']['placed_objects'])):
        if example['situation']['placed_objects'][str(i)]['object']['size']== '4' or example['situation']['placed_objects'][str(i)]['object']['size']== '3' or example['situation']['placed_objects'][str(i)]['object']['size']== '2':
            example['situation']['placed_objects'].pop(str(i))
        else:
            keep_key.append(str(i))
    j = 0
    for i in keep_key:
        example['situation']['placed_objects'][str(j)] = example['situation']['placed_objects'].pop(i) 
        j += 1
            
            
for example in level1_size_2_big:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'circle' or i == 'square' or i == 'cylinder':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == sizeW:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,sizeW)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_size_2_big:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_size_2_big:
    for key, value in example['situation']['placed_objects'].items():
        if key != '0' and (value['object']['size'] == '4' or value['object']['size'] == '3'  or value['object']['size'] == '2'):
            print('ERROR')
            
print('\n\nSize level1_size_2_big:  {}'.format(len(level1_size_2_big)))



Size level1_size_2_big:  621


## Only Size 3 as big

### Extract only size 3 as big, and correct instruction

In [21]:
level1_size_3_big = []
size = '3'
reffered_size = 'b'
sizeW = 'big'

n = 0
for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["size"] == size:
        if example["referred_target"][0]==reffered_size:
            control = 1
            count = 0
            for key, placed_object in example["situation"]["placed_objects"].items():
                if key != '0' and ( placed_object['object']['size'] == '4' or placed_object['object']['size'] == '3'):
                    count += 1
            if count < 3: 
                control = 1
                if count == 0:
                    n = n+1
            else: control = 0
        if control == 1:
            level1_size_3_big.append(copy.deepcopy(example))        


for example in level1_size_3_big:
    keep_key = ['0']
    for i in range(1,len(example['situation']['placed_objects'])):
        if example['situation']['placed_objects'][str(i)]['object']['size']== '4' or example['situation']['placed_objects'][str(i)]['object']['size']== '3':
            example['situation']['placed_objects'].pop(str(i))
        else:
            keep_key.append(str(i))
    j = 0
    for i in keep_key:
        example['situation']['placed_objects'][str(j)] = example['situation']['placed_objects'].pop(i) 
        j += 1
            
            
for example in level1_size_3_big:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'circle' or i == 'square' or i == 'cylinder':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == sizeW:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,sizeW)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_size_3_big:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_size_3_big:
    for key, value in example['situation']['placed_objects'].items():
        if key != '0' and (value['object']['size'] == '4' or value['object']['size'] == '3'):
            print('ERROR')
            
print('\n\nSize level1_size_3_big:  {}'.format(len(level1_size_3_big)))



Size level1_size_3_big:  5442


## Only Size 4 as big

### Extract only size 4as big, and correct instruction

In [22]:
level1_size_4_big = []
size = '4'
reffered_size = 'b'
sizeW = 'big'

n = 0
for example in original_train:
    control = 0
    if example["situation"]["target_object"]["object"]["size"] == size:
        if example["referred_target"][0]==reffered_size:
            control = 1
            count = 0
            for key, placed_object in example["situation"]["placed_objects"].items():
                if key != '0' and ( placed_object['object']['size'] == '4'):
                    count += 1
            if count < 3: 
                control = 1
                if count == 0:
                    n = n+1
            else: control = 0
        if control == 1:
            level1_size_4_big.append(copy.deepcopy(example))        


for example in level1_size_4_big:
    keep_key = ['0']
    for i in range(1,len(example['situation']['placed_objects'])):
        if example['situation']['placed_objects'][str(i)]['object']['size']== '4':
            example['situation']['placed_objects'].pop(str(i))
        else:
            keep_key.append(str(i))
    j = 0
    for i in keep_key:
        example['situation']['placed_objects'][str(j)] = example['situation']['placed_objects'].pop(i) 
        j += 1
            
            
for example in level1_size_4_big:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'circle' or i == 'square' or i == 'cylinder':
            pass
        elif i == 'red' or i == 'green' or i == 'blue' or i =='yellow':
            pass
        elif i == 'small':
            pass
        else:
            new_command_list.append(i)
        if i == sizeW:
            control = 1
    if control == 0:
        index_a = new_command_list.index('a')
        new_command_list.insert(index_a+1,sizeW)

            
            
            
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command
    #print(example['command'])
    
'''for example in level1_size_3_big:
    for key, value in example.items():
        print(key,':   ', value,'\n')
    break'''

for example in level1_size_4_big:
    for key, value in example['situation']['placed_objects'].items():
        if key != '0' and value['object']['size'] == '4':
            print('ERROR')
            
print('\n\nSize level1_size_4_big:  {}'.format(len(level1_size_4_big)))



Size level1_size_4_big:  24834


### LEVEL 1 Results

In [23]:
print('Original Train:   {}'.format(len(original_train)))

colorL = len(level1_color_r)+len(level1_color_g)+len(level1_color_b)+len(level1_color_y)
shapeL = len(level1_shape_square)+len(level1_shape_circle)+len(level1_shape_cylinder)
sizeL = len(level1_size_1_small)+len(level1_size_2_small)+len(level1_size_3_small)+len(level1_size_2_big)+len(level1_size_3_big)+len(level1_size_4_big)
totalL = colorL + shapeL + sizeL
print('\nLevel 1 total:   {}'.format(totalL))

print('\tLevel 1 color:   {}'.format(colorL))
print('\t\tLevel 1 color red:   {}'.format(len(level1_color_r)))
print('\t\tLevel 1 color green:   {}'.format(len(level1_color_g)))
print('\t\tLevel 1 color blue:   {}'.format(len(level1_color_b)))
print('\t\tLevel 1 color yellow:   {}'.format(len(level1_color_y)))

print('\tLevel 1 shape:   {}'.format(shapeL))
print('\t\tLevel 1 shape sqare:   {}'.format(len(level1_shape_square)))
print('\t\tLevel 1 shape circle:   {}'.format(len(level1_shape_circle)))
print('\t\tLevel 1 shape cylinder:   {}'.format(len(level1_shape_cylinder)))

print('\tLevel 1 size:   {}'.format(sizeL))
print('\t\tLevel 1 size 1 small:   {}'.format(len(level1_size_1_small)))
print('\t\tLevel 1 size 2 small:   {}'.format(len(level1_size_2_small)))
print('\t\tLevel 1 size 3 small:   {}'.format(len(level1_size_3_small)))
print('\t\tLevel 1 size 2 big:   {}'.format(len(level1_size_2_big)))
print('\t\tLevel 1 size 3 big:   {}'.format(len(level1_size_3_big)))
print('\t\tLevel 1 size 4 big:   {}'.format(len(level1_size_4_big)))

Original Train:   367933

Level 1 total:   230045
	Level 1 color:   80510
		Level 1 color red:   15385
		Level 1 color green:   22225
		Level 1 color blue:   22471
		Level 1 color yellow:   20429
	Level 1 shape:   89340
		Level 1 shape sqare:   22109
		Level 1 shape circle:   33703
		Level 1 shape cylinder:   33528
	Level 1 size:   60195
		Level 1 size 1 small:   24902
		Level 1 size 2 small:   3842
		Level 1 size 3 small:   554
		Level 1 size 2 big:   621
		Level 1 size 3 big:   5442
		Level 1 size 4 big:   24834


# Level 2

## shape + color

### Extract only shape  and color

In [24]:
level2_shape_color = []

n = 0   
for example in original_train:
    command_list = example['command'].split(',')
    control = 0
    for i in command_list:
        if i == 'big' or i == 'small':
            control = 1
    if control == 0:
        n = n+1
        level2_shape_color.append(copy.deepcopy(example))

print(n)    

153825


## shape + size

### Extract only shape and size

In [25]:
level2_shape_size = []

n = 0   
for example in original_train:
    command_list = example['command'].split(',')
    control = 0
    for i in command_list:
        if i == 'red' or i == 'green' or i == 'blue' or i == 'yellow':
            control = 1
    if control == 0:
        n = n+1
        level2_shape_size.append(copy.deepcopy(example))

print(n)    

192393


## color + size big

### Extract only color and size big 4

In [26]:
level2_color_size = []

n = 0   
for example in original_train:
    command_list = example['command'].split(',')
    control_color = 0
    control_size = 0
    control = 0
    r_size = None
    for i in command_list:
        if (i == 'red' or i == 'green' or i == 'blue' or i == 'yellow'):
            control_color = 1
        elif (i == 'big' or i == 'small'):
            control_size = 1
            r_size = i
    if control_size and control_color:
        control = 1
        
        t_size = example["situation"]["target_object"]["object"]["size"]
        t_color = example["situation"]["target_object"]["object"]["color"]
        
        if r_size == 'big':
            for i in range(int(t_size),5):
                for key, placed_object in example["situation"]["placed_objects"].items():
                    if key != '0' and ( placed_object['object']['size'] == str(i) and placed_object['object']['color']== t_color):
                        control = 0
        elif r_size == 'small':
            for i in range(1,int(t_size)+1):
                for key, placed_object in example["situation"]["placed_objects"].items():
                    if key != '0' and ( placed_object['object']['size'] == str(i) and placed_object['object']['color']== t_color):
                        control = 0
        else: print('ERROR')
            
    if control == 1:
        n = n+1
        level2_color_size.append(copy.deepcopy(example))
        
for example in level2_color_size:
    command_list = example['command'].split(',')
    new_command_list = []
    control = 0
    for i in command_list:
        if i == 'circle' or i == 'square' or i == 'cylinder':
            pass
        else:
            new_command_list.append(i)
        
    new_command = ','.join(new_command_list)
    example['command'] = new_command
    example['meaning'] = new_command

#for example in level2_color_size:
#    print(example['command'])

print(len(level2_color_size))    

47014


### LEVEL 2 Results

In [27]:
print('Original Train:   {}'.format(len(original_train)))

totalL = len(level2_color_size) + len(level2_shape_size) + len(level2_shape_color)
print('\nLevel 2 total:   {}'.format(totalL))

print('\tLevel 2 shape color:   {}'.format(len(level2_shape_color)))
print('\tLevel 2 shape size:   {}'.format(len(level2_shape_size)))
print('\tLevel 2 color size:   {}'.format(len(level2_color_size)))

Original Train:   367933

Level 2 total:   393232
	Level 2 shape color:   153825
	Level 2 shape size:   192393
	Level 2 color size:   47014


# Level 3

# Extract only shape, color and size

In [28]:
level3 = []
   
for example in original_train:
    command_list = example['command'].split(',')
    control_shape = 0
    control_size  = 0
    control_color = 0
    for i in command_list:
        if (i == 'big' or i == 'small'):
            control_size = 1
        elif (i == 'red' or i == 'green' or i == 'blue' or i == 'yellow'):
            control_color = 1
        elif (i == 'square' or i == 'circle' or i == 'cylinder'):
            control_shape = 1
    
    if control_shape and control_size and control_color:
        level3.append(copy.deepcopy(example))

print(len(level3)) 

102114


# Total Results

In [29]:


colorL = len(level1_color_r)+len(level1_color_g)+len(level1_color_b)+len(level1_color_y)
shapeL = len(level1_shape_square)+len(level1_shape_circle)+len(level1_shape_cylinder)
sizeL = len(level1_size_1_small)+len(level1_size_2_small)+len(level1_size_3_small)+len(level1_size_2_big)+len(level1_size_3_big)+len(level1_size_4_big)
totalL1 = colorL + shapeL + sizeL
totalL2 = len(level2_color_size) + len(level2_shape_size) + len(level2_shape_color)
totalL3 = len(level3)
big_total = totalL1 + totalL2 + totalL3 

print('Original Train:   {}'.format(len(original_train)))



print('\nNew Train:   {}'.format(big_total))

print('\tLevel 1 total:   {}'.format(totalL1))

print('\t\tLevel 1 color:   {}'.format(colorL))
print('\t\t\tLevel 1 color red:   {}'.format(len(level1_color_r)))
print('\t\t\tLevel 1 color green:   {}'.format(len(level1_color_g)))
print('\t\t\tLevel 1 color blue:   {}'.format(len(level1_color_b)))
print('\t\t\tLevel 1 color yellow:   {}'.format(len(level1_color_y)))

print('\t\tLevel 1 shape:   {}'.format(shapeL))
print('\t\t\tLevel 1 shape sqare:   {}'.format(len(level1_shape_square)))
print('\t\t\tLevel 1 shape circle:   {}'.format(len(level1_shape_circle)))
print('\t\t\tLevel 1 shape cylinder:   {}'.format(len(level1_shape_cylinder)))

print('\t\tLevel 1 size:   {}'.format(sizeL))
print('\t\t\tLevel 1 size 1 small:   {}'.format(len(level1_size_1_small)))
print('\t\t\tLevel 1 size 2 small:   {}'.format(len(level1_size_2_small)))
print('\t\t\tLevel 1 size 3 small:   {}'.format(len(level1_size_3_small)))
print('\t\t\tLevel 1 size 2 big:   {}'.format(len(level1_size_2_big)))
print('\t\t\tLevel 1 size 3 big:   {}'.format(len(level1_size_3_big)))
print('\t\t\tLevel 1 size 4 big:   {}'.format(len(level1_size_4_big)))


print('\tLevel 2 total:   {}'.format(totalL2))

print('\t\tLevel 2 shape color:   {}'.format(len(level2_shape_color)))
print('\t\tLevel 2 shape size:   {}'.format(len(level2_shape_size)))
print('\t\tLevel 2 color size:   {}'.format(len(level2_color_size)))

print('\tLevel 3 total:   {}'.format(totalL3))

Original Train:   367933

New Train:   725391
	Level 1 total:   230045
		Level 1 color:   80510
			Level 1 color red:   15385
			Level 1 color green:   22225
			Level 1 color blue:   22471
			Level 1 color yellow:   20429
		Level 1 shape:   89340
			Level 1 shape sqare:   22109
			Level 1 shape circle:   33703
			Level 1 shape cylinder:   33528
		Level 1 size:   60195
			Level 1 size 1 small:   24902
			Level 1 size 2 small:   3842
			Level 1 size 3 small:   554
			Level 1 size 2 big:   621
			Level 1 size 3 big:   5442
			Level 1 size 4 big:   24834
	Level 2 total:   393232
		Level 2 shape color:   153825
		Level 2 shape size:   192393
		Level 2 color size:   47014
	Level 3 total:   102114


# Final Check

In [30]:
all_datasets = [level1_color_r,level1_color_g,level1_color_b,level1_color_y,level1_shape_square,level1_shape_circle,level1_shape_cylinder,level1_size_1_small,level1_size_2_small,level1_size_3_small,level1_size_2_big,level1_size_3_big,level1_size_4_big,level2_color_size,level2_shape_size,level2_shape_color,level3]

for test in all_datasets:
    #Info 
    big = 0
    small = 0
    for example in test:
        if example["situation"]["target_object"]['object']['shape'] == 'circle':
            if example["situation"]["target_object"]['object']['size'] == '2':
                if example["referred_target"][0]=='b':
                    big += 1
                elif example["referred_target"][0]=='s':
                    small += 1

    print('Examples: {}'.format(len(test)))
    print('Size2 Reffered as "big": {}'.format(big))
    print('Size2 Reffered as "small": {}'.format(small))

Examples: 15385
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 22225
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 22471
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 20429
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 22109
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 33703
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 33528
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 24902
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 3842
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 554
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 621
Size2 Reffered as "big": 234
Size2 Reffered as "small": 0
Examples: 5442
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 24834
Size2 Reffered as "big": 0
Size2 Reffered as "small": 0
Examples: 47014
Size2 Reffered as "big": 2265
Size2 Reffered as "sma

# Saving The dataset

In [31]:
import json
import random

In [32]:
all_datasets = 0
Level_1 = level1_color_r 
print(len(Level_1))
Level_1.extend(level1_color_g)
print(len(Level_1))
Level_1.extend(level1_color_b)
print(len(Level_1))
Level_1.extend(level1_color_y)
print(len(Level_1))
Level_1.extend(level1_shape_square)
print(len(Level_1))
Level_1.extend(level1_shape_circle)
print(len(Level_1))
Level_1.extend(level1_shape_cylinder)
print(len(Level_1))
Level_1.extend(level1_size_1_small)
print(len(Level_1))
Level_1.extend(level1_size_2_small)
print(len(Level_1))
Level_1.extend(level1_size_3_small)
print(len(Level_1))
Level_1.extend(level1_size_2_big)
print(len(Level_1))
Level_1.extend(level1_size_3_big)
print(len(Level_1))
Level_1.extend(level1_size_4_big)
print(len(Level_1))
random.shuffle(Level_1)
print(len(Level_1))
dataset['examples']['train'] = Level_1
print(len(dataset['examples']['train']))
with open('new_dataset/Level1.txt', 'w') as file:
     file.write(json.dumps(dataset))

15385
37610
60081
80510
102619
136322
169850
194752
198594
199148
199769
205211
230045
230045
230045


In [33]:
Level_1_short = random.choices(Level_1, k=115022)
print(len(Level_1_short))
dataset['examples']['train'] = Level_1_short
print(len(dataset['examples']['train']))
with open('new_dataset/Level_1_short.txt', 'w') as file:
     file.write(json.dumps(dataset))

115022
115022


In [34]:
Level_2 = level2_color_size 
print(len(Level_2))
Level_2.extend(level2_shape_size)
print(len(Level_2))
Level_2.extend(level2_shape_color)
print(len(Level_2))
random.shuffle(Level_2)
print(len(Level_2))
dataset['examples']['train'] = Level_2
print(len(dataset['examples']['train']))
with open('new_dataset/Level2.txt', 'w') as file:
     file.write(json.dumps(dataset))

47014
239407
393232
393232
393232


In [35]:
Level_2_short = random.choices(Level_2, k=196616)
print(len(Level_2_short))
dataset['examples']['train'] = Level_2_short
print(len(dataset['examples']['train']))
with open('new_dataset/Level_2_short.txt', 'w') as file:
     file.write(json.dumps(dataset))

196616
196616


In [36]:
print(len(level3))
random.shuffle(level3)
print(len(level3))
dataset['examples']['train'] = level3
print(len(dataset['examples']['train']))
with open('new_dataset/Level3.txt', 'w') as file:
     file.write(json.dumps(dataset))

102114
102114
102114


In [37]:
Level_3_short = random.choices(level3, k=51057)
print(len(Level_3_short))
dataset['examples']['train'] = Level_3_short
print(len(dataset['examples']['train']))
with open('new_dataset/Level_3_short.txt', 'w') as file:
     file.write(json.dumps(dataset))

51057
51057


In [38]:
Level_1_2 = Level_1
print(len(Level_1_2))
Level_1_2.extend(Level_2)
print(len(Level_1_2))
random.shuffle(Level_1_2)
print(len(Level_1_2))
dataset['examples']['train'] = Level_1_2
print(len(dataset['examples']['train']))
with open('new_dataset/Level_1_2.txt', 'w') as file:
     file.write(json.dumps(dataset))

230045
623277
623277
623277


In [39]:
Level_1_2_short = Level_1_short
print(len(Level_1_2_short))
Level_1_2_short.extend(Level_2_short)
print(len(Level_1_2_short))
random.shuffle(Level_1_2_short)
print(len(Level_1_2_short))
dataset['examples']['train'] = Level_1_2_short
print(len(dataset['examples']['train']))
with open('new_dataset/Level_1_2_short.txt', 'w') as file:
     file.write(json.dumps(dataset))

115022
311638
311638
311638


In [40]:
new_training_set = Level_1_2
print(len(new_training_set))
new_training_set.extend(level3)
print(len(new_training_set))
random.shuffle(new_training_set)
print(len(new_training_set))
dataset['examples']['train'] = new_training_set
print(len(dataset['examples']['train']))
with open('new_dataset/new_training_set.txt', 'w') as file:
     file.write(json.dumps(dataset))

623277
725391
725391
725391


In [41]:
short_new_training_set = Level_1_2_short
print(len(short_new_training_set))
short_new_training_set.extend(Level_3_short)
print(len(short_new_training_set))
random.shuffle(short_new_training_set)
print(len(short_new_training_set))
dataset['examples']['train'] = short_new_training_set
print(len(dataset['examples']['train']))
with open('new_dataset/short_new_training_set.txt', 'w') as file:
     file.write(json.dumps(dataset))

311638
362695
362695
362695
