This notebook illustrates how we get:
1. Counts of how many relations per MM
2. Enrich relations via constraints
3. Counts of how many relations per MM after enriching (full-ET-dataset)


In [1]:
import csv
from utils import *

In [2]:
make_sure_dir_exists("enriched_mms")

In [3]:
from check_constraints import * 
from enrich_via_constraints import *
from relations import *

In [4]:
total_parts = []
min_parts_len = 10000
max_parts_len = 0
with open("../annotation-files/100everydaythings-Selected100_2022Oct31.tsv", "r") as tab1_info:
    tab1_data = csv.reader(tab1_info, delimiter="\t")
    for et_idx, line in enumerate(tab1_data):
        if et_idx < 1:
            continue
        parts = [p.strip() for p in line[1].split(",")]
        #print(et_idx, line)
        #print(parts)
        for part in parts:
            total_parts.append(part)
        len_parts = len(parts)
        if len_parts > max_parts_len:
            max_parts_len = len_parts
        if len_parts < min_parts_len:
            min_parts_len = len_parts

        

print("# parts given as seed (100 everyday things):", len(total_parts))
print("Min. # parts given as seed (100 everyday things):", min_parts_len)
print("Max. # parts given as seed (100 everyday things):", max_parts_len)


# parts given as seed (100 everyday things): 716
Min. # parts given as seed (100 everyday things): 3
Max. # parts given as seed (100 everyday things): 14


## 1. Get transcribed MMs

In [5]:
et2triplets_ann = {}
ann_cnt = 0
for transcriber_idx in range(5):
    with open("../annotation-files/transcribed-turk-ann/Diagram-transcription - id" + str(transcriber_idx) + "_tab2.tsv") as infile:
        in_file_data = csv.reader(infile, delimiter = "\t")
        for line_idx, line in enumerate(in_file_data):

            # new set 
            if line[0] != "":
                et = line[1].lower()
                turker = line[0]
                et2triplets_ann[(et, turker)] = []

            
            new_relation_triplet = [entry.lower().strip() for entry in line[3:6]]
            assert new_relation_triplet[1]  in all_relations_lst
            if not(new_relation_triplet[0] and new_relation_triplet[1] and new_relation_triplet[2]):
                print(line_idx, line[3:6])
            if new_relation_triplet in et2triplets_ann[(et, turker)]:
                print(new_relation_triplet, "exists for", (et, turker), "[repeated entry]")
                continue
            et2triplets_ann[(et, turker)].append(new_relation_triplet)
            ann_cnt += 1
            
print(ann_cnt, "annotated relations")

['foot rests', 'inside', 'deck'] exists for ('kayak', 'worker_100') [repeated entry]
['glass shell', 'above', 'metal base'] exists for ('lightbulb', 'worker_107') [repeated entry]
['top', 'above', 'carry handles'] exists for ('bird cage', 'worker_107') [repeated entry]
['top', 'above', 'rods'] exists for ('bird cage', 'worker_107') [repeated entry]
['control knob', 'part of', 'control panel'] exists for ('electric stove', 'worker_112') [repeated entry]
['leaves', 'part of', 'crown'] exists for ('tree', 'worker_107') [repeated entry]
2752 annotated relations


In [6]:
print("*" * 10, "Annotated MMs", "*" * 10)
parts = 0
total_relations = 0
relation_type_cnt = {"spatial": 0, "conectivity": 0, "functional": 0}
for et, turker in et2triplets_ann:
    relations = et2triplets_ann[(et, turker)]
    parts_local = []
    for p1, rln, p2 in relations:

        for p in (p1, p2):
            if p not in parts_local:
                parts_local.append(p)
        
        if rln == "connects":
            relation_type_cnt["conectivity"] += 1
        elif rln == "requires":
            relation_type_cnt["functional"] += 1
        else:
            relation_type_cnt["spatial"] += 1
        
    assert len(relations) > 1
    parts += len(parts_local)
    total_relations += len(relations)
    #print(et, turker, len(relations))
print("Total number of MMs annotated:", len(et2triplets_ann))
print("Total # parts annotated:", parts)
print("Total number of relations annotated:", total_relations)
for rt in relation_type_cnt:
    print(rt, "annotated: ", relation_type_cnt[rt])

print("Avg # parts annotated per MM:", round(parts/len(et2triplets_ann), 2) )
print("Avg. number of relations per MM:", round(total_relations/len(et2triplets_ann),2))
for rt in relation_type_cnt:
    print(rt, "annotated (avg): ", round(relation_type_cnt[rt]/len(et2triplets_ann),2))

********** Annotated MMs **********
Total number of MMs annotated: 300
Total # parts annotated: 2191
Total number of relations annotated: 2752
spatial annotated:  1858
conectivity annotated:  818
functional annotated:  76
Avg # parts annotated per MM: 7.3
Avg. number of relations per MM: 9.17
spatial annotated (avg):  6.19
conectivity annotated (avg):  2.73
functional annotated (avg):  0.25


In [7]:
v = 0
t = 0
for et, turker in et2triplets_ann:
    relations = et2triplets_ann[(et, turker)]
    relations_tuples = [tuple(triplet) for triplet in relations]
    print(et, turker, len(relations))
    violations, total = get_asymmetric_constraint_violations(relations_tuples, verbose=False)
    v += violations
    t += total
print("Overall consistency violation: {numerator}/{denominator} ({percentage})".format(numerator=v,\
    denominator=t, percentage=round(v/t,2)))

air conditioner worker_100 4
[CONSTRAINT TYPE2]
Consistency violation: 0/0
Not applicable for given mental model!
air conditioner worker_101 6
[CONSTRAINT TYPE2]
Consistency violation: 0/2 (0.0))
air conditioner worker_102 8
[CONSTRAINT TYPE2]
Consistency violation: 0/4 (0.0))
bicycle worker_101 17
[CONSTRAINT TYPE2]
Consistency violation: 0/5 (0.0))
bicycle worker_102 11
[CONSTRAINT TYPE2]
Consistency violation: 0/4 (0.0))
bicycle worker_103 8
[CONSTRAINT TYPE2]
Consistency violation: 0/5 (0.0))
binoculars worker_101 8
[CONSTRAINT TYPE2]
Consistency violation: 0/0
Not applicable for given mental model!
binoculars worker_102 7
[CONSTRAINT TYPE2]
Consistency violation: 0/4 (0.0))
binoculars worker_100 6
[CONSTRAINT TYPE2]
Consistency violation: 0/1 (0.0))
blender worker_101 11
[CONSTRAINT TYPE2]
Consistency violation: 0/5 (0.0))
blender worker_100 7
[CONSTRAINT TYPE2]
Consistency violation: 0/4 (0.0))
blender worker_104 8
[CONSTRAINT TYPE2]
Consistency violation: 0/6 (0.0))
boat worker_

## 2. Enrich via constraints

In [8]:
_= mini_ET_dataset.write("\t".join(["everyday-thing", "turker", "triplet", "label"]) + "\n")

In [9]:
v = 0
t = 0
print("Enriching with 4 types of constraints ...")
print("*" * 10, "Annotated + Enriched MMs", "*" * 10)
parts = []
total_in_data = 0
ans_true = 0
ans_false = 0
relation_type_cnt = {"spatial": 0, "conectivity": 0, "functional": 0}

for et, turker in et2triplets_ann:
    enrich_logfile.write(et + " " + turker + "\n")
    enrich_cnts_logfile.write(et + " " + turker + "\n")
    added_relations_logfile.write(et + " " + turker + "...")
    # enrich
    ann_relations = et2triplets_ann[(et, turker)]
    relations = [] # no duplicates
    for rln in ann_relations:
        if rln not in relations:
            relations.append(rln)
    all_relations = get_all_relations_with_labels(relations, verbose = False)
    print(et, turker, "Before:", len(relations), "After:",  len(all_relations))
    
    #relations_tuples_check = {triplet[0]: triplet[1] for triplet in all_relations}
    relations_tuples_check = [triplet[0] for triplet in all_relations if triplet[1]]
    violations, total = get_all_constraint_violations(relations_tuples_check, verbose=False, max_sat_applied=False)
    v += violations
    t += total

    parts_local = []
    # store relations
    for triplet, label in all_relations:
        mini_ET_dataset.write("\t".join([et, turker, str(triplet), str(label)]) + "\n")
        total_in_data += 1
        if label:
            ans_true += 1
        else:
            ans_false += 1
            
        # get some stats
        p1, rln, p2 = triplet
        for p in (p1, p2):
            if p not in parts_local:
                parts_local.append(p)
        
        if rln == "connects":
            relation_type_cnt["conectivity"] += 1
        elif rln == "requires":
            relation_type_cnt["functional"] += 1
        else:
            relation_type_cnt["spatial"] += 1
    parts += parts_local
        

enrich_logfile.close() 
enrich_cnts_logfile.close()
added_relations_logfile.close()
mini_ET_dataset.close()


Enriching with 4 types of constraints ...
********** Annotated + Enriched MMs **********
air conditioner worker_100 Before: 4 After: 8
[CONSTRAINT TYPE1]
Consistency violation: 0/4 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/0
Not applicable for given mental model!
[CONSTRAINT TYPE3]
Consistency violation: 0/0
Not applicable for given mental model!
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/4 (0.0))
air conditioner worker_101 Before: 6 After: 16
[CONSTRAINT TYPE1]
Consistency violation: 0/4 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/4 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/2 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/10 (0.0))
air conditioner worker_102 Before: 8 After: 24
[CONSTRAINT TYPE1]
Consistency violation: 0/4 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/8 (0.0))
[CONSTRAINT TY

[CONSTRAINT TYPE3]
Consistency violation: 0/8 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/26 (0.0))
house worker_100 Before: 10 After: 34
[CONSTRAINT TYPE1]
Consistency violation: 0/3 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/14 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/7 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/24 (0.0))
house worker_101 Before: 19 After: 56
[CONSTRAINT TYPE1]
Consistency violation: 0/12 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/16 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/8 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/2 (0.0))
[===OVERALL==]
Consistency violation: 0/38 (0.0))
house worker_102 Before: 9 After: 24
[CONSTRAINT TYPE1]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE3]
Consistency vio

fish worker_100 Before: 14 After: 180
[CONSTRAINT TYPE1]
Consistency violation: 0/0
Not applicable for given mental model!
[CONSTRAINT TYPE2]
Consistency violation: 0/90 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/45 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/192 (0.0))
[===OVERALL==]
Consistency violation: 0/327 (0.0))
fish worker_102 Before: 10 After: 52
[CONSTRAINT TYPE1]
Consistency violation: 0/2 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/24 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/12 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/10 (0.0))
[===OVERALL==]
Consistency violation: 0/48 (0.0))
flower worker_107 Before: 5 After: 16
[CONSTRAINT TYPE1]
Consistency violation: 0/2 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/3 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/11 (0.0))
flower worker_101 Before: 9 A

telephone worker_102 Before: 6 After: 14
[CONSTRAINT TYPE1]
Consistency violation: 0/5 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/2 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/1 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/8 (0.0))
telephone worker_107 Before: 14 After: 40
[CONSTRAINT TYPE1]
Consistency violation: 0/8 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/12 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/26 (0.0))
telescope worker_102 Before: 8 After: 48
[CONSTRAINT TYPE1]
Consistency violation: 0/0
Not applicable for given mental model!
[CONSTRAINT TYPE2]
Consistency violation: 0/24 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/12 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/8 (0.0))
[===OVERALL==]
Consistency violation: 0/44

[CONSTRAINT TYPE4]
Consistency violation: 0/2 (0.0))
[===OVERALL==]
Consistency violation: 0/28 (0.0))
bus worker_102 Before: 9 After: 38
[CONSTRAINT TYPE1]
Consistency violation: 0/3 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/16 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/8 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/4 (0.0))
[===OVERALL==]
Consistency violation: 0/31 (0.0))
butterfly worker_110 Before: 9 After: 36
[CONSTRAINT TYPE1]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/12 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/8 (0.0))
[===OVERALL==]
Consistency violation: 0/32 (0.0))
butterfly worker_101 Before: 9 After: 26
[CONSTRAINT TYPE1]
Consistency violation: 0/5 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/8 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/4 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/2 (0.0))
[===OVERALL==]
Consistency violation: 0/19

Consistency violation: 0/4 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/2 (0.0))
[===OVERALL==]
Consistency violation: 0/14 (0.0))
photo frame worker_101 Before: 5 After: 20
[CONSTRAINT TYPE1]
Consistency violation: 0/0
Not applicable for given mental model!
[CONSTRAINT TYPE2]
Consistency violation: 0/10 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/5 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/15 (0.0))
photo frame worker_107 Before: 3 After: 24
[CONSTRAINT TYPE1]
Consistency violation: 0/0
Not applicable for given mental model!
[CONSTRAINT TYPE2]
Consistency violation: 0/12 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/8 (0.0))
[===OVERALL==]
Consistency violation: 0/26 (0.0))
rice cooker worker_100 Before: 12 After: 54
[CONSTRAINT TYPE1]
Consistency violation: 0/3 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/24 (0.0))
[CON

table lamp worker_100 Before: 13 After: 32
[CONSTRAINT TYPE1]
Consistency violation: 0/10 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/3 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/19 (0.0))
tent worker_101 Before: 7 After: 34
[CONSTRAINT TYPE1]
Consistency violation: 0/1 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/16 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/8 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/4 (0.0))
[===OVERALL==]
Consistency violation: 0/29 (0.0))
tent worker_102 Before: 7 After: 20
[CONSTRAINT TYPE1]
Consistency violation: 0/4 (0.0))
[CONSTRAINT TYPE2]
Consistency violation: 0/6 (0.0))
[CONSTRAINT TYPE3]
Consistency violation: 0/3 (0.0))
[CONSTRAINT TYPE4]
Consistency violation: 0/0
Not applicable for given mental model!
[===OVERALL==]
Consistency violation: 0/13 (0.0))
tent worker_103 Before: 5 After: 

In [10]:
print("Total number of MMs annotated:", len(et2triplets_ann))
print("Total # parts annotated:", len(parts))
print("Total number of relations with TRUE label:", ans_true)
print("Total number of relations with FALSE label:", ans_false)
print("Total number of relations in our data:", total_in_data)
for rt in relation_type_cnt:
    print(rt, "annotated: ", relation_type_cnt[rt])
    
print("Avg # parts annotated per MM:", round(len(parts)/len(et2triplets_ann), 2) )
print("Avg. number of relations per MM:", round(total_in_data/len(et2triplets_ann),2))
for rt in relation_type_cnt:
    print(rt, "annotated (avg): ", round(relation_type_cnt[rt]/len(et2triplets_ann),2))
print("Overall consistency violation: {numerator}/{denominator} ({percentage})".format(numerator=v,\
    denominator=t, percentage=round(v/t,2)))

Total number of MMs annotated: 300
Total # parts annotated: 2191
Total number of relations with TRUE label: 6894
Total number of relations with FALSE label: 4826
Total number of relations in our data: 11720
spatial annotated:  9956
conectivity annotated:  1612
functional annotated:  152
Avg # parts annotated per MM: 7.3
Avg. number of relations per MM: 39.07
spatial annotated (avg):  33.19
conectivity annotated (avg):  5.37
functional annotated (avg):  0.51
Overall consistency violation: 0/10615 (0.0)


After running this notebook, you should see an additional folder named "enriched_mms" which constains the logs for the enrichment done above, as well as full-ET-dataset.tsv which is our full dataset.