In [1]:
import pandas as pd
from PIL import Image
import IPython.display as display
import nltk
import torch
import numpy as np
import matplotlib.pyplot as plt
device = 'cpu'

In [68]:
def get_nouns(phrase):
    is_noun = lambda pos: pos[:2] == 'NN'
    mwe = nltk.tokenize.MWETokenizer([('dining', 'table'), ('cell', 'phone'),('wine','glass'),('parking','meter'),
                                      ('hair','drier'),('fire','hydrant'),('traffic','light'),
                                     ('baseball','glove'),('sports','ball'),('stop','sign'),
                                     ('hot','dog')], separator=' ')

    exceptions =['front','side','middle','top','part','edge']
    include = ['oven']
    tokens = nltk.word_tokenize(phrase)
    #print(tokens)
    nouns = [word for (word, pos) in nltk.pos_tag(tokens) if is_noun(pos)]
    #print(nouns)
    aggregated = mwe.tokenize(nouns)#get the compound names
    #print(aggregated)
    filtered_nouns = []
    for noun in aggregated:
        tag = nltk.pos_tag([noun])
        if (tag[0][1]=='NN' or tag[0][1]=='NNS' or tag[0][1]=='JJ') and noun not in exceptions or noun in include:
            filtered_nouns.append(noun)
    return tuple(filtered_nouns)

def read_vsr_dataset(dataset_name, dataset_path = '../visual-spatial-reasoning/',splits_path='splits/', 
                         image_path = 'images/',sort = False, encode_labels = False):
        dataset = pd.read_json(dataset_path+splits_path+dataset_name, lines =True)
        dataset.rename(columns = {'caption':'hypothesis', 'image':'Flickr30kID', 'label' : 'gold_label'}, inplace = True)
        dataset['Flickr30kID']=dataset['Flickr30kID'].apply(lambda img_name: dataset_path + image_path + img_name )
        if encode_labels:
            labels_encoding = {0:0,1:2}#leave the label 0 the same and convert 1 to 2 to mean entailment
            dataset['gold_label']=dataset['gold_label'].apply(lambda label: labels_encoding[label])
        if(dataset_name=='train.json'):
            dataset.drop(labels=[1786,3569,4553,4912], axis=0, inplace = True)
        elif(dataset_name=='test.json'):
            dataset.drop(labels=[135,614,1071,1621,1850], axis=0, inplace = True)
        elif(dataset_name=='dev.json'):
            dataset.drop(labels=[807], axis=0, inplace = True)
        dataset.reset_index(drop=True, inplace=True)
        if sort:
            dataset.sort_values(by="hypothesis", key=lambda x: x.str.len(), inplace = True)
        return dataset
    
train = read_vsr_dataset('train.json')
train['entities'] = train['hypothesis'].apply(lambda x: get_nouns(x))
test = read_vsr_dataset('test.json')
dev = read_vsr_dataset('dev.json')
print(len(train.index))
print(len(test.index))
print(len(dev.index))

7079
2019
1011


# Rule formation

In [117]:
#Get a set of the captions
captions_set = set()
for index, row in train.iterrows():
        caption = row['hypothesis']
        captions_set.add(caption)
        
same_cap = {}
for caption in list(captions_set):
    same_cap_df = train[(train.hypothesis==caption) & (train.gold_label==1)].reset_index(drop=True)
    if(len(same_cap_df.index)>2):
        same_cap[caption]= same_cap_df
print(len(same_cap))

197


In [75]:
entities_set = dict()
for index, row in train.iterrows():
        entities = tuple(row['entities'])
        if(entities not in entities_set):
            entities_set[entities]=0
        entities_set[entities]+=1
        
same_ent = {}
for ent in list(entities_set.keys()):
    same_ent_df = train[(train.entities==ent)].reset_index()
    if(len(same_ent_df.index)>=2):
        same_ent[ent]=same_ent_df 
print(len(same_ent))

873


In [51]:
group1 = {1:['at the right side of'],
           0:['at the left side of']}
group2 = {1:['at the edge of'],
           0:['at the back of']}
group3= {1:['facing'],
           0:['facing away from']}
group4 = {1:['parallel to'],
           0:['perpendicular to']}
group5 = {1:['connected to'],
           0:['detached from']}
group6= {1:['has as a part'],
           0:['part of']}
group7 = {1:['inside'],
           0:['outside']}
group8 = {1:['inside'],
           0:['out of']}
group9 = {1:['in'],
           0:['out of']}
group10 = {1:['in'],
           0:['outside']}
group11 = {1:['within'],
           0:['outside']}
group12 = {1:['within'],
           0:['out of']}
group13 = {1:['close to'],
           0:['far from']}
group14 = {1:['close to'],
           0:['far away from']}
group15 = {1:['near'],
           0:['far from']}
group16 = {1:['near'],
           0:['far away from']}
group17 = {1:['on top of'],
           0:['beneath']}
group18 = {1:['left of'],
           0:['right of']}
group19 = {1:['on top of'],
           0:['under']}
group20 = {1:['on top of'],
           0:['below']}
group21 = {1:['above'],
           0:['below']}
group22 = {1:['over'],
           0:['under']}
group23 = {1:['in the middle of'],
           0:['at the edge of']}
group24 = {1:['away from'],
           0:['close to']}
group25 = {1:['away from'],
           0:['toward']}
group26 = {1:['next to'],
           0:['far from']}
group27 = {1:['next to'],
           0:['far away from']}
group28 = {1:['facing'],
           0:['opposite to']}

groups = [group1, group2, group3, group4, group5, group6, group7, group8, group9, group10, group11,
            group12, group13, group14, group15, group16, group17, group18, group19, group20, group21,
                group22, group23, group24, group25, group26, group27, group28]

#groups of relations where the order of the entities in the phrase matters to determine the spatial relation
asymetric_groups = [group1,group2,group6,group7,group8,group9,group10,group11,group12,group17,
                    group18,group19,group20,group21,group22,group23]

In [116]:
rule1 = {}
for caption in list(captions_set):
    label = 0
    df_zero = train[(train.hypothesis==caption) & (train.gold_label==label)].reset_index(drop=True)
    if(len(df_zero.index)>=2):
        rule1[tuple([caption,label])]= df_zero
    label = 1
    df_one = train[(train.hypothesis==caption) & (train.gold_label==label)].reset_index(drop=True)
    if(len(df_one.index)>=2):
        rule1[tuple([caption,label])]= df_one
rule1_data = list(rule1.values())
print(len(rule1))
#show_dataset(rule1_data[-3])

998


In [113]:
rule2 = []
count = 0
for ent in same_ent:
    ent_df = same_ent[ent]
    count+=1
    if(count==10):
        break
    for group in groups:
        negative_relation = group[0][0]
        positive_relation = group[1][0]
        label_positives = 0
        label_negatives = 1
        positives_df = ent_df[(ent_df.relation == positive_relation) & (ent_df.gold_label==label_positives)].reset_index(drop=True)
        negatives_df = ent_df[(ent_df.relation == negative_relation) & (ent_df.gold_label==label_negatives)].reset_index(drop=True)
        first_df = pd.concat([positives_df,negatives_df]).reset_index()
        if(len(first_df.index)>1):
            rule2.append(first_df)
        label_positives = 1
        label_negatives = 0
        positives_df = ent_df[(ent_df.relation == positive_relation) & (ent_df.gold_label==label_positives)].reset_index(drop=True)
        negatives_df = ent_df[(ent_df.relation == negative_relation) & (ent_df.gold_label==label_negatives)].reset_index(drop=True)
        second_df = pd.concat([positives_df,negatives_df]).reset_index(drop=True)
        if(len(second_df.index)>1):
            rule2.append(second_df)
print(len(rule2))
#show_dataset(rule2[-2])

25
