In [1]:
import pandas as pd
import numpy as np
import os
import sys
from pprint import pprint
import configparser
from collections import Counter
from sklearn.metrics import classification_report, accuracy_score, f1_score, average_precision_score
import matplotlib.pyplot as plt
from IPython.display import display, SVG

sys.path.append(os.path.abspath(os.path.join(os.pardir, 'utils')))
import preprocessing
import visualize

config = configparser.ConfigParser()
config.read(os.path.join(os.pardir, 'config.ini'))

vg_json = config['PATHS']['vg-json']
data_dir = config['PATHS']['data']
image_dir = config['PATHS']['vg-images']
input_dir = data_dir+'prediction_arrays/'

colors = preprocessing.basic_colors()
classifier_list = 'bottomup earlyfusion'.split()


set_type = 'add'

greyscale = lambda x: plt.cm.gray(np.linspace(0.2,0.8,len(x)))

def print_samples(df, prediction=False):
    for index, row in df.iterrows():
        visualize.plot_vg_image(row,image_dir)
        print(index)
        if prediction:
            for p in prediction:
                print ('{p}: {color}'.format(p=p, color=row[p+'_color']))

# Preprare Data

In [2]:
# Gezippte Numpy-Archive importieren
bottomup_arrays = np.load(input_dir+'results_bottomup_bgr.npz')
earlyfusion_arrays = np.load(input_dir+'results_earlyfusion.npz')

# Prediction-Arrays aus importierten Numpy-Dateien
bottomup_predict = bottomup_arrays['predict_'+set_type+'_y']
earlyfusion_predict = earlyfusion_arrays['predict_'+set_type+'_y']

# IDs zu Prediction-Arrays hinzufügen
ids = bottomup_arrays[set_type+'_y'][:,0:1]
bottomup_predict = np.append(ids, bottomup_predict, axis=1)
ids = earlyfusion_arrays[set_type+'_y'][:,0:1]
earlyfusion_predict = np.append(ids, earlyfusion_predict, axis=1)

In [3]:
ids = earlyfusion_predict[:,0].reshape(-1,1)

# Überprüfen, ob Shapes identisch sind
print ('Shapes identical:',
        (earlyfusion_predict.shape == bottomup_predict.shape),
        earlyfusion_predict.shape
      )

# Überprüfen, ob IDs identisch sind
print ('IDs identical:',
       (not False in (earlyfusion_predict[:,0:1] == bottomup_predict[:,0:1]))
      )

Shapes identical: True (21889, 12)
IDs identical: True


In [4]:
all_obj = pd.read_csv(data_dir+"extracted_data/all_objects.csv", index_col=0)
additional_ids = np.load(data_dir+'feature_arrays/'+'additional_ids.npz')['ids']

# DataFrame mit Objekten aus Prediction-Arrays
pred_obj = all_obj.loc[earlyfusion_predict[:,0]]

  mask |= (ar1 == a)


In [5]:
# some objects in the add-set are present in the train-set (with another name)
# -> sort them out

# get object ids from duplicates in train and test set
train_obj = pd.read_csv(data_dir+"extracted_data/train_df.csv", index_col=0)
train_ids = set(train_obj.object_id.values)
add_obj_ids = set(all_obj.loc[set(additional_ids)].object_id.values)
intersection_ids = train_ids.intersection(add_obj_ids)

# sort out ids in train and test set
pred_obj = pred_obj.loc[np.logical_not(pred_obj.object_id.isin(intersection_ids))]
print('entries in pred_obj after cleaning:', len(pred_obj))

entries in pred_obj after cleaning: 6514


In [6]:
# DataFrames aus Predictions (np-Arrays) erstellen
bottomup_predict_df =  pd.DataFrame(data=bottomup_predict[0:,1:],index=bottomup_predict[0:,0],columns=colors).sort_index().astype('float')
earlyfusion_predict_df =  pd.DataFrame(data=earlyfusion_predict[0:,1:],index=earlyfusion_predict[0:,0],columns=colors).sort_index().astype('float')

# Farbwort mit höchstem Wert (-> prognostiziertes Farbwort) (ergibt pd.Series)
pred_label_bottomup = bottomup_predict_df.idxmax(axis=1)
pred_label_earlyfusion = earlyfusion_predict_df.idxmax(axis=1)

# Predictions mit pred_obj kombinieren
predict_df = pred_obj.copy().sort_index()
predict_df = predict_df.merge(pred_label_bottomup.rename('bottomup_color'), left_index=True, right_index=True)
predict_df = predict_df.merge(pred_label_earlyfusion.rename('earlyfusion_color'), left_index=True, right_index=True)

predict_df.head()

Unnamed: 0,bb_h,bb_w,bb_x,bb_y,color,image_id,object_id,object_name,bottomup_color,earlyfusion_color
55.0,20,23,227,272,red,4,5138,cloths,purple,pink
58.0,282,149,362,0,blue,4,1060335,drape,blue,blue
127.0,35,22,520,444,black,8,1060462,stapler,green,black
132.0,142,91,459,315,white,8,1060481,folder,brown,brown
146.0,225,97,168,138,brown,9,1060517,bookshelf,brown,white


# Results

In [7]:
def get_score(classifier,df):
    ytrue = np.argmax(np.array(pd.get_dummies(df.color)), axis=1)
    ypred = np.argmax(np.array(eval(classifier+'_predict_df').loc[df.index]), axis=1)
    return accuracy_score(ytrue,ypred)

def print_zero_shot_examples(row):
    s = '{obj} & {ratio_top_color} ({top_color}) & {bottomup_acc} & {bottomup_top_ratio} ({bottomup_top_pred}) & {earlyfusion_acc} & {earlyfusion_top_ratio} ({earlyfusion_top_pred}) \\\\'
    print(s.format(
        obj=row.object, 
        ratio_top_color=row.ratio_top_color, 
        top_color=row.top_color, 
        bottomup_acc=round(row.bottomup_acc*100, 1),
        bottomup_top_ratio=row.bottomup_top_ratio, 
        bottomup_top_pred=row.bottomup_top_pred, 
        earlyfusion_acc=round(row.earlyfusion_acc*100, 1), 
        earlyfusion_top_ratio=row.earlyfusion_top_ratio, 
        earlyfusion_top_pred=row.earlyfusion_top_pred)
        )

## ACC

In [8]:
acc_df = pd.DataFrame(columns=classifier_list).T
acc_df['all_obj'] = [get_score(classifier, predict_df) for classifier in classifier_list]

In [9]:
acc_df.round(3)
# not directly comparable to results for test set, since colour distribution is not uniform in add set

Unnamed: 0,all_obj
bottomup,0.378
earlyfusion,0.319


## Model Comparison

In [10]:
objects = np.unique(predict_df.object_name.values)

compare_df = pd.DataFrame(columns=['object','top_color','ratio_top_color','bottomup_acc','bottomup_top_pred','bottomup_top_ratio','earlyfusion_acc','earlyfusion_top_pred', 'earlyfusion_top_ratio'])

for obj in objects:
    df = predict_df.loc[predict_df.object_name == obj]
    top_color = df.color.value_counts().index[0]
    ratio_top_color = df.color.value_counts()[top_color] / len(df)
    bottomup_acc = accuracy_score(df.color, df.bottomup_color)
    bottomup_top_pred = df.bottomup_color.value_counts().index[0]
    bottomup_top_ratio = df.bottomup_color.value_counts()[bottomup_top_pred] / len(df)
    earlyfusion_acc = accuracy_score(df.color, df.earlyfusion_color)
    earlyfusion_top_pred = df.earlyfusion_color.value_counts().index[0]
    earlyfusion_top_ratio = df.earlyfusion_color.value_counts()[earlyfusion_top_pred] / len(df)
    d  = {'object':obj, 
          'top_color':top_color, 
          'ratio_top_color':round(ratio_top_color*100,2), 
          'bottomup_acc':round(bottomup_acc,3), 
          'bottomup_top_pred':bottomup_top_pred,
          'bottomup_top_ratio':round(bottomup_top_ratio*100,2),
          'earlyfusion_acc':round(earlyfusion_acc,3),
          'earlyfusion_top_pred':earlyfusion_top_pred,
          'earlyfusion_top_ratio':round(earlyfusion_top_ratio*100,2)
          }
    compare_df = compare_df.append(d, ignore_index=True)

compare_df['acc_diff'] = compare_df.apply(lambda x: x.earlyfusion_acc - x.bottomup_acc, axis=1)
compare_df.sort_values(by='acc_diff', ascending=False)

# cf. Table 3 from ACL paper

Unnamed: 0,object,top_color,ratio_top_color,bottomup_acc,bottomup_top_pred,bottomup_top_ratio,earlyfusion_acc,earlyfusion_top_pred,earlyfusion_top_ratio,acc_diff
139,heater,white,94.12,0.000,brown,35.29,0.824,white,76.47,0.824
278,tablet,black,42.86,0.190,blue,42.86,0.619,black,57.14,0.429
52,burners,black,94.74,0.368,black,36.84,0.789,black,84.21,0.421
308,wipers,black,94.12,0.353,black,29.41,0.706,black,76.47,0.353
225,room,white,54.55,0.182,gray,31.82,0.500,white,36.36,0.318
...,...,...,...,...,...,...,...,...,...,...
215,plank,brown,57.14,0.667,brown,33.33,0.048,gray,90.48,-0.619
103,dumpster,green,37.93,0.724,blue,27.59,0.103,pink,75.86,-0.621
203,peas,green,100.00,0.630,green,62.96,0.000,brown,96.30,-0.630
163,lime,green,100.00,0.684,green,68.42,0.053,yellow,94.74,-0.631


In [11]:
print('earlyfusion > bottomup')
examples = ['heater', 'tablet', 'wipers', 'room', 'lip', 'ketchup']
# heater and wipers are highly colour diagnostic objects -> transferred top-down information helpful
examples_df = compare_df.loc[compare_df.object.isin(examples)].sort_values(by='acc_diff', ascending=False).head(4)
display(examples_df)
#print(examples_df.to_latex())
    
examples_df.apply(lambda x: print_zero_shot_examples(x), axis=1)

earlyfusion > bottomup


Unnamed: 0,object,top_color,ratio_top_color,bottomup_acc,bottomup_top_pred,bottomup_top_ratio,earlyfusion_acc,earlyfusion_top_pred,earlyfusion_top_ratio,acc_diff
139,heater,white,94.12,0.0,brown,35.29,0.824,white,76.47,0.824
278,tablet,black,42.86,0.19,blue,42.86,0.619,black,57.14,0.429
308,wipers,black,94.12,0.353,black,29.41,0.706,black,76.47,0.353
225,room,white,54.55,0.182,gray,31.82,0.5,white,36.36,0.318


heater & 94.12 (white) & 0.0 & 35.29 (brown) & 82.4 & 76.47 (white) \\
tablet & 42.86 (black) & 19.0 & 42.86 (blue) & 61.9 & 57.14 (black) \\
wipers & 94.12 (black) & 35.3 & 29.41 (black) & 70.6 & 76.47 (black) \\
room & 54.55 (white) & 18.2 & 31.82 (gray) & 50.0 & 36.36 (white) \\


139    None
278    None
308    None
225    None
dtype: object

In [12]:
print('bottomup > earlyfusion')
examples = ['cherry', 'lime', 'dumpster', 'plank', 'squash', 'veggie']
# cherry and lime are highly colour diagnostic objects -> transferred top-down information misleading
# 'lime' particulary interesting: it seems fair to assume that the early fusion model picks 'yellow' in most cases because of the semantic similarity between limes and lemons
examples_df = compare_df.loc[compare_df.object.isin(examples)].sort_values(by='acc_diff', ascending=True).head(4)
display(examples_df)
#print(examples_df.to_latex())

examples_df.apply(lambda x: print_zero_shot_examples(x), axis=1)

bottomup > earlyfusion


Unnamed: 0,object,top_color,ratio_top_color,bottomup_acc,bottomup_top_pred,bottomup_top_ratio,earlyfusion_acc,earlyfusion_top_pred,earlyfusion_top_ratio,acc_diff
68,cherry,red,100.0,0.688,red,68.75,0.0,green,100.0,-0.688
163,lime,green,100.0,0.684,green,68.42,0.053,yellow,94.74,-0.631
103,dumpster,green,37.93,0.724,blue,27.59,0.103,pink,75.86,-0.621
215,plank,brown,57.14,0.667,brown,33.33,0.048,gray,90.48,-0.619


cherry & 100.0 (red) & 68.8 & 68.75 (red) & 0.0 & 100.0 (green) \\
lime & 100.0 (green) & 68.4 & 68.42 (green) & 5.3 & 94.74 (yellow) \\
dumpster & 37.93 (green) & 72.4 & 27.59 (blue) & 10.3 & 75.86 (pink) \\
plank & 57.14 (brown) & 66.7 & 33.33 (brown) & 4.8 & 90.48 (gray) \\


68     None
163    None
103    None
215    None
dtype: object