In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [44]:
import csv
import scipy
from scipy import sparse as sps
from scipy import io
import numpy as np
import pickle
import json

In [5]:
tree_p2c={}
tree_c2p={}
with open("../data/imagenet_data/imagenet_tree.txt") as csv_file:
    spamreader = csv.reader(csv_file, delimiter=" ", quotechar='|')
    for row in spamreader:
        if row[1] not in tree_c2p:
            tree_c2p[row[1]]=[row[0]]
        else:
            tree_c2p[row[1]].append(row[0])
        if row[0] not in tree_p2c:
            tree_p2c[row[0]]=[row[1]]
        else:
            tree_p2c[row[0]].append(row[1])
    

In [6]:
words={}
words_r={}
duplicates=[]
with open("../data/imagenet_data/imagenet_words.txt") as csv_file:
    spamreader = csv.reader(csv_file, delimiter="\t", quotechar='|')
    for row in spamreader:
        words[row[0]]=row[1]
        if row[1] in words_r:
            duplicates.append(row[1])
        words_r[row[1]]=row[0]

In [7]:
words_r['elephant']='n02503517'
words_r['person']='n00007846'
words_r['ball']='n02778669'
words_r['bear']='n02131653'
words_r['bed']='n02818832'
words_r['bench']='n02828884'
words_r['book']='n02870526'
words_r['bowl']='n02880940'
words_r['carrot']='n07730207'
words_r['chair']='n03001627'
words_r['couch']='n04256520'
words_r['cow']='n01887787'
words_r['cup']='n03147509'
words_r['fork']='n03383948'
words_r['spoon']='n04284002'
words_r['knife']='n04380346'
words_r['kite']='n03621473'
words_r['mouse']='n03793489'
words_r['orange']='n07747607'
words_r['sheep']='n02411705'
words_r['sign']='n06793231'
words_r['sink']='n03620052'
words_r['tie']='n03815615'
words_r['dog']='n02084071'
words_r['cat']='n02121620'
words_r['tv']='n06277280'
words_r['toilet']='n04447028'
words_r['backpack']='n02773037'
words_r['handbag']='n02774152'
words_r['suitcase']='n02774630'




In [8]:
raw_counts={}
with open("../data/imagenet_data/imagenet_count.txt") as csv_file:
    spamreader = csv.reader(csv_file, delimiter=" ", quotechar='|')
    for row in spamreader:
        if "./n" in row[0]:
            raw_counts[row[0][2:]]=int(row[1])

In [41]:
leaves=(set(tree_c2p.keys())-set(tree_p2c.keys()))
nodes=list(set(tree_c2p.keys())|set(tree_p2c.keys()))


In [11]:
node_2_idx={k:i for i,k in enumerate(nodes)}
idx_2_node={i:k for i,k in enumerate(nodes)}

In [12]:
def ged_descendents(node,Descendents):
    if node not in Descendents:
        descendents=set([node])
        if node in tree_p2c:
            for child in tree_p2c[node]:
                if child not in Descendents:
                    ged_descendents(child,Descendents)
                descendents|=Descendents[child]
        Descendents[node]=descendents

In [13]:
Descendents={}
for node in nodes:
    ged_descendents(node,Descendents)


In [14]:
Ancestors={}
for node in nodes:
    for dec in Descendents[node]:
        if dec not in Ancestors:
            Ancestors[dec]=set([])
        Ancestors[dec].add(node)


In [15]:
counts={}
for node in nodes:
    c=0
    for d in Descendents[node]:
        if d in raw_counts:
            c+=raw_counts[d]
    counts[node]=c

In [17]:
total_count=max(counts.values())
pscnt=1
IC={}
for node in nodes:
    if counts[node]>500000:
        cn=total_count
    else:
        cn=counts[node]
    IC[node]=np.log((cn+pscnt)/float(total_count+pscnt))

In [18]:
def find_common_ancestor(node1,node2):
    a1=Ancestors[node1]
    a2=Ancestors[node2]
    return sorted([(k,counts[k],IC[k]) for k in set(a1)&set(a2)],key=lambda x:x[1])[0]

In [39]:
def compute_sim(classes,class_map):
    """Compute similarity matrix"""
    num_classes = len(classes)
    Sim=np.zeros((num_classes,num_classes))
    for i,clsi in enumerate(classes):
        ci=words_r[class_map[clsi]]
        for j,clsj in enumerate(classes):
            if i==j:
                Sim[i,j]=1
                continue        
            cj=words_r[class_map[clsj]]
            cc=find_common_ancestor(ci,cj)
            if cc[2]==0:
                Sim[i][j]=0
            else:
                Sim[i][j]=(2*cc[2])/(IC[ci]+IC[cj])
            Sim[j][i]=Sim[i][j]
    return Sim

    

In [49]:
with open('../data/info.json', 'r') as fp:
    info = json.load(fp)
pascal_20_classes = info['pascal_cats'][1:]

In [20]:
pascal_class_map={cls:cls for cls in pascal_20_classes}
pascal_class_map["aeroplane"]="airplane, aeroplane, plane"
pascal_class_map["bicycle"]="bicycle, bike, wheel, cycle"
pascal_class_map["bus"]="bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle"
pascal_class_map["train"]="train, railroad train"
pascal_class_map["car"]="car, auto, automobile, machine, motorcar"
pascal_class_map["diningtable"]="dining table, board"
pascal_class_map["horse"]="horse, Equus caballus"
pascal_class_map["motorbike"]="minibike, motorbike"
pascal_class_map["pottedplant"]="pot plant"
pascal_class_map["sofa"]="sofa, couch, lounge"
pascal_class_map["tvmonitor"]="television monitor, tv monitor"


In [22]:
Sim_pascal = compute_sim(pascal_20_classes,pascal_class_map)
# pickle.dump(Sim_pascal,open("../data/semantic_similarity/pascal_voc_2007_semantics.pickle","w"))

In [50]:
coco_80_classes = info['coco_cats'][1:]

In [24]:
coco_class_map={cls:cls for cls in coco_80_classes}
coco_class_map["bicycle"]="bicycle, bike, wheel, cycle"
coco_class_map["car"]="car, auto, automobile, machine, motorcar"
coco_class_map[u'motorcycle']="motorcycle, bike"
coco_class_map[u'airplane']="airplane, aeroplane, plane"
coco_class_map["bus"]="bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle"
coco_class_map["train"]="train, railroad train"
coco_class_map['truck']="truck, motortruck"
coco_class_map['traffic light']="traffic light, traffic signal, stoplight"
coco_class_map['fire hydrant']="fireplug, fire hydrant, plug"
coco_class_map['stop sign']="sign"
coco_class_map["horse"]="horse, Equus caballus"
coco_class_map['giraffe']="giraffe, camelopard, Giraffa camelopardalis"
coco_class_map['backpack']="backpack, back pack, knapsack, packsack, rucksack, haversack"
coco_class_map['handbag']="bag, handbag, pocketbook, purse"
coco_class_map['suitcase']="bag, traveling bag, travelling bag, grip, suitcase"
coco_class_map['frisbee']="ultimate frisbee"
coco_class_map['skis']="ski"
coco_class_map['sports ball']="ball"
coco_class_map['baseball bat']='baseball bat, lumber'
coco_class_map['baseball glove']='baseball glove, glove, baseball mitt, mitt'
coco_class_map['tennis racket']='tennis racket, tennis racquet'
coco_class_map['wine glass']='wineglass'
coco_class_map['hot dog']='hotdog, hot dog'
coco_class_map['pizza']='pizza, pizza pie'
coco_class_map['donut']='doughnut, donut, sinker'
coco_class_map["potted plant"]="pot plant"
coco_class_map['dining table']='dining table, board'
coco_class_map['toilet']='toilet, toilette'
coco_class_map['tv']='television receiver, television, television set, tv, tv set, idiot box, boob tube, telly, goggle box'
coco_class_map['remote']='remote control, remote'
coco_class_map['laptop']='laptop, laptop computer'
coco_class_map['cell phone']='cellular telephone, cellular phone, cellphone, cell, mobile phone'
coco_class_map['refrigerator']='electric refrigerator, fridge'
coco_class_map['teddy bear']='teddy, teddy bear'
coco_class_map['hair drier']='hand blower, blow dryer, blow drier, hair dryer, hair drier'
 




In [32]:
Sim_coco = compute_sim(coco_80_classes,coco_class_map)
#pickle.dump(Sim_coco, open("../data/semantic_similarity/coco_semantics.pickle","w"))