In [1]:
import pandas as pd
import requests
import numpy as np
from PIL import Image
import ast
from ast import literal_eval

In [2]:
data = pd.read_csv('my_data.csv', index_col = 0)

In [3]:
def extract_images(name1, name2): # given a pair = name1, name2
    
    # we zip together all the image IDs and the responses and work on this list
    zip_ = list(zip(data['vg_image_id'], data['responses_v2'].apply(literal_eval)))
    
    lista = []
   
    for i in zip_: #for each (ID, responses) in ManyNames
        
        if len(list(i[1].keys())) > 1: # if the image has at least 2 types of response
            
            # we define as variables the first and the second response 
            response1 = list(i[1].keys())[0]
            response2 = list(i[1].keys())[1]
            
            # if the first response is name1 of our pair and the second response is name2 of our pair, or vice-versa
            # we append to a list the image ID, and the number of times name1 and name2 have been chosen as answers
            if response1 == name1 and response2 == name2 or response1 == name2 and response2 == name1:
                lista.append([i[0], i[1][name1], i[1][name2]])
    
    # we have now the image IDs we want to work on --> we extract their corresponding image link,
    # and generate a list with image ID, n. of times name1 has been chosen for that image, 
    # n. of times name2 has been chosen for that image, image link
    info = []
    for i in lista:            
        info.append([i[0], i[1], i[2], data.loc[data['vg_image_id'] == i[0], 'link_mn'].tolist()[0]])
    
    # we put it into a dataframe
    df = pd.DataFrame(info)
    df.columns = 'vg_image_id', name1, name2, 'link_mn'
    
    # we generate another column of the dataframe, with name1 - name2, to have our index:
    # if positive, it means that name 1 was the modal name
    # if negative, it means that name 2 was the modal name
    # if 0, it means that the 2 names had the same occurrences
    df["index "+name1+"-"+name2] = df[name1]-df[name2]
    
    #we sort the dataframe by the index
    df_sorted = df.sort_values(by= "index "+name1+"-"+name2, ascending = False)
    
    return df_sorted

In [4]:
# running this command, we'll have the dataframe
# the first cases with the highest indices are cases were name1 > name2
# the last cases with the lowest indices are cases were name2 > name1
extract_images("man", "tennis player")

Unnamed: 0,vg_image_id,man,tennis player,link_mn,index man-tennis player
33,2360111,23,2,http://object-naming-amore.upf.edu//2360111_78...,21
45,2368854,22,4,http://object-naming-amore.upf.edu//2368854_34...,18
47,2369848,23,6,http://object-naming-amore.upf.edu//2369848_60...,17
91,2417068,23,6,http://object-naming-amore.upf.edu//2417068_28...,17
38,2362218,21,4,http://object-naming-amore.upf.edu//2362218_37...,17
23,2346851,20,4,http://object-naming-amore.upf.edu//2346851_20...,16
83,2410627,19,4,http://object-naming-amore.upf.edu//2410627_21...,15
14,2332542,21,7,http://object-naming-amore.upf.edu//2332542_30...,14
50,2372201,21,7,http://object-naming-amore.upf.edu//2372201_26...,14
25,2349360,19,5,http://object-naming-amore.upf.edu//2349360_22...,14


In [11]:
extract_images("surfer", "man")

Unnamed: 0,vg_image_id,surfer,man,link_mn,index surfer-man
0,2316639,24,6,http://object-naming-amore.upf.edu//2316639_34...,18
51,2417219,22,7,http://object-naming-amore.upf.edu//2417219_30...,15
16,2348496,21,7,http://object-naming-amore.upf.edu//2348496_36...,14
4,2334533,18,5,http://object-naming-amore.upf.edu//2334533_29...,13
41,2400315,19,6,http://object-naming-amore.upf.edu//2400315_11...,13
9,2338643,22,9,http://object-naming-amore.upf.edu//2338643_95...,13
22,2356481,18,5,http://object-naming-amore.upf.edu//2356481_31...,13
29,2364184,20,8,http://object-naming-amore.upf.edu//2364184_24...,12
21,2353377,21,9,http://object-naming-amore.upf.edu//2353377_84...,12
12,2345446,23,11,http://object-naming-amore.upf.edu//2345446_26...,12
