In [1]:
import random, csv, time, os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import optimize
from __future__ import division

%matplotlib inline
# Videos result files are from http://sunai.uoc.edu/traits/layout2/results/

In [2]:
def append_pair(comp, name1, name2, name_to_num):
    if comp == '-1':
        return((name_to_num[name2], name_to_num[name1]))
    if comp == '1':
        return((name_to_num[name1], name_to_num[name2]))
    
def mle(w, pairs):    
    out = 1      
    for pair in pairs:
        if pair[0] == -1 or pair[1] == -1:
            continue
        out *= 1/(1+np.exp((-w[pair[0]] + w[pair[1]])/3))   
    return -np.log(out)

def gradient(w,pairs):
    grad = []
    for i in range(len(w)):
        gradient = 0

        for pair in pairs:
            if i == pair[0]:
                out = -1
            elif i == pair[1]:
                out = 1  
            else:
                continue
            gradient -= out / (1/(np.exp((w[pair[1]]-w[pair[0]])/15) +1 )) /15
#             gradient -= out / (1/((np.exp(w[pair[1]]-w[pair[0]]))) +1 )
# 
        grad.append(-gradient)
        
    return np.array(grad)

def accuracy(yhat,y):
    out = 0.0
    for i in range(1000):
        a, b = random.sample(range(len(y)),2)
        if (yhat[a]>yhat[b])==(y[a]>y[b]):
            out += 1
    resolution = 0.001
    return np.round((out/1000)/resolution)*resolution

def accuracy_pairs(test, w):
    tot = 0
    for pair in test:
        if w[pair[0]] >= w[pair[1]]:
            tot += 1
        
    return tot/len(test)

In [3]:
# Used to store video name and their corresponding index
name_to_num = dict()
# Here pairs will be a list of 5 lists correpesonding to each of the 5 traits
global pairs
pairs = [[],[],[],[],[]]

j = 0
path = './data'
files = [f for f in os.listdir(path) if f.endswith('.csv')]
for file in files:
    with open('./data/'+file, 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=' ')
        for row in reader:
            if not row[1] in name_to_num:
                name_to_num[row[1]] = j
                j += 1
            if not row[2] in name_to_num:
                name_to_num[row[2]] = j
                j += 1
            for i in range(3,8):
                pair = append_pair(row[i],row[1],row[2],name_to_num)
                if pair != None:
                    pairs[i-3].append(pair)
video_num = j

In [42]:
accuracy = [[],[],[],[],[]]

for iteration in range(20):
    for trait in range(len(pairs)):
        w = np.zeros(30)
        train_index = random.sample(range(len(pairs[trait])), 250)
        test_index=[]
        for i in range(len(pairs[trait])):
            if i not in train_index:
                test_index.append(i)
    #     test_index = [i in range(len(pairs[trait])) if i not in train_index]
        train_pairs = [pairs[trait][i] for i in train_index]
        test_pairs = [pairs[trait][i] for i in test_index]
        res = optimize.minimize(mle, w, 
                                method='Newton-CG',
                                jac=gradient,
                                args=(train_pairs,),
                                tol = 1,
                                options={'disp': False})
        accuracy[trait].append(accuracy_pairs(test_pairs, res.x))

In [43]:
for i in range(5):
    print np.mean(accuracy[i]), np.std(accuracy[i])

0.690046838407 0.0177472913999
0.602284263959 0.0202426085092
0.681944444444 0.0188207563695
0.651662707838 0.0240981451727
0.651713395639 0.0224990281436


# Naive Method

In [24]:
accuracy = [[],[],[],[],[]]

for iteration in range(10):
    for trait in range(5):

        train_index = random.sample(range(len(pairs[trait])), int(len(pairs[trait])*0.9))
        test_index=[]
        for i in range(len(pairs[trait])):
            if i not in train_index:
                test_index.append(i)
        train_pairs = [pairs[trait][i] for i in train_index]
        test_pairs = [pairs[trait][i] for i in test_index]

        scores = np.zeros(30)
        for pair in train_pairs:
            scores[pair[0]] += 1

        tot = 0
        right = 0
        for pair in test_pairs:
            tot += 1
            if scores[pair[0]] >= scores[pair[1]]:
                right += 1

        accuracy[trait].append(right/tot)

In [32]:
for i in range(5):
    print ' & %.3f & $\pm$ %.4f' %(np.mean(accuracy[i]), np.std(accuracy[i]))

 & 0.713 & $\pm$ 0.0535
 & 0.631 & $\pm$ 0.0612
 & 0.706 & $\pm$ 0.0552
 & 0.669 & $\pm$ 0.0642
 & 0.671 & $\pm$ 0.0508


In [40]:
accuracy = [[],[],[],[],[]]

for iteration in range(10):
    for trait in range(5):

        train_index = random.sample(range(len(pairs[trait])), 250)
        test_index=[]
        for i in range(len(pairs[trait])):
            if i not in train_index:
                test_index.append(i)
        train_pairs = [pairs[trait][i] for i in train_index]
        test_pairs = [pairs[trait][i] for i in test_index]

        scores = np.zeros(30)
        for pair in train_pairs:
            scores[pair[0]] += 1

        tot = 0
        right = 0
        for pair in test_pairs:
            tot += 1
            if scores[pair[0]] > scores[pair[1]]:
                right += 1

        accuracy[trait].append(right/tot)
        
for i in range(5):
    print ' & %.3f & $\pm$ %.4f' %(np.mean(accuracy[i]), np.std(accuracy[i]))

 & 0.626 & $\pm$ 0.0221
 & 0.545 & $\pm$ 0.0306
 & 0.588 & $\pm$ 0.0224
 & 0.602 & $\pm$ 0.0262
 & 0.562 & $\pm$ 0.0215
