In [1]:
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, datasets
import pickle
from metric_learn import LSML_Supervised

from cvxopt import matrix
from cvxopt import solvers

In [2]:
vecFeatures = pd.read_csv("./vecFeatures.csv", header=None)
attFeatures = pd.read_csv("./attrFeatures.csv", header=None)
userChoices = pd.read_csv("./userChoices.csv", header=None)
compCount = pd.read_csv("./compsCount.csv", header=None)

compCount.columns = ['ref_font_id', 'font_a_id', 'font_b_id', 'votes_for_a', 'votes_for_b']
userChoices.columns = ['triplet_id', 'user_id', 'reference_font_id', 'font_a_id', 'font_b_id', 'user_choice']

In [3]:
font_names = open('./fontNames.txt', 'r')
fonts = [l.strip('\n') for l in font_names.readlines()]
font_names.close()

attr_names = open('./attrNames.txt', 'r')
attributes = [l.strip('\n') for l in attr_names.readlines()]
attr_names.close()

In [4]:
vecFeatures.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,71,72,73,74,75,76,77,78,79,80
0,2100.0,1840.0,4968.0,5150.0,0.96466,6478.0,7000.0,0.925429,843.0,9154.0,...,3.15389,0.733991,1.547676,0.238739,3.308243,3.141592,5020594.0,-1.570797,188114.294542,
1,2663.0,2196.0,4513.0,5169.0,0.87309,5731.0,7058.0,0.811986,1216.0,8895.0,...,3.503477,0.588926,1.278175,0.332667,3.245685,3.134065,5075789.0,-1.578324,365925.934035,
2,2176.0,2112.0,4243.0,4992.0,0.84996,5511.0,6976.0,0.789994,899.0,6949.0,...,4.010026,0.264251,1.235565,0.163968,3.968383,-3.091108,5352806.0,-1.520311,249319.311711,
3,2048.0,2112.0,5114.0,5184.0,0.986497,6265.0,6976.0,0.898079,960.0,9190.0,...,3.564048,0.514072,1.497844,0.534761,3.151837,0.0,5098695.0,1.570796,240019.85986,
4,2752.0,2432.0,5248.0,4864.0,1.078947,7168.0,6560.0,1.092683,2354.0,9408.0,...,2.754563,1.378808,1.114448,0.455327,1.419836,2.991947,5777467.0,-1.720442,753519.945744,


In [5]:
attFeatures.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36
0,13.99,16.92,34.0,56.37,17.38,62.4,86.27,0,57.9,11.08,...,15.04,9.06,65.4,17.9,74.87,52.08,30.4,52.1,55.08,53.16
1,26.48,32.19,53.55,62.69,13.32,41.52,75.72,0,64.08,26.8,...,42.21,9.43,59.17,20.8,64.95,64.78,34.69,34.05,76.23,45.18
2,32.4,26.56,47.66,46.91,37.05,60.32,82.09,0,63.36,22.45,...,32.11,9.73,67.14,27.88,78.04,53.38,28.78,51.55,55.61,40.3
3,51.45,47.92,49.0,55.46,45.09,57.87,71.5,100,54.2,39.68,...,42.03,9.89,76.56,31.8,58.85,57.54,51.82,42.38,60.46,53.2
4,79.5,70.87,70.27,100.0,20.39,11.17,71.96,0,100.0,27.3,...,63.84,85.6,79.38,59.15,72.29,63.21,61.87,35.48,87.06,41.08


In [6]:
userChoices.head()

Unnamed: 0,triplet_id,user_id,reference_font_id,font_a_id,font_b_id,user_choice
0,17,0,152,66,78,1
1,50,0,95,159,7,1
2,0,0,7,6,7,1
3,114,0,85,194,131,0
4,151,0,123,14,91,0


In [7]:
%time
number_of_samples = userChoices.shape[0]

def get_G_array():
    samples = np.zeros((len(attributes), number_of_samples))

    i = 0
    for _, row in userChoices.iterrows():
        font_ref_index = row['reference_font_id']
        font_a_index = row['font_a_id']
        font_b_index = row['font_b_id']

        diff1 = attFeatures.ix[font_ref_index] - attFeatures.ix[font_a_index]
        diff2 = attFeatures.ix[font_ref_index] - attFeatures.ix[font_b_index]
        
        diff1_2 = np.multiply(diff1, diff1)
        diff2_2 = np.multiply(diff2, diff2)
        
        if row['user_choice'] == 0:
            # Futher one on the left, closer one on the right
            samples[:,i] = diff2_2 - diff1_2
        else:
            samples[:,i] = diff1_2 - diff2_2
            
        i += 1

    return -samples.T

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 8.11 µs


In [8]:
%%time
G = get_G_array()

CPU times: user 57.6 s, sys: 364 ms, total: 58 s
Wall time: 58 s


In [9]:
G.shape

(35387, 37)

In [10]:
G

array([[ -4.53777600e+02,  -2.41346500e+02,  -1.15916040e+03, ...,
         -1.69051680e+03,  -8.33650400e+02,  -8.78625000e+01],
       [ -6.50297700e+02,  -3.51566450e+03,   1.14798370e+03, ...,
         -1.13963850e+03,   3.28285200e+02,  -1.08861610e+03],
       [ -1.44480400e+02,  -2.00222500e+02,  -9.62240400e+02, ...,
         -9.69699600e+02,  -7.57350400e+02,  -1.71610000e+00],
       ..., 
       [ -2.05630800e+02,   1.30845000e+01,   2.15461950e+03, ...,
          1.82445120e+03,   9.23064800e+02,   2.38096000e+02],
       [ -1.01878350e+03,   2.83298400e+02,   1.30906800e+02, ...,
          1.40842720e+03,   1.30358100e+02,  -2.56368900e+02],
       [ -9.83158000e+02,  -4.16705500e+02,   4.40772500e+02, ...,
          2.79764100e+02,   3.26895000e+01,   8.27919000e+01]])

In [11]:
userChoices.describe()

Unnamed: 0,triplet_id,user_id,reference_font_id,font_a_id,font_b_id,user_choice
count,35387.0,35387.0,35387.0,35387.0,35387.0,35387.0
mean,1055.435584,34.669737,99.462571,98.716958,101.045469,0.504083
std,731.772441,32.723133,56.968026,58.795933,58.22115,0.49999
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,390.0,10.0,52.0,47.0,50.0,0.0
50%,1045.0,22.0,98.0,97.0,102.0,1.0
75%,1695.0,53.0,149.0,150.0,152.0,1.0
max,2340.0,139.0,199.0,199.0,199.0,1.0


In [12]:
userChoices.shape[0]

35387

In [13]:
compCount.head()

Unnamed: 0,ref_font_id,font_a_id,font_b_id,votes_for_a,votes_for_b
0,105,29,66,10,4
1,131,91,66,4,8
2,97,123,66,1,14
3,6,86,66,0,12
4,171,67,66,7,7


In [14]:
fonts

['ARSMaquetteWebOne',
 'Acme-Regular',
 'AdventPro-SemiBold',
 'Aldrich',
 'Alegreya-BoldItalic',
 'AllertaStencil-Regular',
 'Amethysta-Regular',
 'Andada-Bold',
 'Andada-Italic',
 'AndadaSC-Bold',
 'AnonymousPro',
 'ArchivoNarrow-Regular',
 'ArialRoundedMTBold',
 'ArialUnicodeMS',
 'Arimo-Bold',
 'Arimo-BoldItalic',
 'Arizonia-Regular',
 'Arvo-Bold',
 'Arvo-BoldItalic',
 'Arvo-Italic',
 'Asap-Regular',
 'Asset',
 'Astloch-Bold',
 'AveriaLibre-LightItalic',
 'AveriaSansLibre-Italic',
 'AveriaSerifLibre-Italic',
 'BadScript-Regular',
 'Bello-Pro',
 'BenchNine-Regular',
 'Bentham-Regular',
 'Bevan',
 'BilboSwashCaps-Regular',
 'BlackOpsOne-Regular',
 'BowlbyOneSC-Regular',
 'Brevia-Regular',
 'BubblerOne',
 'Buenard-Bold',
 'Cabin-SemiBoldItalic',
 'CabinCondensed',
 'CabinCondensed-Medium',
 'Calluna-Regular',
 'Cantarell-BoldOblique',
 'CantoraOne-Regular',
 'Cardo-Italic',
 'CenturyGothic',
 'Chivo-Italic',
 'ClickerScript-Regular',
 'CrimsonText-BoldItalic',
 'CrimsonText-Semibold',

In [15]:
attributes

['angular',
 'artistic',
 'attention-grabbing',
 'attractive',
 'bad',
 'boring',
 'calm',
 'capitals',
 'charming',
 'clumsy',
 'complex',
 'cursive',
 'delicate',
 'disorderly',
 'display',
 'dramatic',
 'formal',
 'fresh',
 'friendly',
 'gentle',
 'graceful',
 'happy',
 'italic',
 'legible',
 'modern',
 'monospace',
 'playful',
 'pretentious',
 'serif',
 'sharp',
 'sloppy',
 'soft',
 'strong',
 'technical',
 'thin',
 'warm',
 'wide']

In [16]:
print("Total Font Count:", len(fonts))
print("Total Attribute Count:", len(attributes))

('Total Font Count:', 200)
('Total Attribute Count:', 37)


In [17]:
def get_unique_dict(unique_values):
    dict_id_to_name = dict(enumerate(unique_values))
    dict_name_to_id = dict((y,x) for x, y in dict_id_to_name.iteritems())
    return dict_id_to_name, dict_name_to_id

In [18]:
id_to_fontname_dict, fontname_to_id_dict  = get_unique_dict(fonts)
id_to_attribute_dict, attribute_to_id_dict = get_unique_dict(attributes)

In [19]:
id_to_fontname_dict

{0: 'ARSMaquetteWebOne',
 1: 'Acme-Regular',
 2: 'AdventPro-SemiBold',
 3: 'Aldrich',
 4: 'Alegreya-BoldItalic',
 5: 'AllertaStencil-Regular',
 6: 'Amethysta-Regular',
 7: 'Andada-Bold',
 8: 'Andada-Italic',
 9: 'AndadaSC-Bold',
 10: 'AnonymousPro',
 11: 'ArchivoNarrow-Regular',
 12: 'ArialRoundedMTBold',
 13: 'ArialUnicodeMS',
 14: 'Arimo-Bold',
 15: 'Arimo-BoldItalic',
 16: 'Arizonia-Regular',
 17: 'Arvo-Bold',
 18: 'Arvo-BoldItalic',
 19: 'Arvo-Italic',
 20: 'Asap-Regular',
 21: 'Asset',
 22: 'Astloch-Bold',
 23: 'AveriaLibre-LightItalic',
 24: 'AveriaSansLibre-Italic',
 25: 'AveriaSerifLibre-Italic',
 26: 'BadScript-Regular',
 27: 'Bello-Pro',
 28: 'BenchNine-Regular',
 29: 'Bentham-Regular',
 30: 'Bevan',
 31: 'BilboSwashCaps-Regular',
 32: 'BlackOpsOne-Regular',
 33: 'BowlbyOneSC-Regular',
 34: 'Brevia-Regular',
 35: 'BubblerOne',
 36: 'Buenard-Bold',
 37: 'Cabin-SemiBoldItalic',
 38: 'CabinCondensed',
 39: 'CabinCondensed-Medium',
 40: 'Calluna-Regular',
 41: 'Cantarell-BoldObli

In [20]:
fontname_to_id_dict

{'ARSMaquetteWebOne': 0,
 'Acme-Regular': 1,
 'AdventPro-SemiBold': 2,
 'Aldrich': 3,
 'Alegreya-BoldItalic': 4,
 'AllertaStencil-Regular': 5,
 'Amethysta-Regular': 6,
 'Andada-Bold': 7,
 'Andada-Italic': 8,
 'AndadaSC-Bold': 9,
 'AnonymousPro': 10,
 'ArchivoNarrow-Regular': 11,
 'ArialRoundedMTBold': 12,
 'ArialUnicodeMS': 13,
 'Arimo-Bold': 14,
 'Arimo-BoldItalic': 15,
 'Arizonia-Regular': 16,
 'Arvo-Bold': 17,
 'Arvo-BoldItalic': 18,
 'Arvo-Italic': 19,
 'Asap-Regular': 20,
 'Asset': 21,
 'Astloch-Bold': 22,
 'AveriaLibre-LightItalic': 23,
 'AveriaSansLibre-Italic': 24,
 'AveriaSerifLibre-Italic': 25,
 'BadScript-Regular': 26,
 'Bello-Pro': 27,
 'BenchNine-Regular': 28,
 'Bentham-Regular': 29,
 'Bevan': 30,
 'BilboSwashCaps-Regular': 31,
 'BlackOpsOne-Regular': 32,
 'BowlbyOneSC-Regular': 33,
 'Brevia-Regular': 34,
 'BubblerOne': 35,
 'Buenard-Bold': 36,
 'Cabin-SemiBoldItalic': 37,
 'CabinCondensed': 38,
 'CabinCondensed-Medium': 39,
 'Calluna-Regular': 40,
 'Cantarell-BoldOblique'

In [21]:
id_to_attribute_dict

{0: 'angular',
 1: 'artistic',
 2: 'attention-grabbing',
 3: 'attractive',
 4: 'bad',
 5: 'boring',
 6: 'calm',
 7: 'capitals',
 8: 'charming',
 9: 'clumsy',
 10: 'complex',
 11: 'cursive',
 12: 'delicate',
 13: 'disorderly',
 14: 'display',
 15: 'dramatic',
 16: 'formal',
 17: 'fresh',
 18: 'friendly',
 19: 'gentle',
 20: 'graceful',
 21: 'happy',
 22: 'italic',
 23: 'legible',
 24: 'modern',
 25: 'monospace',
 26: 'playful',
 27: 'pretentious',
 28: 'serif',
 29: 'sharp',
 30: 'sloppy',
 31: 'soft',
 32: 'strong',
 33: 'technical',
 34: 'thin',
 35: 'warm',
 36: 'wide'}

In [22]:
attribute_to_id_dict

{'angular': 0,
 'artistic': 1,
 'attention-grabbing': 2,
 'attractive': 3,
 'bad': 4,
 'boring': 5,
 'calm': 6,
 'capitals': 7,
 'charming': 8,
 'clumsy': 9,
 'complex': 10,
 'cursive': 11,
 'delicate': 12,
 'disorderly': 13,
 'display': 14,
 'dramatic': 15,
 'formal': 16,
 'fresh': 17,
 'friendly': 18,
 'gentle': 19,
 'graceful': 20,
 'happy': 21,
 'italic': 22,
 'legible': 23,
 'modern': 24,
 'monospace': 25,
 'playful': 26,
 'pretentious': 27,
 'serif': 28,
 'sharp': 29,
 'sloppy': 30,
 'soft': 31,
 'strong': 32,
 'technical': 33,
 'thin': 34,
 'warm': 35,
 'wide': 36}

In [23]:
feature_count = 37

P = matrix(np.identity(feature_count), tc='d')
q = matrix(np.zeros(feature_count), tc='d')
G_mat = matrix(G, tc='d')
h = matrix(np.ones(number_of_samples), tc='d')

In [24]:
%%time
sol = solvers.qp(P,q,G_mat,h)

     pcost       dcost       gap    pres   dres
 0:  3.4624e-08 -6.6070e+04  4e+05  3e+00  4e+08
 1:  5.6097e-08 -1.1730e+05  2e+05  1e+00  2e+08
 2:  6.4500e-08 -1.1892e+05  2e+05  1e+00  2e+08
 3:  7.3770e-08 -1.1934e+05  2e+05  1e+00  2e+08
 4:  1.0552e-07 -1.1636e+05  2e+05  1e+00  1e+08
 5:  1.0595e-07 -1.1007e+05  2e+05  8e-01  1e+08
 6:  8.5386e-08 -9.8884e+04  1e+05  5e-01  7e+07
 7:  4.2712e-08 -2.1258e+04  2e+04  6e-16  1e-05
 8:  4.2712e-08 -2.1258e+02  2e+02  4e-16  1e-07
 9:  4.2712e-08 -2.1258e+00  2e+00  5e-16  2e-09
10:  4.2712e-08 -2.1270e-02  2e-02  4e-16  2e-11
11:  4.2689e-08 -2.2406e-04  2e-04  4e-16  3e-13
12:  4.0684e-08 -1.2544e-05  1e-05  5e-16  2e-14
13:  1.9894e-08 -1.8833e-06  2e-06  5e-16  2e-16
14:  1.1155e-08 -1.0137e-06  1e-06  3e-16  7e-17
15:  7.1379e-09 -4.9291e-07  5e-07  3e-16  2e-17
16:  5.0012e-09 -2.3176e-07  2e-07  3e-16  7e-18
17:  3.4650e-09 -1.6795e-07  2e-07  2e-16  2e-18
18:  2.4638e-09 -8.4992e-08  9e-08  3e-16  1e-18
Optimal solution foun