In [1]:
# dataset: http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html
import numpy as np
import sklearn as sk
import scipy as sp
from numpy.random import randn
from numpy.linalg import norm, svd
from IMC import IMC
from dirtyIMC import dirtyIMC

In [2]:
%matplotlib inline
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_curve, auc

In [3]:
features_train = []
labels_train = []
with open('data/scene/scene_train') as f:
    for line in f:
        c = line.split()
        labe = np.array(c[0].split(',')).astype(np.int)
        feat = [x.split(':')[1] for x in c[1:]]
        features_train.append(feat)
        ll = np.zeros((6,))
        ll[labe] = 1
        labels_train.append(ll)
features_train = np.array(features_train).astype(np.float)
labels_train = np.array(labels_train) * 2 - 1

features_test = []
labels_test = []
with open('data/scene/scene_test') as f:
    for line in f:
        c = line.split()
        labe = np.array(c[0].split(',')).astype(np.int)
        feat = [x.split(':')[1] for x in c[1:]]
        features_test.append(feat)
        ll = np.zeros((6,))
        ll[labe] = 1
        labels_test.append(ll)
features_test = np.array(features_test).astype(np.float)
labels_test = np.array(labels_test) * 2 - 1

In [98]:
num_test = 1000
noise_level = 0

In [128]:
# seed = 1
# np.random.seed(seed)
perm = np.random.permutation(labels_train.shape[0] + labels_test.shape[0])
features = np.concatenate([features_test, features_train], axis = 0)[perm, :]
features = np.concatenate([features, np.ones((features.shape[0], 1))], axis=1)
labels = np.concatenate([labels_test, labels_train], axis = 0)[perm, :]
labels_gt = labels.copy()
labels[:num_test,:] = 0

In [129]:
lamb1 = 10
lamb2 = 5
maxiter = 100

n1 = labels.shape[0]
n2 = labels.shape[1]
d1 = features.shape[1]
d2 = labels.shape[1]
k1 = 20
k2 = 5

# print 'Generating random data...'
X = features
Y = np.eye(d2)
A = labels_gt
R = labels

t1 = int(round(noise_level * d1))
if t1 > 0:
    U, S, V = svd(X)
    N = U[:, d1:]
    I = np.random.choice(range(d1), t1, replace=False)
    X[:, I] = N[:, :t1]

W0 = randn(d1, k1)
H0 = randn(d2, k1)
U0 = randn(n1, k2)
V0 = randn(n2, k2)

# Run IMC
W, H, U, V, losses = dirtyIMC(R, X, Y, k1, k2, lamb1, lamb2, maxiter, W0, H0, U0, V0)
W_imc, H_imc, losses_imc = IMC(R, X, Y, k1, lamb1, maxiter, W0, H0)

Diff = X.dot(W.T).dot(H).dot(Y.T) + U.T.dot(V) - A;
# Diff[R==0] = 0

relerr = norm(Diff, 'fro')**2 / norm(A, 'fro')**2 * 100
print 'dirtyIMC RelErr = %g'%(relerr)

Diff = X.dot(W_imc.T).dot(H_imc).dot(Y.T) - A;
# Diff[R==0] = 0

relerr = norm(Diff, 'fro')**2 / norm(A, 'fro')**2 * 100
print 'IMC RelErr = %g'%(relerr)
# plt.plot(losses)
# plt.yscale('log')
# plt.show()

Iter 0. Updating W.  Iter 1. Updating W.  Iter 2. Updating W.  Iter 3. Updating W.  Iter 4. Updating W.  Iter 5. Updating W.  Iter 6. Updating W.  Iter 7. Updating W.  Iter 8. Updating W.  Iter 9. Updating W.  Iter 10. Updating W.  Iter 11. Updating W.  Iter 12. Updating W.  Iter 13. Updating W.  Iter 14. Updating W.  Iter 15. Updating W.  Iter 16. Updating W.  Iter 17. Updating W.  Iter 18. Updating W.  Iter 19. Updating W.  Iter 20. Updating W.  Iter 21. Updating W.  Iter 22. Updating W.  Iter 23. Updating W.  Iter 24. Updating W.  Iter 25. Updating W.  Iter 26. Updating W.  Iter 27. Updating W.  Iter 28. Updating W.  Iter 29. Updating W.  Iter 30. Updating W.  Iter 31. Updating W.  Iter 32. Updating W.  Iter 33. Updating W.  Iter 34. Updating W.  Iter 35. Updating W.  Iter 36. Updating W.  Iter 37. Updating W.  Iter 38. Updating W.  Iter 39. Updating W.  Iter 40. Updating W.  Iter 41. Updating W.  Iter 42. Updating W.  Iter 43. Updating W.  Iter 44. Updating W.  Iter 45. Updating W.

In [130]:
IMC_pred = (X.dot(W_imc.T).dot(H_imc).dot(Y.T))[:num_test,:]
DIMC_pred = (X.dot(W.T).dot(H).dot(Y.T) + U.T.dot(V))[:num_test,:]

In [131]:
test_gt = labels_gt[:num_test,:]

In [132]:
precision = dict()
recall = dict()
average_precision = dict()

fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(6):
    precision[i], recall[i], _ = precision_recall_curve(test_gt[:, i], IMC_pred[:, i])
    average_precision[i] = average_precision_score(test_gt[:, i], IMC_pred[:, i])
    fpr[i], tpr[i], _ = roc_curve(test_gt[:, i], IMC_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# print average_precision
roc_auc = np.array(roc_auc.values())
print roc_auc, np.mean(roc_auc[~np.isnan(roc_auc)])

[ 0.91171882  0.97960874  0.93514011  0.94865196  0.78272145  0.84477568] 0.900436126165


In [133]:
precision = dict()
recall = dict()
average_precision = dict()

fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(6):
    precision[i], recall[i], _ = precision_recall_curve(test_gt[:, i], DIMC_pred[:, i])
    average_precision[i] = average_precision_score(test_gt[:, i], DIMC_pred[:, i])
    fpr[i], tpr[i], _ = roc_curve(test_gt[:, i], DIMC_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# print average_precision
roc_auc = np.array(roc_auc.values())
print roc_auc, np.mean(roc_auc[~np.isnan(roc_auc)])

[ 0.92147728  0.98293643  0.93775006  0.95141104  0.81427739  0.87606564] 0.91398630654


In [66]:
# bias
[ 0.88561743  0.96580297  0.9220508   0.95168248  0.77645543  0.83664634] 0.889709243145
[ 0.68387599  0.82751553  0.86321151  0.89373357  0.69169784  0.81476965] 0.795800681359

# 1e-2 1e-1
[ 0.88249405  0.96648085  0.89345297  0.93446636  0.76356998  0.83781165] 0.8797126454
[ 0.88370376  0.9154724   0.89687427  0.93141642  0.74596584  0.8470664 ] 0.870083182356


SyntaxError: invalid syntax (<ipython-input-66-e5d25f412c15>, line 2)

In [161]:
thresh = -0.15
IMC_acc = np.zeros(IMC_pred.shape)
IMC_acc[IMC_pred >= thresh] = 1
IMC_acc[IMC_pred < thresh] = -1
IMC_acc = IMC_acc == test_gt
print 'IMC:',np.sum(IMC_acc) * 1.0 / (IMC_acc.shape[0] * IMC_acc.shape[1])

IMC: 0.883833333333


In [162]:
DIMC_acc = np.zeros(DIMC_pred.shape)
DIMC_acc[DIMC_pred >= thresh] = 1
DIMC_acc[DIMC_pred < thresh] = -1
DIMC_acc = DIMC_acc == test_gt
print 'DIMC:',np.sum(DIMC_acc) * 1.0 / (DIMC_acc.shape[0] * DIMC_acc.shape[1])

DIMC: 0.892


In [163]:
features.shape

(2407, 295)