In [1]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.linear_model import RidgeCV, MultiTaskLassoCV
from sklearn.model_selection import KFold
from sklearn.metrics import pairwise_distances
import warnings
warnings.simplefilter('ignore')

In [10]:
text_train = np.load('../data/text_train_tf.npy')
pics_train = np.load('../data/pics_train_tf.npy')

In [11]:
pca = PCA(n_components=100)
pca.fit(pics_train[:,:-512])
pics_train_100 = pca.transform(pics_train[:,:-512])

In [12]:
def get_prediction(vecs,pics):
    dists = pairwise_distances(vecs,pics,metric='cosine')
    return dists.argsort(1)

def map_20(ranks):
    return np.mean([(20-rank)/20 if rank<20 else 0 for rank in ranks])

def evaluate(vectors,label_vectors):
    preds = get_prediction(vectors,label_vectors)
    ranks = [np.argwhere(vec==i)[0][0] for i,vec in enumerate(preds)]
    return np.mean(ranks),map_20(ranks)

In [13]:
rcv = RidgeCV(cv=5,alphas=np.linspace(0.1,8,30))
cv = KFold()
results = []
for train_index,test_index in cv.split(text_train,pics_train_100):
    rcv.fit(text_train[train_index],pics_train_100[train_index])
    print(rcv.alpha_)
    vectors = rcv.predict(text_train[test_index])
    res = evaluate(vectors,pics_train_100[test_index])
    print(res)
    results.append(res)

1.4620689655172414
(49.628, 0.39304999999999995)
1.4620689655172414
(51.719, 0.395575)
1.4620689655172414
(50.151, 0.38994999999999996)
1.4620689655172414
(47.123, 0.38952499999999995)
1.4620689655172414
(50.963, 0.3875)


In [23]:
rcv = RidgeCV(cv=5,alphas=np.linspace(0.1,8,30))
cv = KFold()
results = []
for train_index,test_index in cv.split(text_train,pics_train_100):
    rcv.fit(text_train[train_index],pics_train_100[train_index])
    print(rcv.alpha_)
    vectors = rcv.predict(text_train[test_index])
    res = evaluate(vectors,pics_train_100[test_index])
    print(res)
    results.append(res)

1.4620689655172414
(49.62, 0.393175)
1.4620689655172414
(51.6595, 0.39607500000000007)
1.4620689655172414
(50.154, 0.39002499999999996)
1.4620689655172414
(47.1025, 0.389925)
1.4620689655172414
(50.9125, 0.38717499999999994)


In [None]:
lcv = MultiTaskLassoCV(cv=5,n_jobs=3,n_alphas=8)
cv = KFold()
results = []
for train_index,test_index in cv.split(text_train,pics_train_100):
    lcv.fit(text_train[train_index],pics_train_100[train_index])
    print(lcv.alpha_)
    vectors = lcv.predict(text_train[test_index])
    res = evaluate(vectors,pics_train_100[test_index])
    print(res)
    results.append(res)