In [1]:
import pandas as pd
import numpy as np
import importlib, os, math

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
from ir_crosslingual.embeddings import embeddings
importlib.reload(embeddings)

from ir_crosslingual.features import text_based
importlib.reload(text_based)

from ir_crosslingual.features import vector_based
importlib.reload(vector_based)

from ir_crosslingual.supervised_classification import sup_model
importlib.reload(sup_model)

from ir_crosslingual.utils import paths
importlib.reload(paths)

from ir_crosslingual.sentences import sentences
importlib.reload(sentences)

<module 'ir_crosslingual.sentences.sentences' from '/Users/jani/PycharmProjects/ir-crosslingual/ir_crosslingual/sentences/sentences.py'>

# Load data

## Word embeddings

In [3]:
german = embeddings.WordEmbeddings('de')
german.load_embeddings()

english = embeddings.WordEmbeddings('en')
english.load_embeddings()

In [4]:
W_ende, W_deen = embeddings.WordEmbeddings.learn_projection_matrix(src_lang='en', trg_lang='de')

Learn projection matrix for en-de
Found 13700 valid translation pairs in expert dictionary.
977 other pairs contained at least one unknown word (0 in source language, 977 in target language).
Resulting subspace dimension: (13700, 300)
Resulting subspace dimension: (13700, 300)
Learn projection matrix for de-en
Found 10604 valid translation pairs in expert dictionary.
262 other pairs contained at least one unknown word (0 in source language, 262 in target language).
Resulting subspace dimension: (10604, 300)
Resulting subspace dimension: (10604, 300)


## Sentence embeddings and feature engineering preparation

In [5]:
prepared_features = ['num_words', 'num_punctuation', 'occ_question_mark', 'occ_exclamation_mark']

In [6]:
sens = sentences.Sentences(src_words=english, trg_words=german)

In [7]:
data = sens.load_data(single_source=False, n_max=500, features=prepared_features, dim_red=50, align=False)

Target sentences loaded
Source sentences loaded
Sentences preprocessed
Sentences embeddings extracted in en
Could not find a term of the sentence 'Sicherheitsberater für den Gefahrguttransport' in word embedding vocabulary and thus, could not calculate the respective embedding vector.
Could not find a term of the sentence 'Arbeitsplan' in word embedding vocabulary and thus, could not calculate the respective embedding vector.
Sentences embeddings extracted in de
Sentences transformed
Start preparation of feature num_words
Start preparation of feature num_punctuation
Start preparation of feature occ_question_mark
Start preparation of feature occ_exclamation_mark


In [11]:
data.head()

Unnamed: 0,src_embedding,trg_embedding,src_embedding_0,src_embedding_1,src_embedding_2,src_embedding_3,src_embedding_4,src_embedding_5,src_embedding_6,src_embedding_7,src_embedding_8,src_embedding_9,src_embedding_10,src_embedding_11,src_embedding_12,src_embedding_13,src_embedding_14,src_embedding_15,src_embedding_16,src_embedding_17,src_embedding_18,src_embedding_19,src_embedding_20,src_embedding_21,src_embedding_22,src_embedding_23,src_embedding_24,src_embedding_25,src_embedding_26,src_embedding_27,src_embedding_28,src_embedding_29,src_embedding_30,src_embedding_31,src_embedding_32,src_embedding_33,src_embedding_34,src_embedding_35,src_embedding_36,src_embedding_37,src_embedding_38,src_embedding_39,src_embedding_40,src_embedding_41,src_embedding_42,src_embedding_43,src_embedding_44,src_embedding_45,src_embedding_46,src_embedding_47,src_embedding_48,src_embedding_49,src_sentence,trg_sentence,src_preprocessed,trg_preprocessed,src_num_words,trg_num_words,src_num_punctuation,trg_num_punctuation,src_occ_question_mark,trg_occ_question_mark,src_occ_exclamation_mark,trg_occ_exclamation_mark
0,"[-0.07040999999999999, 0.094428, 0.1178755, -0...","[-0.22081, 0.49136, -0.09375700000000001, -0.6...",-6.813816000000001e-17,1.022941e-15,-9.500174e-16,-1.240892e-15,3.294819e-15,2.933314e-15,4.044607e-15,-1.099121e-14,0.756065,-0.221209,0.06576,-0.485744,0.527874,0.175845,-0.150931,0.218552,0.115515,-0.255623,0.369324,-0.16594,0.149499,0.499675,-0.11791,-0.02426,-0.338445,0.180633,-0.025906,-0.095632,-0.131813,-0.086932,-0.117843,0.206108,0.086321,-0.04038,-0.005119,0.038331,0.125831,-0.151752,0.231917,0.041745,0.048371,-0.011108,0.079806,-0.042099,0.079771,0.068157,-0.148824,-0.048697,0.084795,-0.051762,Resumption of the session,Wiederaufnahme der Sitzungsperiode,"[resumption, session]","[wiederaufnahme, sitzungsperiode]",2,2,0,0,False,False,False,False
1,"[-0.05200671428571431, 0.016875466666666665, -...","[-0.1368885, 0.29018044444444446, -0.152462499...",3.434671e-17,2.7635570000000003e-17,5.000543e-17,1.136636e-16,2.89382e-16,3.856392e-16,-1.531151e-16,-9.922618e-16,0.027637,-0.0905,-0.043361,-0.230276,-0.011518,-0.152904,-0.09975,0.274953,0.075749,-0.105287,-0.043292,-0.093664,-0.01576,0.0374,0.070964,0.154733,0.048981,-0.0174,0.114574,-0.139906,0.094416,0.018032,-0.059899,0.030677,-0.040633,-0.039876,0.009193,0.061246,0.038447,0.160714,0.00978,-0.102946,-0.060325,-0.05915,-0.076941,0.032945,0.019852,-0.031766,0.030744,0.022873,0.040077,-0.059257,I declare resumed the session of the European ...,"Ich erkläre die am Freitag, dem 17. Dezember u...","[declare, resumed, session, european, parliame...","[erkläre, freitag, ,, 17, ., dezember, unterbr...",21,16,2,5,False,False,False,False
2,"[-0.08509222727272729, -0.03279808636363638, -...","[-0.1190726470588235, 0.13479876470588237, -0....",-6.398263e-17,1.0983770000000001e-17,2.498758e-16,2.219906e-16,-2.003122e-16,-6.726332e-16,-1.226164e-15,1.665335e-16,-0.098161,-0.201713,-0.043703,0.019219,0.083174,0.02906,-0.106727,-0.095575,-0.109884,0.080379,-0.00099,0.095195,0.015152,-0.028045,-0.057315,0.274822,-0.077733,-0.045828,0.008512,0.049508,0.006612,-0.002893,-0.167989,-0.103978,0.051534,0.090515,-0.055573,-0.013326,-0.009915,-0.073128,0.052646,-0.105946,0.142619,0.146326,-0.070585,0.046388,-0.104762,-0.063911,0.146058,0.055143,-0.014528,-0.097608,"Although, as you will have seen, the dreaded '...","Wie Sie feststellen konnten, ist der gefürchte...","[although, ,, seen, ,, dreaded, ', millennium,...","[feststellen, konnten, ,, gefürchtete, "", mill...",17,13,6,6,False,False,False,False
3,"[-0.064901, -0.08438565454545456, -0.008655272...","[-0.12862244444444446, 0.33829177777777786, -0...",-2.6778380000000003e-17,2.265477e-16,-1.486344e-16,-1.152948e-16,4.621411e-16,-5.169262e-16,-2.510549e-16,-1.276756e-15,0.055067,-0.112013,-0.153533,-0.147879,0.017726,-0.207878,0.071429,0.240694,0.074224,0.101956,-0.032203,0.015778,-0.281565,0.031674,0.077897,0.078755,0.044893,0.027502,-0.085405,-0.085463,0.050354,0.045843,0.113155,0.049879,0.025113,0.160102,-0.065052,0.023541,0.077003,0.022075,-0.066768,-0.165715,0.148567,-0.038953,0.051826,0.024636,0.115914,-0.152551,0.036224,-0.060032,-0.040084,0.037022,You have requested a debate on this subject in...,Im Parlament besteht der Wunsch nach einer Aus...,"[requested, debate, subject, course, next, day...","[parlament, besteht, wunsch, aussprache, verla...",8,8,3,1,False,False,False,False
4,"[-0.07612752, -0.011232175999999993, -0.131726...","[-0.21589275000000002, 0.14882689999999996, -0...",-8.829222999999999e-19,-1.065464e-16,3.348054e-16,7.592240000000001e-17,2.661468e-16,-1.257577e-15,-1.33551e-15,-7.21645e-16,-0.058413,-0.304249,-0.093107,0.066716,-0.159424,-0.114272,0.075814,0.01878,-0.149323,0.060891,0.114424,-0.194163,0.071959,-0.109974,-0.019721,0.007437,-0.043336,-0.037701,-0.049601,-0.061106,0.047868,-0.021222,0.047966,-0.029327,0.008411,0.12006,0.00051,0.021471,0.033159,-0.017279,0.032576,-0.010282,0.031812,-0.069593,0.064648,0.037056,0.023304,0.067347,-0.115464,0.02713,0.05774,-0.054253,"In the meantime, I should like to observe a mi...",Heute möchte ich Sie bitten - das ist auch der...,"[meantime, ,, like, observe, minute, ', silenc...","[heute, möchte, bitten, -, wunsch, kolleginnen...",18,15,7,6,False,False,False,False


# Create training dataset and do feature engineering

In [135]:
features_dict = {'text_based': ['diff_{}'.format(feat) for feat in prepared_features], 
                 'vector_based': ['cosine_similarity']}

In [136]:
print(len(data))

4996


In [137]:
train_data = sens.create_train_set(n_train=4000, frac_pos=0.5)

In [138]:
train_data.head()

Unnamed: 0,src_sentence,trg_sentence,src_preprocessed,trg_preprocessed,src_embedding,trg_embedding,src_num_words,trg_num_words,src_num_punctuation,trg_num_punctuation,src_occ_question_mark,trg_occ_question_mark,src_occ_exclamation_mark,trg_occ_exclamation_mark,translation
0,Resumption of the session,Wiederaufnahme der Sitzungsperiode,"[resumption, session]","[wiederaufnahme, sitzungsperiode]","[-0.3088614732459478, 0.3472727241705731, -0.2...","[-0.22081, 0.49136, -0.09375700000000001, -0.6...",2,2,0,0,False,False,False,False,1
1,I declare resumed the session of the European ...,"Ich erkläre die am Freitag, dem 17. Dezember u...","[declare, resumed, session, european, parliame...","[erkläre, freitag, ,, 17, ., dezember, unterbr...","[-0.19811859957522993, 0.2886577921005622, -0....","[-0.1368885, 0.29018044444444446, -0.152462499...",21,16,2,5,False,False,False,False,1
2,"Although, as you will have seen, the dreaded '...","Wie Sie feststellen konnten, ist der gefürchte...","[although, ,, seen, ,, dreaded, ', millennium,...","[feststellen, konnten, ,, gefürchtete, "", mill...","[-0.19086391830712227, 0.07257094264933953, -0...","[-0.1190726470588235, 0.13479876470588237, -0....",17,13,6,6,False,False,False,False,1
3,You have requested a debate on this subject in...,Im Parlament besteht der Wunsch nach einer Aus...,"[requested, debate, subject, course, next, day...","[parlament, besteht, wunsch, aussprache, verla...","[-0.1613263801336713, 0.22534862126715746, -0....","[-0.12862244444444446, 0.33829177777777786, -0...",8,8,3,1,False,False,False,False,1
4,"In the meantime, I should like to observe a mi...",Heute möchte ich Sie bitten - das ist auch der...,"[meantime, ,, like, observe, minute, ', silenc...","[heute, möchte, bitten, -, wunsch, kolleginnen...","[-0.19760572236106183, 0.15322830212421143, -0...","[-0.21589275000000002, 0.14882689999999996, -0...",18,15,7,6,False,False,False,False,1


In [139]:
train_data = sens.extract_features(features_dict=features_dict, data='train')

Started diff_num_words
Started diff_num_punctuation
Started diff_occ_question_mark
Started diff_occ_exclamation_mark
Started cosine_similarity


In [140]:
train_data.head()

Unnamed: 0,src_sentence,trg_sentence,src_preprocessed,trg_preprocessed,src_embedding,trg_embedding,translation,diff_num_words,diff_num_punctuation,diff_occ_question_mark,diff_occ_exclamation_mark,cosine_similarity
0,Resumption of the session,Wiederaufnahme der Sitzungsperiode,"[resumption, session]","[wiederaufnahme, sitzungsperiode]","[-0.3088614732459478, 0.3472727241705731, -0.2...","[-0.22081, 0.49136, -0.09375700000000001, -0.6...",1,0,0,0,0,0.69953
1,I declare resumed the session of the European ...,"Ich erkläre die am Freitag, dem 17. Dezember u...","[declare, resumed, session, european, parliame...","[erkläre, freitag, ,, 17, ., dezember, unterbr...","[-0.19811859957522993, 0.2886577921005622, -0....","[-0.1368885, 0.29018044444444446, -0.152462499...",1,5,3,0,0,0.87535
2,"Although, as you will have seen, the dreaded '...","Wie Sie feststellen konnten, ist der gefürchte...","[although, ,, seen, ,, dreaded, ', millennium,...","[feststellen, konnten, ,, gefürchtete, "", mill...","[-0.19086391830712227, 0.07257094264933953, -0...","[-0.1190726470588235, 0.13479876470588237, -0....",1,4,0,0,0,0.842287
3,You have requested a debate on this subject in...,Im Parlament besteht der Wunsch nach einer Aus...,"[requested, debate, subject, course, next, day...","[parlament, besteht, wunsch, aussprache, verla...","[-0.1613263801336713, 0.22534862126715746, -0....","[-0.12862244444444446, 0.33829177777777786, -0...",1,0,2,0,0,0.749109
4,"In the meantime, I should like to observe a mi...",Heute möchte ich Sie bitten - das ist auch der...,"[meantime, ,, like, observe, minute, ', silenc...","[heute, möchte, bitten, -, wunsch, kolleginnen...","[-0.19760572236106183, 0.15322830212421143, -0...","[-0.21589275000000002, 0.14882689999999996, -0...",1,3,1,0,0,0.887995


# Create logistic regression model

### Fit a logistic regression model on training data

In [141]:
features = [feature for values in features_dict.values() for feature in values]
label = 'translation'

In [142]:
logisticRegr = LogisticRegression()

In [143]:
X_train = train_data[features]
y_train = train_data[[label]]

In [144]:
logisticRegr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [145]:
logisticRegr.coef_

array([[-0.37206038, -0.34842457, -0.32800881,  0.21731491, 15.21589542]])

### Save model

In [146]:
#sup_model.SupModel.save_model(model=logisticRegr, name='logReg_v0.2', prepared_features=sens.prepared_features, features=sens.features_dict)


# Evaluate logistic regression model

### Create test collection, annotate translation label and do feature engineering

In [147]:
test_data = sens.create_test_collection(n_queries=50, n_docs=996)

In [148]:
len(test_data)

49800

In [149]:
test_data = sens.extract_features(features_dict=features_dict, data='test')

Started diff_num_words
Started diff_num_punctuation
Started diff_occ_question_mark
Started diff_occ_exclamation_mark
Started cosine_similarity


In [150]:
test_data.head()

Unnamed: 0,src_sentence,src_preprocessed,src_embedding,trg_sentence,trg_preprocessed,trg_embedding,translation,diff_num_words,diff_num_punctuation,diff_occ_question_mark,diff_occ_exclamation_mark,cosine_similarity
0,But we should not kid ourselves: a comprehensi...,"[kid, :, comprehensive, middle, east, peace, d...","[-0.1564942128401019, 0.029462311938183758, -0...",Wir sollten uns aber auch nichts vormachen: Ei...,"[sollten, vormachen, :, umfassende, friedenslö...","[-0.15268183333333332, -0.08944499999999998, -...",1,3,0,0,0,0.706877
1,But we should not kid ourselves: a comprehensi...,"[kid, :, comprehensive, middle, east, peace, d...","[-0.1564942128401019, 0.029462311938183758, -0...",Die Kommission befaßt sich bereits mit der Fra...,"[kommission, befaßt, bereits, frage, ,, jüngst...","[-0.19377049999999998, 0.014464100000000004, -...",0,0,0,0,0,0.617758
2,But we should not kid ourselves: a comprehensi...,"[kid, :, comprehensive, middle, east, peace, d...","[-0.1564942128401019, 0.029462311938183758, -0...",Wir werden dieses Thema mit dem Parlament und ...,"[thema, parlament, rat, schon, bald, ausführli...","[-0.1845195, 0.283377055, -0.134744125, -0.512...",0,3,1,0,0,0.637596
3,But we should not kid ourselves: a comprehensi...,"[kid, :, comprehensive, middle, east, peace, d...","[-0.1564942128401019, 0.029462311938183758, -0...",Ich möchte die Abgeordneten jedoch daran erinn...,"[möchte, abgeordneten, jedoch, daran, erinnern...","[-0.08900128571428571, 0.13880665714285714, -0...",0,3,0,0,0,0.683795
4,But we should not kid ourselves: a comprehensi...,"[kid, :, comprehensive, middle, east, peace, d...","[-0.1564942128401019, 0.029462311938183758, -0...",Wir sind die größten Geldgeber der Palästinenser.,"[größten, geldgeber, palästinenser, .]","[-0.372708, 0.10743775, -0.13228374999999998, ...",0,7,1,0,0,0.609836


### Evaluate boolean classifier

#### "Hard coded"

In [171]:
y_pred = logisticRegr.predict(test_data[features])
y_true = test_data[label]

In [172]:
display(sum(y_pred))
display(sum(y_true))
display(len(y_pred))

8637

50

49800

In [173]:
precision_score(y_true, y_pred)

0.00578904712284358

In [174]:
recall_score(y_true, y_pred)

1.0

In [175]:
accuracy_score(y_true, y_pred)

0.827570281124498

In [176]:
f1_score(y_true, y_pred)

0.011511453896627144

#### Using SupModel instance

In [242]:
sup = sup_model.SupModel()
sup.evaluate_boolean(logisticRegr, sens)

<ir_crosslingual.supervised_classification.sup_model.SupModel at 0x1a36021860>

In [243]:
sup.accuracy

0.827570281124498

In [244]:
sup.precision

0.00578904712284358

In [245]:
sup.recall

1.0

In [246]:
sup.f1

0.011511453896627144

### Evaluate ranking

In [247]:
sup.compute_map(logisticRegr, sens)

0.7239777797227225

### Load preloaded model and apply ranking

In [133]:
logisticRegr, prepared_features, features = sup_model.SupModel.load_model(name='logReg_v0.2')

### Rank target sentences in test dataset

In [173]:
sup_model.SupModel.rank_trg_sentences(logisticRegr, sens)


Done with index: 0
Done with index: 100


In [126]:
sens.test_data.head()

Unnamed: 0,src_sentence,trg_sentence,src_preprocessed,trg_preprocessed,src_embedding,trg_embedding,src_num_words,trg_num_words,src_num_punctuation,trg_num_punctuation,src_occ_question_mark,trg_occ_question_mark,src_occ_exclamation_mark,trg_occ_exclamation_mark,src_num_noun,trg_num_noun,src_num_verb,trg_num_verb,src_num_adverb,trg_num_adverb,src_num_adjective,trg_num_adjective,src_num_wh,trg_num_wh,src_num_pronoun,trg_num_pronoun,translation,predictions,predicted_sentences,predicted_probabilities
0,"On the other hand, we can be fairly confident ...",Hingegen kann man der Zukunft einigermaßen zuv...,"[hand, ,, fairly, confident, future, know, eff...","[hingegen, zukunft, einigermaßen, zuversichtli...","[-0.03125365286703291, 0.05021663067572581, -0...","[-0.12881409285714285, -0.026504692857142853, ...",19,16,6,3,False,False,False,False,12,15,3,0,1,0,3,0,0,0,0,0,1,"[[Unterstreichen möchte ich auch, daß es notwe...","[Unterstreichen möchte ich auch, daß es notwen...","[0.9831631508796376, 0.9831631508796376, 0.983..."
1,I should also like to stress the need to maint...,"Unterstreichen möchte ich auch, daß es notwend...","[also, like, stress, need, maintain, flexible,...","[unterstreichen, möchte, ,, notwendig, ,, ents...","[-0.025572795222382205, -0.0009202446333488115...","[-0.03415418181818181, 0.03397977727272728, -0...",23,19,3,6,False,False,False,False,10,14,4,2,3,1,4,2,0,0,0,0,1,[[Doch diese notwendige realistische und pragm...,[Doch diese notwendige realistische und pragma...,"[0.9380140491536678, 0.9380140491536678, 0.912..."
2,This is a problem of compatibility between our...,Dies ist eine Frage der Vereinbarkeit unserer ...,"[problem, compatibility, directives, end, -, -...","[frage, vereinbarkeit, unserer, richtlinien, a...","[-0.13964157818371498, 0.03865048480077487, -0...","[-0.12898427777777777, -0.00664022222222222, -...",15,14,6,5,False,False,False,False,8,12,4,0,1,0,1,2,0,0,0,0,1,"[[Die vom Parlament vorgeschlagene Lösung, d. ...","[Die vom Parlament vorgeschlagene Lösung, d. h...","[0.9428284887565496, 0.9256622022827817, 0.923..."
3,We must therefore congratulate the Committee o...,Man kann sich daher nur über die Klugheit des ...,"[must, therefore, congratulate, committee, env...","[daher, klugheit, ausschusses, umweltfragen, ,...","[-0.10510580457469121, 0.08141651421038351, -0...","[-0.13002725, 0.12540335, -0.15950131875, -0.2...",25,16,5,5,False,False,False,False,13,9,4,1,1,1,6,2,0,0,0,0,1,[[Ich möchte nochmals kurz auf die wirtschaftl...,[Ich möchte nochmals kurz auf die wirtschaftli...,"[0.9774905186008319, 0.9733422658640649, 0.973..."
4,"In conclusion, let me say that this directive ...","Abschließend möchte ich feststellen, daß diese...","[conclusion, ,, let, say, directive, ambitious...","[abschließend, möchte, feststellen, ,, richtli...","[-0.18686343283558546, 0.1048829425718163, -0....","[-0.14413220833333332, -0.0016888750000000005,...",22,21,3,7,False,False,False,False,10,18,5,2,3,0,4,0,0,0,0,0,1,[[Aus meiner eigenen kommunalpolitischen Erfah...,[Aus meiner eigenen kommunalpolitischen Erfahr...,"[0.9214032349946576, 0.9179825901790721, 0.909..."


In [193]:
# Train: 4800, Test: 200 | Only true translations
sup_model.SupModel.evaluate_at_k(sens, 1)

0.44

In [189]:
# Train: 4800, Test: 200 | Only true translations
sup_model.SupModel.evaluate_at_k(sens, 3)

0.62

In [190]:
# Train: 4800, Test: 200 | Only true translations
sup_model.SupModel.evaluate_at_k(sens, 5)

0.68

In [191]:
# Train: 4800, Test: 200 | Only true translations
sup_model.SupModel.evaluate_at_k(sens, 10)

0.78

In [None]:
# Train: 4800, Test: 200
sup_model.SupModel.evaluate_at_k(sens, 1)

In [177]:
# Train: 4800, Test: 200
sup_model.SupModel.evaluate_at_k(sens, 3)

0.31

In [178]:
# Train: 4800, Test: 200
sup_model.SupModel.evaluate_at_k(sens, 5)

0.34

In [179]:
# Train: 4800, Test: 200
sup_model.SupModel.evaluate_at_k(sens, 10)

0.41

In [127]:
# Train: 7000, Test: 3000
sup_model.SupModel.evaluate_at_k(sens, 1)

0.176

In [128]:
# Train: 7000, Test: 3000
sup_model.SupModel.evaluate_at_k(sens, 3)

0.22166666666666668

In [129]:
# Train: 7000, Test: 3000
sup_model.SupModel.evaluate_at_k(sens, 5)

0.253

In [130]:
# Train: 7000, Test: 3000
sup_model.SupModel.evaluate_at_k(sens, 10)

0.2966666666666667

In [101]:
# Train: 4000, Test: 1000
sup_model.SupModel.evaluate_at_k(sens, 1)

0.248

In [102]:
# Train: 4000, Test: 1000
sup_model.SupModel.evaluate_at_k(sens, 3)

0.292

In [103]:
# Train: 4000, Test: 1000
sup_model.SupModel.evaluate_at_k(sens, 5)

0.318

In [104]:
# Train: 4000, Test: 1000
sup_model.SupModel.evaluate_at_k(sens, 10)

0.365