In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ['PYTHONPATH'] = '/Users/g.sarapulov/Projects/draft/archtorch'
import sys
sys.path.append('/Users/g.sarapulov/Projects/draft/archtorch')

In [94]:
from data.input import FileParser, DataSampler, SiameseData
from loss.margin import ContrastiveLoss
from network.siamese import Siamese, Encoder
from train.fit import NetworkTrainer

In [4]:
train_path = 'train_db/'
test_path = 'test_db/'
parser = FileParser('.txt', ' ')

In [95]:
train_dataset = SiameseData(parser, train_path)
test_dataset = SiameseData(parser, test_path)

In [96]:
model = Siamese(Encoder())
loss = ContrastiveLoss(margin=1.)
trainer = NetworkTrainer(train_dataset, test_dataset, log_interval=100)

In [97]:
model = trainer.fit_model(model, loss, lr=1e-3, n_epochs=5)

Epoch: 1/5. Train set: Average loss: 0.0947
Epoch: 1/5. Validation set: Average loss: 0.0679
Epoch: 2/5. Train set: Average loss: 0.0638
Epoch: 2/5. Validation set: Average loss: 0.0631
Epoch: 3/5. Train set: Average loss: 0.0604
Epoch: 3/5. Validation set: Average loss: 0.0636
Epoch: 4/5. Train set: Average loss: 0.0596
Epoch: 4/5. Validation set: Average loss: 0.0667
Epoch: 5/5. Train set: Average loss: 0.0588
Epoch: 5/5. Validation set: Average loss: 0.0633


In [98]:
from torch import exp
import numpy as np
def calc_labels(data, model):
    scores = []
    labels = []
    for i in range(len(data)):
        ((x0, x1), t) = data.__getitem__(i)
        x0, x1 = model(x0, x1)
        score = exp(-(x0 - x1).norm(2))
        scores.append(float(score))
        labels.append(t)
    return np.array(scores), labels

In [99]:
scores, labels = calc_labels(test_dataset, model)

In [100]:
from sklearn import metrics as m
def meter(probs, yval, thr):
    threshold = thr
    print('roc', m.roc_auc_score(yval, probs))
    print('f1', m.f1_score(yval, probs > threshold))
    print('accuracy', m.accuracy_score(yval, probs > threshold))
    print('precision', m.precision_score(yval, probs > threshold))
    print('recall', m.recall_score(yval, probs > threshold))
    cm = m.confusion_matrix(yval, probs > threshold)
    print('false acceptance', cm[0, 1] / cm[0, :].sum())
    print('false rejection', cm[1, 0] / cm[1, :].sum())
    print(cm)

In [101]:
meter(scores, labels, 0.6)

roc 0.8823464622433694
f1 0.8126858275520318
accuracy 0.8016789087093389
precision 0.7578558225508318
recall 0.8760683760683761
false acceptance 0.27010309278350514
false rejection 0.12393162393162394
[[354 131]
 [ 58 410]]


In [102]:
scores, labels = calc_labels(train_dataset, model)

In [103]:
meter(scores, labels, 0.7)

roc 0.9109688090157814
f1 0.7683315621679064
accuracy 0.7915232193405248
precision 0.8634691635217121
recall 0.6920779568863192
false acceptance 0.10922153857912638
false rejection 0.30792204311368077
[[23309  2858]
 [ 8042 18075]]


In [104]:
from torch import exp
import numpy as np
def calc_all_labels(data, model):
    scores = []
    labels = []
    for i in range(100):
        ((x0, x1), t) = data.get_all_pairs(i)
        for j, tt in enumerate(t):
            xx0, xx1 = model(x0[j], x1[j])
            score = exp(-(xx0 - xx1).norm(2))
            scores.append(float(score))
            labels.append(tt)
    return np.array(scores), labels

In [105]:
scores_all, labels_all = calc_all_labels(test_dataset, model)

In [106]:
meter(scores_all, labels_all, 0.6)

roc 0.9022987644809679
f1 0.06487731279282141
accuracy 0.7528412966597056
precision 0.033628318584070796
recall 0.9169472502805837
false acceptance 0.24870762711864408
false rejection 0.08305274971941638
[[70922 23478]
 [   74   817]]
