In [1]:
import os.path as osp
import pandas as pd
import numpy as np
from snorkel import SnorkelSession
session = SnorkelSession()

In [2]:
from snorkel.models import candidate_subclass

InducingCytokine = candidate_subclass('InducingCytokine', ['cytokine', 'cell_type'])
train_cands = session.query(InducingCytokine).filter(InducingCytokine.split == 0).all()
dev_cands = session.query(InducingCytokine).filter(InducingCytokine.split == 1).all()

In [3]:
len(train_cands), len(dev_cands)

(4334, 120)

In [4]:
from snorkel.annotations import load_marginals
train_marginals = load_marginals(session, split=0)

In [9]:
from snorkel.annotations import load_gold_labels
L_gold_dev = load_gold_labels(session, annotator_name='gold', split=1, load_as_array=True)
L_gold_dev = np.where(L_gold_dev == 1, 1, -1)
assert np.all(np.in1d(L_gold_dev, [-1, 1]))
L_gold_dev.shape

(120,)

In [11]:
train_marginals.shape, train_marginals.min(), train_marginals.max()

((4334,), 0.5, 0.7305957138491104)

In [12]:
from snorkel.learning.pytorch import LSTM

train_kwargs = {
    'lr':              0.01,
    'embedding_dim':   100,
    'hidden_dim':      100,
    'n_epochs':        20,
    'dropout':         0.5,
    'rebalance':       0.25,
    'print_freq':      5,
    'seed':            1701
}

lstm = LSTM(n_threads=None)
lstm.train(train_cands, train_marginals, X_dev=dev_cands, Y_dev=L_gold_dev, **train_kwargs)

[LSTM] Training model
[LSTM] n_train=3907  #epochs=20  batch size=64




[LSTM] Epoch 1 (18.98s)	Average loss=0.690050	Dev F1=49.32
[LSTM] Epoch 6 (114.65s)	Average loss=0.676241	Dev F1=61.02
[LSTM] Epoch 11 (204.03s)	Average loss=0.675003	Dev F1=51.61
[LSTM] Epoch 16 (291.51s)	Average loss=0.675041	Dev F1=50.75
[LSTM] Epoch 20 (362.83s)	Average loss=0.674918	Dev F1=52.31
[LSTM] Model saved as <LSTM>
[LSTM] Training done (362.96s)
[LSTM] Loaded model <LSTM>
