In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import os

# TO USE A DATABASE OTHER THAN SQLITE, USE THIS LINE
# Note that this is necessary for parallel execution amongst other things...
os.environ['SNORKELDB'] = "postgres://postgres:password123@localhost:5432/snorkel"

import numpy as np
from snorkel import SnorkelSession
session = SnorkelSession()

In [2]:
from snorkel.models import candidate_subclass
Mineral   = candidate_subclass('Mineral' ,['mineral_name','strat_name'])

In [3]:
from snorkel.annotations import load_marginals
train_marginals = load_marginals(session, split=0)

In [4]:
train_cands = session.query(Mineral).filter(Mineral.split == 0).order_by(Mineral.id).all()
dev_cands   = session.query(Mineral).filter(Mineral.split == 1).order_by(Mineral.id).all()
test_cands  = session.query(Mineral).filter(Mineral.split == 2).order_by(Mineral.id).all()
final_cands = session.query(Mineral).filter(Mineral.split == 3).order_by(Mineral.id).all()

In [5]:
from snorkel.annotations import load_gold_labels

L_gold_dev  = load_gold_labels(session, annotator_name='gold', split=1)
L_gold_test = load_gold_labels(session, annotator_name='gold', split=2)

In [6]:
from snorkel.learning.pytorch import LSTM

train_kwargs = {
    'lr':              0.001,
    'embedding_dim':   200,
    'hidden_dim':      200,
    'n_epochs':        50,
    'dropout':         0.25,
    'seed':            1701
}

lstm = LSTM(n_threads=None)
lstm.train(train_cands, train_marginals, X_dev=dev_cands, Y_dev=L_gold_dev, **train_kwargs)


[LSTM] Training model
[LSTM] n_train=3717  #epochs=50  batch size=64




[LSTM] Epoch 1 (41.82s)	Average loss=0.449000	Dev F1=16.15
[LSTM] Epoch 2 (82.24s)	Average loss=0.366804	Dev F1=50.76
[LSTM] Epoch 3 (122.29s)	Average loss=0.361307	Dev F1=62.87
[LSTM] Epoch 4 (162.81s)	Average loss=0.314455	Dev F1=64.54
[LSTM] Epoch 5 (203.84s)	Average loss=0.302623	Dev F1=58.08
[LSTM] Epoch 6 (249.88s)	Average loss=0.297812	Dev F1=62.08
[LSTM] Epoch 7 (295.99s)	Average loss=0.296380	Dev F1=64.44
[LSTM] Epoch 8 (340.45s)	Average loss=0.290911	Dev F1=63.68
[LSTM] Epoch 9 (384.73s)	Average loss=0.289008	Dev F1=66.88
[LSTM] Epoch 10 (428.36s)	Average loss=0.285912	Dev F1=58.20
[LSTM] Epoch 11 (473.08s)	Average loss=0.282898	Dev F1=62.79
[LSTM] Epoch 12 (517.66s)	Average loss=0.280201	Dev F1=62.58
[LSTM] Epoch 13 (561.71s)	Average loss=0.281688	Dev F1=59.23
[LSTM] Epoch 14 (605.81s)	Average loss=0.278078	Dev F1=59.93
[LSTM] Epoch 15 (651.19s)	Average loss=0.280147	Dev F1=65.14
[LSTM] Epoch 16 (695.36s)	Average loss=0.275862	Dev F1=62.79
[LSTM] Epoch 17 (740.71s)	Average l

In [7]:
lstm.load(model_name='LSTM')

[LSTM] Loaded model <LSTM>


In [21]:
p, r, f1 = lstm.score(test_cands, L_gold_test, b = 0.7)

In [22]:
print("Prec: {0:.3f}, Recall: {1:.3f}, F1 Score: {2:.3f}".format(p, r, f1))

Prec: 0.702, Recall: 0.331, F1 Score: 0.450


In [23]:
tp, fp, tn, fn = lstm.error_analysis(session, test_cands, L_gold_test,b=0.7)

Scores (Un-adjusted)
Pos. class accuracy: 0.331
Neg. class accuracy: 0.949
Precision            0.702
Recall               0.331
F1                   0.45
----------------------------------------
TP: 87 | FP: 37 | TN: 689 | FN: 176



In [11]:
lstm.marginals(final_cands)

array([0.4785881 , 0.61027616, 0.47855568, ..., 0.47369137, 0.43396294,
       0.6857783 ], dtype=float32)

In [12]:
lstm.save_marginals(session, test_cands)
lstm.save_marginals(session, final_cands)

Saved 989 marginals
Saved 1111 marginals


In [28]:
import yaml, psycopg2
from psycopg2.extensions import AsIs

# Connect to Postgres
with open('./credentials', 'r') as credential_yaml:
    credentials = yaml.load(credential_yaml,Loader = yaml.SafeLoader)

with open('./config', 'r') as config_yaml:
    config = yaml.load(config_yaml, Loader = yaml.SafeLoader)
    
# SPECIFYING CONNECTION TO SNORKEL 
snorkel_connection = psycopg2.connect(
    dbname=credentials['snorkel_postgres']['database'],
    user=credentials['snorkel_postgres']['user'],
    password=credentials['snorkel_postgres']['password'],
    host=credentials['snorkel_postgres']['host'],
    port=credentials['snorkel_postgres']['port'])
snorkel_cursor = snorkel_connection.cursor()

In [29]:
snorkel_cursor.execute(""" CREATE TABLE final_results1 AS
 SELECT * FROM marginal WHERE marginal.training = false; """)

In [30]:
snorkel_cursor.execute("""CREATE TABLE final_results2 AS
SELECT final_results1.id, final_results1.candidate_id, final_results1.value, final_results1.probability, mineral.mineral_name_id,
mineral.strat_name_id
FROM mineral
INNER JOIN final_results1 ON final_results1.candidate_id = mineral.id;
 """)

In [31]:
snorkel_cursor.execute("""CREATE TABLE final_results3 AS
SELECT final_results2.candidate_id, final_results2.probability, final_results2.mineral_name_id, final_results2.strat_name_id, span.id as mineral_span_id, span.sentence_id as mineral_sentence_id, span.char_start as min_char_start, span.char_end as min_char_end
FROM final_results2
INNER JOIN span ON final_results2.mineral_name_id = span.id; """)

In [32]:
snorkel_cursor.execute("""CREATE TABLE final_results4 AS
SELECT final_results3.candidate_id, final_results3.probability, final_results3.mineral_name_id, final_results3.strat_name_id,final_results3.mineral_sentence_id, final_results3.min_char_start, final_results3.min_char_end,  span.id as strat_span_id, span.sentence_id as strat_sentence_id, span.char_start as strat_char_start, span.char_end as strat_char_end
FROM final_results3
INNER JOIN span ON final_results3.strat_name_id = span.id; """)

In [33]:
snorkel_cursor.execute("""CREATE TABLE final_results5 AS
SELECT final_results4.candidate_id, final_results4.probability, final_results4.mineral_name_id, final_results4.strat_name_id, 
final_results4.mineral_sentence_id, final_results4.min_char_start, final_results4.min_char_end, final_results4.strat_span_id,
final_results4.strat_sentence_id, final_results4.strat_char_start, final_results4.strat_char_end, sentence.document_id, 
sentence.text, sentence.words, sentence.char_offsets, sentence.lemmas, sentence.pos_tags, sentence.ner_tags, sentence.dep_parents,
sentence.dep_labels
FROM final_results4
INNER JOIN sentence ON final_results4.mineral_sentence_id = sentence.id;
 """)

In [34]:
# FINAL RESULTS POSTGRES TABLE
snorkel_cursor.execute("""CREATE TABLE final_results AS
SELECT * FROM final_results5
INNER JOIN document ON final_results5.document_id = document.id;

 """)

In [35]:
snorkel_connection.commit()
snorkel_cursor.close()
snorkel_connection.close()