In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import os

# TO USE A DATABASE OTHER THAN SQLITE, USE THIS LINE
# Note that this is necessary for parallel execution amongst other things...
os.environ['SNORKELDB'] = "postgres://evaporite:dummy@localhost:5432/evaporite_snorkel"

import numpy as np
from snorkel import SnorkelSession
session = SnorkelSession()

In [2]:
from snorkel.models import candidate_subclass
Mineral   = candidate_subclass('Mineral' ,['mineral_name','strat_name'])

In [3]:
from snorkel.annotations import load_marginals
train_marginals = load_marginals(session, split=0)

In [4]:
train_cands = session.query(Mineral).filter(Mineral.split == 0).order_by(Mineral.id).all()
dev_cands   = session.query(Mineral).filter(Mineral.split == 1).order_by(Mineral.id).all()
test_cands  = session.query(Mineral).filter(Mineral.split == 2).order_by(Mineral.id).all()
final_cands = session.query(Mineral).filter(Mineral.split == 3).order_by(Mineral.id).all()

In [5]:
print(len(train_cands))
print(len(dev_cands))  
print(len(test_cands))
print(len(final_cands))

4366
1242
1118
1671


In [6]:
from snorkel.annotations import load_gold_labels

L_gold_dev  = load_gold_labels(session, annotator_name='gold', split=1)
L_gold_test = load_gold_labels(session, annotator_name='gold', split=2)

In [None]:
from snorkel.learning.pytorch import LSTM

train_kwargs = {
    'lr':              0.001,
    'embedding_dim':   200,
    'hidden_dim':      200,
    'n_epochs':        50,
    'dropout':         0.25,
    'seed':            1701
}

lstm = LSTM(n_threads=None)
lstm.train(train_cands, train_marginals, X_dev=dev_cands, Y_dev=L_gold_dev, **train_kwargs)


[LSTM] Training model
[LSTM] n_train=4341  #epochs=50  batch size=64




[LSTM] Epoch 1 (81.12s)	Average loss=0.374917	Dev F1=0.00
[LSTM] Epoch 2 (161.99s)	Average loss=0.323917	Dev F1=0.00
[LSTM] Epoch 3 (240.24s)	Average loss=0.298311	Dev F1=0.00
[LSTM] Epoch 4 (318.52s)	Average loss=0.283794	Dev F1=0.00
[LSTM] Epoch 5 (393.93s)	Average loss=0.277269	Dev F1=0.00
[LSTM] Epoch 6 (470.54s)	Average loss=0.273127	Dev F1=0.00
[LSTM] Epoch 7 (547.95s)	Average loss=0.270132	Dev F1=0.00
[LSTM] Epoch 8 (626.25s)	Average loss=0.268499	Dev F1=0.00
[LSTM] Epoch 9 (703.02s)	Average loss=0.267336	Dev F1=0.00
[LSTM] Epoch 10 (779.41s)	Average loss=0.265546	Dev F1=0.00
[LSTM] Epoch 11 (855.37s)	Average loss=0.265038	Dev F1=0.00
[LSTM] Epoch 12 (931.51s)	Average loss=0.264045	Dev F1=0.00
[LSTM] Epoch 13 (1009.29s)	Average loss=0.263472	Dev F1=0.00


In [None]:
lstm.load(model_name='LSTM')

In [None]:
L_gold_test

In [None]:
p, r, f1 = lstm.score(test_cands, L_gold_test, b = 0.7)

In [None]:
print("Prec: {0:.3f}, Recall: {1:.3f}, F1 Score: {2:.3f}".format(p, r, f1))

In [None]:
tp, fp, tn, fn = lstm.error_analysis(session, test_cands, L_gold_test,b=0.7)

In [None]:
lstm.marginals(final_cands)

In [None]:
lstm.save_marginals(session, test_cands)
lstm.save_marginals(session, final_cands)

In [None]:
import yaml, psycopg2
from psycopg2.extensions import AsIs

# Connect to Postgres
with open('./credentials', 'r') as credential_yaml:
    credentials = yaml.load(credential_yaml,Loader = yaml.SafeLoader)

with open('./config', 'r') as config_yaml:
    config = yaml.load(config_yaml, Loader = yaml.SafeLoader)
    
# SPECIFYING CONNECTION TO SNORKEL 
snorkel_connection = psycopg2.connect(
    dbname=credentials['snorkel_postgres']['database'],
    user=credentials['snorkel_postgres']['user'],
    password=credentials['snorkel_postgres']['password'],
    host=credentials['snorkel_postgres']['host'],
    port=credentials['snorkel_postgres']['port'])
snorkel_cursor = snorkel_connection.cursor()

In [None]:
snorkel_cursor.execute(""" CREATE TABLE final_results1 AS
 SELECT * FROM marginal WHERE marginal.training = false; """)

In [None]:
snorkel_cursor.execute("""CREATE TABLE final_results2 AS
SELECT final_results1.id, final_results1.candidate_id, final_results1.value, final_results1.probability, mineral.mineral_name_id,
mineral.strat_name_id
FROM mineral
INNER JOIN final_results1 ON final_results1.candidate_id = mineral.id;
 """)

In [None]:
snorkel_cursor.execute("""CREATE TABLE final_results3 AS
SELECT final_results2.candidate_id, final_results2.probability, final_results2.mineral_name_id, final_results2.strat_name_id, span.id as mineral_span_id, span.sentence_id as mineral_sentence_id, span.char_start as min_char_start, span.char_end as min_char_end
FROM final_results2
INNER JOIN span ON final_results2.mineral_name_id = span.id; """)

In [None]:
snorkel_cursor.execute("""CREATE TABLE final_results4 AS
SELECT final_results3.candidate_id, final_results3.probability, final_results3.mineral_name_id, final_results3.strat_name_id,final_results3.mineral_sentence_id, final_results3.min_char_start, final_results3.min_char_end,  span.id as strat_span_id, span.sentence_id as strat_sentence_id, span.char_start as strat_char_start, span.char_end as strat_char_end
FROM final_results3
INNER JOIN span ON final_results3.strat_name_id = span.id; """)

In [None]:
snorkel_cursor.execute("""CREATE TABLE final_results5 AS
SELECT final_results4.candidate_id, final_results4.probability, final_results4.mineral_name_id, final_results4.strat_name_id, 
final_results4.mineral_sentence_id, final_results4.min_char_start, final_results4.min_char_end, final_results4.strat_span_id,
final_results4.strat_sentence_id, final_results4.strat_char_start, final_results4.strat_char_end, sentence.document_id, 
sentence.text, sentence.words, sentence.char_offsets, sentence.lemmas, sentence.pos_tags, sentence.ner_tags, sentence.dep_parents,
sentence.dep_labels
FROM final_results4
INNER JOIN sentence ON final_results4.mineral_sentence_id = sentence.id;
 """)

In [None]:
# FINAL RESULTS POSTGRES TABLE
snorkel_cursor.execute("""CREATE TABLE final_results AS
SELECT * FROM final_results5
INNER JOIN document ON final_results5.document_id = document.id;

 """)

In [None]:
snorkel_connection.commit()
snorkel_cursor.close()
snorkel_connection.close()