# Model Tester (v1.1)
Before you run any code here, make sure that you put the following commands into a new terminal within the Binder instance. This must be done *once* per Binder instance:
*pip install spacy*
*python -m spacy download en_core_web_lg*
*pip install wget*

After that, everything in here should work.

In [None]:
# Import libraries:
import os
import pickle
import spacy
from thinc.api import Config
from spacy import Language
from spacy.lang.en import English
import sys
import en_core_web_lg
import wget
# Build the model. We only do this once per Binder instance.

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
url = "https://mediacloud-ihop.s3.amazonaws.com/models/spacy_model.p"
take1 = wget.download(url)
with open("spacy_model.p", "rb") as h:
	take = pickle.load(h)
config = Config().from_disk("./config.cfg")
lang_cls = spacy.util.get_lang_class(config["nlp"]["lang"])
nlp = lang_cls.from_config(config)
nlp = nlp.from_bytes(take)
# The model should now be a file named "spacy_model.p" in the nav pane. This will NOT be in the Github.

In [None]:
# Here is a set of code for you to test everything out on your own; a personal sandbox!
doc = nlp("bacon egg and cheese sandwich")
for token in doc:
	print(token.text, token.pos_)
for chunk in doc.noun_chunks:
	print(chunk.text, chunk.root.text)
for ents in doc.ents:
	print(ents.text, ents.label_)

In [7]:
# Testing random sentences.
testsentences = ['', 
                 'get your car insurance at 50% average rates today by calling 334-808-1992', 
                 'today i ate a bacon cheeseburger with lettuce, onion and tomato. the salsa added to the top was too runny, so i would add some lemon juice on top.', 
                 'red pepper flakes are a great seasoning to add to many dishes.', 
                 'just one egg is fine, but i think two eggs will help the millefeuille maintain structure'
                ]
testanswers = [[], 
               [], 
               ['bacon', 'cheeseburger', 'lettuce', 'onion', 'tomato', 'salsa', 'lemon juice'], 
               ['red pepper flakes', 'seasoning'], 
               ['egg', 'egg', 'millefeuille']
              ]
counter = 0
for sentence in testsentences:
    doc = nlp(sentence)
    nlp_answers = []
    print("Computer tags:")
    for ents in doc.ents:
        nlp_answers.append(ents.text)
    print(nlp_answers)
    print("Correct answer:")
    print(testanswers[counter])
    counter += 1
    

Computer tags:
[]
Correct answer:
[]
Computer tags:
[]
Correct answer:
[]
Computer tags:
['bacon', 'cheeseburger', 'onion', 'tomato', 'salsa', 'lemon', 'juice']
Correct answer:
['bacon', 'cheeseburger', 'lettuce', 'onion', 'tomato', 'salsa', 'lemon juice']
Computer tags:
['pepper', 'seasoning']
Correct answer:
['red pepper flakes', 'seasoning']
Computer tags:
['egg', 'eggs']
Correct answer:
['egg', 'egg', 'millefeuille']


In [None]:
# TO ADD: storyjson files to test. 
storyjsons = [e443c33f56c338bc50653946ce88460b, 
5316376137984291a684487b1ff7b68f, 
fa262c03a3f536e5165fc0e9cd061b64, 
c332838cb3ab544ce16aaaf0d30d1587, 
a94ef4308e8cae75766fea73531918c1, 
0cf67eb7f52cd235509b530bf7c3ea9b, 
c3aeef89e389f90bb137f28204861b49, 
7e3345fe9fad3a161ac8aa11fa214831, 
ff401e9686de3bec35052aa33a8382c5, 
0b12807f55062696086ad3831211c284, 
613aa03ee246323cd6ffcc4ea03e68cc, 
a5d7a468b900da50f787f11e10673224, 
f3d1c80cb0263594aadf4903d4ff448b, 
ca6f92e3a1f112f6714cfa69bbe90cd0, 
0acff7d82116e5c0f1b3eb1b6b5bade8, 
703a2a8217ed493e0ae29b81386aa9b4, 
b0d7baca212c60b884de5ccdb5025b28, 
c3f04ed913c48f33238d9764c3817cfb, 
cc834cc2d8291bcc4811387dfc810c94, 
a5b2c77925de0529df3fcfce06c75243, 
f83238aabbb6f29fc5f33b48bbf3efde, 
a85cbd99b2dba1cd1dbc31b46952ac76, 
239d90cb32a74d64daf8952cafca791d, 
3db89c985c314dbdc4bc6244e2bd4e01, 
7a2421f687bf90732cf4ba727ab6b09b]
