From bb73005da1c1a3a3c41983e1353dba638407363d Mon Sep 17 00:00:00 2001 From: joriscram Date: Thu, 8 Mar 2018 15:19:46 +0100 Subject: [PATCH] implemented review comments - also changed the readme with the instruction to install a spacy nlp model, this way you do not need the assets folder. --- README.md | 3 +++ bin/main.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 34b40e2..9b02443 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,9 @@ conda env create -f environment.yml # Activate Python virtual environment source activate resume +#Retrieve language model from spacy +python -m spacy download en + # Run code (with default configurations) cd bin/ python main.py diff --git a/bin/main.py b/bin/main.py index 82bd9ee..3216811 100644 --- a/bin/main.py +++ b/bin/main.py @@ -12,8 +12,8 @@ import lib import field_extraction +import spacy -import en_core_web_sm def main(): """ @@ -27,7 +27,7 @@ def main(): observations = extract() # Spacy: Spacy NLP - nlp = en_core_web_sm.load() + nlp = spacy.load('en') # Transform data to have appropriate fields observations, nlp = transform(observations, nlp) @@ -42,7 +42,7 @@ def text_extract_utf8(f): try: return unicode(textract.process(f), "utf-8") except UnicodeDecodeError, e: - return e + return '' def extract(): logging.info('Begin extract')