In [1]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl (12.8 MB)
[K     |████████████████████████████████| 12.8 MB 1.4 MB/s eta 0:00:01
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [2]:
import json
import spacy
from spacy.pipeline import EntityRuler

In [3]:
def load_data(file):
    with open (file, "r", encoding="utf-8") as f:
        data = json.load(f)
    return (data)

In [4]:
commodities = load_data("commodities-patterns.json")
print(commodities)

[[{'LOWER': 'aluminium', 'POS': 'NOUN'}], [{'LOWER': 'cattle', 'POS': 'NOUN'}], [{'LOWER': 'cobalt', 'POS': 'NOUN'}], [{'LOWER': 'cocoa', 'POS': 'NOUN'}], [{'LOWER': 'coffee', 'POS': 'NOUN'}], [{'LOWER': 'copper', 'POS': 'NOUN'}], [{'LOWER': 'corn', 'POS': 'NOUN'}], [{'LOWER': 'cotton', 'POS': 'NOUN'}], [{'LOWER': 'crude', 'POS': 'ADJ'}, {'LOWER': 'oil', 'POS': 'NOUN'}], [{'LOWER': 'gold', 'POS': 'NOUN'}], [{'LOWER': 'iron', 'POS': 'NOUN'}, {'LOWER': 'ore', 'POS': 'NOUN'}], [{'LOWER': 'lithium', 'POS': 'NOUN'}], [{'LOWER': 'natural', 'POS': 'ADJ'}, {'LOWER': 'gas', 'POS': 'NOUN'}], [{'LOWER': 'palm', 'POS': 'NOUN'}, {'LOWER': 'oil', 'POS': 'NOUN'}], [{'LOWER': 'poultry', 'POS': 'NOUN'}], [{'LOWER': 'rice', 'POS': 'NOUN'}], [{'LOWER': 'silver', 'POS': 'NOUN'}], [{'LOWER': 'sugar', 'POS': 'NOUN'}], [{'LOWER': 'wheat', 'POS': 'NOUN'}], [{'LOWER': 'zinc', 'POS': 'NOUN'}]]


In [5]:
def create_patterns(file, type):
    data = load_data(file)
    labelled_patterns = []
    for pattern in data:
        labelled_pattern = {
            "label": type,
            "pattern": pattern
        }
        labelled_patterns.append(labelled_pattern)
    return (labelled_patterns)

In [6]:
def create_rules(patterns):
    nlp = spacy.load("en_core_web_sm")

    nlp.remove_pipe("ner")

    ruler = nlp.add_pipe("entity_ruler")

    ruler.add_patterns(patterns)
    
    nlp.to_disk("commodities_ner_rules")

In [7]:
patterns = create_patterns("commodities-patterns.json", "COMMODITY")
print(patterns)

[{'label': 'COMMODITY', 'pattern': [{'LOWER': 'aluminium', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'cattle', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'cobalt', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'cocoa', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'coffee', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'copper', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'corn', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'cotton', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'crude', 'POS': 'ADJ'}, {'LOWER': 'oil', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'gold', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'iron', 'POS': 'NOUN'}, {'LOWER': 'ore', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'lithium', 'POS': 'NOUN'}]}, {'label': 'COMMODITY', 'pattern': [{'LOWER': 'natural', 'POS':

In [8]:
create_rules(patterns)