In [31]:


import spacy
from spacy.tokens import Doc
from spacy.training import Example
from spacy.util import minibatch, compounding
from spacy.matcher import Matcher
import random
import json
import re

# Step 1: Load the original model
nlp = spacy.load("en_core_web_sm")
nlp.disable_pipes("ner")  # Disable the NER component of the original model

# Step 2: Load the training data from JSON
TRAINING_DATA = []
with open("project-7-at-2023-07-13-12-38-aa52cb06.json", "r") as f:
    json_data = json.load(f)

for data in json_data:
    text = data["text"]
    entities = data["label"]
    entity_tuples = []
    for entity in entities:
        start = entity["start"]
        end = entity["end"]
        label = str(entity["labels"][0])  # Convert label to string format
        entity_tuples.append((start, end, label))
    TRAINING_DATA.append((text, {"entities": entity_tuples}))    


# Step 3: Enable and train the NER component with rehearsal
nlp.enable_pipe("ner")


#Step 5: Training the model
n_iter = 30
for _ in range(n_iter):
    losses = {}
    random.shuffle(TRAINING_DATA)
    batches = minibatch(TRAINING_DATA, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        texts, annotations = zip(*batch)
        examples = []
        for text, annotation in zip(texts, annotations):
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotation)
            examples.append(example)

        nlp.update(examples, losses=losses)

    # Step 6: Generate additional examples using original model predictions as annotations (rehearsal)
    rehearsal_examples = []
    for text, _ in TRAINING_DATA:
        doc = nlp(text)
        rehearsal_entities = [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]
        rehearsal_examples.append(Example.from_dict(doc, {"entities": rehearsal_entities}))

    # Step 7: Combine the original and rehearsal examples
    combined_examples = examples + rehearsal_examples
    nlp.update(combined_examples, losses=losses)
    print("Losses:", losses)

Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 69.52981865239956}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 40.487405186913634}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 36.14376766816615}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 31.472512658421003}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 52.69568395905905}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 58.71613860207239}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 78.64948080326072}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 54.36086083917658}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 58.347731899247115}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 79.52791448633815}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 62.33102255340523}
Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 53.245793605236514}
Losses: {'tok2vec': 0.0,

In [1]:
!pip install -U spacy
!python3 -m spacy download en_core_web_sm

Looking in indexes: https://pypi.apple.com/simple/, https://artifacts.apple.com/api/pypi/apple-pypi-integration-local/simple/
Collecting spacy
  Downloading https://pypi.apple.com/packages/packages/b2/6d/7abebaa1df4d895dc5fad6eb26ce2b7f1e401d709c17cc3b8bff62248884/spacy-3.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 3.3 MB/s eta 0:00:01
Collecting srsly<3.0.0,>=2.4.3
  Downloading https://pypi.apple.com/packages/packages/42/2d/a67f395915597a7e6cbda4b51d0cb1e0f750e531fba81ca3bdee0d048e89/srsly-2.4.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (491 kB)
[K     |████████████████████████████████| 491 kB 3.2 MB/s eta 0:00:01
Collecting pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4
  Downloading https://pypi.apple.com/packages/packages/6a/19/af6ac6f22f9a2a3866fc5a726dca0b7d524e1660821388dc99d56764e6df/pydantic-1.10.11-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[K     |██████████████████

In [34]:
output_dir = "Custom_Ner/"  # Replace with the desired output directory
nlp.to_disk(output_dir)

In [24]:
!pip install --upgrade pyOpenSSL

Looking in indexes: https://pypi.apple.com/simple/, https://artifacts.apple.com/api/pypi/apple-pypi-integration-local/simple/
Collecting pyOpenSSL
  Downloading https://pypi.apple.com/packages/packages/f0/e2/f8b4f1c67933a4907e52228241f4bd52169f3196b70af04403b29c63238a/pyOpenSSL-23.2.0-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 3.3 MB/s eta 0:00:011
Installing collected packages: pyOpenSSL
  Attempting uninstall: pyOpenSSL
    Found existing installation: pyOpenSSL 20.0.1
    Uninstalling pyOpenSSL-20.0.1:
      Successfully uninstalled pyOpenSSL-20.0.1
[31mERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

snowflake-connector-python 2.7.1 requires cryptography<4.0.0,>=3.1.0, but you'll have cryptography 41.0.2 whic

In [25]:
!pip install cryptography==3.4.8 pyOpenSSL==16.2.0

Looking in indexes: https://pypi.apple.com/simple/, https://artifacts.apple.com/api/pypi/apple-pypi-integration-local/simple/
Collecting cryptography==3.4.8
  Using cached https://pypi.apple.com/packages/packages/96/07/4d23f8e34e56d8eeb2c37cd5924928a01c3dd756a1d99e470181bc57551e/cryptography-3.4.8-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
Collecting pyOpenSSL==16.2.0
  Downloading https://pypi.apple.com/packages/packages/ac/93/b4cd538d31adacd07f83013860db6b88d78755af1f3fefe68ec22d397e7b/pyOpenSSL-16.2.0-py2.py3-none-any.whl (43 kB)
[K     |████████████████████████████████| 43 kB 6.3 MB/s  eta 0:00:01
Installing collected packages: cryptography, pyOpenSSL
  Attempting uninstall: cryptography
    Found existing installation: cryptography 41.0.2
    Uninstalling cryptography-41.0.2:
      Successfully uninstalled cryptography-41.0.2
  Attempting uninstall: pyOpenSSL
    Found existing installation: pyOpenSSL 23.2.0
    Uninstalling pyOpenSSL-23.2.0:
      Successf