In [1]:
! pip install -U spacy -q

In [2]:
!python -m spacy info

[1m

spaCy version    3.8.7                         
Location         /usr/local/lib/python3.12/dist-packages/spacy
Platform         Linux-6.6.105+-x86_64-with-glibc2.35
Python version   3.12.12                       
Pipelines        en_core_web_sm (3.8.0)        



In [3]:
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm

nlp = spacy.blank("en") # load a new spacy model
db = DocBin() # create a DocBin object

In [4]:
import json
f = open('/content/annotations.json')
TRAIN_DATA = json.load(f)

In [5]:
TRAIN_DATA

{'classes': ['ORG',
  'DATE',
  'MONEY',
  'PERCENT',
  'PRODUCT',
  'SEGMENT',
  'LOCATION',
  'DEPARTMENT',
  'TECHNOLOGY'],
 'annotations': [['Apple Inc. reported strong financial performance for the fourth quarter of FY2025, with total revenue reaching $97.4 billion, marking a 6.8% year-over-year increase driven primarily by higher iPhone and Services sales. The company’s net income stood at $24.6 billion, supported by robust demand across North America and Asia-Pacific regions, despite ongoing global supply chain constraints.\r',
   {'entities': [[0, 10, 'ORG'],
     [57, 81, 'DATE'],
     [111, 124, 'MONEY'],
     [136, 140, 'PERCENT'],
     [192, 198, 'PRODUCT'],
     [203, 218, 'SEGMENT'],
     [253, 266, 'MONEY'],
     [302, 315, 'LOCATION'],
     [320, 332, 'LOCATION']]}],
  ['Operating expenses rose modestly by 3.2%, mainly due to increased R&D spending on artificial intelligence and wearable technologies. Apple also announced a $20 billion share repurchase program, reflecti

In [6]:
for text, annot in tqdm(TRAIN_DATA['annotations']):
    doc = nlp.make_doc(text)
    ents = []
    for start, end, label in annot["entities"]:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    doc.ents = ents
    db.add(doc)
    db.to_disk("./training_data.spacy")


100%|██████████| 3/3 [00:00<00:00, 215.29it/s]

Skipping entity
Skipping entity
Skipping entity





In [7]:
! python -m spacy init config config.cfg --lang en --pipeline ner --optimize efficiency

[38;5;3m⚠ To generate a more effective transformer-based config (GPU-only),
install the spacy-transformers package and re-run this command. The config
generated now does not use transformers.[0m
[38;5;4mℹ Generated config template specific for your use case[0m
- Language: en
- Pipeline: ner
- Optimize for: efficiency
- Hardware: CPU
- Transformer: None
[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [8]:
! python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./training_data.spacy

[38;5;4mℹ Saving to output directory: .[0m
[38;5;4mℹ Using CPU[0m
[1m
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     40.08    0.00    0.00    0.00    0.00
100     200        755.60   2108.75  100.00  100.00  100.00    1.00
268     400          0.00      0.00  100.00  100.00  100.00    1.00
468     600          0.00      0.00  100.00  100.00  100.00    1.00
668     800          0.00      0.00  100.00  100.00  100.00    1.00
868    1000          0.00      0.00  100.00  100.00  100.00    1.00
1068    1200          0.00      0.00  100.00  100.00  100.00    1.00
1268    1400          0.00      0.00  100.00  100.00  100.00    1.00
1468    1600          0.00      0.00  100.00  100.00  100.00    1.00
1668    1800          0.00      0.00  100.00  10

In [9]:
nlp_ner = spacy.load("/content/model-best")

In [24]:
doc = nlp_ner("AstraFin Technologies Pvt. Ltd. posted revenue of ₹98.4 crore, marking a 7.2% increase over ₹91.8 crore recorded in Q1 FY2024. The growth was fueled by higher adoption of its fintech SaaS platform and expanded partnerships with regional banks. Operating profit stood at ₹18.6 crore, while net profit rose 6.1% year-over-year to ₹12.4 crore. The company attributed the margin improvement to reduced marketing expenditure and automation-driven cost savings.")

In [25]:
spacy.displacy.render(doc, style="ent", jupyter=True) # display in Jupyter