In [1]:
from __future__ import unicode_literals, print_function
import plac
import random
from pathlib import Path
import spacy
from tqdm import tqdm
from spacy.training.example import Example

## Train Data

In [2]:
# training data
TRAIN_DATA = [("IoT based smart water quality monitoring system Varsha Lakshmikantha, Anjitha Hiriyannagowda, Akshay Manjunath, Aruna Patted, Jagadeesh Basavaiah , AudreArlene Anthony",
              {"entities":[(0,47,"Title"),(48,68,"Author"),(70,92,"Author"),(94,110,"Author"),(112,124,"Author"),(126,145,"Author"),(148,167,"Author")]}),
              ("A Review on Internet of Things (IoT) M.U. Farooq Muhammad Waseem Sadia Mazhar Anjum Khairi Talha Kamal",
               {"entities":[(0,36,"Title"),(37,48,"Author"),(49,64,"Author"),(65,77,"Author"),(78,90,"Author"),(91,102,"Author")]}),
              ("Designing a New Generalized Battery Management System John Chatzakis, Kostas Kalaitzakis, Nicholas C. Voulgaris, and Stefanos N. Manias ",
               {"entities":[(0,53,"Title"),(54,68,"Author"),(70,88,"Author"),(90,111,"Author"),(117,135,"Author")]}),
              ("Secure Data Acquisition for Battery Management Systems Fikret Basic, Christian Seifert, Christian Steger Robert Kofler",
               {"entities":[(0,54,"Title"),(55,67,"Author"),(69,86,"Author"),(88,104,"Author"),(105,118,"Author")]}),
              ("Digital twin challenges in biodiversity modelling Athanasios Trantas , Ruduan Plug , Paolo Pileggi , Elena Lazovik",
               {"entities":[(0,49,"Title"),(50,68,"Author"),(71,82,"Author"),(85,98,"Author"),(101,114,"Author")]}),
              ("Detection of Domestic Waste Based on YOLO Yaohui Hou",{"entities":[(0,41,"Title"),(42,52,"Author")]}),
              ("Smart e-waste management system utilizing Internet of Things and Deep Learning approaches Daniel Voskergian , Isam Ishaq",
               {"entities":[(0,89,"Title"),(90,107,"Author"),(110,120,"Author")]}),
              ("Fault prediction model in wind turbines using deep learning structure with enhance optimisation algorithm Mahendra Bhatu Gawali, Swapnali Sunil Gawali & Megharani Patil",
               {"entities":[(0,105,"Title"),(106,127,"Author"),(129,150,"Author"),(153,168,"Author")]}),
              ("Object Detection for Construction Waste Based on an Improved YOLOv5 Model Qinghui Zhou , Haoshi Liu , Yuhang Qiu and Wuchao Zheng",
               {"entities":[(0,73,"Title"),(74,86,"Author"),(89,99,"Author"),(102,112,"Author"),(117,129,"Author")]}),
              ("Region-based Convolutional Networks for Accurate Object Detection and Segmentation Ross Girshick , Jeff Donahue , Trevor Darrell and Jitendra Malik",
               {"entities":[(0,82,"Title"),(83,96,"Author"),(99,111,"Author"),(114,128,"Author"),(133,147,"Author")]}),
              ("Design of Electric Vehicle Battery Management System Qian Liu , Gang Chen",{"entities":[(0,52,"Title"),(53,61,"Author"),(64,73,"Author")]}),
              ("Detection of Cardiovascular Diseases in ECG Images Using Machine Learning and Deep Learning Methods Mohammed B. Abubaker and Bilal Babayigit",
               {"entities":[(0,99,"Title"),(100,120,"Author"),(125,140,"Author")]}),
              ("Microsoft Power BI Amrapali Bansal, A. K. Upadhyay",{"entities":[(0,18,"Title"),(19,34,"Author"),(36,50,"Author")]}),
              ("Hybrid deep learning for detecting lung diseases from X-ray images Subrato Bharati , Prajoy Podder, M. Rubaiyat Hossain Mondal ",
               {"entities":[(0,66,"Title"),(67,82,"Author"),(85,98,"Author"),(100,126,"Author")]}),
              ("Detection of Tuberculosis based on Deep Learning based methods Murali Krishna Puttagunta and S. Ravi ",
               {"entities":[(0,62,"Title"),(63,88,"Author"),(93,100,"Author")]}),
              ("Journal of Physics: Conference Series ( 20 March 2020 )",{"entities":[(0,37,"Published At"),(40,53,"Date of Publishing")]}),
              ("Elsevier Ltd. ( 30 June 2020 ) ",{"entities":[(0,13,"Published At"),(16,28,"Date of Publishing")]}),
              ("International Journal of Soft Computing and Engineering ( 3 July 2017 ) ",{"entities":[(0,55,"Published At"),(58,69,"Date of Publishing")]}),
              ("IEEE TRANSACTIONS ON ARTIFICIAL INTELLIGENCE ( 2 APRIL 2023 ) ",{"entities":[(0,44,"Published At"),(47,59,"Date of Publishing")]}),
              ("International Conference on Power, Grid and Energy Storage (12 August 2023) ",{"entities":[(0,58,"Published At"),(60,74,"Date of Publishing")]}),
              ("IEEE (26 July 2015) ",{"entities":[(0,4,"Published At"),(6,18,"Date of Publishing")]}),
              ("MDPI ( 28 December 2022 ) ",{"entities":[(0,4,"Published At"),(7,23,"Date of Publishing")]}),
              ("Journal of Control and Decision ( 28 Sep 2023 )",{"entities":[(0,31,"Published At"),(34,45,"Date of Publishing")]}),
              ("Journal of Smart Cities and Society ( 13 August 2023 ) ",{"entities":[(0,35,"Published At"),(38,52,"Date of Publishing")]}),
              ("Frontiers in Computing and Intelligent Systems ( 2 May 2022 ) ",{"entities":[(0,46,"Published At"),(49,59,"Date of Publishing")]}),
              ("Elsevier Ltd. ( 27 October 2023 ) ",{"entities":[(0,13,"Published At"),(17,32,"Date of Publishing")]}),
              ("Euromicro Conference Series on Digital System Design ( 20 November 2023 ) ",{"entities":[(0,52,"Published At"),(55,71,"Date of Publishing")]}),
              ("IEEE TRANSACTIONS ON INDUSTRIAL ELECTRONICS ( 5 OCTOBER 2003 ) ",{"entities":[(0,43,"Published At"),(46,60,"Date of Publishing")]}),
              ("International Journal of Computer Applications ( 1 March 2015 ) ",{"entities":[(0,46,"Published At"),(49,61,"Date of Publishing")]}),
               ("Global Transitions Proceedings ( 2 July 2021 ) ",{"entities":[(0,30,"Published At"),(33,44,"Date of Publishing")]}),
            ]

## Define our variables

In [3]:
model = None
output_dir=Path("/ner")
n_iter=100

## Load the model

In [4]:
if model is not None:
    nlp = spacy.load(model)
    print("Loaded model '%s'" % model)
else:
    nlp = spacy.blank('en')
    print("Created blank 'en' model")

Created blank 'en' model


## Set up the pipeline

In [5]:
if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe('ner', last=True)
else:
    ner = nlp.get_pipe('ner')

## Train the Recognizer

In [None]:
for _, annotations in TRAIN_DATA:
    for ent in annotations.get('entities'):
        ner.add_label(ent[2])

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(n_iter):
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in tqdm(TRAIN_DATA):
            doc = nlp.make_doc(text)
            example = Example.from_dict(doc, annotations)
            nlp.update([example],drop=0.5,sgd=optimizer,losses=losses)
        print(losses)

## Test the trained model

In [None]:
for text, _ in TRAIN_DATA:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
    print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])

## Save the model

In [8]:
if output_dir is not None:
    output_dir = Path(output_dir)
    if not output_dir.exists():
        output_dir.mkdir()
    nlp.to_disk(output_dir)
    print("Saved model to", output_dir)

Saved model to \ner


## Test the Saved Model

In [9]:
#Testing the trained model
doc=nlp("IoT based smart water quality monitoring system Varsha Lakshmikantha, Anjitha Hiriyannagowda, Akshay Manjunath, Aruna Patted, Jagadeesh Basavaiah , AudreArlene Anthony Global Transitions Proceedings  ( 2 July 2021 )")
for ent in doc.ents:
    print(ent.label_+ '  -->>   ' + ent.text)


"""          Output
Title  -->>   IoT based smart water quality monitoring system
Author  -->>   Varsha Lakshmikantha
Author  -->>   Anjitha Hiriyannagowda
Author  -->>   Akshay Manjunath
Author  -->>   Aruna Patted
Author  -->>   Jagadeesh Basavaiah
Author  -->>   AudreArlene Anthony
Author  -->>   Global Transitions Proceedings
Date of Publishing  -->>   2 July 2021"""


Title  -->>   IoT based smart water quality monitoring system
Author  -->>   Varsha Lakshmikantha
Author  -->>   Anjitha Hiriyannagowda
Author  -->>   Akshay Manjunath
Author  -->>   Aruna Patted
Author  -->>   Jagadeesh Basavaiah
Author  -->>   AudreArlene Anthony
Author  -->>   Global Transitions Proceedings
Date of Publishing  -->>   2 July 2021


In [10]:
#Testing the trained model
doc=nlp("Smart e-waste management system utilizing Internet of Things and Deep Learning approaches Daniel Voskergian , Isam Ishaq Journal of Smart Cities and Society ( 13 August 2023 )")
for ent in doc.ents:
    print(ent.label_+ '  -->>   ' + ent.text)


"""               Output 
Title  -->>   Smart e-waste management system utilizing Internet of Things and Deep Learning approaches
Author  -->>   Daniel Voskergian
Author  -->>   Isam Ishaq
Published At  -->>   Journal of Smart Cities and Society
Date of Publishing  -->>   13 August 2023"""

Title  -->>   Smart e-waste management system utilizing Internet of Things and Deep Learning approaches
Author  -->>   Daniel Voskergian
Author  -->>   Isam Ishaq
Published At  -->>   Journal of Smart Cities and Society
Date of Publishing  -->>   13 August 2023
