<a href="https://colab.research.google.com/github/akash-kaul/Using-scispaCy-for-Named-Entity-Recognition/blob/master/scispaCyNER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Install Packages

In [0]:
!pip install -U spacy
!pip install scispacy
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_craft_md-0.2.4.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_jnlpba_md-0.2.4.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_bc5cdr_md-0.2.4.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_bionlp13cg_md-0.2.4.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_lg-0.2.4.tar.gz

#Import Packages

In [0]:
import scispacy
import spacy

#Core models
import en_core_sci_sm
import en_core_sci_lg

#NER specific models
import en_ner_craft_md
import en_ner_bc5cdr_md
import en_ner_jnlpba_md
import en_ner_bionlp13cg_md

#Tools for extracting & displaying data
from spacy import displacy
import pandas as pd




#Read in Single Text

In [0]:
#Read in csv file
meta_df = pd.read_csv("/content/metadata.csv")

#Pick specific abstract to use (row 0, column "abstract")
text = meta_df.loc[0, "abstract"]

#Load specific model and pass text through
nlp = en_ner_jnlpba_md.load()
doc = nlp(text)

#Display resulting entity extraction
displacy_image = displacy.render(doc, jupyter=True,style='ent')


#Read in Entire File

In [0]:
#Read in file
meta_df = pd.read_csv("/content/sample.csv")

#Load the models
nlp_cr = en_ner_craft_md.load()
nlp_bc = en_ner_bc5cdr_md.load()
nlp_bi = en_ner_bionlp13cg_md.load()
nlp_jn = en_ner_jnlpba_md.load()

#Create table to store entities
table= {"doi":[], "Entity":[], "Class":[]}

#Loop over entire CSV file
meta_df.index
for index, row in meta_df.iterrows():
    text = meta_df.loc[index, "abstract"]
    doi = meta_df.loc[index, "doi"]
    if type(text) == float:
        continue

#Add all entity value pairs to table (run one at a time for best results)
    add_cr (nlp_cr, text, table, doi)

    # add_bc (nlp_bc, text, table, doi)

    # add_bi (nlp_bi, text, table, doi)

    # add_jn (nlp_jn, text, table, doi)

#Turn table into an exportable CSV file (returns normalized file of entity/value pairs)
trans_df = pd.DataFrame(table)
trans_df.to_csv ("Entity_model.csv", index=False)

#Methods to add entity/value pairs to table

In [0]:
def add_cr(model, text, table, doi):
      ent_cr = createTable(model, text)
      for key in ent_cr:
        table["doi"].append(doi)
        table["Entity"].append(key)
        table["Class"].append(ent_cr[key])



In [0]:
def add_bc(model, text, table, doi):
      ent_bc = createTable(model, text)
      for key in ent_bc:
        table["doi"].append(doi)
        table["Entity"].append(key)
        table["Class"].append(ent_bc[key])


In [0]:
def add_bi(model, text, table, doi):
      ent_bi = createTable(model, text)
      for key in ent_bi:
        table["doi"].append(doi)
        table["Entity"].append(key)
        table["Class"].append(ent_bi[key])


In [0]:
def add_jn(model, text, table, doi):
      ent_jn = createTable(model, text)
      for key in ent_jn:
        table["doi"].append(doi)
        table["Entity"].append(key)
        table["Class"].append(ent_jn[key])


#Method to Extract Entity/Value pairs

In [0]:
def createTable(nlp,document):
     
    doc = nlp(document)
    values = {}
    for x in doc.ents:
      values[x.text] = x.label_
      
    # return  dictionary with entity/value pairs
    return values