**Using Spacy NLP**

In [None]:
# installing spacy libraries
! pip install -U pip setuptools wheel
! pip install -U spacy
! python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m42.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# Importing ICD9-430 Patients text file which we generated from Extract_notes.ipynb file
from google.colab import files
import pandas as pd
import spacy

d1 = files.upload()
icd9_df = pd.read_csv('ICD9-430_Patients_DischargeSummary_Random.csv')

nlp = spacy.load('en_core_web_sm')

Saving ICD9-430_Patients_DischargeSummary_Random.csv to ICD9-430_Patients_DischargeSummary_Random.csv


In [None]:
icd9_df.head()
#Selecting only TEXT column from dataframe
icd9_df["spacy_doc"] = icd9_df["TEXT"].apply(nlp)



In [None]:
# Printing lable and text for top 2
for doc in icd9_df["spacy_doc"].head(2):
    print("Entities in Text:", [(ent.text, ent.label_) for ent in doc.ents])
    for token in doc:
      print(token.text)

Entities in Text: [('today', 'DATE'), ('2', 'CARDINAL'), ('Family', 'PRODUCT'), ('First', 'ORDINAL'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Medicare', 'ORG'), ('Explained', 'PERSON'), ('Family', 'PRODUCT'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('66', 'CARDINAL'), ('NP', 'ORG'), ('CT', 'ORG'), ('Family', 'PRODUCT'), ('this Thursday', 'DATE'), ('SICU Attending', 'ORG'), ('the day to day', 'DATE'), ('DNR', 'ORG'), ('Aspergers Syndrome', 'WORK_OF_ART'), ('ADL', 'ORG'), ('Family', 'PRODUCT')]
Family
meeting
held
today
with
pt


s
mother
&
father
,
2
sisters
and
pt

   
brother
in
law
.
 
Purpose
of
the
meeting
was
to
further
discuss
the
role

   
of
spokesperson
for
the
pt
,
guardianship
,
[
*
*
Name2
(
NI
)
*
*
]
health
and
social

   
security
.
 
Family
also
here
for
a
medical
update
from
the
neuro
-
surgery

   
team
.
 
Pt
remains
incubated
,
not
sedated
and
not
awake
.

   
Parents

In [None]:
#Lemmatization
# Performing lemmatization
for doc in icd9_df["spacy_doc"].head(2):
    print("Entities in Text:", [(ent.text, ent.label_) for ent in doc.ents])
    for token in doc:
      print(token, token.lemma, token.lemma_)

Entities in Text: [('today', 'DATE'), ('2', 'CARDINAL'), ('Family', 'PRODUCT'), ('First', 'ORDINAL'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Medicare', 'ORG'), ('Explained', 'PERSON'), ('Family', 'PRODUCT'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('66', 'CARDINAL'), ('NP', 'ORG'), ('CT', 'ORG'), ('Family', 'PRODUCT'), ('this Thursday', 'DATE'), ('SICU Attending', 'ORG'), ('the day to day', 'DATE'), ('DNR', 'ORG'), ('Aspergers Syndrome', 'WORK_OF_ART'), ('ADL', 'ORG'), ('Family', 'PRODUCT')]
Family 18292453351080475948 family
meeting 14798207169164081740 meeting
held 3818475640248018718 hold
today 11042482332948150395 today
with 12510949447758279278 with
pt 7525952262478869079 pt

 962983613142996970 

s 16978132545290669629 s
mother 7963322251145911254 mother
& 15473034735919704609 &
father 17071697760115891398 father
, 2593208677638477497 ,
2 15180167692696242062 2
sisters 16030156721

In [None]:
#POS Tagging
for doc in icd9_df["spacy_doc"].head(2):
    print("Entities in Text:", [(ent.text, ent.label_) for ent in doc.ents])
    for token in doc:
        print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

Entities in Text: [('today', 'DATE'), ('2', 'CARDINAL'), ('Family', 'PRODUCT'), ('First', 'ORDINAL'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Medicare', 'ORG'), ('Explained', 'PERSON'), ('Family', 'PRODUCT'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('66', 'CARDINAL'), ('NP', 'ORG'), ('CT', 'ORG'), ('Family', 'PRODUCT'), ('this Thursday', 'DATE'), ('SICU Attending', 'ORG'), ('the day to day', 'DATE'), ('DNR', 'ORG'), ('Aspergers Syndrome', 'WORK_OF_ART'), ('ADL', 'ORG'), ('Family', 'PRODUCT')]
Family family NOUN NN compound Xxxxx True False
meeting meeting NOUN NN ROOT xxxx True False
held hold VERB VBN acl xxxx True False
today today NOUN NN npadvmod xxxx True False
with with ADP IN prep xxxx True True
pt pt PROPN NNP poss xx True False

 
 SPACE _SP dep 
 False False
s s PROPN NNP case x True False
mother mother NOUN NN pobj xxxx True False
& & CCONJ CC cc & False False
father father PR

In [None]:
# Name Entities
for doc in icd9_df["spacy_doc"].head(2):
    print("***************************************************")
    print("Entities in Text:", [(ent.text, ent.label_) for ent in doc.ents])
    print("---------------------------------------------------")
    for ent in doc.ents:
        print(ent.text, ent.start_char, ent.end_char, ent.label_)


***************************************************
Entities in Text: [('today', 'DATE'), ('2', 'CARDINAL'), ('Family', 'PRODUCT'), ('First', 'ORDINAL'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Medicare', 'ORG'), ('Explained', 'PERSON'), ('Family', 'PRODUCT'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('66', 'CARDINAL'), ('NP', 'ORG'), ('CT', 'ORG'), ('Family', 'PRODUCT'), ('this Thursday', 'DATE'), ('SICU Attending', 'ORG'), ('the day to day', 'DATE'), ('DNR', 'ORG'), ('Aspergers Syndrome', 'WORK_OF_ART'), ('ADL', 'ORG'), ('Family', 'PRODUCT')]
---------------------------------------------------
today 20 25 DATE
2 53 54 CARDINAL
Family 240 246 PRODUCT
First 433 438 ORDINAL
Explained 452 461 ORG
First 527 532 ORDINAL
MA Health 622 631 ORG
Medicare 633 641 ORG
Explained 697 706 PERSON
Family 881 887 PRODUCT
Explained 1092 1101 ORG
First 1117 1122 ORDINAL
MA Health 1235 1244 ORG
Family 1247 

In [None]:
# Entity Visualizer
from spacy import displacy
for doc in icd9_df["spacy_doc"]:
    print("Entities in Text:", [(ent.text, ent.label_) for ent in doc.ents])
    displacy.render(doc, style="ent", jupyter=True)
    print("***************************************************")


Entities in Text: [('today', 'DATE'), ('2', 'CARDINAL'), ('Family', 'PRODUCT'), ('First', 'ORDINAL'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Medicare', 'ORG'), ('Explained', 'PERSON'), ('Family', 'PRODUCT'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('66', 'CARDINAL'), ('NP', 'ORG'), ('CT', 'ORG'), ('Family', 'PRODUCT'), ('this Thursday', 'DATE'), ('SICU Attending', 'ORG'), ('the day to day', 'DATE'), ('DNR', 'ORG'), ('Aspergers Syndrome', 'WORK_OF_ART'), ('ADL', 'ORG'), ('Family', 'PRODUCT')]


***************************************************
Entities in Text: [('67', 'CARDINAL'), ('Today', 'DATE'), ('neuro-', 'ORG'), ('Son', 'PERSON'), ('Son', 'PERSON'), ('Validated', 'ORG'), ('Family', 'PRODUCT')]


***************************************************
Entities in Text: [('two', 'CARDINAL'), ('First', 'ORDINAL'), ('33', 'CARDINAL'), ('First', 'ORDINAL'), ('1928', 'DATE'), ('Asian', 'NORP'), ('first', 'ORDINAL'), ('33', 'CARDINAL'), ('second', 'ORDINAL'), ('652', 'CARDINAL')]


***************************************************
Entities in Text: [('yesterday', 'DATE'), ('NP', 'ORG'), ('Neuro', 'PERSON'), ('MD', 'GPE'), ('3', 'CARDINAL'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('today', 'DATE'), ('Attending', 'ORG'), ('the next\n   days', 'DATE'), ('MD', 'GPE'), ('3', 'CARDINAL'), ('NI', 'ORG'), ('186', 'CARDINAL'), ('3', 'CARDINAL'), ('daily', 'DATE'), ('Will', 'PERSON')]


***************************************************
Entities in Text: [('NI', 'ORG'), ('1932', 'DATE'), ('Wednesday of this week', 'DATE'), ('First', 'ORDINAL'), ('English', 'LANGUAGE'), ('PVK', 'ORG'), ('MA Helath', 'ORG')]


***************************************************
Entities in Text: [('2128-12-14', 'DATE'), ('First', 'ORDINAL'), ('LICSW', 'ORG'), ('2128-12-14', 'DATE'), ('Affiliation', 'ORG'), ('1', 'CARDINAL'), ('SW', 'ORG'), ('her over past two weeks', 'DATE'), ('Today', 'DATE'), ('First', 'ORDINAL'), ('7', 'CARDINAL'), ('RN & pt', 'ORG'), ('SW', 'ORG'), ('First', 'ORDINAL'), ('487', 'CARDINAL'), ('379', 'CARDINAL'), ('LICSW', 'ORG')]


***************************************************
Entities in Text: [('yesterday', 'DATE'), ('today', 'DATE'), ('78', 'CARDINAL'), ('a few days', 'DATE'), ('CPR', 'ORG'), ('CCU', 'ORG'), ('a few days', 'DATE'), ('SW', 'ORG'), ('Husb', 'PERSON'), ('50 years', 'DATE'), ('Husb', 'PERSON'), ('Husb', 'PERSON'), ('5', 'CARDINAL'), ('Husb', 'PERSON'), ('the past 2 months', 'DATE'), ('Husb', 'PERSON'), ('SW', 'GPE'), ('SW', 'ORG')]


***************************************************
Entities in Text: [('2192-12-7', 'DATE'), ('RN', 'ORG'), ('Attending', 'ORG'), ('Farsi', 'ORG'), ('the next\n   week to ten days', 'DATE'), ('First', 'ORDINAL'), ('4922', 'DATE'), ('NI', 'ORG'), ('4922', 'DATE'), ('NI', 'ORG'), ('4922', 'DATE'), ('Will', 'PERSON')]


***************************************************
Entities in Text: [('today', 'DATE'), ('57', 'CARDINAL'), ('2', 'CARDINAL'), ('EMT', 'ORG'), ('Initial', 'ORG'), ('MD', 'GPE')]


***************************************************
Entities in Text: [('Family', 'PRODUCT'), ('Will', 'PERSON')]


***************************************************


In [None]:
# Sentence identifier
for doc in icd9_df["spacy_doc"]:
    print("Entities in Text:", [(ent.text, ent.label_) for ent in doc.ents])
    for ix, sent in enumerate(doc.sents, 1):
        print("Sentence number {}:{}".format(ix, sent))
    print("***************************************************")

Entities in Text: [('today', 'DATE'), ('2', 'CARDINAL'), ('Family', 'PRODUCT'), ('First', 'ORDINAL'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Medicare', 'ORG'), ('Explained', 'PERSON'), ('Family', 'PRODUCT'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('66', 'CARDINAL'), ('NP', 'ORG'), ('CT', 'ORG'), ('Family', 'PRODUCT'), ('this Thursday', 'DATE'), ('SICU Attending', 'ORG'), ('the day to day', 'DATE'), ('DNR', 'ORG'), ('Aspergers Syndrome', 'WORK_OF_ART'), ('ADL', 'ORG'), ('Family', 'PRODUCT')]
Sentence number 1:Family meeting held today with pt
s mother & father, 2 sisters and pt
   brother in law.  
Sentence number 2:Purpose of the meeting was to further discuss the role
   of spokesperson for the pt, guardianship, [**Name2 (NI) **] health and social
   security.  
Sentence number 3:Family also here for a medical update from the neuro-surgery
   team.  
Sentence number 4:Pt remains incub

In [None]:
# dependence tree
for doc in icd9_df["spacy_doc"]:
    print("Entities in Text:", [(ent.text, ent.label_) for ent in doc.ents])
    print("***************************************************")
    sentence_spans = list(doc.sents)
    displacy.render(sentence_spans, style="dep", jupyter=True)

Entities in Text: [('today', 'DATE'), ('2', 'CARDINAL'), ('Family', 'PRODUCT'), ('First', 'ORDINAL'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Medicare', 'ORG'), ('Explained', 'PERSON'), ('Family', 'PRODUCT'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('66', 'CARDINAL'), ('NP', 'ORG'), ('CT', 'ORG'), ('Family', 'PRODUCT'), ('this Thursday', 'DATE'), ('SICU Attending', 'ORG'), ('the day to day', 'DATE'), ('DNR', 'ORG'), ('Aspergers Syndrome', 'WORK_OF_ART'), ('ADL', 'ORG'), ('Family', 'PRODUCT')]
***************************************************


Entities in Text: [('67', 'CARDINAL'), ('Today', 'DATE'), ('neuro-', 'ORG'), ('Son', 'PERSON'), ('Son', 'PERSON'), ('Validated', 'ORG'), ('Family', 'PRODUCT')]
***************************************************


Entities in Text: [('two', 'CARDINAL'), ('First', 'ORDINAL'), ('33', 'CARDINAL'), ('First', 'ORDINAL'), ('1928', 'DATE'), ('Asian', 'NORP'), ('first', 'ORDINAL'), ('33', 'CARDINAL'), ('second', 'ORDINAL'), ('652', 'CARDINAL')]
***************************************************


Entities in Text: [('yesterday', 'DATE'), ('NP', 'ORG'), ('Neuro', 'PERSON'), ('MD', 'GPE'), ('3', 'CARDINAL'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('today', 'DATE'), ('Attending', 'ORG'), ('the next\n   days', 'DATE'), ('MD', 'GPE'), ('3', 'CARDINAL'), ('NI', 'ORG'), ('186', 'CARDINAL'), ('3', 'CARDINAL'), ('daily', 'DATE'), ('Will', 'PERSON')]
***************************************************


Entities in Text: [('NI', 'ORG'), ('1932', 'DATE'), ('Wednesday of this week', 'DATE'), ('First', 'ORDINAL'), ('English', 'LANGUAGE'), ('PVK', 'ORG'), ('MA Helath', 'ORG')]
***************************************************


Entities in Text: [('2128-12-14', 'DATE'), ('First', 'ORDINAL'), ('LICSW', 'ORG'), ('2128-12-14', 'DATE'), ('Affiliation', 'ORG'), ('1', 'CARDINAL'), ('SW', 'ORG'), ('her over past two weeks', 'DATE'), ('Today', 'DATE'), ('First', 'ORDINAL'), ('7', 'CARDINAL'), ('RN & pt', 'ORG'), ('SW', 'ORG'), ('First', 'ORDINAL'), ('487', 'CARDINAL'), ('379', 'CARDINAL'), ('LICSW', 'ORG')]
***************************************************


Entities in Text: [('yesterday', 'DATE'), ('today', 'DATE'), ('78', 'CARDINAL'), ('a few days', 'DATE'), ('CPR', 'ORG'), ('CCU', 'ORG'), ('a few days', 'DATE'), ('SW', 'ORG'), ('Husb', 'PERSON'), ('50 years', 'DATE'), ('Husb', 'PERSON'), ('Husb', 'PERSON'), ('5', 'CARDINAL'), ('Husb', 'PERSON'), ('the past 2 months', 'DATE'), ('Husb', 'PERSON'), ('SW', 'GPE'), ('SW', 'ORG')]
***************************************************


Entities in Text: [('2192-12-7', 'DATE'), ('RN', 'ORG'), ('Attending', 'ORG'), ('Farsi', 'ORG'), ('the next\n   week to ten days', 'DATE'), ('First', 'ORDINAL'), ('4922', 'DATE'), ('NI', 'ORG'), ('4922', 'DATE'), ('NI', 'ORG'), ('4922', 'DATE'), ('Will', 'PERSON')]
***************************************************


Entities in Text: [('today', 'DATE'), ('57', 'CARDINAL'), ('2', 'CARDINAL'), ('EMT', 'ORG'), ('Initial', 'ORG'), ('MD', 'GPE')]
***************************************************


Entities in Text: [('Family', 'PRODUCT'), ('Will', 'PERSON')]
***************************************************


In [None]:
# vocab
for doc in icd9_df["spacy_doc"]:
    print("Entities in Text:", [(ent.text, ent.label_) for ent in doc.ents])
    print("***************************************************")
    print("Vocab:", doc.vocab)

Entities in Text: [('today', 'DATE'), ('2', 'CARDINAL'), ('Family', 'PRODUCT'), ('First', 'ORDINAL'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Medicare', 'ORG'), ('Explained', 'PERSON'), ('Family', 'PRODUCT'), ('Explained', 'ORG'), ('First', 'ORDINAL'), ('MA Health', 'ORG'), ('Family', 'PRODUCT'), ('Family', 'PRODUCT'), ('66', 'CARDINAL'), ('NP', 'ORG'), ('CT', 'ORG'), ('Family', 'PRODUCT'), ('this Thursday', 'DATE'), ('SICU Attending', 'ORG'), ('the day to day', 'DATE'), ('DNR', 'ORG'), ('Aspergers Syndrome', 'WORK_OF_ART'), ('ADL', 'ORG'), ('Family', 'PRODUCT')]
***************************************************
Vocab: <spacy.vocab.Vocab object at 0x794a606b2980>
Entities in Text: [('67', 'CARDINAL'), ('Today', 'DATE'), ('neuro-', 'ORG'), ('Son', 'PERSON'), ('Son', 'PERSON'), ('Validated', 'ORG'), ('Family', 'PRODUCT')]
***************************************************
Vocab: <spacy.vocab.Vocab object at 0x794a606b2980>
Entities in Text: [('two', 'CARDIN

# Tried to use scispacy and spacy but having difficulty hence using scispacy in differrent file. Added this code, just to highlight the issue. For evaluation of scispacy use refer Scispacy_NLP file.

In [1]:
# !pip uninstall -y scispacy
# !pip uninstall -y spacy
# !pip uninstall -y spacy-nightly
# !pip uninstall -y spacy-legacy
# !pip uninstall -y spacy-loggers
!pip install spacy==3.1.0
!pip install scispacy


Collecting spacy==3.1.0
  Using cached spacy-3.1.0-cp311-cp311-linux_x86_64.whl
Collecting thinc<8.1.0,>=8.0.7 (from spacy==3.1.0)
  Using cached thinc-8.0.17-cp311-cp311-linux_x86_64.whl
Installing collected packages: thinc, spacy
  Attempting uninstall: thinc
    Found existing installation: thinc 8.2.5
    Uninstalling thinc-8.2.5:
      Successfully uninstalled thinc-8.2.5
  Attempting uninstall: spacy
    Found existing installation: spacy 3.7.5
    Uninstalling spacy-3.7.5:
      Successfully uninstalled spacy-3.7.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
en-ner-bc5cdr-md 0.4.0 requires spacy<3.1.0,>=3.0.1, but you have spacy 3.1.0 which is incompatible.
en-ner-craft-md 0.4.0 requires spacy<3.1.0,>=3.0.1, but you have spacy 3.1.0 which is incompatible.
en-ner-bionlp13cg-md 0.4.0 requires spacy<3.1.0,>=3.0.1, but you have spacy 3.1.0 which is 

In [2]:
# Installing libraries
!pip list | grep spacy
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_craft_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_jnlpba_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_bc5cdr_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_bionlp13cg_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_lg-0.4.0.tar.gz

scispacy                           0.5.5
spacy                              3.7.5
spacy-legacy                       3.0.12
spacy-loggers                      1.0.5
Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz (125.2 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting spacy<3.1.0,>=3.0.1 (from en_core_sci_md==0.4.0)
  Using cached spacy-3.0.9-cp311-cp311-linux_x86_64.whl
Collecting thinc<8.1.0,>=8.0.3 (from spacy<3.1.0,>=3.0.1->en_core_sci_md==0.4.0)
  Using cached thinc-8.0.17-cp311-cp311-linux_x86_64.whl
Installing collected packages: thinc, spacy
  Attempting uninstall: thinc
    Found existing installation: thinc 8.2.5
    Uninstalling thinc-8.2.5:
      Successfully uninstalled thinc-8.2.5
  Attempting uninstall: spacy
    Found existing installation: spacy 3.7.5
    Uninstalling spacy-3.7.5:
    

In [None]:
# import scispacy

# import en_core_sci_lg
# nlp = en_core_sci_lg.load()
!pip uninstall -y scispacy
!pip uninstall -y spacy
!pip uninstall -y en-core-web-sm
!pip uninstall -y en-core-sci-lg

!pip install spacy==3.5.0
!pip install scispacy==0.5.1
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_lg-0.5.1.tar.gz

Found existing installation: scispacy 0.5.5
Uninstalling scispacy-0.5.5:
  Successfully uninstalled scispacy-0.5.5
Found existing installation: spacy 3.0.9
Uninstalling spacy-3.0.9:
  Successfully uninstalled spacy-3.0.9
Found existing installation: en_core_web_sm 3.8.0
Uninstalling en_core_web_sm-3.8.0:
  Successfully uninstalled en_core_web_sm-3.8.0
Found existing installation: en_core_sci_lg 0.4.0
Uninstalling en_core_sci_lg-0.4.0:
  Successfully uninstalled en_core_sci_lg-0.4.0
Collecting spacy==3.5.0
  Downloading spacy-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting thinc<8.2.0,>=8.1.0 (from spacy==3.5.0)
  Downloading thinc-8.1.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (15 kB)
Downloading spacy-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m70.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading thinc-

Collecting scispacy==0.5.1
  Downloading scispacy-0.5.1-py3-none-any.whl.metadata (15 kB)
Collecting spacy<3.5.0,>=3.4.0 (from scispacy==0.5.1)
  Downloading spacy-3.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (24 kB)
Collecting nmslib>=1.7.3.6 (from scispacy==0.5.1)
  Using cached nmslib-2.1.1.tar.gz (188 kB)
  Preparing metadata (setup.py) ... [?25l[?25hcanceled[31mERROR: Operation cancelled by user[0m[31m
[0m^C
^C


In [3]:
# !pip install scispacy==0.5.1
!pip list | grep spacy


scispacy                           0.5.5
spacy                              3.0.9
spacy-legacy                       3.0.12
spacy-loggers                      1.0.5


In [None]:
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_craft_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_jnlpba_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_bc5cdr_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_bionlp13cg_md-0.4.0.tar.gz
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_lg-0.4.0.tar.gz


Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_md-0.4.0.tar.gz (125.2 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting spacy<3.1.0,>=3.0.1 (from en_core_sci_md==0.4.0)
  Using cached spacy-3.0.9-cp311-cp311-linux_x86_64.whl
Collecting thinc<8.1.0,>=8.0.3 (from spacy<3.1.0,>=3.0.1->en_core_sci_md==0.4.0)
  Using cached thinc-8.0.17-cp311-cp311-linux_x86_64.whl
Installing collected packages: thinc, spacy
  Attempting uninstall: thinc
    Found existing installation: thinc 8.1.12
    Uninstalling thinc-8.1.12:
      Successfully uninstalled thinc-8.1.12
  Attempting uninstall: spacy
    Found existing installation: spacy 3.5.0
    Uninstalling spacy-3.5.0:
      Successfully uninstalled spacy-3.5.0
Successfully installed spacy-3.0.9 thinc-8.0.17


Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_craft_md-0.4.0.tar.gz
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_craft_md-0.4.0.tar.gz (125.1 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_jnlpba_md-0.4.0.tar.gz
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_jnlpba_md-0.4.0.tar.gz (125.1 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_bc5cdr_md-0.4.0.tar.gz
  Using cached https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_bc5cdr_md-0.4.0.tar.gz (125.1 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_ner_bionlp13cg_md-0.4.0.tar.gz
  Using cached https://s3-us-west-2.am

  yield d


ImportError: cannot import name util

In [9]:
!pip list | grep spacy
import en_ner_bc5cdr_md
nlp = en_ner_bc5cdr_md.load()

scispacy                           0.5.5
spacy                              3.0.9
spacy-legacy                       3.0.12
spacy-loggers                      1.0.5


ValueError: 'in' is not a valid parameter name