In [4]:
import spacy
import json

In [3]:
nlp = spacy.load("en_core_web_lg")

In [38]:
with open("data.json", "r") as f: 
    data = json.load(f)

In [39]:
training_data = {'classes' : ['MEDICINE', "MEDICALCONDITION", "PATHOGEN"], 'annotations' : []}
for example in data['examples']:
  temp_dict = {}
  temp_dict['text'] = example['content']
  temp_dict['entities'] = []
  for annotation in example['annotations']:
    start = annotation['start']
    end = annotation['end']
    label = annotation['tag_name'].upper()
    temp_dict['entities'].append((start, end, label))
  training_data['annotations'].append(temp_dict)
  
print(training_data['annotations'][0])

{'text': "While bismuth compounds (Pepto-Bismol) decreased the number of bowel movements in those with travelers' diarrhea, they do not decrease the length of illness.[91] Anti-motility agents like loperamide are also effective at reducing the number of stools but not the duration of disease.[8] These agents should be used only if bloody diarrhea is not present.[92]\n\nDiosmectite, a natural aluminomagnesium silicate clay, is effective in alleviating symptoms of acute diarrhea in children,[93] and also has some effects in chronic functional diarrhea, radiation-induced diarrhea, and chemotherapy-induced diarrhea.[45] Another absorbent agent used for the treatment of mild diarrhea is kaopectate.\n\nRacecadotril an antisecretory medication may be used to treat diarrhea in children and adults.[86] It has better tolerability than loperamide, as it causes less constipation and flatulence.[94]", 'entities': [(360, 371, 'MEDICINE'), (383, 408, 'MEDICINE'), (104, 112, 'MEDICALCONDITION'), (679,

In [7]:
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm

In [40]:
nlp = spacy.blank("en") # load a new spacy model
doc_bin = DocBin() # create a DocBin object

In [41]:
from spacy.util import filter_spans

for training_example  in tqdm(training_data['annotations']): 
    text = training_example['text']
    labels = training_example['entities']
    doc = nlp.make_doc(text) 
    ents = []
    for start, end, label in labels:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    filtered_ents = filter_spans(ents)
    doc.ents = filtered_ents 
    doc_bin.add(doc)

doc_bin.to_disk("training_data.spacy") # save the docbin object

100%|██████████| 31/31 [00:00<00:00, 534.49it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity





In [10]:
!python -m spacy init fill-config base_config.cfg config.cfg

✔ Auto-filled config with all values
✔ Saved config
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [42]:
!python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./training_data.spacy 


ℹ Saving to output directory: .
ℹ Using CPU
[1m
✔ Initialized pipeline
[1m
ℹ Pipeline: ['tok2vec', 'ner']
ℹ Initial learn rate: 0.01
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00    150.79    0.57    0.98    0.41    0.01
✔ Saved pipeline to output directory
model-last


[2022-11-11 14:32:41,259] [INFO] Set up nlp object from config
[2022-11-11 14:32:41,272] [INFO] Pipeline: ['tok2vec', 'ner']
[2022-11-11 14:32:41,275] [INFO] Created vocabulary
[2022-11-11 14:32:43,111] [INFO] Added vectors: en_core_web_lg
[2022-11-11 14:32:44,130] [INFO] Finished initializing nlp object
[2022-11-11 14:32:45,635] [INFO] Initialized pipeline components: ['tok2vec', 'ner']


In [43]:
nlp_ner = spacy.load("model-best")

In [46]:
doc = nlp_ner("Antiretroviral therapy (ART) is recommended for all HIV-infected\
individuals to reduce the risk of disease progression.\nART also is recommended \
for HIV-infected individuals for the prevention of transmission of HIV.\nPatients \
starting ART should be willing and able to commit to treatment and understand the\
benefits and risks of therapy and the importance of adherence. Patients may choose\
to postpone therapy, and providers, on a case-by-case basis, may elect to defer\
therapy on the basis of clinical and/or psychosocial factors.")

In [48]:
colors = {"PATHOGEN": "#F67DE3", "MEDICINE": "#7DF6D9", "MEDICALCONDITION":"#FFFFFF"}
options = {"colors": colors} 

spacy.displacy.render(doc, style="ent", options= options, jupyter=True)

In [47]:
for ent in doc.ents:
    print(ent.text, ent.label_)

postpone therapy, and providers, on a case-by-case basis, MEDICALCONDITION


In [44]:
pages_array = ['- 1 - SAMPLE (All names and details provided in this sample are fictitious. Some fields have been deliberately left blank.) MEDICAL REPORT MCR no. of doctor: 333333 Hospital / Clinic name and address: 1 Blackacre Hospital, Singapore 01010101 Doctor’s qualifications and experience in this area of work: [To set out details] SECTION 1: PATIENT’S PARTICULARS Full name of patient: Mr Tan Ah Kow NRIC/FIN/Passport no. of patient: S1111111X Age of patient: 55 years old SECTION 2: DOCTOR’S PARTICULARS Full name of doctor: Tan Ah Moi NRIC/FIN/Passport no. of doctor: S2222222Z',

               '- 2 - Doctor-patient relationship: Please state if you have been seeing the patient regularly over a period of time (if so, please state when you first started seeing the patient and how often you see the patient) or if you saw the patient specifically for this mental capacity assessment only. I have been the doctor in charge of Mr Tan Ah Kow since November 2010. I have seen Mr Tan regularly since then, on average once or twice a year. For purposes of this medical report, I re-examined Mr Tan on 20 June 2015. SECTION 3: PATIENT’S MEDICAL INFORMATION Patient’s clinical history: Please also state the source of the information (e.g. from medical records, from the patient, from the applicant etc.). Mr Tan Ah Kow was accompanied by his son, Mr Tan Ah Beng, for the examination. Mr Tan is a 55 year old man, who is divorced, and unemployed. Mr Tan is currently living with his son, Ah Beng, in Ah Beng’s flat. Mr Tan Ah Beng informed me that Mr Tan Ah Kow used to work as a cleaner. Mr Tan Ah Kow has a history of medical conditions. He has had hypertension and hyperlipidemia since 1990 and suffered several strokes in 2005. He subsequently developed heart problems (cardiomyopathy), cardiac failure and chronic renal disease and was treated in ABC Hospital. He was last admitted to the ABC Hospital on 1 April 2010 till 15 April 2010, during which he was diagnosed to have suffered from a stroke. This was confirmed by CT and MRI brain scans. Thereafter, he was transferred to XYZ Hospital for stroke rehabilitation on 15 April 2010. After that, Mr Tan was referred to Blackacre Hospital for follow-up treatment from in November 2010. The clinical impression was that he was manifesting behavioural and psychological symptoms secondary to Dementia.',

               '- 3 - The clinical impression was that he was manifesting behavioural and psychological symptoms secondary to Dementia. I was informed by Mr Tan Ah Beng that Mr Tan is at present incontinent, and is unable to bathe or use the toilet on his own. He is, however, able to feed himself. I have observed a gradual deterioration in his cognitive ability and physical state over the years. Findings from physical examination / mental state examination: Please state your observations of the patient (i.e. the patient’s demeanour, body language, tone of voice etc. relevant to your assessment), the interactions you had with the patient/the tests you administered/the questions you posed, and the date you examined the patient. Mr Tan was brought to the clinic in a wheel chair. His mood was euthymic (i.e. normal, non-depressed and reasonably positive mood) and he did not have any psychotic symptoms. Orientation to time, place and person With regard to his orientation to time, place and person, he was unable to tell that he was in a hospital clinic but identified his son and was able to tell his own name and the name of his son. After being told he was in a hospital clinic, he identified me as a doctor, when asked. However, he could not remember my name, although I have been treating him for the last 5 years. He was able to tell correctly how he arrived at the clinic. However, when asked some moments later where he was, he said he did not know. He said that it was 10 February (actual date 20 June) and it was a Wednesday (actual day, Monday). He appeared to be just guessing as to what date and day it was. After being told what the day and date were, he forgot a few moments later after being asked again. He could not answer when asked what year it was. When asked what time it was, he said that it was 5pm in the afternoon (actual time, 3pm). He was not able to tell the time from looking at a watch. Basic information He gave his age as 50 years old (actually 55 years) and could not answer when asked when his birthday was. He correctly said he lived in a flat with his son, but could not give the correct address, and also got the area wrong (he said the flat was in Bedok, when it was actually in Jurong.) He incorrectly stated that the Prime Minister was Lee Kuan Yew.',

               '- 4 - Simple arithmetic, financial issues Mr Tan was unable to perform simple arithmetic such as 4 plus 3, giving 8 as the answer, and could not subtract 7 from 10. He could not count backwards from 20 to 1. He was unable to recognise notes or coins (he identified a 50-cent coin as 20 cents and a 10-dollar note as 2 dollars). Personal welfare/property and affairs related questions I asked Mr Tan if he owned any property. Mr Tan said he did. I asked him if he was the sole owner. He said yes. (However, Mr Tan Ah Beng informed me that Mr Tan owned the flat together with his (Mr Tan’s) elderly mother, in joint names, and that the elderly mother was currently living in the flat by herself.) I asked Mr Tan what the address of the flat was. He said he could not remember. I asked how many rooms the flat had, and he said it was a 4-room flat. (However, Mr Tan Ah Beng said it was a 3- room flat.) I asked Mr Tan what he wanted to do with the flat, since he was not staying there. He said that he wanted to rent it out. I asked him whether he planned to rent out the whole flat. Mr Tan said yes (even though his elderly mother was still living there.) I asked him how much rent he planned to charge, Mr Tan said he did not know, and that he was not good at money matters. I asked him whether he knew what medical problems he had. Mr Tan just stared at me blankly and shook his head. I asked him whether he remembered being in hospital before. Mr Tan nodded. I asked him if he knew what he was in hospital for. Mr Tan shook his head. I asked him if he was taking any medicine at the moment. Mr Tan just looked at me blankly and did not reply. Date of physical examination/mental state examination: 20 June 2015 Relevant investigation results: [Results of CT brain, MRI brain scan and other investigations to be stated.]',

               '- 5 - Diagnosis: 1. Dementia 2. Stroke SECTION 4: OPINION ON PATIENT’S MENTAL CAPACITY OPINION ON PATIENT’S PERSONAL WELFARE MENTAL CAPACITY IN RELATION TO In your opinion, can the patient understand information relevant to a decision relating to his or her personal welfare? Yes No In your opinion, can the patient retain information long enough to make a decision relating to his or her personal welfare? Yes No In your opinion, can the patient weigh information as part of the process of',

               '- 6 - making a decision relating to his or her personal welfare? Yes No In your opinion, can the patient communicate his or her decision relating to his or her personal welfare? Yes No Taking into consideration the above, in your opinion, does the patient have mental capacity in respect of personal welfare? Yes No OPINION ON PATIENT’S MENTAL CAPACITY IN RELATION TO PROPERTY AND AFFAIRS In your opinion, can the patient understand information relevant to a decision relating to his or her property and affairs? Yes No In your opinion, can the patient retain information long enough to make a decision relating to his or her property and affairs? Yes 3 No In your opinion, can the patient weigh information as part of the process of making a decision relating to his or her property and affairs? Yes No In your opinion, can the patient communicate his or her decision relating to his or her property and affairs? Yes No Taking into consideration the above, in your opinion, does the patient have mental capacity in respect of property and affairs? Yes No',

               '- 7 - Please state the basis of your opinion above in respect of the patient’s mental capacity: Mr Tan’s dementia and stroke have impaired the functioning of his mind and brain. His failure to remember where he was (i.e. in the hospital) and the day and date, despite being told a short while ago, shows his inability to retain information. He was also not able to remember basic information such as his age, and the address where he lives. His failure to tell the time from a watch or to recognize notes and coins shows his inability to understand simple information. He could not do basic arithmetic, which shows that he is not able to weigh and use information. Since he is unable to understand, retain, use or weigh simple information, due to his memory deficits and cognitive failures, he will not be able to make decisions about his personal and financial affairs, which would require being able to process such information. This is also demonstrated by his inability to remember basic information on the property he owns with his elderly mother, and also his inability to make a realistic and concrete plan for what to do with the property. He was also not able to remember what medical problems he has, and not able to answer a question as to whether he is currently on medication. In my view, his cognitive functions are unlikely to improve and would most likely get worse over time, as there is no treatment which can reverse his dementia.',

               '- 8 - PROGNOSIS In your opinion, is the patient likely to regain mental capacity? Yes No Not Sure If “Yes” or “Not Sure”, please suggest when another assessment of the patient’s mental capacity should be carried out: In your opinion, would the patient understand if he/she were to be informed of this application? Yes No Are you aware of any other doctor who holds a different professional opinion regarding the patient’s mental capacity? If so, please provide details: No.',

               '- 9 - SECTION 5: DECLARATION I have read and understood the provisions in sections 3, 4 and 5 of the Mental Capacity Act. I believe in the correctness of the opinion set out herein. I understand that in giving this report my duty is to the Court and I confirm that I have complied with this duty. Signature: _[Doctor to sign]_________ Name: ______Dr Tan Ah Moi____ Date: ______20 July 2015________ Explanatory notes: 1. “Personal welfare” refers to matters such as deciding where to live and consenting to medical and dental treatment. 2. “Property and affairs”, as the name implies, refers to matters concerning the patient’s financial affairs and property. 3. When giving your opinion on the patient’s mental capacity, please note that where it is not patently obvious from the clinical history and examination that the patient has or lacks capacity, you will need to explain the basis for your opinion. Section 3 of the Mental Capacity Act (1) The following principles apply for the purposes of this Act. (2) A person must be assumed to have capacity unless it is established that he lacks capacity. (3) A person is not to be treated as unable to make a decision unless all practicable steps to help him to do so have been taken without success. (4) A person is not to be treated as unable to make a decision merely because he makes an unwise decision. (5) An act done, or a decision made, under this Act for or on behalf of a person who lacks capacity must be done, or made, in his best interests. (6) Before the act is done, or the decision is made, regard must be had to whether the purpose for which it is needed can be as effectively achieved in a way that is less restrictive of the person’s rights and freedom of action. Section 4 of the Mental Capacity Act (1) For the purposes of this Act, a person lacks capacity in relation to a matter if at the material time he is unable to make a decision for himself in relation to the matter because of an impairment of, or a',

               '- 10 - disturbance in the functioning of, the mind or brain. (2) It does not matter whether the impairment or disturbance is permanent or temporary. (3) A lack of capacity cannot be established merely by reference to — (a) a person’s age or appearance; or (b) a condition of his, or an aspect of his behaviour, which might lead others to make unjustified assumptions about his capacity. (4) In proceedings under this Act (other than proceedings for offences under this Act), any question whether a person lacks capacity within the meaning of this Act must be decided on the balance of probabilities. (5) Subject to section 21, no power which a person (“D”) may exercise under this Act — (a) in relation to a person who lacks capacity; or (b) where D reasonably thinks that a person lacks capacity, is exercisable in relation to a person below 21 years of age. Section 5 of the Mental Capacity Act (1) For the purposes of section 4, a person is unable to make a decision for himself if he is unable— (a) to understand the information relevant to the decision; (b) to retain that information; (c) to use or weigh that information as part of the process of making the decision; or (d) to communicate his decision (whether by talking, using sign language or any other means). (2) A person is not to be regarded as unable to understand the information relevant to a decision if he is able to understand an explanation of it given to him in a way that is appropriate to his circumstances (using simple language, visual aids or any other means). (3) The fact that a person is able to retain the information relevant to a decision for a short period only does not prevent him from being regarded as able to make the decision. (4) The information relevant to a decision includes information about the reasonably foreseeable consequences of — (a) deciding one way or another; or (b) failing to make the decision.']

In [45]:
for page_num, page in enumerate(pages_array):
    sents = page.split(".")
    for sent in sents:
        doc = nlp_ner(sent)
        for ent in doc.ents:
            print(f"Page: {page_num +1}, Match: {ent.text}, Entity: {ent.label_}")

Page: 1, Match: have been, Entity: MEDICALCONDITION
Page: 1, Match: 01010101 Doctor’s qualifications and experience in, Entity: MEDICALCONDITION
Page: 1, Match: years old SECTION 2, Entity: MEDICALCONDITION
Page: 2, Match: doctor in charge of, Entity: MEDICALCONDITION
Page: 2, Match: Tan on, Entity: MEDICALCONDITION
Page: 2, Match: June 2015, Entity: MEDICALCONDITION
Page: 2, Match: history: Please also state the source of the information (, Entity: MEDICALCONDITION
Page: 2, Match: Ah Beng, Entity: MEDICALCONDITION
Page: 2, Match: year old man,, Entity: MEDICALCONDITION
Page: 2, Match: Tan is, Entity: MEDICALCONDITION
Page: 2, Match: Ah Beng, Entity: MEDICALCONDITION
Page: 2, Match: developed heart, Entity: MEDICALCONDITION
Page: 2, Match: was treated in ABC Hospital, Entity: MEDICALCONDITION
Page: 2, Match: April 2010, during which, Entity: MEDICALCONDITION
Page: 2, Match: to XYZ Hospital, Entity: MEDICALCONDITION
Page: 2, Match: April 2010, Entity: MEDICALCONDITION
Page: 2, Match: to

In [16]:
page_2 = pages_array[1]

In [19]:
doc = nlp_ner(page_2)

In [21]:
doc.ents

(. I have been the doctor in charge of,
 Tan on,
 history: Please also state the source of the information (e.g. from medical records, from the patient,
 Ah Beng,
 year old,
 unemployed.,
 Ah Beng,
 was treated in ABC Hospital.,
 April 2010, during which,
 . This was confirmed by CT and MRI brain,
 to XYZ Hospital,
 to Blackacre Hospital for follow-up treatment from in November)