In [None]:
!pip install -U spacy -q
!python -m spacy info

[1m

spaCy version    3.7.4                         
Location         /usr/local/lib/python3.10/dist-packages/spacy
Platform         Linux-6.1.58+-x86_64-with-glibc2.35
Python version   3.10.12                       
Pipelines        en_core_web_sm (3.7.1)        



In [None]:
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm

nlp = spacy.blank("en") # load a new spacy model
db = DocBin() # create a DocBin object

In [None]:
import json

# Define the filename
# filename = 'training_data.json'
filename = '/content/training_data_3490.json'

# Initialize TRAIN_DATA as an empty list
TRAIN_DATA = []

# Open the file using 'with' statement to ensure it is properly closed
with open(filename, 'r') as f:
    # Load the JSON data into TRAIN_DATA
    TRAIN_DATA = json.load(f)

print(len(TRAIN_DATA['annotations']))

3490


In [None]:
for text, annot in tqdm(TRAIN_DATA['annotations']):
    doc = nlp.make_doc(text)
    ents = []
    for start, end, label in annot["entities"]:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    doc.ents = ents
    db.add(doc)

db.to_disk("./training_data.spacy") # save the docbin object

100%|██████████| 3490/3490 [00:00<00:00, 4173.67it/s]


In [None]:
# create config.cfg file and save in directory
!python -m spacy init config config.cfg --lang en --pipeline ner --optimize efficiency

[38;5;3m⚠ To generate a more effective transformer-based config (GPU-only),
install the spacy-transformers package and re-run this command. The config
generated now does not use transformers.[0m
[38;5;4mℹ Generated config template specific for your use case[0m
- Language: en
- Pipeline: ner
- Optimize for: efficiency
- Hardware: CPU
- Transformer: None
[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [None]:
!python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./training_data.spacy

[38;5;4mℹ Saving to output directory: .[0m
[38;5;4mℹ Using CPU[0m
[1m
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     61.00    7.75    6.06   10.74    0.08
  0     200         71.17   2055.41   99.87   99.89   99.86    1.00
  0     400          5.63      5.06   99.97   99.97   99.97    1.00
  1     600          4.28      5.93   99.97   99.97   99.97    1.00
  2     800         20.49     14.12   99.97   99.97   99.97    1.00
  3    1000          0.50      2.00   99.97   99.97   99.97    1.00
  4    1200          4.04      4.58   99.97   99.97   99.97    1.00
  6    1400         26.82     22.08   99.96   99.99   99.93    1.00
  7    1600          7.73      7.06   99.97   99.99   99.96    1.00
 10    1800         89.48     34.57   99.97   99.97

In [None]:
model_path = "/content/model-last"
nlp_ner = spacy.load(model_path)

In [None]:
def check_ner(test_data):
    doc = nlp_ner(test_data, )
    spacy.displacy.render(doc, style="ent", jupyter=True)
    dict_ = {'TEST_NAME': [],
            "UNIT":[]}
    for entity in doc.ents:
        # print(entity.text, entity.label_)
        if entity.label_=='TEST NAME':
            d1 = dict_['TEST_NAME']
            d1.append(entity.text)
            dict_['TEST_NAME'] = d1
        if entity.label_=='UNIT':
            d2 = dict_['UNIT']
            d2.append(entity.text)
            dict_['UNIT'] = d2
    print(dict_)

In [None]:
test_data = "Triglycerides	92	mg/dL	<150"
check_ner(test_data)

{'TEST_NAME': ['Triglycerides'], 'UNIT': ['mg/dL']}


In [None]:
test_data = "Triglycerides	92	mg/dL	<150"
check_ner(test_data)

In [None]:
test_data = "Triglycerides	92	mg/dL	<150"
check_ner(test_data)

In [None]:
test_data = "Triglycerides	92	mg/dL	<150"
check_ner(test_data)

In [None]:
text_file_path = r'/content/All_data_1.txt'

with open(text_file_path, 'r') as fp:
    data = fp.readlines()

print("Number of lines: ", len(data))

Number of lines:  3834


In [None]:
first_100 = data[:100]

In [None]:
for idx_, text_line in enumerate(first_100):
    print(idx_, text_line)
    doc = nlp_ner(text_line, )
    spacy.displacy.render(doc, style="ent", jupyter=True)
    dict_ = {'TEST_NAME': [],
         "UNIT":[]}
    for entity in doc.ents:
        # print(entity.text, entity.label_)
        if entity.label_=='TEST NAME':
            d1 = dict_['TEST_NAME']
            d1.append(entity.text)
            dict_['TEST_NAME'] = d1
        if entity.label_=='UNIT':
            d2 = dict_['UNIT']
            d2.append(entity.text)
            dict_['UNIT'] = d2
    print(dict_)
    print("______"*10)
    print()

0 RBC COUNT	5.12	Million/cu.mm	3.8-4.8	Electrical Impedence



{'TEST_NAME': ['RBC COUNT'], 'UNIT': ['Million/cu.mm']}
____________________________________________________________

1 HAEMOGLOBIN	13.9	g/dL	12-15	Spectrophotometer



{'TEST_NAME': ['HAEMOGLOBIN'], 'UNIT': ['g/dL']}
____________________________________________________________

2 TOTAL IRON BINDING CAPACITY ( TIBC)	325	µg/dL	261-462	TPTZ AND NITROSO-



{'TEST_NAME': ['TOTAL IRON BINDING CAPACITY ( TIBC)'], 'UNIT': ['µg/dL']}
____________________________________________________________

3 IRON	89.0	µg/dL	60-180	TPTZ



{'TEST_NAME': ['IRON'], 'UNIT': ['µg/dL']}
____________________________________________________________

4 GAMMA GLUTAMYL TRANSPEPTIDASE	26.00	U/L	<38	IFCC



{'TEST_NAME': ['GAMMA GLUTAMYL TRANSPEPTIDASE'], 'UNIT': ['U/L']}
____________________________________________________________

5 BILIRUBIN CONJUGATED (DIRECT)	0.13	mg/dL	<0.2	DPD



{'TEST_NAME': ['BILIRUBIN CONJUGATED (DIRECT)'], 'UNIT': ['mg/dL']}
____________________________________________________________

6 PCV	40.50	%	36-46	Electronic pulse &



{'TEST_NAME': ['PCV'], 'UNIT': ['%']}
____________________________________________________________

7 MCV	79.1	fL	83-101	Calculated



{'TEST_NAME': ['MCV'], 'UNIT': ['fL']}
____________________________________________________________

8 MCHC	34.4	g/dL	31.5-34.5	Calculated



{'TEST_NAME': ['MCHC'], 'UNIT': ['g/dL']}
____________________________________________________________

9 R.D.W	12.9	%	11.6-14	Calculated



{'TEST_NAME': ['R.D.W'], 'UNIT': ['%']}
____________________________________________________________

10 TOTAL LEUCOCYTE COUNT (TLC)	8,029	cells/cu.mm	4000-10000	Electrical Impedance



{'TEST_NAME': ['TOTAL LEUCOCYTE COUNT (TLC)'], 'UNIT': ['cells/cu.mm']}
____________________________________________________________

11 NEUTROPHILS	57.3	%	40-80	Electrical Impedance



{'TEST_NAME': ['NEUTROPHILS'], 'UNIT': ['%']}
____________________________________________________________

12 LYMPHOCYTES	33.1	%	20-40	Electrical Impedance



{'TEST_NAME': ['LYMPHOCYTES'], 'UNIT': ['%']}
____________________________________________________________

13 MONOCYTES	5.6	%	2-10	Electrical Impedance



{'TEST_NAME': ['MONOCYTES'], 'UNIT': ['%']}
____________________________________________________________

14 EOSINOPHILS	3.4	%	1-6	Electrical Impedance



{'TEST_NAME': ['EOSINOPHILS'], 'UNIT': ['%']}
____________________________________________________________

15 BASOPHILS	0.6	%	<1-2	Electrical Impedance



{'TEST_NAME': ['BASOPHILS'], 'UNIT': ['%']}
____________________________________________________________

16 BILIRUBIN (INDIRECT)	0.68	mg/dL	0.0-1.1	Dual Wavelength



{'TEST_NAME': ['BILIRUBIN (INDIRECT)'], 'UNIT': ['mg/dL']}
____________________________________________________________

17 ALBUMIN	4.05	g/dL	3.5-5.2	BROMO CRESOL



{'TEST_NAME': ['ALBUMIN'], 'UNIT': ['g/dL']}
____________________________________________________________

18 GLOBULIN	3.28	g/dL	2.0-3.5	Calculated



{'TEST_NAME': ['GLOBULIN'], 'UNIT': ['g/dL']}
____________________________________________________________

19 PROTEIN, TOTAL	7.33	g/dL	6.6-8.3	Biuret



{'TEST_NAME': ['PROTEIN, TOTAL'], 'UNIT': ['g/dL']}
____________________________________________________________

20 MCH	27.2	pg	27-32	Calculated



{'TEST_NAME': ['MCH'], 'UNIT': ['pg']}
____________________________________________________________

21 VLDL CHOLESTEROL	18.4	mg/dL	<30	Calculated



{'TEST_NAME': ['VLDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

22 PLATELET COUNT	270000	cells/cu.mm	150000-410000	Electrical impedence



{'TEST_NAME': ['PLATELET COUNT'], 'UNIT': ['cells/cu.mm']}
____________________________________________________________

23 ERYTHROCYTE SEDIMENTATION	34	mm at the end	0-20	Modified Westegren



{'TEST_NAME': ['ERYTHROCYTE SEDIMENTATION'], 'UNIT': ['mm at the end']}
____________________________________________________________

24 HBA1C, GLYCATED HEMOGLOBIN ,	6.3	%	HPLC



{'TEST_NAME': ['HBA1C, GLYCATED HEMOGLOBIN ,'], 'UNIT': ['%']}
____________________________________________________________

25 HDL CHOLESTEROL	48	mg/dL	40-60	Enzymatic



{'TEST_NAME': ['HDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

26 TOTAL CHOLESTEROL	238	mg/dL	<200	CHO-POD



{'TEST_NAME': ['TOTAL CHOLESTEROL', '-POD'], 'UNIT': ['mg/dL']}
____________________________________________________________

27 LDL CHOLESTEROL	171.4	mg/dL	<100	Calculated



{'TEST_NAME': ['LDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

28 TRIGLYCERIDES	92	mg/dL	<150	GPO-POD



{'TEST_NAME': ['TRIGLYCERIDES'], 'UNIT': ['mg/dL']}
____________________________________________________________

29 NON-HDL CHOLESTEROL	190	mg/dL	<130	Calculated



{'TEST_NAME': ['NON-HDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

30 TRI-IODOTHYRONINE (T3, TOTAL)	1.08	ng/mL	0.64-1.52	CMIA



{'TEST_NAME': ['TRI-IODOTHYRONINE (T3, TOTAL)'], 'UNIT': ['ng/mL']}
____________________________________________________________

31 THYROXINE (T4, TOTAL)	9.16	µg/dL	4.87-11.72	CMIA



{'TEST_NAME': ['THYROXINE (T4, TOTAL)'], 'UNIT': ['µg/dL']}
____________________________________________________________

32 THYROID STIMULATING HORMONE	1.710	µIU/mL	0.35-4.94	CMIA



{'TEST_NAME': ['THYROID STIMULATING HORMONE'], 'UNIT': ['µIU/mL']}
____________________________________________________________

33 BLOOD UREA NITROGEN	8.4	mg/dL	8.0 - 23.0	Calculated



{'TEST_NAME': ['BLOOD UREA NITROGEN'], 'UNIT': ['mg/dL']}
____________________________________________________________

34 CREATININE	0.64	mg/dL	0.72 – 1.18	JAFFE METHOD



{'TEST_NAME': ['CREATININE'], 'UNIT': ['mg/dL']}
____________________________________________________________

35 UREA	18.00	mg/dL	17-43	GLDH, Kinetic Assay



{'TEST_NAME': ['UREA', ', Kinetic Assay'], 'UNIT': ['mg/dL']}
____________________________________________________________

36 URIC ACID	4.38	mg/dL	2.6-6.0	Uricase PAP



{'TEST_NAME': ['URIC ACID'], 'UNIT': ['mg/dL']}
____________________________________________________________

37 CALCIUM	9.20	mg/dL	8.8-10.6	Arsenazo III



{'TEST_NAME': ['CALCIUM'], 'UNIT': ['mg/dL']}
____________________________________________________________

38 VITAMIN B12 , SERUM	147	pg/mL	120-914	CLIA



{'TEST_NAME': ['VITAMIN B12 , SERUM'], 'UNIT': ['pg/mL']}
____________________________________________________________

39 PHOSPHORUS, INORGANIC	3.23	mg/dL	2.5-4.5	Phosphomolybdate



{'TEST_NAME': ['PHOSPHORUS, INORGANIC'], 'UNIT': ['mg/dL']}
____________________________________________________________

40 VITAMIN D (25 - OH VITAMIN D) , SERUM	10.1	ng/mL	CMIA



{'TEST_NAME': ['VITAMIN D (25 - OH VITAMIN D) , SERUM'], 'UNIT': ['ng/mL']}
____________________________________________________________

41 SODIUM	137	mmol/L	136–146	ISE (Indirect)



{'TEST_NAME': ['SODIUM', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

42 ALANINE AMINOTRANSFERASE	26	U/L	<35	IFCC



{'TEST_NAME': ['ALANINE AMINOTRANSFERASE'], 'UNIT': ['U/L']}
____________________________________________________________

43 POTASSIUM	4.8	mmol/L	3.5–5.1	ISE (Indirect)



{'TEST_NAME': ['POTASSIUM', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

44 CHLORIDE	101	mmol/L	101–109	ISE (Indirect)



{'TEST_NAME': ['CHLORIDE', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

45 ASPARTATE AMINOTRANSFERASE	20.0	U/L	<35	IFCC



{'TEST_NAME': ['ASPARTATE AMINOTRANSFERASE'], 'UNIT': ['U/L']}
____________________________________________________________

46 BILIRUBIN, TOTAL	0.81	mg/dL	0.3–1.2	DPD



{'TEST_NAME': ['BILIRUBIN, TOTAL'], 'UNIT': ['mg/dL']}
____________________________________________________________

47 ALKALINE PHOSPHATASE	41.00	U/L	30-120	IFCC



{'TEST_NAME': ['ALKALINE PHOSPHATASE'], 'UNIT': ['U/L']}
____________________________________________________________

48 RBC COUNT	4.12	Million/cu.mm	3.8-4.8	Electrical Impedence



{'TEST_NAME': ['RBC COUNT'], 'UNIT': ['Million/cu.mm']}
____________________________________________________________

49 HAEMOGLOBIN	11.6	g/dL	12-15	Spectrophotometer



{'TEST_NAME': ['HAEMOGLOBIN'], 'UNIT': ['g/dL']}
____________________________________________________________

50 GAMMA GLUTAMYL TRANSPEPTIDASE	8.00	U/L	<38	IFCC



{'TEST_NAME': ['GAMMA GLUTAMYL TRANSPEPTIDASE'], 'UNIT': ['U/L']}
____________________________________________________________

51 BILIRUBIN CONJUGATED (DIRECT)	0.07	mg/dL	<0.2	DPD



{'TEST_NAME': ['BILIRUBIN CONJUGATED (DIRECT)'], 'UNIT': ['mg/dL']}
____________________________________________________________

52 PCV	35.60	%	36-46	Electronic pulse &



{'TEST_NAME': ['PCV'], 'UNIT': ['%']}
____________________________________________________________

53 MCV	86.4	fL	83-101	Calculated



{'TEST_NAME': ['MCV'], 'UNIT': ['fL']}
____________________________________________________________

54 MCHC	32.6	g/dL	31.5-34.5	Calculated



{'TEST_NAME': ['MCHC'], 'UNIT': ['g/dL']}
____________________________________________________________

55 R.D.W	14.3	%	11.6-14	Calculated



{'TEST_NAME': ['R.D.W'], 'UNIT': ['%']}
____________________________________________________________

56 TOTAL LEUCOCYTE COUNT (TLC)	9,880	cells/cu.mm	4000-10000	Electrical Impedance



{'TEST_NAME': ['TOTAL LEUCOCYTE COUNT (TLC)'], 'UNIT': ['cells/cu.mm']}
____________________________________________________________

57 NEUTROPHILS	79.2	%	40-80	Electrical Impedance



{'TEST_NAME': ['NEUTROPHILS'], 'UNIT': ['%']}
____________________________________________________________

58 LYMPHOCYTES	14.2	%	20-40	Electrical Impedance



{'TEST_NAME': ['LYMPHOCYTES'], 'UNIT': ['%']}
____________________________________________________________

59 MONOCYTES	4.2	%	2-10	Electrical Impedance



{'TEST_NAME': ['MONOCYTES'], 'UNIT': ['%']}
____________________________________________________________

60 EOSINOPHILS	2.3	%	1-6	Electrical Impedance



{'TEST_NAME': ['EOSINOPHILS'], 'UNIT': ['%']}
____________________________________________________________

61 BASOPHILS	0.1	%	<1-2	Electrical Impedance



{'TEST_NAME': ['BASOPHILS'], 'UNIT': ['%']}
____________________________________________________________

62 BILIRUBIN (INDIRECT)	0.30	mg/dL	0.0-1.1	Dual Wavelength



{'TEST_NAME': ['BILIRUBIN (INDIRECT)'], 'UNIT': ['mg/dL']}
____________________________________________________________

63 ALBUMIN	3.58	g/dL	3.5-5.2	BROMO CRESOL



{'TEST_NAME': ['ALBUMIN'], 'UNIT': ['g/dL']}
____________________________________________________________

64 GLOBULIN	2.76	g/dL	2.0-3.5	Calculated



{'TEST_NAME': ['GLOBULIN'], 'UNIT': ['g/dL']}
____________________________________________________________

65 PROTEIN, TOTAL	6.34	g/dL	6.6-8.3	Biuret



{'TEST_NAME': ['PROTEIN, TOTAL'], 'UNIT': ['g/dL']}
____________________________________________________________

66 MCH	28.2	pg	27-32	Calculated



{'TEST_NAME': ['MCH'], 'UNIT': ['pg']}
____________________________________________________________

67 VLDL CHOLESTEROL	33.4	mg/dL	<30	Calculated



{'TEST_NAME': ['VLDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

68 PLATELET COUNT	276000	cells/cu.mm	150000-410000	Electrical impedence



{'TEST_NAME': ['PLATELET COUNT'], 'UNIT': ['cells/cu.mm']}
____________________________________________________________

69 ERYTHROCYTE SEDIMENTATION	30	mm at the end	0-20	Modified Westegren



{'TEST_NAME': ['ERYTHROCYTE SEDIMENTATION'], 'UNIT': ['mm at the end']}
____________________________________________________________

70 HBA1C, GLYCATED HEMOGLOBIN ,	5	%	HPLC



{'TEST_NAME': ['HBA1C, GLYCATED HEMOGLOBIN ,'], 'UNIT': ['%']}
____________________________________________________________

71 HDL CHOLESTEROL	60	mg/dL	40-60	Enzymatic



{'TEST_NAME': ['HDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

72 TOTAL CHOLESTEROL	154	mg/dL	<200	CHO-POD



{'TEST_NAME': ['TOTAL CHOLESTEROL', '-POD'], 'UNIT': ['mg/dL']}
____________________________________________________________

73 LDL CHOLESTEROL	60.5	mg/dL	<100	Calculated



{'TEST_NAME': ['LDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

74 TRIGLYCERIDES	167	mg/dL	<150	GPO-POD



{'TEST_NAME': ['TRIGLYCERIDES'], 'UNIT': ['mg/dL']}
____________________________________________________________

75 NON-HDL CHOLESTEROL	94	mg/dL	<130	Calculated



{'TEST_NAME': ['NON-HDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

76 TRI-IODOTHYRONINE (T3, TOTAL)	1.37	ng/mL	0.64-1.52	CMIA



{'TEST_NAME': ['TRI-IODOTHYRONINE (T3, TOTAL)'], 'UNIT': ['ng/mL']}
____________________________________________________________

77 THYROXINE (T4, TOTAL)	15.46	µg/dL	4.87-11.72	CMIA



{'TEST_NAME': ['THYROXINE (T4, TOTAL)'], 'UNIT': ['µg/dL']}
____________________________________________________________

78 THYROID STIMULATING HORMONE	1.270	µIU/mL	0.35-4.94	CMIA



{'TEST_NAME': ['THYROID STIMULATING HORMONE'], 'UNIT': ['µIU/mL']}
____________________________________________________________

79 BLOOD UREA NITROGEN	4.2	mg/dL	8.0 - 23.0	Calculated



{'TEST_NAME': ['BLOOD UREA NITROGEN'], 'UNIT': ['mg/dL']}
____________________________________________________________

80 CREATININE	0.42	mg/dL	0.72 – 1.18	JAFFE METHOD



{'TEST_NAME': ['CREATININE'], 'UNIT': ['mg/dL']}
____________________________________________________________

81 UREA	9.00	mg/dL	17-43	GLDH, Kinetic Assay



{'TEST_NAME': ['UREA', ', Kinetic Assay'], 'UNIT': ['mg/dL']}
____________________________________________________________

82 URIC ACID	3.58	mg/dL	2.6-6.0	Uricase PAP



{'TEST_NAME': ['URIC ACID'], 'UNIT': ['mg/dL']}
____________________________________________________________

83 CALCIUM	7.90	mg/dL	8.8-10.6	Arsenazo III



{'TEST_NAME': ['CALCIUM'], 'UNIT': ['mg/dL']}
____________________________________________________________

84 PHOSPHORUS, INORGANIC	2.84	mg/dL	2.5-4.5	Phosphomolybdate



{'TEST_NAME': ['PHOSPHORUS, INORGANIC'], 'UNIT': ['mg/dL']}
____________________________________________________________

85 SODIUM	135	mmol/L	136–146	ISE (Indirect)



{'TEST_NAME': ['SODIUM', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

86 ALANINE AMINOTRANSFERASE	13	U/L	<35	IFCC



{'TEST_NAME': ['ALANINE AMINOTRANSFERASE'], 'UNIT': ['U/L']}
____________________________________________________________

87 POTASSIUM	4.1	mmol/L	3.5–5.1	ISE (Indirect)



{'TEST_NAME': ['POTASSIUM', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

88 CHLORIDE	106	mmol/L	101–109	ISE (Indirect)



{'TEST_NAME': ['CHLORIDE', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

89 ASPARTATE AMINOTRANSFERASE	18.0	U/L	<35	IFCC



{'TEST_NAME': ['ASPARTATE AMINOTRANSFERASE'], 'UNIT': ['U/L']}
____________________________________________________________

90 BILIRUBIN, TOTAL	0.37	mg/dL	0.3–1.2	DPD



{'TEST_NAME': ['BILIRUBIN, TOTAL'], 'UNIT': ['mg/dL']}
____________________________________________________________

91 ALKALINE PHOSPHATASE	88.00	U/L	30-120	IFCC



{'TEST_NAME': ['ALKALINE PHOSPHATASE'], 'UNIT': ['U/L']}
____________________________________________________________

92 RED BLOOD CELL (RBC) COUNT	4.74	4.5 - 5.5	mil/µL



{'TEST_NAME': ['RED BLOOD CELL (RBC) COUNT'], 'UNIT': ['mil/µL']}
____________________________________________________________

93 HEMOGLOBIN (HB)	14.9	13.0 - 17.0	g/dL



{'TEST_NAME': ['HEMOGLOBIN (HB)'], 'UNIT': ['g/dL']}
____________________________________________________________

94 GAMMA GLUTAMYL TRANSFERASE (GGT)	112 High	15 - 85	U/L



{'TEST_NAME': ['GAMMA GLUTAMYL TRANSFERASE (GGT)'], 'UNIT': ['U/L']}
____________________________________________________________

95 BILIRUBIN, DIRECT	0.2	0.0 - 0.2	mg/dL



{'TEST_NAME': ['BILIRUBIN, DIRECT'], 'UNIT': ['mg/dL']}
____________________________________________________________

96 HEMATOCRIT (PCV)	43.6	40 - 50	%



{'TEST_NAME': ['HEMATOCRIT (PCV)'], 'UNIT': ['%']}
____________________________________________________________

97 MEAN CORPUSCULAR VOLUME (MCV)	92.0	83 - 101	fL



{'TEST_NAME': ['MEAN CORPUSCULAR VOLUME (MCV)'], 'UNIT': ['fL']}
____________________________________________________________

98 MEAN CORPUSCULAR HEMOGLOBIN	34.2	31.5 - 34.5	g/dL



{'TEST_NAME': ['MEAN CORPUSCULAR HEMOGLOBIN'], 'UNIT': ['g/dL']}
____________________________________________________________

99 RED CELL DISTRIBUTION WIDTH (RDW)	12.4	11.6 - 14.0	%



{'TEST_NAME': ['RED CELL DISTRIBUTION WIDTH (RDW)'], 'UNIT': ['%']}
____________________________________________________________



In [None]:
last_100 = data[-100:]

for idx_, text_line in enumerate(last_100):
    print(idx_+1, text_line)
    doc = nlp_ner(text_line, )
    spacy.displacy.render(doc, style="ent", jupyter=True)
    dict_ = {'TEST_NAME': [],
         "UNIT":[]}
    for entity in doc.ents:
        # print(entity.text, entity.label_)
        if entity.label_=='TEST NAME':
            d1 = dict_['TEST_NAME']
            d1.append(entity.text)
            dict_['TEST_NAME'] = d1
        if entity.label_=='UNIT':
            d2 = dict_['UNIT']
            d2.append(entity.text)
            dict_['UNIT'] = d2
    print(dict_)
    print("______"*10)
    print()

1 T4	7.72	5.10 - 14.10	µg/dL



{'TEST_NAME': ['T4'], 'UNIT': ['µg/dL']}
____________________________________________________________

2 TSH 3RD GENERATION	2.540	0.270 - 4.200	µIU/mL



{'TEST_NAME': ['TSH 3RD GENERATION'], 'UNIT': ['µIU/mL']}
____________________________________________________________

3 BLOOD UREA NITROGEN	11	6 - 20	mg/dL



{'TEST_NAME': ['BLOOD UREA NITROGEN'], 'UNIT': ['mg/dL']}
____________________________________________________________

4 CREATININE	0.96	0.70 - 1.20	mg/dL



{'TEST_NAME': ['CREATININE'], 'UNIT': ['mg/dL']}
____________________________________________________________

5 URIC ACID	6.8	3.4 - 7.0	mg/dL



{'TEST_NAME': ['URIC ACID'], 'UNIT': ['mg/dL']}
____________________________________________________________

6 ALANINE AMINOTRANSFERASE (ALT/SGPT)	33	0 - 41	U/L



{'TEST_NAME': ['ALANINE AMINOTRANSFERASE (ALT/SGPT)'], 'UNIT': ['U/L']}
____________________________________________________________

7 ASPARTATE AMINOTRANSFERASE (AST/SGOT)	24	0 - 40	U/L



{'TEST_NAME': ['ASPARTATE AMINOTRANSFERASE (AST/SGOT)'], 'UNIT': ['U/L']}
____________________________________________________________

8 BILIRUBIN, TOTAL	0.29	UPTO 1.2	mg/dL



{'TEST_NAME': ['BILIRUBIN, TOTAL'], 'UNIT': ['mg/dL']}
____________________________________________________________

9 ALKALINE PHOSPHATASE	89	40 - 129	U/L



{'TEST_NAME': ['ALKALINE PHOSPHATASE'], 'UNIT': ['U/L']}
____________________________________________________________

10 PROSTATE SPECIFIC ANTIGEN	2.660	High	0.0 - 2.0	ng/mL



{'TEST_NAME': ['PROSTATE SPECIFIC ANTIGEN'], 'UNIT': ['High', 'ng/mL']}
____________________________________________________________

11 RBC COUNT	5.06	Million/cu.mm	4.5-5.5	Electrical Impedence



{'TEST_NAME': ['RBC COUNT'], 'UNIT': ['Million/cu.mm']}
____________________________________________________________

12 HAEMOGLOBIN	15.7	g/dL	13-17	Spectrophotometer



{'TEST_NAME': ['HAEMOGLOBIN'], 'UNIT': ['g/dL']}
____________________________________________________________

13 GAMMA GLUTAMYL TRANSPEPTIDASE	44.00	U/L	<55	IFCC



{'TEST_NAME': ['GAMMA GLUTAMYL TRANSPEPTIDASE'], 'UNIT': ['U/L']}
____________________________________________________________

14 BILIRUBIN CONJUGATED (DIRECT)	0.13	mg/dL	<0.2	DPD



{'TEST_NAME': ['BILIRUBIN CONJUGATED (DIRECT)'], 'UNIT': ['mg/dL']}
____________________________________________________________

15 PCV	47.20	%	40-50	Electronic pulse &



{'TEST_NAME': ['PCV'], 'UNIT': ['%']}
____________________________________________________________

16 MCV	93.3	fL	83-101	Calculated



{'TEST_NAME': ['MCV'], 'UNIT': ['fL']}
____________________________________________________________

17 MCHC	33.2	g/dL	31.5-34.5	Calculated



{'TEST_NAME': ['MCHC'], 'UNIT': ['g/dL']}
____________________________________________________________

18 R.D.W	12.8	%	11.6-14	Calculated



{'TEST_NAME': ['R.D.W'], 'UNIT': ['%']}
____________________________________________________________

19 TOTAL LEUCOCYTE COUNT (TLC)	4,890	cells/cu.mm	4000-10000	Electrical Impedance



{'TEST_NAME': ['TOTAL LEUCOCYTE COUNT (TLC)'], 'UNIT': ['cells/cu.mm']}
____________________________________________________________

20 NEUTROPHILS	55.8	%	40-80	Electrical Impedance



{'TEST_NAME': ['NEUTROPHILS'], 'UNIT': ['%']}
____________________________________________________________

21 LYMPHOCYTES	31.7	%	20-40	Electrical Impedance



{'TEST_NAME': ['LYMPHOCYTES'], 'UNIT': ['%']}
____________________________________________________________

22 MONOCYTES	9.2	%	2-10	Electrical Impedance



{'TEST_NAME': ['MONOCYTES'], 'UNIT': ['%']}
____________________________________________________________

23 EOSINOPHILS	2.8	%	1-6	Electrical Impedance



{'TEST_NAME': ['EOSINOPHILS'], 'UNIT': ['%']}
____________________________________________________________

24 BASOPHILS	0.5	%	<1-2	Electrical Impedance



{'TEST_NAME': ['BASOPHILS'], 'UNIT': ['%']}
____________________________________________________________

25 BILIRUBIN (INDIRECT)	0.72	mg/dL	0.0-1.1	Dual Wavelength



{'TEST_NAME': ['BILIRUBIN (INDIRECT)'], 'UNIT': ['mg/dL']}
____________________________________________________________

26 ALBUMIN	4.72	g/dL	3.5-5.2	BROMO CRESOL



{'TEST_NAME': ['ALBUMIN'], 'UNIT': ['g/dL']}
____________________________________________________________

27 GLOBULIN	2.77	g/dL	2.0-3.5	Calculated



{'TEST_NAME': ['GLOBULIN'], 'UNIT': ['g/dL']}
____________________________________________________________

28 PROTEIN, TOTAL	7.49	g/dL	6.6-8.3	Biuret



{'TEST_NAME': ['PROTEIN, TOTAL'], 'UNIT': ['g/dL']}
____________________________________________________________

29 MCH	31	pg	27-32	Calculated



{'TEST_NAME': ['MCH'], 'UNIT': ['pg']}
____________________________________________________________

30 VLDL CHOLESTEROL	23	mg/dL	<30	Calculated



{'TEST_NAME': ['VLDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

31 PLATELET COUNT	260000	cells/cu.mm	150000-410000	Electrical impedence



{'TEST_NAME': ['PLATELET COUNT'], 'UNIT': ['cells/cu.mm']}
____________________________________________________________

32 ERYTHROCYTE SEDIMENTATION	3	mm at the end	0-15	Modified Westergren



{'TEST_NAME': ['ERYTHROCYTE SEDIMENTATION'], 'UNIT': ['mm at the end']}
____________________________________________________________

33 GLUCOSE, FASTING , NAF PLASMA	114	mg/dL	70-100	HEXOKINASE



{'TEST_NAME': ['GLUCOSE, FASTING , NAF PLASMA'], 'UNIT': ['mg/dL']}
____________________________________________________________

34 GLUCOSE, POST PRANDIAL (PP), 2	93	mg/dL	70-140	HEXOKINASE



{'TEST_NAME': ['GLUCOSE, POST PRANDIAL (PP),'], 'UNIT': ['mg/dL']}
____________________________________________________________

35 HBA1C, GLYCATED HEMOGLOBIN ,	5.9	%	HPLC



{'TEST_NAME': ['HBA1C, GLYCATED HEMOGLOBIN ,'], 'UNIT': ['%']}
____________________________________________________________

36 HDL CHOLESTEROL	45	mg/dL	40-60	Enzymatic



{'TEST_NAME': ['HDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

37 TOTAL CHOLESTEROL	161	mg/dL	<200	CHO-POD



{'TEST_NAME': ['TOTAL CHOLESTEROL', '-POD'], 'UNIT': ['mg/dL']}
____________________________________________________________

38 LDL CHOLESTEROL	93.3	mg/dL	<100	Calculated



{'TEST_NAME': ['LDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

39 TRIGLYCERIDES	115	mg/dL	<150	GPO-POD



{'TEST_NAME': ['TRIGLYCERIDES'], 'UNIT': ['mg/dL']}
____________________________________________________________

40 NON-HDL CHOLESTEROL	116	mg/dL	<130	Calculated



{'TEST_NAME': ['NON-HDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

41 TRI-IODOTHYRONINE (T3, TOTAL)	1.19	ng/mL	0.64-1.52	CMIA



{'TEST_NAME': ['TRI-IODOTHYRONINE (T3, TOTAL)'], 'UNIT': ['ng/mL']}
____________________________________________________________

42 THYROXINE (T4, TOTAL)	7.80	µg/dL	4.87-11.72	CMIA



{'TEST_NAME': ['THYROXINE (T4, TOTAL)'], 'UNIT': ['µg/dL']}
____________________________________________________________

43 THYROID STIMULATING HORMONE	1.030	µIU/mL	0.35-4.94	CMIA



{'TEST_NAME': ['THYROID STIMULATING HORMONE'], 'UNIT': ['µIU/mL']}
____________________________________________________________

44 BLOOD UREA NITROGEN	8.3	mg/dL	8.0 - 23.0	Calculated



{'TEST_NAME': ['BLOOD UREA NITROGEN'], 'UNIT': ['mg/dL']}
____________________________________________________________

45 CREATININE	0.91	mg/dL	0.72 – 1.18	JAFFE METHOD



{'TEST_NAME': ['CREATININE'], 'UNIT': ['mg/dL']}
____________________________________________________________

46 UREA	17.80	mg/dL	17-43	GLDH, Kinetic Assay



{'TEST_NAME': ['UREA', ', Kinetic Assay'], 'UNIT': ['mg/dL']}
____________________________________________________________

47 URIC ACID	7.53	mg/dL	3.5–7.2	Uricase PAP



{'TEST_NAME': ['URIC ACID'], 'UNIT': ['mg/dL']}
____________________________________________________________

48 CALCIUM	10.10	mg/dL	8.8-10.6	Arsenazo III



{'TEST_NAME': ['CALCIUM'], 'UNIT': ['mg/dL']}
____________________________________________________________

49 PHOSPHORUS, INORGANIC	3.20	mg/dL	2.5-4.5	Phosphomolybdate



{'TEST_NAME': ['PHOSPHORUS, INORGANIC'], 'UNIT': ['mg/dL']}
____________________________________________________________

50 SODIUM	139	mmol/L	136–146	ISE (Indirect)



{'TEST_NAME': ['SODIUM', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

51 ALANINE AMINOTRANSFERASE	72	U/L	<50	IFCC



{'TEST_NAME': ['ALANINE AMINOTRANSFERASE'], 'UNIT': ['U/L']}
____________________________________________________________

52 POTASSIUM	4.3	mmol/L	3.5–5.1	ISE (Indirect)



{'TEST_NAME': ['POTASSIUM', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

53 CHLORIDE	104	mmol/L	101–109	ISE (Indirect)



{'TEST_NAME': ['CHLORIDE', '(Indirect)'], 'UNIT': ['mmol/L']}
____________________________________________________________

54 ASPARTATE AMINOTRANSFERASE	37.0	U/L	<50	IFCC



{'TEST_NAME': ['ASPARTATE AMINOTRANSFERASE'], 'UNIT': ['U/L']}
____________________________________________________________

55 BILIRUBIN, TOTAL	0.85	mg/dL	0.3–1.2	DPD



{'TEST_NAME': ['BILIRUBIN, TOTAL'], 'UNIT': ['mg/dL']}
____________________________________________________________

56 ALKALINE PHOSPHATASE	81.00	U/L	30-120	IFCC



{'TEST_NAME': ['ALKALINE PHOSPHATASE'], 'UNIT': ['U/L']}
____________________________________________________________

57 FERRITIN	172.00	High	13 - 150	ng/mL



{'TEST_NAME': ['FERRITIN'], 'UNIT': ['ng/mL']}
____________________________________________________________

58 BP	110/60MMHG(SUPINE)	mm/Hg



{'TEST_NAME': ['BP'], 'UNIT': ['mm/Hg']}
____________________________________________________________

59 RED BLOOD CELL COUNT	4.60	3.8 - 4.8	mil/µL



{'TEST_NAME': ['RED BLOOD CELL COUNT'], 'UNIT': ['mil/µL']}
____________________________________________________________

60 HEMOGLOBIN	12.9	12.0 - 15.0	g/dL



{'TEST_NAME': ['HEMOGLOBIN'], 'UNIT': ['g/dL']}
____________________________________________________________

61 GAMMA GLUTAMYL TRANSFERASE (GGT)	23	0 - 40	U/L



{'TEST_NAME': ['GAMMA GLUTAMYL TRANSFERASE (GGT)'], 'UNIT': ['U/L']}
____________________________________________________________

62 BILIRUBIN, DIRECT	0.2	< 0.30	mg/dL



{'TEST_NAME': ['BILIRUBIN, DIRECT'], 'UNIT': ['mg/dL']}
____________________________________________________________

63 HEMATOCRIT	39.4	36.0 - 46.0	%



{'TEST_NAME': ['HEMATOCRIT'], 'UNIT': ['%']}
____________________________________________________________

64 MEAN CORPUSCULAR VOLUME	85.6	83.0 - 101.0	fL



{'TEST_NAME': ['MEAN CORPUSCULAR VOLUME'], 'UNIT': ['fL']}
____________________________________________________________

65 MEAN CORPUSCULAR HEMOGLOBIN	28.0	27.0 - 32.0	pg



{'TEST_NAME': ['MEAN CORPUSCULAR HEMOGLOBIN'], 'UNIT': ['pg']}
____________________________________________________________

66 RED CELL DISTRIBUTION WIDTH	14.8	High	11.6 - 14.0	%



{'TEST_NAME': ['RED CELL DISTRIBUTION WIDTH'], 'UNIT': ['High', '%']}
____________________________________________________________

67 WHITE BLOOD CELL COUNT	5.43	4 - 10	thou/µL



{'TEST_NAME': ['WHITE BLOOD CELL COUNT'], 'UNIT': ['thou/µL']}
____________________________________________________________

68 NEUTROPHILS	43	40 - 80	%



{'TEST_NAME': ['NEUTROPHILS'], 'UNIT': ['%']}
____________________________________________________________

69 LYMPHOCYTES	50	High	20 - 40	%



{'TEST_NAME': ['LYMPHOCYTES'], 'UNIT': ['High', '%']}
____________________________________________________________

70 MONOCYTES	05	2.0 - 10.0	%



{'TEST_NAME': ['MONOCYTES'], 'UNIT': ['%']}
____________________________________________________________

71 EOSINOPHILS	02	1 - 6	%



{'TEST_NAME': ['EOSINOPHILS'], 'UNIT': ['%']}
____________________________________________________________

72 BASOPHILS	00	0 - 2	%



{'TEST_NAME': ['BASOPHILS'], 'UNIT': ['%']}
____________________________________________________________

73 BILIRUBIN, INDIRECT	0.3	0.1 - 1.0	mg/dL



{'TEST_NAME': ['BILIRUBIN, INDIRECT'], 'UNIT': ['mg/dL']}
____________________________________________________________

74 ALBUMIN	4.1	3.97 - 4.94	g/dL



{'TEST_NAME': ['ALBUMIN'], 'UNIT': ['g/dL']}
____________________________________________________________

75 GLOBULIN	2.7	2.0 - 3.5	g/dL



{'TEST_NAME': ['GLOBULIN'], 'UNIT': ['g/dL']}
____________________________________________________________

76 TOTAL PROTEIN	6.9	6.0 - 8.0	g/dL



{'TEST_NAME': ['TOTAL PROTEIN'], 'UNIT': ['g/dL']}
____________________________________________________________

77 MEAN CORPUSCULAR HEMOGLOBIN	28.0	27.0 - 32.0	pg



{'TEST_NAME': ['MEAN CORPUSCULAR HEMOGLOBIN'], 'UNIT': ['pg']}
____________________________________________________________

78 VERY LOW DENSITY LIPOPROTEIN	31.4	mg/dL



{'TEST_NAME': ['VERY LOW DENSITY LIPOPROTEIN'], 'UNIT': ['mg/dL']}
____________________________________________________________

79 PLATELET COUNT	197	150 - 410	thou/µL



{'TEST_NAME': ['PLATELET COUNT'], 'UNIT': ['thou/µL']}
____________________________________________________________

80 ERYTHRO SEDIMENTATION RATE, BLOOD



{'TEST_NAME': ['ERYTHRO SEDIMENTATION RATE, BLOOD\n'], 'UNIT': []}
____________________________________________________________

81 GLUCOSE, FASTING, PLASMA	288	High	Normal 75 - 99	mg/dL



{'TEST_NAME': ['GLUCOSE, FASTING, PLASMA'], 'UNIT': ['mg/dL']}
____________________________________________________________

82 GLUCOSE, POST-PRANDIAL, PLASMA	367	High	70 - 139	mg/dL



{'TEST_NAME': ['GLUCOSE, POST-PRANDIAL, PLASMA'], 'UNIT': ['High', 'mg/dL']}
____________________________________________________________

83 GLYCOSYLATED HEMOGLOBIN (HBA1C)	13.8	High	Non-diabetic: < 5.7	%



{'TEST_NAME': ['GLYCOSYLATED HEMOGLOBIN (HBA1C)'], 'UNIT': ['%']}
____________________________________________________________

84 HDL CHOLESTEROL	56	Low HDL Cholesterol <40	mg/dL



{'TEST_NAME': ['HDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

85 CHOLESTEROL	274	High	Desirable cholesterol level	mg/dL



{'TEST_NAME': ['CHOLESTEROL', 'cholesterol level'], 'UNIT': ['mg/dL']}
____________________________________________________________

86 CHOLESTEROL LDL	211	mg/dL



{'TEST_NAME': ['CHOLESTEROL LDL'], 'UNIT': ['mg/dL']}
____________________________________________________________

87 TRIGLYCERIDES	157	High	Normal: < 150	mg/dL



{'TEST_NAME': ['TRIGLYCERIDES'], 'UNIT': ['mg/dL']}
____________________________________________________________

88 C-REACTIVE PROTEIN	13.9	High	< 5.0	mg/L



{'TEST_NAME': ['C-REACTIVE PROTEIN'], 'UNIT': ['High', 'mg/L']}
____________________________________________________________

89 NON HDL CHOLESTEROL	218	High	Desirable : < 130	mg/dL



{'TEST_NAME': ['NON HDL CHOLESTEROL'], 'UNIT': ['mg/dL']}
____________________________________________________________

90 T3	112.0	80 - 200	ng/dL



{'TEST_NAME': ['T3'], 'UNIT': ['ng/dL']}
____________________________________________________________

91 T4	7.00	5.1 - 14.1	µg/dL



{'TEST_NAME': ['T4'], 'UNIT': ['µg/dL']}
____________________________________________________________

92 TSH 3RD GENERATION	2.700	0.27 - 4.2	µIU/mL



{'TEST_NAME': ['TSH 3RD GENERATION'], 'UNIT': ['µIU/mL']}
____________________________________________________________

93 BLOOD UREA NITROGEN	9.1	6 - 20	mg/dL



{'TEST_NAME': ['BLOOD UREA NITROGEN'], 'UNIT': ['mg/dL']}
____________________________________________________________

94 CREATININE	0.60	0.5 - 0.9	mg/dL



{'TEST_NAME': ['CREATININE'], 'UNIT': ['mg/dL']}
____________________________________________________________

95 URIC ACID	3.8	2.4 - 5.7	mg/dL



{'TEST_NAME': ['URIC ACID'], 'UNIT': ['mg/dL']}
____________________________________________________________

96 ALANINE AMINOTRANSFERASE (ALT/SGPT)	46	High	< OR = 35	U/L



{'TEST_NAME': ['ALANINE AMINOTRANSFERASE (ALT/SGPT)'], 'UNIT': ['High', 'U/L']}
____________________________________________________________

97 ASPARTATE AMINOTRANSFERASE (AST/SGOT)	32	< OR = 35	U/L



{'TEST_NAME': ['ASPARTATE AMINOTRANSFERASE (AST/SGOT)'], 'UNIT': ['U/L']}
____________________________________________________________

98 BILIRUBIN, TOTAL	0.5	Upto 1.2	mg/dL



{'TEST_NAME': ['BILIRUBIN, TOTAL'], 'UNIT': ['mg/dL']}
____________________________________________________________

99 ALKALINE PHOSPHATASE	81	35 - 104	U/L



{'TEST_NAME': ['ALKALINE PHOSPHATASE'], 'UNIT': ['U/L']}
____________________________________________________________

100 MEAN CORPUSCULAR HEMOGLOBIN	32.7	31.5 - 34.5	g/dL


{'TEST_NAME': ['MEAN CORPUSCULAR HEMOGLOBIN'], 'UNIT': ['g/dL']}
____________________________________________________________

