In [1]:
import re

In [2]:
text = """Patient's phone number is 7321119999. 
Bill amount is 120$ and 45 dollars. 
Patient can go to our online portal www.Awesome1Hospital.com 
and pay the bill online."""

In [4]:
# find all numbers
pattern = r"\d+"

match = re.findall(pattern, text)
match

['7321119999', '120', '45', '1']

In [6]:
# Extract only phone number. Phone number is always 10 digits
pattern = r"\d{10}"

match = re.findall(pattern, text)
match

['7321119999']

In [13]:
# Extract phone number, which is either 10 continuous digits or follows this format (xxx)-xxx-xxxx

text = "Patient's phone no is (732)-111-9999, spouse phone number 7326664444. Bill amounts is 120$"
pattern = r"\(\d{3}\)-\d{3}-\d{4}|\d{10}"

match = re.findall(pattern, text)
match

['(732)-111-9999', '7326664444']

In [16]:
# Extract phone number and bill amount separately

text = "Patient's phone no is 7321564895. Bill amount is 120$"

pattern = r"(\d{10})\D+(\d+)\$"

match = re.search(pattern, text)
print(match)

<re.Match object; span=(22, 53), match='7321564895. Bill amount is 120$'>


In [17]:
match.groups()

('7321564895', '120')

In [18]:
phone_number, bill_amount = match.groups()
print(f"Phone no: {phone_number}")
print(f"Bill amount: {bill_amount}")

Phone no: 7321564895
Bill amount: 120


In [48]:
text = """
Dr John Smith, M.D

2 Non-Important street,
New York, Phone (900)-12123- ~2222

Name:  Virat Kohli Date: 2/05/2022

Address: 2 cricket blvd, New Delhi

| Omeprazole 40 mg

Directions: Use two tablets daily for three months

Refill: 3 times"""

In [26]:
# Extract name
pattern = r"Name:(.*)Date"

re.findall(pattern, text)[0].strip()

'Marta Sharapova'

In [28]:
# Extract address
pattern = r"Address:(.*)\n"

address = re.findall(pattern, text)
address

[' 9 tennis court, new Russia, DC']

In [49]:
# Extract medicines
pattern = "Address:[^\n]*(.*)Directions"
match = re.findall(pattern, text, flags=re.DOTALL)
print(match[0].strip())

| Omeprazole 40 mg


In [50]:
# Extract directions
pattern = r"Directions:\n(.*)Refill"
match = re.findall(pattern, text, flags=re.DOTALL)
print(match[0].strip())

IndexError: list index out of range

In [47]:
# Extract refill info
pattern = r"Refill:.*(\d).*times"
match = re.findall(pattern, text)
print(eval(match[0].strip()))

2


In [26]:
text_1 = """
17/12/2020

Patient Medical Record

Patient Information Birth Date
Jerry Lucas May 2 1998
(279) 920-8204 " Weight:
4218 Wheeler Ridge Dr $7
anaes 14201 Height:

In Case of Emergency
meee

Joe Lucas 4218 Wheeler Ridge Dr
Buffalo, New York, 14201
Home phone United States
Work phone

General Medical History

Chicken Pox (Varicelia): Measles:

IMMUNE NOT IMMUNE
Have you had the Hepatitis B vaccination?
Yes ,

List any Medical Problems (asthma, seizures, headaches):
N/A

abc"""

text_2 = """
17/12/2020

Patient Medical Record

Patient Information Birth Date

Kathy Crawford May 6 1972

(737) 988-0851 Weight’

9264 Ash Dr 95

New York City, 10005 ‘

United States Height
190

In Case of Emergency
ee
Simeone Crawford 9266 Ash Dr
H New York City, New York, 10005
ome phone United States
(990) 375-4621
Work phone
Genera! Medical History
_

eS I ee

ne

a enna

Chicken Pox (Varicella): Measies:

IMMUNE IMMUNE

Have you had the Hepatitis B vaccination?

No

List any Medical Problems (asthma, seizures, headaches):

Migraine

abc"""

In [5]:
# name
name_pattern = "Date\n+([a-zA-Z]+\s+[a-zA-Z]+).\D{3}"
matches = re.findall(name_pattern, text_2)
print(matches)

['Kathy Crawford']


In [6]:
name_pattern = "Date\n+([a-zA-Z]+\s+[a-zA-Z]+).\D{3}"
matches = re.findall(name_pattern, text_1)
print(matches)

['Jerry Lucas']


In [8]:
# phone number
phone_pattern = "(\(\d{3}\).\d{3}.\d{4}).+Weight"
for text in [text_1, text_2]:
    matches = re.findall(phone_pattern, text)
    print(matches)

['(279) 920-8204']
['(737) 988-0851']


In [10]:
# vaccination status
v_status_pattern = "vaccination\?\n+(Yes|No)"
for text in [text_1, text_2]:
    matches = re.findall(v_status_pattern, text)
    print(matches)

['Yes']
['No']


In [30]:
# Medical problem
med_problem_pattern = "headaches\):\n+(\D+)\n"
for text in [text_1, text_2]:
    matches = re.findall(med_problem_pattern, text)
    print(matches)

['N/A\n']
['Migraine\n']
