# Task 1: Create a Prescription Parser using CRF
This task tests your ability to build a Doctor Prescription Parser with the help of CRF model

Your job is to build a Prescription Parser that takes a prescription (sentence) as an input and find / label the words in that sentence with one of the already pre-defined labels

### Problem: SEQUENCE PREDICTION - Label words in a sentence
#### Input : Doctor Prescription in the form of a sentence split into tokens
- Ex: Take 2 tablets once a day for 10 days

#### Output : FHIR Labels
- ('Take', 'Method')
- ('2', 'Qty') 
- ('tablets', 'Form')
- ('once', 'Frequency')
- ('a', 'Period') 
- ('day', 'PeriodUnit')
- ('for', 'FOR')
- ('10', 'Duration')
- ('days', 'DurationUnit') 

### Major Steps
- Install necessary library
- Import the libraries
- Create training data with labels
    - Split the sentence into tokens
    - Compute POS tags
    - Create triples
- Extract features
- Split the data into training and testing set
- Create CRF model
- Save the CRF model
- Load the CRF model
- Predict on test data
- Accuracy

#### Install necesaary library

In [591]:
!pip install scikit-learn numpy pandas nltk




#### Import the necessary libraries

In [593]:
import numpy as np
import pandas as pd
import nltk
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

### Input data (GIVEN)
#### Creating the inputs to the ML model in the following form:
- sigs --> ['take 3 tabs for 10 days']       INPUT SIG
- input_sigs --> [['take', '3', 'tabs', 'for', '10', 'days']]      TOKENS
- output_labels --> [['Method','Qty', 'Form', 'FOR', 'Duration', 'DurationUnit']]       LABELS

In [595]:
sigs = ["for 5 to 6 days", "inject 2 units", "x 2 weeks", "x 3 days", "every day", "every 2 weeks", "every 3 days", "every 1 to 2 months", "every 2 to 6 weeks", "every 4 to 6 days", "take two to four tabs", "take 2 to 4 tabs", "take 3 tabs orally bid for 10 days at bedtime", "swallow three capsules tid orally", "take 2 capsules po every 6 hours", "take 2 tabs po for 10 days", "take 100 caps by mouth tid for 10 weeks", "take 2 tabs after an hour", "2 tabs every 4-6 hours", "every 4 to 6 hours", "q46h", "q4-6h", "2 hours before breakfast", "before 30 mins at bedtime", "30 mins before bed", "and 100 tabs twice a month", "100 tabs twice a month", "100 tabs once a month", "100 tabs thrice a month", "3 tabs daily for 3 days then 1 tab per day at bed", "30 tabs 10 days tid", "take 30 tabs for 10 days three times a day", "qid q6h", "bid", "qid", "30 tabs before dinner and bedtime", "30 tabs before dinner & bedtime", "take 3 tabs at bedtime", "30 tabs thrice daily for 10 days ", "30 tabs for 10 days three times a day", "Take 2 tablets a day", "qid for 10 days", "every day", "take 2 caps at bedtime", "apply 3 drops before bedtime", "take three capsules daily", "swallow 3 pills once a day", "swallow three pills thrice a day", "apply daily", "apply three drops before bedtime", "every 6 hours", "before food", "after food", "for 20 days", "for twenty days", "with meals"]
input_sigs = [['for', '5', 'to', '6', 'days'], ['inject', '2', 'units'], ['x', '2', 'weeks'], ['x', '3', 'days'], ['every', 'day'], ['every', '2', 'weeks'], ['every', '3', 'days'], ['every', '1', 'to', '2', 'months'], ['every', '2', 'to', '6', 'weeks'], ['every', '4', 'to', '6', 'days'], ['take', 'two', 'to', 'four', 'tabs'], ['take', '2', 'to', '4', 'tabs'], ['take', '3', 'tabs', 'orally', 'bid', 'for', '10', 'days', 'at', 'bedtime'], ['swallow', 'three', 'capsules', 'tid', 'orally'], ['take', '2', 'capsules', 'po', 'every', '6', 'hours'], ['take', '2', 'tabs', 'po', 'for', '10', 'days'], ['take', '100', 'caps', 'by', 'mouth', 'tid', 'for', '10', 'weeks'], ['take', '2', 'tabs', 'after', 'an', 'hour'], ['2', 'tabs', 'every', '4-6', 'hours'], ['every', '4', 'to', '6', 'hours'], ['q46h'], ['q4-6h'], ['2', 'hours', 'before', 'breakfast'], ['before', '30', 'mins', 'at', 'bedtime'], ['30', 'mins', 'before', 'bed'], ['and', '100', 'tabs', 'twice', 'a', 'month'], ['100', 'tabs', 'twice', 'a', 'month'], ['100', 'tabs', 'once', 'a', 'month'], ['100', 'tabs', 'thrice', 'a', 'month'], ['3', 'tabs', 'daily', 'for', '3', 'days', 'then', '1', 'tab', 'per', 'day', 'at', 'bed'], ['30', 'tabs', '10', 'days', 'tid'], ['take', '30', 'tabs', 'for', '10', 'days', 'three', 'times', 'a', 'day'], ['qid', 'q6h'], ['bid'], ['qid'], ['30', 'tabs', 'before', 'dinner', 'and', 'bedtime'], ['30', 'tabs', 'before', 'dinner', '&', 'bedtime'], ['take', '3', 'tabs', 'at', 'bedtime'], ['30', 'tabs', 'thrice', 'daily', 'for', '10', 'days'], ['30', 'tabs', 'for', '10', 'days', 'three', 'times', 'a', 'day'], ['take', '2', 'tablets', 'a', 'day'], ['qid', 'for', '10', 'days'], ['every', 'day'], ['take', '2', 'caps', 'at', 'bedtime'], ['apply', '3', 'drops', 'before', 'bedtime'], ['take', 'three', 'capsules', 'daily'], ['swallow', '3', 'pills', 'once', 'a', 'day'], ['swallow', 'three', 'pills', 'thrice', 'a', 'day'], ['apply', 'daily'], ['apply', 'three', 'drops', 'before', 'bedtime'], ['every', '6', 'hours'], ['before', 'food'], ['after', 'food'], ['for', '20', 'days'], ['for', 'twenty', 'days'], ['with', 'meals']]
output_labels = [['FOR', 'Duration', 'TO', 'DurationMax', 'DurationUnit'], ['Method', 'Qty', 'Form'], ['FOR', 'Duration', 'DurationUnit'], ['FOR', 'Duration', 'DurationUnit'], ['EVERY', 'Period'], ['EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['Method', 'Qty', 'TO', 'Qty', 'Form'], ['Method', 'Qty', 'TO', 'Qty', 'Form'], ['Method', 'Qty', 'Form', 'PO', 'BID', 'FOR', 'Duration', 'DurationUnit', 'AT', 'WHEN'], ['Method', 'Qty', 'Form', 'TID', 'PO'], ['Method', 'Qty', 'Form', 'PO', 'EVERY', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'PO', 'FOR', 'Duration', 'DurationUnit'], ['Method', 'Qty', 'Form', 'BY', 'PO', 'TID', 'FOR', 'Duration', 'DurationUnit'], ['Method', 'Qty', 'Form', 'AFTER', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'EVERY', 'Period', 'PeriodUnit'], ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'], ['Q46H'], ['Q4-6H'], ['Qty', 'PeriodUnit', 'BEFORE', 'WHEN'], ['BEFORE', 'Qty', 'M', 'AT', 'WHEN'], ['Qty', 'M', 'BEFORE', 'WHEN'], ['AND', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Qty', 'Form', 'Frequency', 'FOR', 'Duration', 'DurationUnit', 'THEN', 'Qty', 'Form', 'Frequency', 'PeriodUnit', 'AT', 'WHEN'], ['Qty', 'Form', 'Duration', 'DurationUnit', 'TID'], ['Method', 'Qty', 'Form', 'FOR', 'Duration', 'DurationUnit', 'Qty', 'TIMES', 'Period', 'PeriodUnit'], ['QID', 'Q6H'], ['BID'], ['QID'],['Qty', 'Form', 'BEFORE', 'WHEN', 'AND', 'WHEN'], ['Qty', 'Form', 'BEFORE', 'WHEN', 'AND', 'WHEN'], ['Method', 'Qty', 'Form', 'AT', 'WHEN'], ['Qty', 'Form', 'Frequency', 'DAILY', 'FOR', 'Duration', 'DurationUnit'], ['Qty', 'Form', 'FOR', 'Duration', 'DurationUnit', 'Frequency', 'TIMES', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'Period', 'PeriodUnit'], ['QID', 'FOR', 'Duration', 'DurationUnit'], ['EVERY', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'AT', 'WHEN'], ['Method', 'Qty', 'Form', 'BEFORE', 'WHEN'], ['Method', 'Qty', 'Form', 'DAILY'], ['Method', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Method', 'Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'], ['Method', 'DAILY'], ['Method', 'Qty', 'Form', 'BEFORE', 'WHEN'], ['EVERY', 'Period', 'PeriodUnit'], ['BEFORE', 'FOOD'], ['AFTER', 'FOOD'], ['FOR', 'Duration', 'DurationUnit'], ['FOR', 'Duration', 'DurationUnit'], ['WITH', 'FOOD']]

In [596]:
len(sigs), len(input_sigs) , len(output_labels)

(56, 56, 56)

### Creating a Tuples Maker method
Create the tuples as given below by writing a function **tuples_maker(input_sigs, output_labels)** and returns **output** as given below

Input(s): 
- input_sigs
- output_lables

Output:

[[('for', 'FOR'),
  ('5', 'Duration'),
  ('to', 'TO'),
  ('6', 'DurationMax'),
  ('days', 'DurationUnit')], [second sentence], ...]

In [598]:
def tuples_maker(input_sigs, output_labels):
    
    sample_data = []
    for tokens, labels in zip(input_sigs, output_labels):
        sentence_tuples = [(token, label) for token, label in zip(tokens, labels)]
        sample_data.append(sentence_tuples)

    return sample_data

In [599]:
result = tuples_maker(input_sigs, output_labels)
result


[[('for', 'FOR'),
  ('5', 'Duration'),
  ('to', 'TO'),
  ('6', 'DurationMax'),
  ('days', 'DurationUnit')],
 [('inject', 'Method'), ('2', 'Qty'), ('units', 'Form')],
 [('x', 'FOR'), ('2', 'Duration'), ('weeks', 'DurationUnit')],
 [('x', 'FOR'), ('3', 'Duration'), ('days', 'DurationUnit')],
 [('every', 'EVERY'), ('day', 'Period')],
 [('every', 'EVERY'), ('2', 'Period'), ('weeks', 'PeriodUnit')],
 [('every', 'EVERY'), ('3', 'Period'), ('days', 'PeriodUnit')],
 [('every', 'EVERY'),
  ('1', 'Period'),
  ('to', 'TO'),
  ('2', 'PeriodMax'),
  ('months', 'PeriodUnit')],
 [('every', 'EVERY'),
  ('2', 'Period'),
  ('to', 'TO'),
  ('6', 'PeriodMax'),
  ('weeks', 'PeriodUnit')],
 [('every', 'EVERY'),
  ('4', 'Period'),
  ('to', 'TO'),
  ('6', 'PeriodMax'),
  ('days', 'PeriodUnit')],
 [('take', 'Method'),
  ('two', 'Qty'),
  ('to', 'TO'),
  ('four', 'Qty'),
  ('tabs', 'Form')],
 [('take', 'Method'),
  ('2', 'Qty'),
  ('to', 'TO'),
  ('4', 'Qty'),
  ('tabs', 'Form')],
 [('take', 'Method'),
  ('3', 

### Creating the triples_maker( ) for feature extraction
- input: tuples_maker_output
- output: 
[[('for', 'IN', 'FOR'),
  ('5', 'CD', 'Duration'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'DurationMax'),
  ('days', 'NNS', 'DurationUnit')], [second sentence], ... ]

In [601]:

def triples_maker(whole_data):
    sample_data = []
    
    for sentence in whole_data:
        # Extract tokens and labels separately
        tokens, labels = zip(*sentence)
        
        # Get POS tags for the tokens
        pos_tags = nltk.pos_tag(tokens)
        
        # Form triples of (token, POS, label) and append to sample_data
        sentence_data = [(token, pos, label) for (token, pos), label in zip(pos_tags, labels)]
        sample_data.append(sentence_data)    
    return sample_data 



In [602]:
whole_data = tuples_maker(input_sigs, output_labels)

sample_data = triples_maker(whole_data)
sample_data


[[('for', 'IN', 'FOR'),
  ('5', 'CD', 'Duration'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'DurationMax'),
  ('days', 'NNS', 'DurationUnit')],
 [('inject', 'JJ', 'Method'), ('2', 'CD', 'Qty'), ('units', 'NNS', 'Form')],
 [('x', 'RB', 'FOR'),
  ('2', 'CD', 'Duration'),
  ('weeks', 'NNS', 'DurationUnit')],
 [('x', 'RB', 'FOR'),
  ('3', 'CD', 'Duration'),
  ('days', 'NNS', 'DurationUnit')],
 [('every', 'DT', 'EVERY'), ('day', 'NN', 'Period')],
 [('every', 'DT', 'EVERY'),
  ('2', 'CD', 'Period'),
  ('weeks', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('3', 'CD', 'Period'),
  ('days', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('1', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('2', 'CD', 'PeriodMax'),
  ('months', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('2', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'PeriodMax'),
  ('weeks', 'NNS', 'PeriodUnit')],
 [('every', 'DT', 'EVERY'),
  ('4', 'CD', 'Period'),
  ('to', 'TO', 'TO'),
  ('6', 'CD', 'PeriodMax'),
  ('

### Creating the features extractor method (GIVEN as a BASELINE)
#### The features used are:
- SOS, EOS, lowercase, uppercase, title, digit, postag, previous_tag, next_tag
#### Feel free to include more features

In [604]:
def token_to_features(doc, i):
    word = doc[i][0]
    postag = doc[i][1]

    # Common features for all words
    features = [
        'bias',
        'word.lower=' + word.lower(),
        'word[-3:]=' + word[-3:],
        'word[-2:]=' + word[-2:],
        'word.isupper=%s' % word.isupper(),
        'word.istitle=%s' % word.istitle(),
        'word.isdigit=%s' % word.isdigit(),
        'postag=' + postag
    ]

    # Features for words that are not
    # at the beginning of a document
    if i > 0:
        word1 = doc[i-1][0]
        postag1 = doc[i-1][1]
        features.extend([
            '-1:word.lower=' + word1.lower(),
            '-1:word.istitle=%s' % word1.istitle(),
            '-1:word.isupper=%s' % word1.isupper(),
            '-1:word.isdigit=%s' % word1.isdigit(),
            '-1:postag=' + postag1
        ])
    else:
        # Indicate that it is the 'beginning of a document'
        features.append('BOS')

    # Features for words that are not
    # at the end of a document
    if i < len(doc)-1:
        word1 = doc[i+1][0]
        postag1 = doc[i+1][1]
        features.extend([
            '+1:word.lower=' + word1.lower(),
            '+1:word.istitle=%s' % word1.istitle(),
            '+1:word.isupper=%s' % word1.isupper(),
            '+1:word.isdigit=%s' % word1.isdigit(),
            '+1:postag=' + postag1
        ])
    else:
        # Indicate that it is the 'end of a document'
        features.append('EOS')

    return features

### Running the feature extractor on the training data 
- Feature extraction
- Train-test-split

In [606]:

def get_features(sentence):
    # Extract features from tokens and labels in the sentence
    features = []
    for i, token in enumerate(sentence):
        token_features = {
            'word': token[0],
            'POS': token[1],
            'label': token[2],  # Assuming token is a tuple (word, POS, label)
        }
        features.append(token_features)
    return features

In [607]:

# Function to prepare the dataset with features
def prepare_dataset(input_sigs, output_labels):
    whole_data = []
    for sig, labels in zip(input_sigs, output_labels):
        sentence = [(sig[i], labels[i]) for i in range(len(sig))]
        features = get_features(sentence)
        whole_data.append(features)
    return whole_data


X = [get_features(doc) for doc in sample_data]
y = [get_labels(doc) for doc in sample_data]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### Training the CRF model with the features extracted using the feature extractor method

In [609]:
import pycrfsuite


In [610]:
# Initialise the trainer
trainer = pycrfsuite.Trainer(algorithm='lbfgs')

# Submit training data to the trainer
for x, y in zip(X_train, y_train):
    trainer.append(x, y)

# Set the parameters of the model
trainer.set_params({'c1': 0.1,'c2': 0.1,'max_iterations': 1000,'feature.possible_transitions': True, 'feature.possible_transitions': True,})

# Providing a file name as a parameter to the train function, such that the model will be saved to the file when training is finished
trainer.train('crf_m_o_d_e_l.crfsuite')


Feature generation
type: CRF1d
feature.minfreq: 0.000000
feature.possible_states: 0
feature.possible_transitions: 1
0....1....2....3....4....5....6....7....8....9....10
Number of features: 1054
Seconds required: 0.001

L-BFGS optimization
c1: 0.100000
c2: 0.100000
num_memories: 6
max_iterations: 1000
epsilon: 0.000010
stop: 10
delta: 0.000010
linesearch: MoreThuente
linesearch.max_iterations: 20

***** Iteration #1 *****
Loss: 620.263290
Feature norm: 1.000000
Error norm: 82.630721
Active features: 1054
Line search trials: 1
Line search step: 0.011798
Seconds required for this iteration: 0.000

***** Iteration #2 *****
Loss: 600.499614
Feature norm: 53.850437
Error norm: 29.746222
Active features: 1054
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 0.000

***** Iteration #3 *****
Loss: 343.860543
Feature norm: 48.130195
Error norm: 17.299463
Active features: 490
Line search trials: 1
Line search step: 1.000000
Seconds required for this iteration: 

### Predicting the test data with the built model

In [612]:

# Load the trained model
crf_model = pycrfsuite.Tagger()
crf_model.open('crf_m_o_d_e_l.crfsuite')


<contextlib.closing at 0x315f8ca10>

In [613]:
# Predict the labels for the test data
y_pred = [crf_model.tag(x) for x in X_test]

# Flatten the predictions and true labels for evaluation
y_pred_flat = [item for sublist in y_pred for item in sublist]
y_test_flat = [item for sublist in y_test for item in sublist]
y_pred

[['FOR', 'Duration', 'TO', 'Duration', 'DurationUnit'],
 ['EVERY', 'Period', 'PeriodUnit'],
 ['WHEN'],
 ['Method', 'Qty', 'Form', 'TID', 'PO'],
 ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit'],
 ['EVERY', 'Period', 'PeriodUnit'],
 ['Qty', 'Form', 'BEFORE', 'WHEN', 'AND', 'WHEN'],
 ['Qty', 'Form', 'Frequency', 'Period', 'PeriodUnit'],
 ['Method', 'Qty', 'Form', 'BEFORE', 'WHEN'],
 ['Method',
  'Qty',
  'Form',
  'PO',
  'TID',
  'FOR',
  'Duration',
  'DurationUnit',
  'AT',
  'WHEN'],
 ['FOR', 'Duration', 'DurationUnit'],
 ['FOR', 'Duration', 'DurationUnit']]

In [614]:

# Calculate accuracy
accuracy = accuracy_score(y_test_flat, y_pred_flat)
accuracy


0.9444444444444444

In [615]:
# Generate classification report with zero_division parameter to handle undefined metrics

report = classification_report(y_test_flat, y_pred_flat, zero_division=1)

# Print the classification report
print("Classification Report:")
print(report)



Classification Report:
              precision    recall  f1-score   support

         AND       1.00      1.00      1.00         1
          AT       1.00      1.00      1.00         1
      BEFORE       1.00      1.00      1.00         2
         BID       1.00      0.00      0.00         2
    Duration       0.80      1.00      0.89         4
 DurationMax       1.00      0.00      0.00         1
DurationUnit       1.00      1.00      1.00         4
       EVERY       1.00      1.00      1.00         3
         FOR       1.00      1.00      1.00         4
        Form       1.00      1.00      1.00         5
   Frequency       1.00      1.00      1.00         1
      Method       1.00      1.00      1.00         3
          PO       1.00      1.00      1.00         2
      Period       1.00      1.00      1.00         4
   PeriodMax       1.00      1.00      1.00         1
  PeriodUnit       1.00      1.00      1.00         4
         Qty       1.00      1.00      1.00         5
    

The classification report shows that model performs well with an overall accuracy of 94%. It has perfect precision and recall for most labels (like "AND," "AT," "Form"), but struggles with some labels like "BID" and "DurationMax" (recall of 0.00), meaning it didn't correctly identify any instances of those labels. The model's F1-score is generally high, except for certain labels. 

### Putting all the prediction logic inside a predict method

In [618]:
def get_features_from_sentence(sentence):
    """
    Converts a sentence into features using the feature extraction method for CRF.
    @param sentence: The sentence to be processed
    @return: List of feature dictionaries for each word in the sentence
    """
    words = sentence.split()  # Split sentence into words
    features = []
    for word in words:
        word_features = {
            'word': word,
            'is_first': word == words[0],
            'is_last': word == words[-1],
            'word_len': len(word),
        }
        features.append(word_features)
    return features

def predict(sig):
    """
    predict(sig)
    Purpose: Labels the given sig into corresponding labels
    @param sig: A Sentence (A medical prescription sig written by a doctor)
    @return: A list with predicted labels (first level of labeling)
    """
   # Load the pre-trained model
    crf_model = pycrfsuite.Tagger()
    crf_model.open('crf_m_o_d_e_l.crfsuite')
    
    # Extract features from the input sentence
    features = get_features_from_sentence(sig)
    
    # Predict the labels using the trained CRF model
    predicted_labels = crf_model.tag(features)
    
    # Format the output in the desired format
    predictions = [[sig.split()] + [predicted_labels]]  # Format the output as a list of list with sentence and labels

    return predictions


### Sample predictions

In [620]:
predictions = predict("take 2 tabs every 6 hours x 10 days")
predictions

[[['take', '2', 'tabs', 'every', '6', 'hours', 'x', '10', 'days'],
  ['Method',
   'Qty',
   'Form',
   'EVERY',
   'Period',
   'PeriodUnit',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [621]:
predictions = predict("2 capsu for 10 day at bed")
predictions

[[['2', 'capsu', 'for', '10', 'day', 'at', 'bed'],
  ['Duration',
   'DurationUnit',
   'FOR',
   'Duration',
   'DurationUnit',
   'AT',
   'WHEN']]]

In [622]:
predictions = predict("2 capsu for 10 days at bed")
predictions

[[['2', 'capsu', 'for', '10', 'days', 'at', 'bed'],
  ['Duration',
   'DurationUnit',
   'FOR',
   'Duration',
   'DurationUnit',
   'AT',
   'WHEN']]]

In [623]:
predictions = predict("5 days 2 tabs at bed")
predictions

[[['5', 'days', '2', 'tabs', 'at', 'bed'],
  ['Duration', 'DurationUnit', 'Qty', 'Form', 'AT', 'WHEN']]]

In [624]:
predictions = predict("3 tabs qid x 10 weeks")
predictions

[[['3', 'tabs', 'qid', 'x', '10', 'weeks'],
  ['Qty', 'Form', 'QID', 'FOR', 'Duration', 'DurationUnit']]]

In [625]:
predictions = predict("x 30 days")
predictions

[[['x', '30', 'days'], ['FOR', 'Duration', 'DurationUnit']]]

In [626]:
predictions = predict("x 20 months")
predictions

[[['x', '20', 'months'], ['FOR', 'Duration', 'DurationUnit']]]

In [627]:
predictions = predict("take 2 tabs po tid for 10 days")
predictions

[[['take', '2', 'tabs', 'po', 'tid', 'for', '10', 'days'],
  ['Method', 'Qty', 'Form', 'PO', 'TID', 'FOR', 'Duration', 'DurationUnit']]]

In [628]:
predictions = predict("take 2 capsules po every 6 hours")
predictions

[[['take', '2', 'capsules', 'po', 'every', '6', 'hours'],
  ['Method', 'Qty', 'Form', 'PO', 'EVERY', 'Period', 'PeriodUnit']]]

In [629]:
predictions = predict("inject 2 units pu tid")
predictions

[[['inject', '2', 'units', 'pu', 'tid'],
  ['FOR', 'Duration', 'DurationUnit', 'Duration', 'DurationUnit']]]

In [630]:
predictions = predict("swallow 3 caps tid by mouth")
predictions

[[['swallow', '3', 'caps', 'tid', 'by', 'mouth'],
  ['FOR', 'Duration', 'DurationUnit', 'TID', 'BY', 'PO']]]

In [631]:
predictions = predict("inject 3 units orally")
predictions

[[['inject', '3', 'units', 'orally'],
  ['FOR', 'Duration', 'DurationUnit', 'THEN']]]

In [632]:
predictions = predict("orally take 3 tabs tid")
predictions

[[['orally', 'take', '3', 'tabs', 'tid'],
  ['FOR', 'Duration', 'DurationUnit', 'Duration', 'DurationUnit']]]

In [633]:
predictions = predict("by mouth take three caps")
predictions

[[['by', 'mouth', 'take', 'three', 'caps'],
  ['BY', 'PO', 'FOR', 'Duration', 'DurationUnit']]]

In [634]:
predictions = predict("take 3 tabs orally three times a day for 10 days at bedtime")
predictions

[[['take',
   '3',
   'tabs',
   'orally',
   'three',
   'times',
   'a',
   'day',
   'for',
   '10',
   'days',
   'at',
   'bedtime'],
  ['Method',
   'Qty',
   'Form',
   'Duration',
   'DurationUnit',
   'TIMES',
   'Period',
   'PeriodUnit',
   'FOR',
   'Duration',
   'DurationUnit',
   'AT',
   'WHEN']]]

In [635]:
predictions = predict("take 3 tabs orally bid for 10 days at bedtime")
predictions

[[['take', '3', 'tabs', 'orally', 'bid', 'for', '10', 'days', 'at', 'bedtime'],
  ['Method',
   'Qty',
   'Form',
   'Duration',
   'DurationUnit',
   'FOR',
   'Duration',
   'DurationUnit',
   'AT',
   'WHEN']]]

In [636]:
predictions = predict("take 3 tabs bid orally at bed")
predictions

[[['take', '3', 'tabs', 'bid', 'orally', 'at', 'bed'],
  ['Method', 'Qty', 'Form', 'Frequency', 'FOR', 'Duration', 'DurationUnit']]]

In [637]:
predictions = predict("take 10 capsules by mouth qid")
predictions

[[['take', '10', 'capsules', 'by', 'mouth', 'qid'],
  ['FOR', 'Duration', 'DurationUnit', 'FOR', 'Duration', 'DurationUnit']]]

In [638]:
predictions = predict("inject 10 units orally qid x 3 months")
predictions

[[['inject', '10', 'units', 'orally', 'qid', 'x', '3', 'months'],
  ['FOR',
   'Duration',
   'DurationUnit',
   'THEN',
   'QID',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [639]:
prediction = predict("please take 2 tablets per day for a month in the morning and evening each day")
predictions

[[['inject', '10', 'units', 'orally', 'qid', 'x', '3', 'months'],
  ['FOR',
   'Duration',
   'DurationUnit',
   'THEN',
   'QID',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [640]:
prediction = predict("Amoxcicillin QID 30 tablets")
predictions

[[['inject', '10', 'units', 'orally', 'qid', 'x', '3', 'months'],
  ['FOR',
   'Duration',
   'DurationUnit',
   'THEN',
   'QID',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [641]:
prediction = predict("take 3 tabs TID for 90 days with food")
prediction

[[['take', '3', 'tabs', 'TID', 'for', '90', 'days', 'with', 'food'],
  ['Method',
   'Qty',
   'Form',
   'Frequency',
   'FOR',
   'Duration',
   'DurationUnit',
   'WITH',
   'FOOD']]]

In [642]:
prediction = predict("with food take 3 tablets per day for 90 days")
prediction

[[['with', 'food', 'take', '3', 'tablets', 'per', 'day', 'for', '90', 'days'],
  ['WITH',
   'FOOD',
   'FOR',
   'Duration',
   'DurationUnit',
   'Duration',
   'DurationUnit',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [643]:
prediction = predict("with food take 3 tablets per week for 90 weeks")
print(prediction)

[[['with', 'food', 'take', '3', 'tablets', 'per', 'week', 'for', '90', 'weeks'], ['WITH', 'FOOD', 'FOR', 'Duration', 'DurationUnit', 'Duration', 'DurationUnit', 'FOR', 'Duration', 'DurationUnit']]]


In [644]:
prediction = predict("take 2-4 tabs")
print(prediction)

[[['take', '2-4', 'tabs'], ['Method', 'Qty', 'Form']]]


In [645]:
prediction = predict("take 2 to 4 tabs")
prediction

[[['take', '2', 'to', '4', 'tabs'],
  ['FOR', 'Duration', 'DurationUnit', 'Qty', 'Form']]]

In [646]:
prediction = predict("take two to four tabs")
prediction

[[['take', 'two', 'to', 'four', 'tabs'],
  ['FOR', 'Duration', 'DurationUnit', 'Qty', 'Form']]]

In [647]:
prediction = predict("take 2-4 tabs for 8 to 9 days")
prediction

[[['take', '2-4', 'tabs', 'for', '8', 'to', '9', 'days'],
  ['Method',
   'Qty',
   'Form',
   'FOR',
   'Duration',
   'DurationUnit',
   'Duration',
   'DurationUnit']]]

In [648]:
prediction = predict("take 20 tabs every 6 to 8 days")
prediction

[[['take', '20', 'tabs', 'every', '6', 'to', '8', 'days'],
  ['Method',
   'Qty',
   'Form',
   'EVERY',
   'Period',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [649]:
prediction = predict("take 2 tabs every 4 to 6 days")
prediction

[[['take', '2', 'tabs', 'every', '4', 'to', '6', 'days'],
  ['Method',
   'Qty',
   'Form',
   'EVERY',
   'Period',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [650]:
prediction = predict("take 2 tabs every 2 to 10 weeks")
prediction

[[['take', '2', 'tabs', 'every', '2', 'to', '10', 'weeks'],
  ['Method',
   'Qty',
   'Form',
   'EVERY',
   'Period',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [651]:
prediction = predict("take 2 tabs every 4 to 6 days")
prediction

[[['take', '2', 'tabs', 'every', '4', 'to', '6', 'days'],
  ['Method',
   'Qty',
   'Form',
   'EVERY',
   'Period',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [652]:
prediction = predict("take 2 tabs every 2 to 10 months")
prediction

[[['take', '2', 'tabs', 'every', '2', 'to', '10', 'months'],
  ['Method',
   'Qty',
   'Form',
   'EVERY',
   'Period',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [653]:
prediction = predict("every 60 mins")
prediction

[[['every', '60', 'mins'], ['EVERY', 'Period', 'PeriodUnit']]]

In [654]:
prediction = predict("every 10 mins")
prediction

[[['every', '10', 'mins'], ['FOR', 'Duration', 'DurationUnit']]]

In [655]:
prediction = predict("every two to four months")
prediction

[[['every', 'two', 'to', 'four', 'months'],
  ['EVERY', 'Period', 'TO', 'PeriodMax', 'PeriodUnit']]]

In [656]:
prediction = predict("take 2 tabs every 3 to 4 days")
prediction

[[['take', '2', 'tabs', 'every', '3', 'to', '4', 'days'],
  ['Method',
   'Qty',
   'Form',
   'EVERY',
   'Period',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [657]:
prediction = predict("every 3 to 4 days take 20 tabs")
prediction

[[['every', '3', 'to', '4', 'days', 'take', '20', 'tabs'],
  ['EVERY',
   'Period',
   'FOR',
   'Duration',
   'DurationUnit',
   'Method',
   'Qty',
   'Form']]]

In [658]:
prediction = predict("once in every 3 days take 3 tabs")
prediction

[[['once', 'in', 'every', '3', 'days', 'take', '3', 'tabs'],
  ['FOR',
   'Duration',
   'DurationUnit',
   'Duration',
   'DurationUnit',
   'Method',
   'Qty',
   'Form']]]

In [659]:
prediction = predict("take 3 tabs once in every 3 days")
prediction

[[['take', '3', 'tabs', 'once', 'in', 'every', '3', 'days'],
  ['Method',
   'Qty',
   'Form',
   'FOR',
   'Duration',
   'DurationUnit',
   'Duration',
   'DurationUnit']]]

In [660]:
prediction = predict("orally take 20 tabs every 4-6 weeks")
prediction

[[['orally', 'take', '20', 'tabs', 'every', '4-6', 'weeks'],
  ['FOR',
   'Duration',
   'DurationUnit',
   'TID',
   'FOR',
   'Duration',
   'DurationUnit']]]

In [661]:
prediction = predict("10 tabs x 2 days")
prediction

[[['10', 'tabs', 'x', '2', 'days'],
  ['Duration', 'DurationUnit', 'FOR', 'Duration', 'DurationUnit']]]

In [662]:
prediction = predict("3 capsule x 15 days")
prediction

[[['3', 'capsule', 'x', '15', 'days'],
  ['Duration', 'DurationUnit', 'FOR', 'Duration', 'DurationUnit']]]

In [663]:
prediction = predict("10 tabs")
prediction

[[['10', 'tabs'], ['Duration', 'DurationUnit']]]