In [1]:
import re
import os
import csv
import torch
os.environ["USE_TORCH"] = "1"
import numpy as np
import pandas as pd
from transformers import DistilBertTokenizerFast
from sklearn.model_selection import train_test_split

In [2]:
tokens, tags = [], []

with open(r'C:\Users\anu10961\Work\POC\submission\autonomiq\data\tokens.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    next(csv_reader)
    for row in csv_reader:
        if len(row): tokens.append(row)

In [3]:
with open(r'C:\Users\anu10961\Work\POC\submission\autonomiq\data\tags.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    next(csv_reader)
    for row in csv_reader:
        if len(row): tags.append(row)

In [4]:
len(tokens), len(tags)

(434, 434)

In [5]:
def sanity(tokens, tags, diff=0):
    for l1, l2 in zip(tokens, tags):
        if abs(len(l1) - len(l2)) != diff: raise Exception
            
sanity(tokens, tags)

In [6]:
train_tokens, valid_tokens, tr_tags, val_tags = train_test_split(tokens, tags, test_size=.1)

In [7]:
unique_tags = ["action", "label", "data", "O"]

tag2id = {tag: id for id, tag in enumerate(unique_tags)}
id2tag = {id: tag for tag, id in tag2id.items()}

In [8]:
tag2id, id2tag

({'action': 0, 'label': 1, 'data': 2, 'O': 3},
 {0: 'action', 1: 'label', 2: 'data', 3: 'O'})

In [9]:
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

In [10]:
def get_tokenized_token_tags(tokens_list, tags_list):
    tokenized_ids, tokenized_tags = [], []
    for tokens, tags in zip(tokens_list, tags_list):
        target_ids, target_tags = list(), list()
        for index, token in enumerate(tokens):
            token_breakup_list = tokenizer.encode(token, add_special_tokens=False)
            num_items = len(token_breakup_list)
            target_ids.extend(token_breakup_list)
            tag = tags[index]
            target_tags.extend([tag] * num_items)
        tokenized_ids.append(target_ids)
        tokenized_tags.append(["O"] + target_tags + ["O"])
        
    return tokenized_ids, tokenized_tags

train_ids, train_tags = get_tokenized_token_tags(train_tokens, tr_tags)
valid_ids, valid_tags = get_tokenized_token_tags(valid_tokens, val_tags)

In [44]:
str1 = " ".join(['navigate', 'to', 'payables', '-', '>', 'payments', 'and', 'deselect', 'manage', 'payments'])

In [45]:
print(str1)
print(len(str1.split(" ")))

navigate to payables - > payments and deselect manage payments
10


In [43]:
tok_ids = tokenizer("payables").get("input_ids")

tokenizer.convert_ids_to_tokens(tok_ids, skip_special_tokens=True)

['pay', '##able', '##s']

In [46]:
print(tokenizer.convert_ids_to_tokens(train_ids[0]))
print(len(tokenizer.convert_ids_to_tokens(train_ids[0])))
print(train_tags[0])

['navigate', 'to', 'pay', '##able', '##s', '-', '>', 'payments', 'and', 'des', '##ele', '##ct', 'manage', 'payments']
14
['O', 'action', 'O', 'label', 'label', 'label', 'O', 'O', 'label', 'O', 'action', 'action', 'action', 'label', 'label', 'O']


In [12]:
sanity(train_ids, train_tags, 2)

In [13]:
sanity(valid_ids, valid_tags, 2)

In [14]:
train_encodings = tokenizer(train_tokens, is_split_into_words=True, return_offsets_mapping=True, padding=True, truncation=True)
val_encodings = tokenizer(valid_tokens, is_split_into_words=True, return_offsets_mapping=True, padding=True, truncation=True)

In [15]:
train_encodings[0]

Encoding(num_tokens=110, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])

In [16]:
print(train_encodings[0].attention_mask)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [17]:
max_len_train = max([len(x) for x in train_encodings["input_ids"]])
max_len_val = max([len(x) for x in val_encodings["input_ids"]])

In [18]:
max_len_train, max_len_val

(110, 67)

In [19]:
def encode_tags(tags, max_len):
    #labels = [[tag2id[tag] for tag in doc] for doc in tags]
    encoded_labels, otag_id = [], tag2id["O"]
    for doc_labels in tags:
        tmp_list = [-100 for i in range(max_len)]
        for index, item in enumerate(doc_labels):
            tmp_list[index] = tag2id[item]
        encoded_labels.append(tmp_list)
    return encoded_labels

train_labels = encode_tags(train_tags, max_len_train)
val_labels = encode_tags(valid_tags, max_len_val)

In [20]:
import torch

class AutonomIQDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_encodings.pop("offset_mapping") # we don't want to pass this to the model
val_encodings.pop("offset_mapping")

train_dataset = AutonomIQDataset(train_encodings, train_labels)
val_dataset = AutonomIQDataset(val_encodings, val_labels)

In [21]:
from transformers import DistilBertForTokenClassification
model = DistilBertForTokenClassification.from_pretrained('distilbert-base-uncased', num_labels=len(unique_tags))
model = model.cuda()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN t

In [34]:
model.config

DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "vocab_size": 30522
}

In [22]:
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

average = "micro"

def compute_metrics(p):
    average = "micro"
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens)
    true_predictions = [
        p for prediction, label in zip(predictions, labels) for (p, l) in zip(prediction, label) if l != -100 
    ]
    true_labels = [
        l for prediction, label in zip(predictions, labels) for (p, l) in zip(prediction, label) if l != -100
    ]

    #results = classification_report(true_labels, true_predictions, target_names=tag2id, output_dict=True)
    results = {
        "accuracy": accuracy_score(true_labels, true_predictions),
        "precision": precision_score(true_labels, true_predictions, average=average),
        "recall": recall_score(true_labels, true_predictions, average=average),
        "f1": f1_score(true_labels, true_predictions, average=average)
    }
    return results

In [23]:
def compute_metrics_per_label(labels, predictions):
    # Remove ignored index (special tokens)
    true_predictions = [
        p for prediction, label in zip(predictions, labels) for (p, l) in zip(prediction, label) if l != -100 
    ]
    true_labels = [
        l for prediction, label in zip(predictions, labels) for (p, l) in zip(prediction, label) if l != -100
    ]
    
    print(classification_report(true_labels, true_predictions, target_names=tag2id))

In [24]:
def evaluate(dataset):
    y_true, y_pred = [], []
    for item in dataset:
        val_labels, input_ids, attention_mask = item.get("labels").tolist(), item.get("input_ids"), item.get("attention_mask")
        input_ids, attention_mask = input_ids.resize(1, input_ids.size()[0]).cuda(), attention_mask.resize(1, input_ids.size()[0]).cuda()
        output = model.forward(input_ids, attention_mask)
        tokens = input_ids.tolist()
        pred_labels = output[0].argmax(2).tolist()
        
        y_true.append(val_labels)
        y_pred.append(pred_labels[0])
    return y_true, y_pred

In [25]:
y_true, y_pred = evaluate(val_dataset)
compute_metrics_per_label(y_true, y_pred)



              precision    recall  f1-score   support

      action       0.17      0.69      0.27       125
       label       0.21      0.10      0.13       191
        data       0.13      0.28      0.17        25
           O       0.47      0.14      0.22       456

    accuracy                           0.22       797
   macro avg       0.24      0.30      0.20       797
weighted avg       0.35      0.22      0.21       797



In [26]:
from transformers import Trainer, TrainingArguments

In [27]:
# tag2id.pop("O", None)

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=5,              # total number of training epochs
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=64,   # batch size for evaluation
    warmup_steps=40,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    do_eval=True,
    evaluate_during_training=True,
    evaluation_strategy="epoch"
)

trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,             # evaluation dataset
    compute_metrics=compute_metrics
)

trainer.train()



HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value='Iteration'), FloatProgress(value=0.0, max=25.0), HTML(value='')))

{'loss': 1.3458271026611328, 'learning_rate': 1.25e-05, 'epoch': 0.4, 'total_flos': 7008244953600, 'step': 10}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 1.2045083045959473, 'eval_accuracy': 0.5784190715181933, 'eval_precision': 0.5784190715181933, 'eval_recall': 0.5784190715181933, 'eval_f1': 0.5784190715181933, 'epoch': 0.4, 'total_flos': 7008244953600, 'step': 10}
{'loss': 1.0510946273803712, 'learning_rate': 2.5e-05, 'epoch': 0.8, 'total_flos': 14016489907200, 'step': 20}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.8882801532745361, 'eval_accuracy': 0.5759096612296111, 'eval_precision': 0.5759096612296111, 'eval_recall': 0.5759096612296111, 'eval_f1': 0.5759096612296111, 'epoch': 0.8, 'total_flos': 14016489907200, 'step': 20}



HBox(children=(HTML(value='Iteration'), FloatProgress(value=0.0, max=25.0), HTML(value='')))

{'loss': 0.7429004669189453, 'learning_rate': 3.7500000000000003e-05, 'epoch': 1.2, 'total_flos': 20586719551200, 'step': 30}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.5230689644813538, 'eval_accuracy': 0.823086574654956, 'eval_precision': 0.823086574654956, 'eval_recall': 0.823086574654956, 'eval_f1': 0.823086574654956, 'epoch': 1.2, 'total_flos': 20586719551200, 'step': 30}
{'loss': 0.4461969375610352, 'learning_rate': 5e-05, 'epoch': 1.6, 'total_flos': 27594964504800, 'step': 40}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.3081311285495758, 'eval_accuracy': 0.8820577164366374, 'eval_precision': 0.8820577164366374, 'eval_recall': 0.8820577164366374, 'eval_f1': 0.8820577164366374, 'epoch': 1.6, 'total_flos': 27594964504800, 'step': 40}
{'loss': 0.2697273254394531, 'learning_rate': 4.411764705882353e-05, 'epoch': 2.0, 'total_flos': 34165194148800, 'step': 50}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.22805634140968323, 'eval_accuracy': 0.917189460476788, 'eval_precision': 0.917189460476788, 'eval_recall': 0.917189460476788, 'eval_f1': 0.917189460476788, 'epoch': 2.0, 'total_flos': 34165194148800, 'step': 50}



HBox(children=(HTML(value='Iteration'), FloatProgress(value=0.0, max=25.0), HTML(value='')))

{'loss': 0.20343360900878907, 'learning_rate': 3.8235294117647055e-05, 'epoch': 2.4, 'total_flos': 41173439102400, 'step': 60}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.22505517303943634, 'eval_accuracy': 0.9360100376411543, 'eval_precision': 0.9360100376411543, 'eval_recall': 0.9360100376411543, 'eval_f1': 0.9360100376411543, 'epoch': 2.4, 'total_flos': 41173439102400, 'step': 60}
{'loss': 0.13460731506347656, 'learning_rate': 3.235294117647059e-05, 'epoch': 2.8, 'total_flos': 48181684056000, 'step': 70}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.19071367383003235, 'eval_accuracy': 0.9435382685069009, 'eval_precision': 0.9435382685069009, 'eval_recall': 0.9435382685069009, 'eval_f1': 0.9435382685069009, 'epoch': 2.8, 'total_flos': 48181684056000, 'step': 70}



HBox(children=(HTML(value='Iteration'), FloatProgress(value=0.0, max=25.0), HTML(value='')))

{'loss': 0.10393409729003907, 'learning_rate': 2.647058823529412e-05, 'epoch': 3.2, 'total_flos': 54751913700000, 'step': 80}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.18758872151374817, 'eval_accuracy': 0.9473023839397742, 'eval_precision': 0.9473023839397742, 'eval_recall': 0.9473023839397742, 'eval_f1': 0.9473023839397742, 'epoch': 3.2, 'total_flos': 54751913700000, 'step': 80}
{'loss': 0.08570594787597656, 'learning_rate': 2.058823529411765e-05, 'epoch': 3.6, 'total_flos': 61760158653600, 'step': 90}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.18657055497169495, 'eval_accuracy': 0.9523212045169385, 'eval_precision': 0.9523212045169385, 'eval_recall': 0.9523212045169385, 'eval_f1': 0.9523212045169385, 'epoch': 3.6, 'total_flos': 61760158653600, 'step': 90}
{'loss': 0.07507057189941406, 'learning_rate': 1.4705882352941177e-05, 'epoch': 4.0, 'total_flos': 68330388297600, 'step': 100}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.17002694308757782, 'eval_accuracy': 0.9510664993726474, 'eval_precision': 0.9510664993726474, 'eval_recall': 0.9510664993726474, 'eval_f1': 0.9510664993726474, 'epoch': 4.0, 'total_flos': 68330388297600, 'step': 100}



HBox(children=(HTML(value='Iteration'), FloatProgress(value=0.0, max=25.0), HTML(value='')))

{'loss': 0.04829330444335937, 'learning_rate': 8.823529411764707e-06, 'epoch': 4.4, 'total_flos': 75338633251200, 'step': 110}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.16724401712417603, 'eval_accuracy': 0.958594730238394, 'eval_precision': 0.958594730238394, 'eval_recall': 0.958594730238394, 'eval_f1': 0.958594730238394, 'epoch': 4.4, 'total_flos': 75338633251200, 'step': 110}
{'loss': 0.050584030151367185, 'learning_rate': 2.9411764705882355e-06, 'epoch': 4.8, 'total_flos': 82346878204800, 'step': 120}


HBox(children=(HTML(value='Evaluation'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


{'eval_loss': 0.16824127733707428, 'eval_accuracy': 0.958594730238394, 'eval_precision': 0.958594730238394, 'eval_recall': 0.958594730238394, 'eval_f1': 0.958594730238394, 'epoch': 4.8, 'total_flos': 82346878204800, 'step': 120}




TrainOutput(global_step=125, training_loss=0.3667749328613281)

In [28]:
y_true, y_pred = evaluate(val_dataset)
compute_metrics_per_label(y_true, y_pred)

              precision    recall  f1-score   support

      action       0.98      0.98      0.98       125
       label       0.92      0.92      0.92       191
        data       1.00      0.96      0.98        25
           O       0.96      0.96      0.96       456

    accuracy                           0.96       797
   macro avg       0.97      0.96      0.96       797
weighted avg       0.96      0.96      0.96       797



In [29]:
def clean_data(text):
    text = re.sub('([!?,\'".\n\-\:\*\/])', r' \1 ', text)
    text = re.sub(' +', ' ', text)
    return text.strip()


def generate_inference(text):
    if isinstance(text, str): text=[text]
    text_list = [clean_data(x) for x in text]
    token_list = [x.split(" ") for x in text_list]
    encodings = tokenizer.batch_encode_plus(token_list, max_length=max_len_train, padding=True, truncation=True,
                                                is_split_into_words=True, return_tensors="pt")

    input_ids, attention_mask = encodings.get("input_ids").cuda(), encodings.get("attention_mask").cuda()
    output = model.forward(input_ids=input_ids, attention_mask=attention_mask)

    batch_token_ids = input_ids.tolist()
    batch_label_ids = output[0].argmax(2).tolist()

    tokens = [tokenizer.convert_ids_to_tokens(x) for x in batch_token_ids]
    labels = [[id2tag[i] for i in labels] for labels in batch_label_ids]
    return tokens, labels

In [30]:
text = """Verify for Lab Batch Type selection, the following are displayed:
- Reason for this Batch*: displays: Select Reason for this Batch
- Optional Equipment: displays: Select Optional Equipment

Capture and attach screen print: Equipment Information section"""

batch_tokens, batch_labels = generate_inference(text)
print(batch_tokens);print(batch_labels)

for token, label in zip(batch_tokens, batch_labels):
    for t, l in (zip(token, label)):
        print(f"{t:<20} ------->{l:>20}")
    print("\n")

[['[CLS]', 'verify', 'for', 'lab', 'batch', 'type', 'selection', ',', 'the', 'following', 'are', 'displayed', ':', '-', 'reason', 'for', 'this', 'batch', '*', ':', 'displays', ':', 'select', 'reason', 'for', 'this', 'batch', '-', 'optional', 'equipment', ':', 'displays', ':', 'select', 'optional', 'equipment', 'capture', 'and', 'attach', 'screen', 'print', ':', 'equipment', 'information', 'section', '[SEP]']]
[['O', 'action', 'O', 'data', 'label', 'label', 'label', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'label', 'O', 'label', 'label', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'label', 'O', 'O', 'O', 'O', 'O', 'O', 'action', 'O', 'action', 'label', 'label', 'O', 'O', 'O', 'O', 'O']]
[CLS]                ------->                   O
verify               ------->              action
for                  ------->                   O
lab                  ------->                data
batch                ------->               label
type                 ------->               label


In [31]:
id2tag

{0: 'action', 1: 'label', 2: 'data', 3: 'O'}

In [32]:
def get_results(text):
    result = []
    if isinstance(text, str): text=[text]
    batch_tokens, batch_labels = generate_inference(text)
    for index, (tokens, labels) in enumerate(zip(batch_tokens, batch_labels)):
        prev_label, token_list, entities = labels[0], [tokens[0]], []
        for token_index, (token, label) in enumerate(list(zip(tokens[1:], labels[1:]))):
            label = label.split("-")[-1]
            if label == 'O':
                if prev_label != 'O': entities.append((token_list, prev_label))
                token_list, prev_label = list(), label

            if prev_label == label:
                token_list.append( token)
            else:
                if prev_label != 'O': entities.append((token_list, prev_label))
                token_list = list()
                prev_label = label
                token_list.append(token)
        if token_list:
            entities.append((token_list, prev_label))

        #print(entities)
        entity_dict = {}
        _ = [entity_dict.setdefault(x[1], []).append(" ".join(x[0])) for x in entities if x[0]]
        entity_dict.pop("O", None)
        result.append(entity_dict)
        print(text[index], "\n")
        print(entity_dict, "\n")
        print("=" * 50)
get_results(text)

Verify for Lab Batch Type selection, the following are displayed:
- Reason for this Batch*: displays: Select Reason for this Batch
- Optional Equipment: displays: Select Optional Equipment

Capture and attach screen print: Equipment Information section 

{'action': ['verify', 'capture', 'attach'], 'data': ['lab'], 'label': ['batch type selection', 'reason for this batch', 'optional equipment', 'screen print']} 



In [47]:
text_list = ["""Select the following for the Equipment Information section:
a. - Site
b. - Legal Product Category""",
"""Click 'Actions' dropdown and select 'Post to Ledger'""",
"""Navigate to Manage Users Page [Overview->Manage Users] and click on Add User button"""]

get_results(text_list)

Select the following for the Equipment Information section:
a. - Site
b. - Legal Product Category 

{'action': ['select'], 'label': ['site', 'legal product category']} 

Click 'Actions' dropdown and select 'Post to Ledger' 

{'action': ['click', 'select'], 'label': ['actions', 'post to ledger', '[PAD] [PAD]']} 

Navigate to Manage Users Page [Overview->Manage Users] and click on Add User button 

{'action': ['navigate', 'click'], 'label': ['manage users', 'add user']} 



In [65]:
text_list = ["Click on please accept this invitation",
            "Navigate to website by clicking on login button",
            "I want to Login to the website", 
             "This website has to be login", 
             "open the app drawer"]
get_results(text_list)

Click on please accept this invitation 

{'action': ['click', 'accept'], 'label': ['invitation']} 

Navigate to website by clicking on login button 

{'action': ['navigate', 'clicking'], 'label': ['website', 'log ##in']} 

I want to Login to the website 

{'action': ['log ##in']} 

This website has to be login 

{'action': ['log ##in']} 

open the app drawer 

{'action': ['open'], 'label': ['app drawer', '[PAD]', '[PAD]']} 

