# Collator with Bidirectional Encoder Representations from Transformers (BERT)

In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import seaborn as sns
from wordcloud import WordCloud,STOPWORDS
import missingno as msno
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from keras.preprocessing import text
import keras
from keras.models import Sequential
from keras.layers import Dense,Embedding,LSTM,Dropout
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
from nltk import word_tokenize
from nltk.stem import PorterStemmer
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer,BertTokenizerFast, BertForSequenceClassification
from transformers import TrainingArguments, Trainer, pipeline


2024-10-21 17:35:49.530125: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Prepare Dataset

Load JSON file

In [2]:
def load_json_file(filename):
    with open(filename) as f:
        file=json.load(f)
    return file   
    
filename="intents.json"
intents=load_json_file(filename)
print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'Is anyone there?', 'Hello', 'Good day', "What's up", 'whatsup', 'Hi there, can you help me with something?', 'Hello! I have a question. Can you assist me?', 'Hey, I need your help with something.', 'Good morning/afternoon/evening! Can you answer a question for me?', 'Greetings! I need your assistance with something.', 'Hi,can you provide me with some information?', 'Hello, can you help me with a task?', "Hey,I'm looking for some guidance. Can you help?", 'Good day! Can you assist me with a query?', 'Hi,I would like to ask you a question.', 'Hello', 'Greetings', 'Good morning', 'Good afternoon', 'Good evening', 'Salutations', 'Pleasure to chat with you', "It's great to talk with you", 'Hi chatbot', 'Hello chatbot', 'Hey chatbot', "What's new", "How's it going", "What's happening", 'What can you do for me', 'Can you help me', 'I need your help', 'I have a question', "I'm here to chat", "Let's talk"], 'responses': ['Hello!', 'Hi there, 

Create dataframe for patterns and tags

In [3]:
def create_df():
    df=pd.DataFrame({'Pattern':[],'Tag':[]})
    return df
    
df=create_df()
df

Unnamed: 0,Pattern,Tag


Extract data from file

In [4]:
def extract_json_info(json_file,df):
    for intent in json_file['intents']:
        for pattern in intent['patterns']:
            sentence_tag=[pattern,intent['tag']]
            df.loc[len(df.index)] = sentence_tag
    return df        
df=extract_json_info(intents,df)

## Explore data

In [5]:
df.head()

Unnamed: 0,Pattern,Tag
0,Hi,greeting
1,Is anyone there?,greeting
2,Hello,greeting
3,Good day,greeting
4,What's up,greeting


In [6]:
df.shape

(109, 2)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 109 entries, 0 to 108
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Pattern  109 non-null    object
 1   Tag      109 non-null    object
dtypes: object(2)
memory usage: 2.6+ KB


In [8]:
x=len(df['Tag'].unique())
print(f"{x} classes in or tags")

print(df['Tag'].unique())

38 classes in or tags
['greeting' 'thanks' 'goodbye' 'can_do' 'attendance_count'
 'attendance_list' 'finance_count' 'finance_list' 'testimony_count'
 'testimony_list' 'report_submission' 'report_sample' 'countrystate_list'
 'region_list' 'group_list' 'location_list' 'countrystate_show'
 'region_show' 'group_show' 'location_show' 'countrystate_admin_list'
 'region_admin_list' 'group_admin_list' 'location_admin_list'
 'countrystate_admin_add' 'region_admin_add' 'group_admin_add'
 'location_admin_add' 'countrystate_admin_remove' 'region_admin_remove'
 'group_admin_remove' 'location_admin_remove' 'meeting_list' 'user_show'
 'user_signup' 'password_reset' 'helpdesk' 'unrecognized_input']


In [9]:
df.isna().sum()

Pattern    0
Tag        0
dtype: int64

Understand the corpus

In [10]:
def get_corpus(series):
    words = []
    for text in series:
        for word in text.split():
            words.append(word.strip())
    return words

corpus = get_corpus(df.Pattern)
corpus[:5]
# print(corpus)

['Hi', 'Is', 'anyone', 'there?', 'Hello']

In [11]:
from collections import Counter
counter = Counter(corpus)
most_common = counter.most_common(10)
print(most_common)
most_common = dict(most_common)
most_common

[('with', 31), ('my', 17), ('you', 14), ('List', 13), ('locationid', 12), ('a', 10), ('to', 10), ('at', 10), ('list', 9), ('me', 8)]


{'with': 31,
 'my': 17,
 'you': 14,
 'List': 13,
 'locationid': 12,
 'a': 10,
 'to': 10,
 'at': 10,
 'list': 9,
 'me': 8}

In [22]:
def get_top_text_ngrams(corpus, n,g):
    vec = CountVectorizer(ngram_range=(1, 1)).fit(corpus)
    bag_of_words = vec.transform(corpus)
    sum_words = bag_of_words.sum(axis=0) 
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    return words_freq[:n]

Create another dataframe for patterns, tags and labels

In [12]:
df2 = df.copy()
df2.head()

Unnamed: 0,Pattern,Tag
0,Hi,greeting
1,Is anyone there?,greeting
2,Hello,greeting
3,Good day,greeting
4,What's up,greeting


Create list of unique tags as labels

In [13]:
labels=df2['Tag'].unique().tolist()
labels=[s.strip() for s in labels]
labels[:5]

['greeting', 'thanks', 'goodbye', 'can_do', 'attendance_count']

Create `id` to `label` and vise versa

In [14]:
id2label={id:label for id,label in enumerate(labels)}
label2id={label:id for id,label in enumerate(labels)}
num_labels=len(labels)

In [16]:
id2label

{0: 'greeting',
 1: 'thanks',
 2: 'goodbye',
 3: 'can_do',
 4: 'attendance_count',
 5: 'attendance_list',
 6: 'finance_count',
 7: 'finance_list',
 8: 'testimony_count',
 9: 'testimony_list',
 10: 'report_submission',
 11: 'report_sample',
 12: 'countrystate_list',
 13: 'region_list',
 14: 'group_list',
 15: 'location_list',
 16: 'countrystate_show',
 17: 'region_show',
 18: 'group_show',
 19: 'location_show',
 20: 'countrystate_admin_list',
 21: 'region_admin_list',
 22: 'group_admin_list',
 23: 'location_admin_list',
 24: 'countrystate_admin_add',
 25: 'region_admin_add',
 26: 'group_admin_add',
 27: 'location_admin_add',
 28: 'countrystate_admin_remove',
 29: 'region_admin_remove',
 30: 'group_admin_remove',
 31: 'location_admin_remove',
 32: 'meeting_list',
 33: 'user_show',
 34: 'user_signup',
 35: 'password_reset',
 36: 'helpdesk',
 37: 'unrecognized_input'}

In [30]:
label2id

{'greeting': 0,
 'thanks': 1,
 'goodbye': 2,
 'can_do': 3,
 'attendance_count': 4,
 'attendance_list': 5,
 'finance_count': 6,
 'finance_list': 7,
 'testimony_count': 8,
 'testimony_list': 9,
 'report_submission': 10,
 'report_sample': 11,
 'countrystate_list': 12,
 'region_list': 13,
 'group_list': 14,
 'location_list': 15,
 'countrystate_show': 16,
 'region_show': 17,
 'group_show': 18,
 'location_show': 19,
 'countrystate_admin_list': 20,
 'region_admin_list': 21,
 'group_admin_list': 22,
 'location_admin_list': 23,
 'countrystate_admin_add': 24,
 'region_admin_add': 25,
 'group_admin_add': 26,
 'location_admin_add': 27,
 'countrystate_admin_remove': 28,
 'region_admin_remove': 29,
 'group_admin_remove': 30,
 'location_admin_remove': 31,
 'meeting_list': 32,
 'user_show': 33,
 'user_signup': 34,
 'password_reset': 35,
 'helpdesk': 36,
 'unrecognized_input': 37}

Add converted `label2id` to df2

In [11]:
df2['labels'] = df2['Tag'].map(lambda x: label2id[x.strip()])
df2.head()

Unnamed: 0,Pattern,Tag,labels
0,Hi,greeting,0
1,Is anyone there?,greeting,0
2,Hello,greeting,0
3,Good day,greeting,0
4,What's up,greeting,0


In [12]:
X=list(df2['Pattern'])
X[:5]

['Hi', 'Is anyone there?', 'Hello', 'Good day', "What's up"]

In [13]:
y=list(df2['labels'])
y[:5]

[0, 0, 0, 0, 0]

Define training and test datasets

In [14]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=100)

Define base model

In [15]:
model_name="bert-base-uncased"
max_len=256

Load tokenizer

In [37]:
tokenizer=BertTokenizer.from_pretrained(model_name,max_length=max_len)



Load model

In [39]:
model=BertForSequenceClassification.from_pretrained(model_name,num_labels=num_labels,id2label=id2label,label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Define encoders

In [40]:
train_encoding=tokenizer(X_train,truncation=True,padding=True)
test_encoding=tokenizer(X_test,truncation=True,padding=True)

In [41]:
full_data = tokenizer(X, truncation=True, padding=True)

Data loader

In [42]:
class DataLoader(Dataset):
    
    def __init__(self, encodings, labels):
        
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
               
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):

        return len(self.labels)

In [43]:
train_dataloader = DataLoader(train_encoding, y_train)
test_dataloader = DataLoader(test_encoding, y_test)

In [44]:
fullDataLoader = DataLoader(full_data, y_test)

In [45]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids  # True labels
    preds = pred.predictions.argmax(-1)  # Predicted labels (argmax along the last axis)

    # Using precision_recall_fscore_support to compute precision, recall, F1 score
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')

    # Using accuracy_score to compute accuracy
    acc = accuracy_score(labels, preds)
    
    # Returning a dictionary containing the computed metrics
    return {
        'Accuracy': acc,
        'F1': f1,
        'Precision': precision,
        'Recall': recall
    }


Define training arguments

In [46]:
training_args = TrainingArguments(
    output_dir='./output', 
    do_train=True,
    do_eval=True,
    num_train_epochs=100,              
    per_device_train_batch_size=32,  
    per_device_eval_batch_size=16,
    warmup_steps=100,                
    weight_decay=0.05,
    logging_strategy='steps',
    logging_dir='./logs',            
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=50,
    save_strategy="steps", 
    load_best_model_at_end=True
)



In [47]:
trainer = Trainer(
    model=model,
    args=training_args,                 
    train_dataset=train_dataloader,         
    eval_dataset=test_dataloader,            
    compute_metrics= compute_metrics
)

In [48]:
trainer.train()

  0%|          | 0/800 [00:00<?, ?it/s]

{'loss': 3.2895, 'grad_norm': 6.290635585784912, 'learning_rate': 2.5e-05, 'epoch': 6.25}


  0%|          | 0/6 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 2.4870944023132324, 'eval_Accuracy': 0.43902439024390244, 'eval_F1': 0.10977413665585709, 'eval_Precision': 0.11655145929339479, 'eval_Recall': 0.13978494623655913, 'eval_runtime': 1.2433, 'eval_samples_per_second': 65.955, 'eval_steps_per_second': 4.826, 'epoch': 6.25}
{'loss': 1.6858, 'grad_norm': 3.3381083011627197, 'learning_rate': 5e-05, 'epoch': 12.5}


  0%|          | 0/6 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.2049498558044434, 'eval_Accuracy': 0.7560975609756098, 'eval_F1': 0.4643578643578643, 'eval_Precision': 0.452020202020202, 'eval_Recall': 0.48484848484848486, 'eval_runtime': 0.2927, 'eval_samples_per_second': 280.135, 'eval_steps_per_second': 20.498, 'epoch': 12.5}
{'loss': 0.6521, 'grad_norm': 1.7712889909744263, 'learning_rate': 4.642857142857143e-05, 'epoch': 18.75}


  0%|          | 0/6 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.5323908925056458, 'eval_Accuracy': 0.9878048780487805, 'eval_F1': 0.9615384615384616, 'eval_Precision': 0.9591836734693878, 'eval_Recall': 0.9642857142857143, 'eval_runtime': 0.3197, 'eval_samples_per_second': 256.496, 'eval_steps_per_second': 18.768, 'epoch': 18.75}
{'loss': 0.2315, 'grad_norm': 0.9312476515769958, 'learning_rate': 4.2857142857142856e-05, 'epoch': 25.0}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.21531929075717926, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2887, 'eval_samples_per_second': 284.067, 'eval_steps_per_second': 20.785, 'epoch': 25.0}
{'loss': 0.0918, 'grad_norm': 0.3334563076496124, 'learning_rate': 3.928571428571429e-05, 'epoch': 31.25}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.07996250689029694, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2798, 'eval_samples_per_second': 293.1, 'eval_steps_per_second': 21.446, 'epoch': 31.25}
{'loss': 0.0465, 'grad_norm': 0.211528018116951, 'learning_rate': 3.571428571428572e-05, 'epoch': 37.5}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.04615364596247673, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2704, 'eval_samples_per_second': 303.225, 'eval_steps_per_second': 22.187, 'epoch': 37.5}
{'loss': 0.0311, 'grad_norm': 0.16053956747055054, 'learning_rate': 3.2142857142857144e-05, 'epoch': 43.75}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.03280305489897728, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2819, 'eval_samples_per_second': 290.843, 'eval_steps_per_second': 21.281, 'epoch': 43.75}
{'loss': 0.0244, 'grad_norm': 0.2031094878911972, 'learning_rate': 2.857142857142857e-05, 'epoch': 50.0}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.02576862834393978, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2828, 'eval_samples_per_second': 289.989, 'eval_steps_per_second': 21.219, 'epoch': 50.0}
{'loss': 0.0199, 'grad_norm': 0.1286749243736267, 'learning_rate': 2.5e-05, 'epoch': 56.25}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.0211139228194952, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2784, 'eval_samples_per_second': 294.588, 'eval_steps_per_second': 21.555, 'epoch': 56.25}
{'loss': 0.017, 'grad_norm': 0.09523440152406693, 'learning_rate': 2.1428571428571428e-05, 'epoch': 62.5}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.018190089613199234, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2873, 'eval_samples_per_second': 285.373, 'eval_steps_per_second': 20.881, 'epoch': 62.5}
{'loss': 0.0152, 'grad_norm': 0.11290749907493591, 'learning_rate': 1.785714285714286e-05, 'epoch': 68.75}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.016140209510922432, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2934, 'eval_samples_per_second': 279.459, 'eval_steps_per_second': 20.448, 'epoch': 68.75}
{'loss': 0.0135, 'grad_norm': 0.10391589254140854, 'learning_rate': 1.4285714285714285e-05, 'epoch': 75.0}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.014780910685658455, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.294, 'eval_samples_per_second': 278.916, 'eval_steps_per_second': 20.409, 'epoch': 75.0}
{'loss': 0.0128, 'grad_norm': 0.0917266309261322, 'learning_rate': 1.0714285714285714e-05, 'epoch': 81.25}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.013751944527029991, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2724, 'eval_samples_per_second': 300.985, 'eval_steps_per_second': 22.023, 'epoch': 81.25}
{'loss': 0.0121, 'grad_norm': 0.07534883916378021, 'learning_rate': 7.142857142857143e-06, 'epoch': 87.5}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.013061584904789925, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2748, 'eval_samples_per_second': 298.422, 'eval_steps_per_second': 21.836, 'epoch': 87.5}
{'loss': 0.0114, 'grad_norm': 0.08161803334951401, 'learning_rate': 3.5714285714285714e-06, 'epoch': 93.75}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.012680296786129475, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2863, 'eval_samples_per_second': 286.42, 'eval_steps_per_second': 20.958, 'epoch': 93.75}
{'loss': 0.0112, 'grad_norm': 0.06197451055049896, 'learning_rate': 0.0, 'epoch': 100.0}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.012555195018649101, 'eval_Accuracy': 1.0, 'eval_F1': 1.0, 'eval_Precision': 1.0, 'eval_Recall': 1.0, 'eval_runtime': 0.2736, 'eval_samples_per_second': 299.689, 'eval_steps_per_second': 21.928, 'epoch': 100.0}
{'train_runtime': 311.8438, 'train_samples_per_second': 78.565, 'train_steps_per_second': 2.565, 'train_loss': 0.38535389192402364, 'epoch': 100.0}


TrainOutput(global_step=800, training_loss=0.38535389192402364, metrics={'train_runtime': 311.8438, 'train_samples_per_second': 78.565, 'train_steps_per_second': 2.565, 'total_flos': 214103859186000.0, 'train_loss': 0.38535389192402364, 'epoch': 100.0})

Evaluate trainer

In [53]:
q=[trainer.evaluate(eval_dataset=df2) for df2 in [train_dataloader, test_dataloader]]
pd.DataFrame(q, index=["train","test"]).iloc[:,:5]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Unnamed: 0,eval_loss,eval_Accuracy,eval_F1,eval_Precision,eval_Recall
train,0.008246,1.0,1.0,1.0,1.0
test,0.012555,1.0,1.0,1.0,1.0


Save model and tokenizer

In [23]:
model_path = "bert-model"

In [3]:
model_path

'bert-model'

In [50]:
trainer.save_model(model_path)
tokenizer.save_pretrained(model_path)

('bert-model/tokenizer_config.json',
 'bert-model/special_tokens_map.json',
 'bert-model/vocab.txt',
 'bert-model/added_tokens.json')

Load model and tokenizer

In [29]:
new_model = BertForSequenceClassification.from_pretrained(model_path)
new_tokenizer= BertTokenizerFast.from_pretrained(model_path)

In [25]:
pipe= pipeline("sentiment-analysis", model=new_model, tokenizer=new_tokenizer, device='mps')

In [26]:
pipe("Hello")

[{'label': 'greeting', 'score': 0.9989669322967529}]

## Predict

In [27]:
def predict(text):
    # Tokenize the input text and convert it to PyTorch tensors
    inputs = new_tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt").to("cpu")

    # Pass the input tensors through the model
    outputs = new_model(**inputs)

    # Softmax the output to get probabilities
    probs = outputs[0].softmax(1)

    # Get the predicted label index
    pred_label_idx = probs.argmax()

    # Convert the predicted label index to the actual label using model configuration
    pred_label = new_model.config.id2label[pred_label_idx.item()]

    return probs, pred_label_idx, pred_label


In [30]:
text = "Hello"
predict(text)



(tensor([[9.9897e-01, 4.0382e-05, 5.0594e-05, 2.0533e-05, 4.7807e-05, 1.3049e-05,
          1.7133e-05, 2.4188e-05, 1.1959e-05, 2.3613e-05, 3.7294e-05, 3.9787e-05,
          4.8652e-05, 4.1899e-05, 1.0683e-05, 3.7709e-05, 1.7957e-05, 2.8868e-05,
          3.0518e-05, 2.7206e-05, 1.1888e-05, 3.5707e-05, 2.1373e-05, 1.7299e-05,
          1.1532e-05, 1.4921e-05, 1.5354e-05, 5.2564e-05, 9.3532e-06, 1.2270e-05,
          2.2896e-05, 4.4233e-05, 4.9501e-05, 2.9216e-05, 4.3846e-05, 3.2940e-05,
          2.5992e-05, 1.2399e-05]], grad_fn=<SoftmaxBackward0>),
 tensor(0),
 'greeting')

In [31]:
def chat(pipe):
    
    print("CHATBOT: Hi! I am your virtual assistance,Feel free to ask, and I'll do my best to provide you with answers and assistance..")
    print("Type 'quit' to exit the chat\n\n")
    
    text = input("User: ").strip().lower()
    
    while(text != 'quit'):

        score = pipe(text)[0]['score']
        
        if score < 0.8:
            print("Chatbot: Sorry I can't answer that\n\n")
            text = input("User: ").strip().lower()
            continue
        
        label = label2id[pipe(text)[0]['label']]
        response = random.choice(intents['intents'][label]['responses'])
        
        print(f"CHATBOT: {response}\n\n")
            
        text = input("YOU: ").strip().lower()


In [32]:
chat(pipe)

CHATBOT: Hi! I am your virtual assistance,Feel free to ask, and I'll do my best to provide you with answers and assistance..
Type 'quit' to exit the chat


CHATBOT: How may I be of service to you today?


