# Base model with Bidirectional Encoder Representations from Transformers (BERT)

In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import seaborn as sns
from wordcloud import WordCloud,STOPWORDS
import missingno as msno
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from keras.preprocessing import text
import keras
from keras.models import Sequential
from keras.layers import Dense,Embedding,LSTM,Dropout
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
from nltk import word_tokenize
from nltk.stem import PorterStemmer
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer,BertTokenizerFast, BertForSequenceClassification
from transformers import TrainingArguments, Trainer, pipeline


2024-10-23 18:59:01.345295: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Prepare Dataset

Load JSON file

In [11]:
def load_json_file(filename):
    with open(filename) as f:
        file=json.load(f)
    return file   
    
filename="intents.json"
intents=load_json_file(filename)
print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'Hello', 'Hey', 'How are you', "What's up"], 'responses': ['Hi there', 'Hello', 'Hey', "I'm fine, thank you", 'Nothing much']}, {'tag': 'goodbye', 'patterns': ['Bye', 'See you later', 'Goodbye', 'Take care'], 'responses': ['Goodbye', 'See you later', 'Take care']}, {'tag': 'thanks', 'patterns': ['Thank you', 'Thanks', 'Thanks a lot', 'I appreciate it'], 'responses': ["You're welcome", 'No problem', 'Glad I could help']}, {'tag': 'about', 'patterns': ['What can you do', 'Who are you', 'What are you', 'What is your purpose'], 'responses': ['I am a chatbot', 'My purpose is to assist you', 'I can answer questions and provide assistance']}, {'tag': 'help', 'patterns': ['Help', 'I need help', 'Can you help me', 'What should I do'], 'responses': ['Sure, what do you need help with?', "I'm here to help. What's the problem?", 'How can I assist you?']}, {'tag': 'age', 'patterns': ['How old are you', "What's your age"], 'responses': ["I don't hav

Create dataframe for patterns and tags

In [12]:
def create_df():
    df=pd.DataFrame({'Pattern':[],'Tag':[]})
    return df
    
df=create_df()
df

Unnamed: 0,Pattern,Tag


Extract data from file

In [13]:
def extract_json_info(json_file,df):
    for intent in json_file['intents']:
        for pattern in intent['patterns']:
            sentence_tag=[pattern,intent['tag']]
            df.loc[len(df.index)] = sentence_tag
    return df        
df=extract_json_info(intents,df)

## Explore data

In [14]:
df.head()

Unnamed: 0,Pattern,Tag
0,Hi,greeting
1,Hello,greeting
2,Hey,greeting
3,How are you,greeting
4,What's up,greeting


In [15]:
df.shape

(850, 2)

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 850 entries, 0 to 849
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Pattern  850 non-null    object
 1   Tag      850 non-null    object
dtypes: object(2)
memory usage: 19.9+ KB


In [17]:
x=len(df['Tag'].unique())
print(f"{x} classes in or tags")

print(df['Tag'].unique())

278 classes in or tags
['greeting' 'goodbye' 'thanks' 'about' 'help' 'age' 'weather' 'budget'
 'credit_score' 'name' 'favorite_color' 'hobby' 'time' 'joke' 'food'
 'movies' 'technology' 'compliment' 'meaning_of_life' 'sports' 'pets'
 'travel' 'books' 'education' 'health' 'coding' 'art' 'career'
 'technology_help' 'history' 'music' 'exercise' 'mindfulness' 'science'
 'gaming' 'positivity' 'cooking' 'relationship' 'nature' 'productivity'
 'travel_tips' 'languages' 'inspiration' 'finance_tips'
 'artificial_intelligence' 'motivation' 'future' 'movies_series'
 'self_improvement' 'robotics' 'philosophy' 'coding_languages'
 'virtual_reality' 'space_exploration' 'emotional_intelligence'
 'cybersecurity' 'creativity' 'futuristic_technology' 'entrepreneurship'
 'internet_of_things' 'universe' 'social_media' 'cuisine' 'happiness'
 'self_care' 'augmented_reality' 'global_warming' 'data_privacy'
 'positivity_quotes' 'virtual_assistant' 'emerging_technologies'
 'philanthropy' 'sustainability' 'gene_

In [18]:
df.isna().sum()

Pattern    0
Tag        0
dtype: int64

Understand the corpus

In [19]:
def get_corpus(series):
    words = []
    for text in series:
        for word in text.split():
            words.append(word.strip())
    return words

corpus = get_corpus(df.Pattern)
corpus[:5]
# print(corpus)

['Hi', 'Hello', 'Hey', 'How', 'are']

In [20]:
from collections import Counter
counter = Counter(corpus)
most_common = counter.most_common(10)
print(most_common)
most_common = dict(most_common)
most_common

[('me', 198), ('Tell', 188), ('about', 184), ('to', 131), ('How', 121), ('of', 121), ('a', 99), ('the', 68), ('and', 67), ('in', 49)]


{'me': 198,
 'Tell': 188,
 'about': 184,
 'to': 131,
 'How': 121,
 'of': 121,
 'a': 99,
 'the': 68,
 'and': 67,
 'in': 49}

Create another dataframe for patterns, tags and labels

In [21]:
df2 = df.copy()
df2.head()

Unnamed: 0,Pattern,Tag
0,Hi,greeting
1,Hello,greeting
2,Hey,greeting
3,How are you,greeting
4,What's up,greeting


Create list of unique tags as labels

In [22]:
labels=df2['Tag'].unique().tolist()
labels=[s.strip() for s in labels]
labels[:5]

['greeting', 'goodbye', 'thanks', 'about', 'help']

Create `id` to `label` and vise versa

In [23]:
id2label={id:label for id,label in enumerate(labels)}
label2id={label:id for id,label in enumerate(labels)}
num_labels=len(labels)

In [24]:
id2label

{0: 'greeting',
 1: 'goodbye',
 2: 'thanks',
 3: 'about',
 4: 'help',
 5: 'age',
 6: 'weather',
 7: 'budget',
 8: 'credit_score',
 9: 'name',
 10: 'favorite_color',
 11: 'hobby',
 12: 'time',
 13: 'joke',
 14: 'food',
 15: 'movies',
 16: 'technology',
 17: 'compliment',
 18: 'meaning_of_life',
 19: 'sports',
 20: 'pets',
 21: 'travel',
 22: 'books',
 23: 'education',
 24: 'health',
 25: 'coding',
 26: 'art',
 27: 'career',
 28: 'technology_help',
 29: 'history',
 30: 'music',
 31: 'exercise',
 32: 'mindfulness',
 33: 'science',
 34: 'gaming',
 35: 'positivity',
 36: 'cooking',
 37: 'relationship',
 38: 'nature',
 39: 'productivity',
 40: 'travel_tips',
 41: 'languages',
 42: 'inspiration',
 43: 'finance_tips',
 44: 'artificial_intelligence',
 45: 'motivation',
 46: 'future',
 47: 'movies_series',
 48: 'self_improvement',
 49: 'robotics',
 50: 'philosophy',
 51: 'coding_languages',
 52: 'virtual_reality',
 53: 'space_exploration',
 54: 'emotional_intelligence',
 55: 'cybersecurity',
 56

In [25]:
label2id

{'greeting': 0,
 'goodbye': 1,
 'thanks': 2,
 'about': 3,
 'help': 4,
 'age': 5,
 'weather': 6,
 'budget': 7,
 'credit_score': 8,
 'name': 9,
 'favorite_color': 10,
 'hobby': 11,
 'time': 12,
 'joke': 13,
 'food': 14,
 'movies': 15,
 'technology': 16,
 'compliment': 17,
 'meaning_of_life': 18,
 'sports': 19,
 'pets': 20,
 'travel': 21,
 'books': 22,
 'education': 23,
 'health': 24,
 'coding': 25,
 'art': 26,
 'career': 27,
 'technology_help': 28,
 'history': 29,
 'music': 30,
 'exercise': 31,
 'mindfulness': 32,
 'science': 33,
 'gaming': 34,
 'positivity': 35,
 'cooking': 36,
 'relationship': 37,
 'nature': 38,
 'productivity': 39,
 'travel_tips': 40,
 'languages': 41,
 'inspiration': 42,
 'finance_tips': 43,
 'artificial_intelligence': 44,
 'motivation': 45,
 'future': 46,
 'movies_series': 47,
 'self_improvement': 48,
 'robotics': 49,
 'philosophy': 50,
 'coding_languages': 51,
 'virtual_reality': 52,
 'space_exploration': 53,
 'emotional_intelligence': 54,
 'cybersecurity': 55,
 'c

Add converted `label2id` to df2

In [26]:
df2['labels'] = df2['Tag'].map(lambda x: label2id[x.strip()])
df2.head()

Unnamed: 0,Pattern,Tag,labels
0,Hi,greeting,0
1,Hello,greeting,0
2,Hey,greeting,0
3,How are you,greeting,0
4,What's up,greeting,0


In [27]:
X=list(df2['Pattern'])
X[:5]

['Hi', 'Hello', 'Hey', 'How are you', "What's up"]

In [28]:
y=list(df2['labels'])
y[:5]

[0, 0, 0, 0, 0]

Define training and test datasets

In [29]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=100)

Define base model

In [30]:
model_name="bert-base-uncased"
max_len=256

Load tokenizer

In [31]:
tokenizer=BertTokenizer.from_pretrained(model_name,max_length=max_len)



Load model

In [32]:
model=BertForSequenceClassification.from_pretrained(model_name,num_labels=num_labels,id2label=id2label,label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Define encoders

In [33]:
train_encoding=tokenizer(X_train,truncation=True,padding=True)
test_encoding=tokenizer(X_test,truncation=True,padding=True)

In [34]:
full_data = tokenizer(X, truncation=True, padding=True)

Data loader

In [35]:
class DataLoader(Dataset):
    
    def __init__(self, encodings, labels):
        
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
               
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):

        return len(self.labels)

In [36]:
train_dataloader = DataLoader(train_encoding, y_train)
test_dataloader = DataLoader(test_encoding, y_test)

In [37]:
fullDataLoader = DataLoader(full_data, y_test)

In [38]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids  # True labels
    preds = pred.predictions.argmax(-1)  # Predicted labels (argmax along the last axis)

    # Using precision_recall_fscore_support to compute precision, recall, F1 score
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')

    # Using accuracy_score to compute accuracy
    acc = accuracy_score(labels, preds)
    
    # Returning a dictionary containing the computed metrics
    return {
        'Accuracy': acc,
        'F1': f1,
        'Precision': precision,
        'Recall': recall
    }


Define training arguments

In [39]:
training_args = TrainingArguments(
    output_dir='./output', 
    do_train=True,
    do_eval=True,
    num_train_epochs=100,              
    per_device_train_batch_size=32,  
    per_device_eval_batch_size=16,
    warmup_steps=100,                
    weight_decay=0.05,
    logging_strategy='steps',
    logging_dir='./logs',            
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=50,
    save_strategy="steps", 
    load_best_model_at_end=True
)



In [40]:
trainer = Trainer(
    model=model,
    args=training_args,                 
    train_dataset=train_dataloader,         
    eval_dataset=test_dataloader,            
    compute_metrics= compute_metrics
)

In [41]:
%%time
trainer.train()

  0%|          | 0/2000 [00:00<?, ?it/s]

{'loss': 5.6529, 'grad_norm': 4.828703880310059, 'learning_rate': 2.5e-05, 'epoch': 2.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 5.6662702560424805, 'eval_Accuracy': 0.004694835680751174, 'eval_F1': 0.0006303183107469272, 'eval_Precision': 0.00033266799733865603, 'eval_Recall': 0.005988023952095809, 'eval_runtime': 1.2009, 'eval_samples_per_second': 177.361, 'eval_steps_per_second': 11.658, 'epoch': 2.5}
{'loss': 5.499, 'grad_norm': 5.296258449554443, 'learning_rate': 5e-05, 'epoch': 5.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 5.601845741271973, 'eval_Accuracy': 0.0, 'eval_F1': 0.0, 'eval_Precision': 0.0, 'eval_Recall': 0.0, 'eval_runtime': 0.6633, 'eval_samples_per_second': 321.113, 'eval_steps_per_second': 21.106, 'epoch': 5.0}
{'loss': 5.0541, 'grad_norm': 5.023128509521484, 'learning_rate': 4.868421052631579e-05, 'epoch': 7.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 5.38176965713501, 'eval_Accuracy': 0.046948356807511735, 'eval_F1': 0.021049519586104952, 'eval_Precision': 0.01781649245063879, 'eval_Recall': 0.03902439024390244, 'eval_runtime': 0.608, 'eval_samples_per_second': 350.357, 'eval_steps_per_second': 23.028, 'epoch': 7.5}
{'loss': 4.4643, 'grad_norm': 4.903512954711914, 'learning_rate': 4.736842105263158e-05, 'epoch': 10.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 5.156608581542969, 'eval_Accuracy': 0.06572769953051644, 'eval_F1': 0.03813872580995868, 'eval_Precision': 0.038457635717909694, 'eval_Recall': 0.04794520547945205, 'eval_runtime': 0.6069, 'eval_samples_per_second': 350.981, 'eval_steps_per_second': 23.069, 'epoch': 10.0}
{'loss': 3.8917, 'grad_norm': 5.0684967041015625, 'learning_rate': 4.605263157894737e-05, 'epoch': 12.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 4.925370693206787, 'eval_Accuracy': 0.107981220657277, 'eval_F1': 0.0764069264069264, 'eval_Precision': 0.07534722222222223, 'eval_Recall': 0.08705357142857142, 'eval_runtime': 0.6041, 'eval_samples_per_second': 352.604, 'eval_steps_per_second': 23.176, 'epoch': 12.5}
{'loss': 3.3742, 'grad_norm': 5.112164497375488, 'learning_rate': 4.473684210526316e-05, 'epoch': 15.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 4.728433132171631, 'eval_Accuracy': 0.13145539906103287, 'eval_F1': 0.09049707602339181, 'eval_Precision': 0.08689083820662767, 'eval_Recall': 0.10526315789473684, 'eval_runtime': 0.6177, 'eval_samples_per_second': 344.817, 'eval_steps_per_second': 22.664, 'epoch': 15.0}
{'loss': 2.9015, 'grad_norm': 4.806368827819824, 'learning_rate': 4.342105263157895e-05, 'epoch': 17.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 4.566895484924316, 'eval_Accuracy': 0.17370892018779344, 'eval_F1': 0.1329798294863797, 'eval_Precision': 0.13027656477438138, 'eval_Recall': 0.1462882096069869, 'eval_runtime': 0.6127, 'eval_samples_per_second': 347.656, 'eval_steps_per_second': 22.851, 'epoch': 17.5}
{'loss': 2.4711, 'grad_norm': 5.764880180358887, 'learning_rate': 4.210526315789474e-05, 'epoch': 20.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 4.3949737548828125, 'eval_Accuracy': 0.19718309859154928, 'eval_F1': 0.15183792815371763, 'eval_Precision': 0.14875730994152045, 'eval_Recall': 0.16666666666666666, 'eval_runtime': 0.576, 'eval_samples_per_second': 369.802, 'eval_steps_per_second': 24.306, 'epoch': 20.0}
{'loss': 2.0636, 'grad_norm': 3.9272406101226807, 'learning_rate': 4.078947368421053e-05, 'epoch': 22.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 4.245105266571045, 'eval_Accuracy': 0.2300469483568075, 'eval_F1': 0.18128654970760233, 'eval_Precision': 0.17595029239766083, 'eval_Recall': 0.19956140350877194, 'eval_runtime': 0.5821, 'eval_samples_per_second': 365.89, 'eval_steps_per_second': 24.049, 'epoch': 22.5}
{'loss': 1.7428, 'grad_norm': 4.090250492095947, 'learning_rate': 3.9473684210526316e-05, 'epoch': 25.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 4.10429048538208, 'eval_Accuracy': 0.25821596244131456, 'eval_F1': 0.20162241887905602, 'eval_Precision': 0.19690265486725664, 'eval_Recall': 0.21902654867256638, 'eval_runtime': 0.5775, 'eval_samples_per_second': 368.848, 'eval_steps_per_second': 24.244, 'epoch': 25.0}
{'loss': 1.4172, 'grad_norm': 3.1947741508483887, 'learning_rate': 3.815789473684211e-05, 'epoch': 27.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.9979069232940674, 'eval_Accuracy': 0.2535211267605634, 'eval_F1': 0.20520833333333335, 'eval_Precision': 0.2051339285714286, 'eval_Recall': 0.21651785714285715, 'eval_runtime': 0.5853, 'eval_samples_per_second': 363.945, 'eval_steps_per_second': 23.921, 'epoch': 27.5}
{'loss': 1.176, 'grad_norm': 3.528109550476074, 'learning_rate': 3.6842105263157895e-05, 'epoch': 30.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.855703353881836, 'eval_Accuracy': 0.2863849765258216, 'eval_F1': 0.22612612612612615, 'eval_Precision': 0.22447447447447447, 'eval_Recall': 0.24324324324324326, 'eval_runtime': 0.5821, 'eval_samples_per_second': 365.947, 'eval_steps_per_second': 24.053, 'epoch': 30.0}
{'loss': 0.9468, 'grad_norm': 2.6590030193328857, 'learning_rate': 3.5526315789473684e-05, 'epoch': 32.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.7513043880462646, 'eval_Accuracy': 0.28169014084507044, 'eval_F1': 0.22790346907993966, 'eval_Precision': 0.22473604826546, 'eval_Recall': 0.24434389140271492, 'eval_runtime': 0.5741, 'eval_samples_per_second': 371.007, 'eval_steps_per_second': 24.385, 'epoch': 32.5}
{'loss': 0.7724, 'grad_norm': 2.8158140182495117, 'learning_rate': 3.421052631578947e-05, 'epoch': 35.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.6536316871643066, 'eval_Accuracy': 0.29577464788732394, 'eval_F1': 0.23546423135464228, 'eval_Precision': 0.23668188736681892, 'eval_Recall': 0.2534246575342466, 'eval_runtime': 0.5862, 'eval_samples_per_second': 363.365, 'eval_steps_per_second': 23.883, 'epoch': 35.0}
{'loss': 0.6213, 'grad_norm': 2.3893933296203613, 'learning_rate': 3.289473684210527e-05, 'epoch': 37.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.564413070678711, 'eval_Accuracy': 0.29577464788732394, 'eval_F1': 0.2362654320987654, 'eval_Precision': 0.23202160493827162, 'eval_Recall': 0.25925925925925924, 'eval_runtime': 0.5834, 'eval_samples_per_second': 365.081, 'eval_steps_per_second': 23.996, 'epoch': 37.5}
{'loss': 0.5129, 'grad_norm': 2.0033841133117676, 'learning_rate': 3.157894736842105e-05, 'epoch': 40.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.500619411468506, 'eval_Accuracy': 0.3333333333333333, 'eval_F1': 0.26635367762128326, 'eval_Precision': 0.2644757433489828, 'eval_Recall': 0.29107981220657275, 'eval_runtime': 0.5945, 'eval_samples_per_second': 358.285, 'eval_steps_per_second': 23.549, 'epoch': 40.0}
{'loss': 0.4012, 'grad_norm': 1.4194780588150024, 'learning_rate': 3.0263157894736844e-05, 'epoch': 42.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.413306474685669, 'eval_Accuracy': 0.3333333333333333, 'eval_F1': 0.2655712050078247, 'eval_Precision': 0.26447574334898283, 'eval_Recall': 0.2863849765258216, 'eval_runtime': 0.5863, 'eval_samples_per_second': 363.311, 'eval_steps_per_second': 23.88, 'epoch': 42.5}
{'loss': 0.3352, 'grad_norm': 1.497567057609558, 'learning_rate': 2.8947368421052634e-05, 'epoch': 45.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.381288528442383, 'eval_Accuracy': 0.3380281690140845, 'eval_F1': 0.26124031007751936, 'eval_Precision': 0.2616279069767442, 'eval_Recall': 0.2837209302325581, 'eval_runtime': 0.5921, 'eval_samples_per_second': 359.722, 'eval_steps_per_second': 23.644, 'epoch': 45.0}
{'loss': 0.2623, 'grad_norm': 1.0221730470657349, 'learning_rate': 2.7631578947368426e-05, 'epoch': 47.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.3313190937042236, 'eval_Accuracy': 0.352112676056338, 'eval_F1': 0.2787596899224806, 'eval_Precision': 0.28062015503875976, 'eval_Recall': 0.29534883720930233, 'eval_runtime': 0.6076, 'eval_samples_per_second': 350.539, 'eval_steps_per_second': 23.04, 'epoch': 47.5}
{'loss': 0.2171, 'grad_norm': 1.2945233583450317, 'learning_rate': 2.6315789473684212e-05, 'epoch': 50.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.309480905532837, 'eval_Accuracy': 0.3474178403755869, 'eval_F1': 0.26999999999999996, 'eval_Precision': 0.2674603174603175, 'eval_Recall': 0.2976190476190476, 'eval_runtime': 0.6055, 'eval_samples_per_second': 351.798, 'eval_steps_per_second': 23.123, 'epoch': 50.0}
{'loss': 0.1762, 'grad_norm': 0.9702227711677551, 'learning_rate': 2.5e-05, 'epoch': 52.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.269791841506958, 'eval_Accuracy': 0.3427230046948357, 'eval_F1': 0.27452830188679245, 'eval_Precision': 0.27712264150943394, 'eval_Recall': 0.29245283018867924, 'eval_runtime': 0.5979, 'eval_samples_per_second': 356.254, 'eval_steps_per_second': 23.416, 'epoch': 52.5}
{'loss': 0.1508, 'grad_norm': 0.7599645256996155, 'learning_rate': 2.368421052631579e-05, 'epoch': 55.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.275515079498291, 'eval_Accuracy': 0.3568075117370892, 'eval_F1': 0.2777777777777778, 'eval_Precision': 0.27582159624413144, 'eval_Recall': 0.3028169014084507, 'eval_runtime': 0.5955, 'eval_samples_per_second': 357.701, 'eval_steps_per_second': 23.511, 'epoch': 55.0}
{'loss': 0.1258, 'grad_norm': 0.5189951658248901, 'learning_rate': 2.236842105263158e-05, 'epoch': 57.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2483155727386475, 'eval_Accuracy': 0.3568075117370892, 'eval_F1': 0.2859649122807017, 'eval_Precision': 0.284688995215311, 'eval_Recall': 0.30861244019138756, 'eval_runtime': 0.6286, 'eval_samples_per_second': 338.863, 'eval_steps_per_second': 22.273, 'epoch': 57.5}
{'loss': 0.114, 'grad_norm': 0.6564291715621948, 'learning_rate': 2.105263157894737e-05, 'epoch': 60.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.239880084991455, 'eval_Accuracy': 0.36619718309859156, 'eval_F1': 0.28773584905660377, 'eval_Precision': 0.29127358490566035, 'eval_Recall': 0.3089622641509434, 'eval_runtime': 0.6126, 'eval_samples_per_second': 347.698, 'eval_steps_per_second': 22.853, 'epoch': 60.0}
{'loss': 0.1013, 'grad_norm': 0.3476957678794861, 'learning_rate': 1.9736842105263158e-05, 'epoch': 62.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2256810665130615, 'eval_Accuracy': 0.3615023474178404, 'eval_F1': 0.2842767295597484, 'eval_Precision': 0.28537735849056606, 'eval_Recall': 0.30424528301886794, 'eval_runtime': 0.6345, 'eval_samples_per_second': 335.675, 'eval_steps_per_second': 22.063, 'epoch': 62.5}
{'loss': 0.0921, 'grad_norm': 0.41436076164245605, 'learning_rate': 1.8421052631578947e-05, 'epoch': 65.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2238659858703613, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.29162717219589257, 'eval_Precision': 0.2950236966824645, 'eval_Recall': 0.3127962085308057, 'eval_runtime': 0.7138, 'eval_samples_per_second': 298.386, 'eval_steps_per_second': 19.612, 'epoch': 65.0}
{'loss': 0.0827, 'grad_norm': 0.4520709812641144, 'learning_rate': 1.7105263157894737e-05, 'epoch': 67.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2327513694763184, 'eval_Accuracy': 0.3615023474178404, 'eval_F1': 0.2859399684044234, 'eval_Precision': 0.2863349131121643, 'eval_Recall': 0.3080568720379147, 'eval_runtime': 0.6861, 'eval_samples_per_second': 310.451, 'eval_steps_per_second': 20.405, 'epoch': 67.5}
{'loss': 0.0805, 'grad_norm': 0.42651626467704773, 'learning_rate': 1.5789473684210526e-05, 'epoch': 70.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2280473709106445, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.29544025157232706, 'eval_Precision': 0.29874213836477986, 'eval_Recall': 0.3136792452830189, 'eval_runtime': 0.5878, 'eval_samples_per_second': 362.349, 'eval_steps_per_second': 23.816, 'epoch': 70.0}
{'loss': 0.0726, 'grad_norm': 0.2804950177669525, 'learning_rate': 1.4473684210526317e-05, 'epoch': 72.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2283058166503906, 'eval_Accuracy': 0.36619718309859156, 'eval_F1': 0.2895734597156398, 'eval_Precision': 0.2938388625592417, 'eval_Recall': 0.3080568720379147, 'eval_runtime': 0.6297, 'eval_samples_per_second': 338.278, 'eval_steps_per_second': 22.234, 'epoch': 72.5}
{'loss': 0.0702, 'grad_norm': 0.7277926802635193, 'learning_rate': 1.3157894736842106e-05, 'epoch': 75.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.235275983810425, 'eval_Accuracy': 0.36619718309859156, 'eval_F1': 0.28773584905660377, 'eval_Precision': 0.29127358490566035, 'eval_Recall': 0.3089622641509434, 'eval_runtime': 0.6056, 'eval_samples_per_second': 351.693, 'eval_steps_per_second': 23.116, 'epoch': 75.0}
{'loss': 0.0653, 'grad_norm': 0.33961716294288635, 'learning_rate': 1.1842105263157895e-05, 'epoch': 77.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.229926586151123, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.29126984126984123, 'eval_Precision': 0.2916666666666667, 'eval_Recall': 0.3142857142857143, 'eval_runtime': 0.5989, 'eval_samples_per_second': 355.622, 'eval_steps_per_second': 23.374, 'epoch': 77.5}
{'loss': 0.0631, 'grad_norm': 0.37561655044555664, 'learning_rate': 1.0526315789473684e-05, 'epoch': 80.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2308387756347656, 'eval_Accuracy': 0.3755868544600939, 'eval_F1': 0.29523809523809524, 'eval_Precision': 0.2972222222222222, 'eval_Recall': 0.31666666666666665, 'eval_runtime': 0.5891, 'eval_samples_per_second': 361.588, 'eval_steps_per_second': 23.766, 'epoch': 80.0}
{'loss': 0.0604, 'grad_norm': 0.3354123532772064, 'learning_rate': 9.210526315789474e-06, 'epoch': 82.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2324984073638916, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.29539682539682544, 'eval_Precision': 0.298015873015873, 'eval_Recall': 0.31666666666666665, 'eval_runtime': 0.6379, 'eval_samples_per_second': 333.91, 'eval_steps_per_second': 21.947, 'epoch': 82.5}
{'loss': 0.0587, 'grad_norm': 0.8634827136993408, 'learning_rate': 7.894736842105263e-06, 'epoch': 85.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.233325481414795, 'eval_Accuracy': 0.3755868544600939, 'eval_F1': 0.2944444444444444, 'eval_Precision': 0.29642857142857143, 'eval_Recall': 0.31666666666666665, 'eval_runtime': 0.6286, 'eval_samples_per_second': 338.865, 'eval_steps_per_second': 22.273, 'epoch': 85.0}
{'loss': 0.0563, 'grad_norm': 0.2906375527381897, 'learning_rate': 6.578947368421053e-06, 'epoch': 87.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2343297004699707, 'eval_Accuracy': 0.3755868544600939, 'eval_F1': 0.29654088050314464, 'eval_Precision': 0.3007075471698113, 'eval_Recall': 0.3160377358490566, 'eval_runtime': 0.6857, 'eval_samples_per_second': 310.647, 'eval_steps_per_second': 20.418, 'epoch': 87.5}
{'loss': 0.0555, 'grad_norm': 0.6995247006416321, 'learning_rate': 5.263157894736842e-06, 'epoch': 90.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.233208656311035, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.2928571428571428, 'eval_Precision': 0.29642857142857143, 'eval_Recall': 0.3142857142857143, 'eval_runtime': 0.6112, 'eval_samples_per_second': 348.517, 'eval_steps_per_second': 22.907, 'epoch': 90.0}
{'loss': 0.0535, 'grad_norm': 0.2603815793991089, 'learning_rate': 3.9473684210526315e-06, 'epoch': 92.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2339847087860107, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.2936507936507936, 'eval_Precision': 0.2972222222222222, 'eval_Recall': 0.3142857142857143, 'eval_runtime': 0.6611, 'eval_samples_per_second': 322.178, 'eval_steps_per_second': 21.176, 'epoch': 92.5}
{'loss': 0.0541, 'grad_norm': 0.26633894443511963, 'learning_rate': 2.631578947368421e-06, 'epoch': 95.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.233236789703369, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.2936507936507936, 'eval_Precision': 0.2972222222222222, 'eval_Recall': 0.3142857142857143, 'eval_runtime': 0.5917, 'eval_samples_per_second': 359.966, 'eval_steps_per_second': 23.66, 'epoch': 95.0}
{'loss': 0.0528, 'grad_norm': 0.2787396311759949, 'learning_rate': 1.3157894736842106e-06, 'epoch': 97.5}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.233506679534912, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.2928571428571428, 'eval_Precision': 0.29642857142857143, 'eval_Recall': 0.3142857142857143, 'eval_runtime': 0.591, 'eval_samples_per_second': 360.412, 'eval_steps_per_second': 23.689, 'epoch': 97.5}
{'loss': 0.0526, 'grad_norm': 0.23745191097259521, 'learning_rate': 0.0, 'epoch': 100.0}


  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.2338368892669678, 'eval_Accuracy': 0.37089201877934275, 'eval_F1': 0.2928571428571428, 'eval_Precision': 0.29642857142857143, 'eval_Recall': 0.3142857142857143, 'eval_runtime': 0.6177, 'eval_samples_per_second': 344.843, 'eval_steps_per_second': 22.666, 'epoch': 100.0}
{'train_runtime': 830.2487, 'train_samples_per_second': 76.724, 'train_steps_per_second': 2.409, 'train_loss': 1.1354074995517731, 'epoch': 100.0}
CPU times: user 12min 5s, sys: 48.4 s, total: 12min 53s
Wall time: 13min 50s


TrainOutput(global_step=2000, training_loss=1.1354074995517731, metrics={'train_runtime': 830.2487, 'train_samples_per_second': 76.724, 'train_steps_per_second': 2.409, 'total_flos': 459421689199200.0, 'train_loss': 1.1354074995517731, 'epoch': 100.0})

Evaluate trainer

In [42]:
q=[trainer.evaluate(eval_dataset=df2) for df2 in [train_dataloader, test_dataloader]]
pd.DataFrame(q, index=["train","test"]).iloc[:,:5]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,eval_loss,eval_Accuracy,eval_F1,eval_Precision,eval_Recall
train,0.036079,0.99843,0.998384,0.999273,0.998182
test,3.233837,0.370892,0.292857,0.296429,0.314286


Save model and tokenizer

In [43]:
model_path = "base"

In [44]:
trainer.save_model(model_path)
tokenizer.save_pretrained(model_path)

('base/tokenizer_config.json',
 'base/special_tokens_map.json',
 'base/vocab.txt',
 'base/added_tokens.json')

Load model and tokenizer

In [50]:
new_model = BertForSequenceClassification.from_pretrained(model_path)
new_tokenizer= BertTokenizerFast.from_pretrained(model_path)

In [46]:
pipe= pipeline("sentiment-analysis", model=new_model, tokenizer=new_tokenizer, device='mps')

In [47]:
pipe("Hello")

[{'label': 'greeting', 'score': 0.9877205491065979}]

## Predict

In [48]:
def predict(text):
    # Tokenize the input text and convert it to PyTorch tensors
    inputs = new_tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt").to("cpu")

    # Pass the input tensors through the model
    outputs = new_model(**inputs)

    # Softmax the output to get probabilities
    probs = outputs[0].softmax(1)

    # Get the predicted label index
    pred_label_idx = probs.argmax()

    # Convert the predicted label index to the actual label using model configuration
    pred_label = new_model.config.id2label[pred_label_idx.item()]

    return pred_label


In [51]:
text = "Hello"
predict(text)



'greeting'

In [52]:
def model_response(text):
    response = ''
    score = pipe(text)[0]['score']
    tag = pipe(text)[0]['label']
    if score < 0.8:
        response += "Don't understand"
    else:
        label = new_model.config.label2id[pipe(text)[0]['label']]
        response = random.choice(intents['intents'][label]['responses'])
    return response, tag

In [58]:
model_response('hi')

('Hello', 'greeting')