# Base model with Bidirectional Encoder Representations from Transformers (BERT)

In [1]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import seaborn as sns
from wordcloud import WordCloud,STOPWORDS
import missingno as msno
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from keras.preprocessing import text
import keras
from keras.models import Sequential
from keras.layers import Dense,Embedding,LSTM,Dropout
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
from nltk import word_tokenize
from nltk.stem import PorterStemmer
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer,BertTokenizerFast, BertForSequenceClassification
from transformers import TrainingArguments, Trainer, pipeline


2024-10-23 15:15:24.474018: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Prepare Dataset

Load JSON file

In [2]:
def load_json_file(filename):
    with open(filename) as f:
        file=json.load(f)
    return file   
    
filename="intents.json"
intents=load_json_file(filename)
print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'Hey', 'Is anyone there?', 'Hello', 'Hey there', 'hello there'], 'responses': ['Hello How may I help You?', 'Hi,How can I help You?', 'Hi there,What Can I do for You Today?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['Bye', 'See you later', 'Goodbye'], 'responses': ['See you later', 'Have a nice day', 'Bye! Come back again'], 'context_set': ''}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful", 'Thanks for the help'], 'responses': ['Happy to help!', 'Any time!', 'My pleasure', "You're most welcome!"], 'context_set': ''}, {'tag': 'about', 'patterns': ['Who are you?', 'What are you?', 'Who you are?'], 'responses': ["I'm MMCOE's intelligent bot assistant, here to assist you!"], 'context_set': ''}, {'tag': 'name', 'patterns': ['what is your name', 'what should I call you', 'whats your name?'], 'responses': ['A bot has no name. But I have a purpose, and that is to help you. How can I help you?'], 'context_s

Create dataframe for patterns and tags

In [3]:
def create_df():
    df=pd.DataFrame({'Pattern':[],'Tag':[]})
    return df
    
df=create_df()
df

Unnamed: 0,Pattern,Tag


Extract data from file

In [4]:
def extract_json_info(json_file,df):
    for intent in json_file['intents']:
        for pattern in intent['patterns']:
            sentence_tag=[pattern,intent['tag']]
            df.loc[len(df.index)] = sentence_tag
    return df        
df=extract_json_info(intents,df)

## Explore data

In [5]:
df.head()

Unnamed: 0,Pattern,Tag
0,Hi,greeting
1,Hey,greeting
2,Is anyone there?,greeting
3,Hello,greeting
4,Hey there,greeting


In [6]:
df.shape

(914, 2)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 914 entries, 0 to 913
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Pattern  914 non-null    object
 1   Tag      914 non-null    object
dtypes: object(2)
memory usage: 21.4+ KB


In [8]:
x=len(df['Tag'].unique())
print(f"{x} classes in or tags")

print(df['Tag'].unique())

134 classes in or tags
['greeting' 'goodbye' 'thanks' 'about' 'name' 'complaint' 'contact'
 'social connect' 'working-hours' 'location' 'why-choose-mmcoe'
 'what-we-offer' 'admission-enquiry' 'admission-after-allotment'
 'admission_in_charge_FE' 'admission_in_charge_DSE'
 'admission_in_charge_ME_MBA' 'Computer-Intake' 'IT-Intake'
 'Mechanical-Intake' 'ENTC-Intake' 'Electrical-Intake'
 'DSE-computer-Intake' 'DSE-IT-Intake' 'DSE-Mechanical-Intake'
 'DSE-ENTC-Intake' 'DSE-Electrical-Intake' 'ME-Intake' 'MBA-Intake'
 'FE-Eligibility' 'DSE-Eligibility' 'ME-Eligibility' 'MBA-Eligibility'
 'Documents-FE' 'fee-payment-process' 'Fe-FEES' 'DSE-FEES' 'MBA-FEES'
 'ME-FEES' 'scholarships' 'Faculty-College-Computer' 'Faculty-College-IT'
 'Faculty-College-Mechanical' 'Faculty-College-E&TC'
 'Faculty-College-Electrical' 'Faculty-College-MBA'
 'Faculty-College-Engineering-Sciences' 'Research-Projects'
 'Student-projects' 'About-MMCOE' 'About-Marathwada Mitramandal'
 'About In general' "Principal's mess

In [9]:
df.isna().sum()

Pattern    0
Tag        0
dtype: int64

Understand the corpus

In [10]:
def get_corpus(series):
    words = []
    for text in series:
        for word in text.split():
            words.append(word.strip())
    return words

corpus = get_corpus(df.Pattern)
corpus[:5]
# print(corpus)

['Hi', 'Hey', 'Is', 'anyone', 'there?']

In [11]:
from collections import Counter
counter = Counter(corpus)
most_common = counter.most_common(10)
print(most_common)
most_common = dict(most_common)
most_common

[('the', 449), ('for', 416), ('is', 308), ('What', 287), ('of', 183), ('are', 140), ('what', 128), ('cutoff', 119), ('admission', 112), ('department', 92)]


{'the': 449,
 'for': 416,
 'is': 308,
 'What': 287,
 'of': 183,
 'are': 140,
 'what': 128,
 'cutoff': 119,
 'admission': 112,
 'department': 92}

Create another dataframe for patterns, tags and labels

In [13]:
df2 = df.copy()
df2.head()

Unnamed: 0,Pattern,Tag
0,Hi,greeting
1,Hey,greeting
2,Is anyone there?,greeting
3,Hello,greeting
4,Hey there,greeting


Create list of unique tags as labels

In [14]:
labels=df2['Tag'].unique().tolist()
labels=[s.strip() for s in labels]
labels[:5]

['greeting', 'goodbye', 'thanks', 'about', 'name']

Create `id` to `label` and vise versa

In [15]:
id2label={id:label for id,label in enumerate(labels)}
label2id={label:id for id,label in enumerate(labels)}
num_labels=len(labels)

In [16]:
id2label

{0: 'greeting',
 1: 'goodbye',
 2: 'thanks',
 3: 'about',
 4: 'name',
 5: 'complaint',
 6: 'contact',
 7: 'social connect',
 8: 'working-hours',
 9: 'location',
 10: 'why-choose-mmcoe',
 11: 'what-we-offer',
 12: 'admission-enquiry',
 13: 'admission-after-allotment',
 14: 'admission_in_charge_FE',
 15: 'admission_in_charge_DSE',
 16: 'admission_in_charge_ME_MBA',
 17: 'Computer-Intake',
 18: 'IT-Intake',
 19: 'Mechanical-Intake',
 20: 'ENTC-Intake',
 21: 'Electrical-Intake',
 22: 'DSE-computer-Intake',
 23: 'DSE-IT-Intake',
 24: 'DSE-Mechanical-Intake',
 25: 'DSE-ENTC-Intake',
 26: 'DSE-Electrical-Intake',
 27: 'ME-Intake',
 28: 'MBA-Intake',
 29: 'FE-Eligibility',
 30: 'DSE-Eligibility',
 31: 'ME-Eligibility',
 32: 'MBA-Eligibility',
 33: 'Documents-FE',
 34: 'fee-payment-process',
 35: 'Fe-FEES',
 36: 'DSE-FEES',
 37: 'MBA-FEES',
 38: 'ME-FEES',
 39: 'scholarships',
 40: 'Faculty-College-Computer',
 41: 'Faculty-College-IT',
 42: 'Faculty-College-Mechanical',
 43: 'Faculty-College-E&

In [17]:
label2id

{'greeting': 0,
 'goodbye': 1,
 'thanks': 2,
 'about': 3,
 'name': 4,
 'complaint': 5,
 'contact': 6,
 'social connect': 7,
 'working-hours': 8,
 'location': 9,
 'why-choose-mmcoe': 10,
 'what-we-offer': 11,
 'admission-enquiry': 12,
 'admission-after-allotment': 13,
 'admission_in_charge_FE': 14,
 'admission_in_charge_DSE': 15,
 'admission_in_charge_ME_MBA': 16,
 'Computer-Intake': 17,
 'IT-Intake': 18,
 'Mechanical-Intake': 19,
 'ENTC-Intake': 20,
 'Electrical-Intake': 21,
 'DSE-computer-Intake': 22,
 'DSE-IT-Intake': 23,
 'DSE-Mechanical-Intake': 24,
 'DSE-ENTC-Intake': 25,
 'DSE-Electrical-Intake': 26,
 'ME-Intake': 27,
 'MBA-Intake': 28,
 'FE-Eligibility': 29,
 'DSE-Eligibility': 30,
 'ME-Eligibility': 31,
 'MBA-Eligibility': 32,
 'Documents-FE': 33,
 'fee-payment-process': 34,
 'Fe-FEES': 35,
 'DSE-FEES': 36,
 'MBA-FEES': 37,
 'ME-FEES': 38,
 'scholarships': 39,
 'Faculty-College-Computer': 40,
 'Faculty-College-IT': 41,
 'Faculty-College-Mechanical': 42,
 'Faculty-College-E&TC':

Add converted `label2id` to df2

In [18]:
df2['labels'] = df2['Tag'].map(lambda x: label2id[x.strip()])
df2.head()

Unnamed: 0,Pattern,Tag,labels
0,Hi,greeting,0
1,Hey,greeting,0
2,Is anyone there?,greeting,0
3,Hello,greeting,0
4,Hey there,greeting,0


In [19]:
X=list(df2['Pattern'])
X[:5]

['Hi', 'Hey', 'Is anyone there?', 'Hello', 'Hey there']

In [20]:
y=list(df2['labels'])
y[:5]

[0, 0, 0, 0, 0]

Define training and test datasets

In [21]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=100)

Define base model

In [22]:
model_name="bert-base-uncased"
max_len=256

Load tokenizer

In [23]:
tokenizer=BertTokenizer.from_pretrained(model_name,max_length=max_len)



Load model

In [24]:
model=BertForSequenceClassification.from_pretrained(model_name,num_labels=num_labels,id2label=id2label,label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Define encoders

In [25]:
train_encoding=tokenizer(X_train,truncation=True,padding=True)
test_encoding=tokenizer(X_test,truncation=True,padding=True)

In [26]:
full_data = tokenizer(X, truncation=True, padding=True)

Data loader

In [27]:
class DataLoader(Dataset):
    
    def __init__(self, encodings, labels):
        
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
               
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):

        return len(self.labels)

In [28]:
train_dataloader = DataLoader(train_encoding, y_train)
test_dataloader = DataLoader(test_encoding, y_test)

In [29]:
fullDataLoader = DataLoader(full_data, y_test)

In [30]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids  # True labels
    preds = pred.predictions.argmax(-1)  # Predicted labels (argmax along the last axis)

    # Using precision_recall_fscore_support to compute precision, recall, F1 score
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')

    # Using accuracy_score to compute accuracy
    acc = accuracy_score(labels, preds)
    
    # Returning a dictionary containing the computed metrics
    return {
        'Accuracy': acc,
        'F1': f1,
        'Precision': precision,
        'Recall': recall
    }


Define training arguments

In [31]:
training_args = TrainingArguments(
    output_dir='./output', 
    do_train=True,
    do_eval=True,
    num_train_epochs=100,              
    per_device_train_batch_size=32,  
    per_device_eval_batch_size=16,
    warmup_steps=100,                
    weight_decay=0.05,
    logging_strategy='steps',
    logging_dir='./logs',            
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=50,
    save_strategy="steps", 
    load_best_model_at_end=True
)



In [32]:
trainer = Trainer(
    model=model,
    args=training_args,                 
    train_dataset=train_dataloader,         
    eval_dataset=test_dataloader,            
    compute_metrics= compute_metrics
)

In [33]:
%%time
trainer.train()

  0%|          | 0/2200 [00:00<?, ?it/s]

{'loss': 4.8776, 'grad_norm': 5.240514278411865, 'learning_rate': 2.5e-05, 'epoch': 2.27}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 4.767058849334717, 'eval_Accuracy': 0.034934497816593885, 'eval_F1': 0.004294139864609663, 'eval_Precision': 0.0026765009523630214, 'eval_Recall': 0.021212121212121213, 'eval_runtime': 1.3475, 'eval_samples_per_second': 169.945, 'eval_steps_per_second': 11.132, 'epoch': 2.27}
{'loss': 4.514, 'grad_norm': 5.23051118850708, 'learning_rate': 5e-05, 'epoch': 4.55}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 4.323123931884766, 'eval_Accuracy': 0.10480349344978165, 'eval_F1': 0.04019463431305536, 'eval_Precision': 0.050501830978246065, 'eval_Recall': 0.07083333333333333, 'eval_runtime': 0.6937, 'eval_samples_per_second': 330.12, 'eval_steps_per_second': 21.624, 'epoch': 4.55}
{'loss': 3.8599, 'grad_norm': 5.501142501831055, 'learning_rate': 4.880952380952381e-05, 'epoch': 6.82}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 3.6803367137908936, 'eval_Accuracy': 0.22707423580786026, 'eval_F1': 0.13135537768379152, 'eval_Precision': 0.12932425793155625, 'eval_Recall': 0.19336569579288027, 'eval_runtime': 0.6483, 'eval_samples_per_second': 353.221, 'eval_steps_per_second': 23.137, 'epoch': 6.82}
{'loss': 3.0407, 'grad_norm': 4.611559867858887, 'learning_rate': 4.761904761904762e-05, 'epoch': 9.09}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 2.956373453140259, 'eval_Accuracy': 0.44541484716157204, 'eval_F1': 0.316137037565609, 'eval_Precision': 0.35515632515632517, 'eval_Recall': 0.3712018140589569, 'eval_runtime': 0.6988, 'eval_samples_per_second': 327.717, 'eval_steps_per_second': 21.466, 'epoch': 9.09}
{'loss': 2.2375, 'grad_norm': 4.4825239181518555, 'learning_rate': 4.642857142857143e-05, 'epoch': 11.36}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 2.36291241645813, 'eval_Accuracy': 0.5633187772925764, 'eval_F1': 0.43003429592214637, 'eval_Precision': 0.4323134549770064, 'eval_Recall': 0.48348909657320865, 'eval_runtime': 0.6703, 'eval_samples_per_second': 341.641, 'eval_steps_per_second': 22.378, 'epoch': 11.36}
{'loss': 1.6221, 'grad_norm': 3.1862645149230957, 'learning_rate': 4.523809523809524e-05, 'epoch': 13.64}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.9014685153961182, 'eval_Accuracy': 0.6899563318777293, 'eval_F1': 0.553119822564267, 'eval_Precision': 0.5570987654320988, 'eval_Recall': 0.5864417989417989, 'eval_runtime': 0.6921, 'eval_samples_per_second': 330.891, 'eval_steps_per_second': 21.674, 'epoch': 13.64}
{'loss': 1.1538, 'grad_norm': 3.063471555709839, 'learning_rate': 4.404761904761905e-05, 'epoch': 15.91}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.5489848852157593, 'eval_Accuracy': 0.7467248908296943, 'eval_F1': 0.6338009861374346, 'eval_Precision': 0.6399755229194481, 'eval_Recall': 0.6563417890520695, 'eval_runtime': 0.6742, 'eval_samples_per_second': 339.652, 'eval_steps_per_second': 22.248, 'epoch': 15.91}
{'loss': 0.8042, 'grad_norm': 2.3797619342803955, 'learning_rate': 4.2857142857142856e-05, 'epoch': 18.18}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.2834683656692505, 'eval_Accuracy': 0.7685589519650655, 'eval_F1': 0.6545359252906423, 'eval_Precision': 0.6631027253668763, 'eval_Recall': 0.6758984725965859, 'eval_runtime': 0.6543, 'eval_samples_per_second': 350.017, 'eval_steps_per_second': 22.927, 'epoch': 18.18}
{'loss': 0.5807, 'grad_norm': 2.1765284538269043, 'learning_rate': 4.166666666666667e-05, 'epoch': 20.45}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.1259993314743042, 'eval_Accuracy': 0.7729257641921398, 'eval_F1': 0.6801679452622849, 'eval_Precision': 0.6887690925426775, 'eval_Recall': 0.6983827493261455, 'eval_runtime': 0.6605, 'eval_samples_per_second': 346.706, 'eval_steps_per_second': 22.71, 'epoch': 20.45}
{'loss': 0.4084, 'grad_norm': 1.2104610204696655, 'learning_rate': 4.047619047619048e-05, 'epoch': 22.73}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.0223029851913452, 'eval_Accuracy': 0.7991266375545851, 'eval_F1': 0.7206470345893423, 'eval_Precision': 0.7353708791208791, 'eval_Recall': 0.7468177655677656, 'eval_runtime': 0.7478, 'eval_samples_per_second': 306.228, 'eval_steps_per_second': 20.059, 'epoch': 22.73}
{'loss': 0.3176, 'grad_norm': 2.4511306285858154, 'learning_rate': 3.928571428571429e-05, 'epoch': 25.0}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.9291858673095703, 'eval_Accuracy': 0.8165938864628821, 'eval_F1': 0.7499649756931309, 'eval_Precision': 0.7611650485436894, 'eval_Recall': 0.7678224687933426, 'eval_runtime': 0.6933, 'eval_samples_per_second': 330.309, 'eval_steps_per_second': 21.636, 'epoch': 25.0}
{'loss': 0.2394, 'grad_norm': 1.4110783338546753, 'learning_rate': 3.809523809523809e-05, 'epoch': 27.27}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8902250528335571, 'eval_Accuracy': 0.8165938864628821, 'eval_F1': 0.7370507799079229, 'eval_Precision': 0.7476190476190476, 'eval_Recall': 0.7490022675736961, 'eval_runtime': 0.7157, 'eval_samples_per_second': 319.98, 'eval_steps_per_second': 20.959, 'epoch': 27.27}
{'loss': 0.1956, 'grad_norm': 1.6297471523284912, 'learning_rate': 3.690476190476191e-05, 'epoch': 29.55}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8631446957588196, 'eval_Accuracy': 0.8209606986899564, 'eval_F1': 0.7607815931345342, 'eval_Precision': 0.7648692810457516, 'eval_Recall': 0.7824696545284782, 'eval_runtime': 0.787, 'eval_samples_per_second': 290.97, 'eval_steps_per_second': 19.059, 'epoch': 29.55}
{'loss': 0.1591, 'grad_norm': 1.2028167247772217, 'learning_rate': 3.571428571428572e-05, 'epoch': 31.82}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8242508172988892, 'eval_Accuracy': 0.8296943231441049, 'eval_F1': 0.7718133718133717, 'eval_Precision': 0.7803454715219422, 'eval_Recall': 0.7873716153127919, 'eval_runtime': 0.7191, 'eval_samples_per_second': 318.435, 'eval_steps_per_second': 20.858, 'epoch': 31.82}
{'loss': 0.144, 'grad_norm': 1.435449481010437, 'learning_rate': 3.4523809523809526e-05, 'epoch': 34.09}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.7966517806053162, 'eval_Accuracy': 0.8384279475982532, 'eval_F1': 0.7755439549557196, 'eval_Precision': 0.7893323996265174, 'eval_Recall': 0.7922735760971056, 'eval_runtime': 0.6848, 'eval_samples_per_second': 334.427, 'eval_steps_per_second': 21.906, 'epoch': 34.09}
{'loss': 0.1191, 'grad_norm': 1.031904935836792, 'learning_rate': 3.3333333333333335e-05, 'epoch': 36.36}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.7989042401313782, 'eval_Accuracy': 0.8384279475982532, 'eval_F1': 0.7787632846456375, 'eval_Precision': 0.7949813258636789, 'eval_Recall': 0.7930905695611576, 'eval_runtime': 0.6992, 'eval_samples_per_second': 327.504, 'eval_steps_per_second': 21.452, 'epoch': 36.36}
{'loss': 0.1093, 'grad_norm': 0.39999523758888245, 'learning_rate': 3.2142857142857144e-05, 'epoch': 38.64}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.7953278422355652, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.7817842288430524, 'eval_Precision': 0.7911297852474323, 'eval_Recall': 0.7979925303454715, 'eval_runtime': 0.6912, 'eval_samples_per_second': 331.297, 'eval_steps_per_second': 21.701, 'epoch': 38.64}
{'loss': 0.1055, 'grad_norm': 1.3252975940704346, 'learning_rate': 3.095238095238095e-05, 'epoch': 40.91}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.7934222221374512, 'eval_Accuracy': 0.8384279475982532, 'eval_F1': 0.7791535806241688, 'eval_Precision': 0.7916666666666669, 'eval_Recall': 0.7930905695611576, 'eval_runtime': 0.7215, 'eval_samples_per_second': 317.383, 'eval_steps_per_second': 20.789, 'epoch': 40.91}
{'loss': 0.1016, 'grad_norm': 0.5957934856414795, 'learning_rate': 2.9761904761904762e-05, 'epoch': 43.18}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.7864994406700134, 'eval_Accuracy': 0.8384279475982532, 'eval_F1': 0.7962196219621962, 'eval_Precision': 0.8095238095238096, 'eval_Recall': 0.8083686940122583, 'eval_runtime': 0.7384, 'eval_samples_per_second': 310.149, 'eval_steps_per_second': 20.315, 'epoch': 43.18}
{'loss': 0.0978, 'grad_norm': 1.2242313623428345, 'learning_rate': 2.857142857142857e-05, 'epoch': 45.45}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8056041598320007, 'eval_Accuracy': 0.834061135371179, 'eval_F1': 0.7887510179589388, 'eval_Precision': 0.8014380009429516, 'eval_Recall': 0.8063884959924564, 'eval_runtime': 0.6761, 'eval_samples_per_second': 338.687, 'eval_steps_per_second': 22.185, 'epoch': 45.45}
{'loss': 0.0781, 'grad_norm': 1.0922659635543823, 'learning_rate': 2.7380952380952383e-05, 'epoch': 47.73}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8024114370346069, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8020187733059021, 'eval_Precision': 0.8110089580386611, 'eval_Recall': 0.8157944365865157, 'eval_runtime': 0.6954, 'eval_samples_per_second': 329.328, 'eval_steps_per_second': 21.572, 'epoch': 47.73}
{'loss': 0.09, 'grad_norm': 0.27247709035873413, 'learning_rate': 2.6190476190476192e-05, 'epoch': 50.0}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8115273118019104, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8027538468132527, 'eval_Precision': 0.8123290900518624, 'eval_Recall': 0.8187647336162188, 'eval_runtime': 0.7525, 'eval_samples_per_second': 304.316, 'eval_steps_per_second': 19.933, 'epoch': 50.0}
{'loss': 0.0821, 'grad_norm': 1.272147297859192, 'learning_rate': 2.5e-05, 'epoch': 52.27}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.7964934706687927, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.7924907336672044, 'eval_Precision': 0.8015873015873017, 'eval_Recall': 0.8053454715219421, 'eval_runtime': 0.7016, 'eval_samples_per_second': 326.397, 'eval_steps_per_second': 21.38, 'epoch': 52.27}
{'loss': 0.0825, 'grad_norm': 0.9612154960632324, 'learning_rate': 2.380952380952381e-05, 'epoch': 54.55}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8023630976676941, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.8048174603174604, 'eval_Precision': 0.8166190476190477, 'eval_Recall': 0.8236190476190477, 'eval_runtime': 0.6598, 'eval_samples_per_second': 347.058, 'eval_steps_per_second': 22.733, 'epoch': 54.55}
{'loss': 0.0825, 'grad_norm': 1.474337100982666, 'learning_rate': 2.261904761904762e-05, 'epoch': 56.82}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.7981274127960205, 'eval_Accuracy': 0.8384279475982532, 'eval_F1': 0.7962196219621962, 'eval_Precision': 0.8095238095238096, 'eval_Recall': 0.8083686940122583, 'eval_runtime': 0.6625, 'eval_samples_per_second': 345.675, 'eval_steps_per_second': 22.642, 'epoch': 56.82}
{'loss': 0.077, 'grad_norm': 0.9130083322525024, 'learning_rate': 2.1428571428571428e-05, 'epoch': 59.09}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.807162880897522, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8022587973083021, 'eval_Precision': 0.8116690240452618, 'eval_Recall': 0.8157944365865157, 'eval_runtime': 0.6616, 'eval_samples_per_second': 346.141, 'eval_steps_per_second': 22.673, 'epoch': 59.09}
{'loss': 0.0698, 'grad_norm': 0.5593658089637756, 'learning_rate': 2.023809523809524e-05, 'epoch': 61.36}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8058935403823853, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.7972725844012972, 'eval_Precision': 0.808038661008958, 'eval_Recall': 0.8133191890617633, 'eval_runtime': 0.6649, 'eval_samples_per_second': 344.426, 'eval_steps_per_second': 22.561, 'epoch': 61.36}
{'loss': 0.0789, 'grad_norm': 0.7050065994262695, 'learning_rate': 1.9047619047619046e-05, 'epoch': 63.64}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8012343645095825, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8083039183039183, 'eval_Precision': 0.8175, 'eval_Recall': 0.8264523809523808, 'eval_runtime': 0.6889, 'eval_samples_per_second': 332.436, 'eval_steps_per_second': 21.775, 'epoch': 63.64}
{'loss': 0.07, 'grad_norm': 1.1650397777557373, 'learning_rate': 1.785714285714286e-05, 'epoch': 65.91}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8152254223823547, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8072972582972582, 'eval_Precision': 0.8167857142857143, 'eval_Recall': 0.8264523809523808, 'eval_runtime': 0.6651, 'eval_samples_per_second': 344.314, 'eval_steps_per_second': 22.553, 'epoch': 65.91}
{'loss': 0.0695, 'grad_norm': 0.564784824848175, 'learning_rate': 1.6666666666666667e-05, 'epoch': 68.18}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8106096386909485, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8119083694083693, 'eval_Precision': 0.8187857142857143, 'eval_Recall': 0.826952380952381, 'eval_runtime': 0.665, 'eval_samples_per_second': 344.377, 'eval_steps_per_second': 22.557, 'epoch': 68.18}
{'loss': 0.0755, 'grad_norm': 0.6537875533103943, 'learning_rate': 1.5476190476190476e-05, 'epoch': 70.45}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8079487085342407, 'eval_Accuracy': 0.8558951965065502, 'eval_F1': 0.8027368670933027, 'eval_Precision': 0.8123762376237623, 'eval_Recall': 0.8207449316360208, 'eval_runtime': 0.715, 'eval_samples_per_second': 320.295, 'eval_steps_per_second': 20.98, 'epoch': 70.45}
{'loss': 0.0649, 'grad_norm': 0.5554403066635132, 'learning_rate': 1.4285714285714285e-05, 'epoch': 72.73}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8125206828117371, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.8089352314352314, 'eval_Precision': 0.8151666666666667, 'eval_Recall': 0.8244523809523809, 'eval_runtime': 0.6654, 'eval_samples_per_second': 344.133, 'eval_steps_per_second': 22.541, 'epoch': 72.73}
{'loss': 0.0659, 'grad_norm': 0.15411686897277832, 'learning_rate': 1.3095238095238096e-05, 'epoch': 75.0}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8087052702903748, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.8089718614718615, 'eval_Precision': 0.8152857142857144, 'eval_Recall': 0.8244523809523809, 'eval_runtime': 0.6871, 'eval_samples_per_second': 333.277, 'eval_steps_per_second': 21.83, 'epoch': 75.0}
{'loss': 0.0667, 'grad_norm': 0.9630918502807617, 'learning_rate': 1.1904761904761905e-05, 'epoch': 77.27}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8131391406059265, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.8089718614718615, 'eval_Precision': 0.8152857142857144, 'eval_Recall': 0.8244523809523809, 'eval_runtime': 0.6782, 'eval_samples_per_second': 337.638, 'eval_steps_per_second': 22.116, 'epoch': 77.27}
{'loss': 0.0665, 'grad_norm': 0.605758547782898, 'learning_rate': 1.0714285714285714e-05, 'epoch': 79.55}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8141095042228699, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8003009092118003, 'eval_Precision': 0.8094059405940595, 'eval_Recall': 0.8182696841112683, 'eval_runtime': 0.6781, 'eval_samples_per_second': 337.694, 'eval_steps_per_second': 22.12, 'epoch': 79.55}
{'loss': 0.0607, 'grad_norm': 0.9121922254562378, 'learning_rate': 9.523809523809523e-06, 'epoch': 81.82}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8173481822013855, 'eval_Accuracy': 0.8384279475982532, 'eval_F1': 0.7947266155186948, 'eval_Precision': 0.8045733144743047, 'eval_Recall': 0.8083686940122583, 'eval_runtime': 0.6674, 'eval_samples_per_second': 343.116, 'eval_steps_per_second': 22.475, 'epoch': 81.82}
{'loss': 0.0662, 'grad_norm': 0.6751923561096191, 'learning_rate': 8.333333333333334e-06, 'epoch': 84.09}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8187943696975708, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.80001928764305, 'eval_Precision': 0.8146393210749647, 'eval_Recall': 0.8138142385667139, 'eval_runtime': 0.6708, 'eval_samples_per_second': 341.368, 'eval_steps_per_second': 22.36, 'epoch': 84.09}
{'loss': 0.0638, 'grad_norm': 0.7941254377365112, 'learning_rate': 7.142857142857143e-06, 'epoch': 86.36}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8225769400596619, 'eval_Accuracy': 0.8427947598253275, 'eval_F1': 0.7860601816484168, 'eval_Precision': 0.7952147525676938, 'eval_Recall': 0.8033846872082167, 'eval_runtime': 0.7314, 'eval_samples_per_second': 313.104, 'eval_steps_per_second': 20.509, 'epoch': 86.36}
{'loss': 0.0637, 'grad_norm': 1.0268417596817017, 'learning_rate': 5.9523809523809525e-06, 'epoch': 88.64}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8226656317710876, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8032838998185532, 'eval_Precision': 0.8088637435172089, 'eval_Recall': 0.8182696841112683, 'eval_runtime': 0.6656, 'eval_samples_per_second': 344.049, 'eval_steps_per_second': 22.536, 'epoch': 88.64}
{'loss': 0.0637, 'grad_norm': 0.7988002896308899, 'learning_rate': 4.7619047619047615e-06, 'epoch': 90.91}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8271474838256836, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.8009259717180509, 'eval_Precision': 0.8070957095709571, 'eval_Recall': 0.8162894860914663, 'eval_runtime': 0.674, 'eval_samples_per_second': 339.755, 'eval_steps_per_second': 22.255, 'epoch': 90.91}
{'loss': 0.0641, 'grad_norm': 0.9412129521369934, 'learning_rate': 3.5714285714285714e-06, 'epoch': 93.18}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8253958225250244, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.8009622390810509, 'eval_Precision': 0.8072135785007072, 'eval_Recall': 0.8162894860914663, 'eval_runtime': 0.697, 'eval_samples_per_second': 328.569, 'eval_steps_per_second': 21.522, 'epoch': 93.18}
{'loss': 0.0657, 'grad_norm': 0.8275387287139893, 'learning_rate': 2.3809523809523808e-06, 'epoch': 95.45}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8262022733688354, 'eval_Accuracy': 0.8471615720524017, 'eval_F1': 0.8009622390810509, 'eval_Precision': 0.8072135785007072, 'eval_Recall': 0.8162894860914663, 'eval_runtime': 0.6676, 'eval_samples_per_second': 343.04, 'eval_steps_per_second': 22.47, 'epoch': 95.45}
{'loss': 0.0592, 'grad_norm': 0.08362942188978195, 'learning_rate': 1.1904761904761904e-06, 'epoch': 97.73}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8259606957435608, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8036012392448035, 'eval_Precision': 0.8094059405940596, 'eval_Recall': 0.8182696841112683, 'eval_runtime': 0.6932, 'eval_samples_per_second': 330.348, 'eval_steps_per_second': 21.638, 'epoch': 97.73}
{'loss': 0.0609, 'grad_norm': 0.11326982080936432, 'learning_rate': 0.0, 'epoch': 100.0}


  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.8264638781547546, 'eval_Accuracy': 0.851528384279476, 'eval_F1': 0.8027695626705528, 'eval_Precision': 0.808038661008958, 'eval_Recall': 0.8182696841112683, 'eval_runtime': 0.6698, 'eval_samples_per_second': 341.908, 'eval_steps_per_second': 22.396, 'epoch': 100.0}
{'train_runtime': 966.3213, 'train_samples_per_second': 70.887, 'train_steps_per_second': 2.277, 'train_loss': 0.5994519558819857, 'epoch': 100.0}
CPU times: user 13min 34s, sys: 55.9 s, total: 14min 30s
Wall time: 16min 6s


TrainOutput(global_step=2200, training_loss=0.5994519558819857, metrics={'train_runtime': 966.3213, 'train_samples_per_second': 70.887, 'train_steps_per_second': 2.277, 'total_flos': 881077530450000.0, 'train_loss': 0.5994519558819857, 'epoch': 100.0})

Evaluate trainer

In [34]:
q=[trainer.evaluate(eval_dataset=df2) for df2 in [train_dataloader, test_dataloader]]
pd.DataFrame(q, index=["train","test"]).iloc[:,:5]

  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,eval_loss,eval_Accuracy,eval_F1,eval_Precision,eval_Recall
train,0.073348,0.972263,0.98124,0.986588,0.980843
test,0.805604,0.834061,0.788751,0.801438,0.806388


Save model and tokenizer

In [35]:
model_path = "base"

In [36]:
trainer.save_model(model_path)
tokenizer.save_pretrained(model_path)

('base/tokenizer_config.json',
 'base/special_tokens_map.json',
 'base/vocab.txt',
 'base/added_tokens.json')

Load model and tokenizer

In [42]:
new_model = BertForSequenceClassification.from_pretrained(model_path)
new_tokenizer= BertTokenizerFast.from_pretrained(model_path)

In [38]:
pipe= pipeline("sentiment-analysis", model=new_model, tokenizer=new_tokenizer, device='mps')

In [39]:
pipe("Hello")

[{'label': 'greeting', 'score': 0.9863651990890503}]

## Predict

In [43]:
def predict(text):
    # Tokenize the input text and convert it to PyTorch tensors
    inputs = new_tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt").to("cpu")

    # Pass the input tensors through the model
    outputs = new_model(**inputs)

    # Softmax the output to get probabilities
    probs = outputs[0].softmax(1)

    # Get the predicted label index
    pred_label_idx = probs.argmax()

    # Convert the predicted label index to the actual label using model configuration
    pred_label = new_model.config.id2label[pred_label_idx.item()]

    return pred_label


In [44]:
text = "Hello"
predict(text)



'greeting'

In [50]:
def model_response(text):
    response = ''
    score = pipe(text)[0]['score']
    tag = pipe(text)[0]['label']
    if score < 0.8:
        response += "Don't understand"
    else:
        label = new_model.config.label2id[pipe(text)[0]['label']]
        response = random.choice(intents['intents'][label]['responses'])
    return response, tag

In [56]:
model_response('have a gym')

("The college has excellent gym facilities with a large amount of training equipment, 'For more information please visit - 'https://www.mmcoe.edu.in/index.php/campus/gymnasium'",
 'Campus-Gymnasium')