In [42]:
import numpy as np
from datasets import Dataset,DatasetDict,load_dataset
from transformers import Trainer,TrainingArguments,AutoTokenizer,AutoModelForSequenceClassification,DataCollatorWithPadding,AutoConfig
import evaluate

In [43]:
dataset_dict=load_dataset("bitext/Bitext-events-ticketing-llm-chatbot-training-dataset")

tokenizer=AutoTokenizer.from_pretrained("bert-base-uncased")

label2id={"CANCELLATIONS":0,"CONTACT":1,"DELIVERY":2,"EVENTS":3,"PAYMENT":4,"POLICY":5,"REFUNDS":6,"TICKETS":7}
id2label={0:"CANCELLATIONS",1:"CONTACT",2:"DELIVERY",3:"EVENTS",4:"PAYMENT",5:"POLICY",6:"REFUNDS",7:"TICKETS"}

def norms(example):
    example['label']=label2id[example['category']]
    return example




In [44]:
num_labels=8
classification=AutoModelForSequenceClassification.from_pretrained("bert-base-uncased",num_labels=num_labels,label2id=label2id,id2label=id2label,ignore_mismatched_sizes=True)

for name, param in classification.base_model.named_parameters():
    param.requires_grad=False
for name, param in classification.base_model.named_parameters():
    if "pooler" in name:
     param.requires_grad=True

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [45]:
def preprocess(examples):
    return tokenizer(examples["instruction"],truncation=True)

dat=dataset_dict.map(preprocess,batched=True)
dat=dat.map(norms)
dat=dat['train'].train_test_split(test_size=0.2,seed=42)
dats=DataCollatorWithPadding(tokenizer= tokenizer)
print(dat)

DatasetDict({
    train: Dataset({
        features: ['instruction', 'intent', 'category', 'tags', 'response', 'input_ids', 'token_type_ids', 'attention_mask', 'label'],
        num_rows: 19761
    })
    test: Dataset({
        features: ['instruction', 'intent', 'category', 'tags', 'response', 'input_ids', 'token_type_ids', 'attention_mask', 'label'],
        num_rows: 4941
    })
})


In [46]:
acc=evaluate.load('accuracy')
f1=evaluate.load('f1')
def pred(eval_pred):
    prediction,label=eval_pred
    probabilities=np.exp(prediction)/np.exp(prediction).sum(-1,keepdims=True)
    ap=np.argmax(probabilities,axis=1)
    accuracy=acc.compute(predictions=ap,references=label)
    F1s=f1.compute(predictions=ap,references=label,average='macro')
    auu=round(accuracy['accuracy'],3)
    au=round(F1s['f1'],3)
    print('accuracy:',auu)
    print('F1:',au)
    return{'accuracy':auu,'F1':au}


In [47]:
l=2e-4
batch=4
eps=10
training_args=TrainingArguments(
    output_dir="berts",
    save_safetensors=False,
    learning_rate=l,
    per_device_train_batch_size=batch, 
    per_device_eval_batch_size=batch,
    num_train_epochs=eps,
    eval_strategy='epoch',
    logging_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    max_steps=1000
    
    
)

In [48]:
trainer=Trainer(
    model=classification,
    args=training_args,
    data_collator=dats,
    processing_class=tokenizer,
    compute_metrics=pred,
    train_dataset=dat['train'],
    eval_dataset=dat['test'],
)
trainer.train()

trainer.evaluate()



Epoch,Training Loss,Validation Loss,Accuracy,F1
0,1.2394,0.790495,0.778,0.698


accuracy: 0.778
F1: 0.698




accuracy: 0.778
F1: 0.698


{'eval_loss': 0.7904946208000183,
 'eval_accuracy': 0.778,
 'eval_F1': 0.698,
 'eval_runtime': 301.4008,
 'eval_samples_per_second': 16.393,
 'eval_steps_per_second': 4.101,
 'epoch': 0.20238818053025703}

In [49]:
trainer.save_model('berts')
model=trainer.model
model.save_pretrained('berts',safe_serialization=False)
tokenizer.save_pretrained('berts')

('berts\\tokenizer_config.json',
 'berts\\special_tokens_map.json',
 'berts\\vocab.txt',
 'berts\\added_tokens.json',
 'berts\\tokenizer.json')

In [12]:
model_path='berts'
config=AutoConfig.from_pretrained(model_path,num_labels=8)
tokenizer=AutoTokenizer.from_pretrained(model_path,config=config)
classification=AutoModelForSequenceClassification.from_pretrained(model_path,config=config,ignore_mismatched_sizes=True)
classification.eval()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at berts and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([8]) in the model instantiated
- classifier.weight: found shape torch.Size([4, 768]) in the checkpoint and torch.Size([8, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [62]:
import torch
def rec(text):
    input=tokenizer(text,return_tensors='pt',truncation=True,padding=True)
    with torch.no_grad():
     output=classification(**input)
     probab=torch.nn.functional.softmax(output.logits,dim=-1).numpy()[0]
     actual=torch.argmax(output.logits,dim=-1).item()
     acc=id2label[actual]
     top3=np.argsort(-probab)[:3]
     print('predicted class:')
     for idx in top3:
       idx=int(idx)
       print(f"{id2label[idx]}:{probab[idx]:.3f}")
     
     print('actual pred:',acc)

In [63]:
text=input("enter your ticket=")
rec(text)

enter your ticket= I'd like to send an email to client service could uhelp me


predicted class:
CONTACT:0.590
CANCELLATIONS:0.201
POLICY:0.059
actual pred: CONTACT
