In [2]:
import numpy as np
from datasets import Dataset,DatasetDict,load_dataset
from transformers import Trainer,TrainingArguments,AutoTokenizer,AutoModelForSequenceClassification,DataCollatorWithPadding
import evaluate

  from .autonotebook import tqdm as notebook_tqdm





In [3]:
dataset_dict=load_dataset("sh0416/ag_news",streaming=True)

In [4]:
tokenizer=AutoTokenizer.from_pretrained("bert-base-uncased")
def norms(example):
    example['label']=example['label']-1
    return example

In [5]:
label2id={"World":0,"Sports":1,"Business":2,"Sci/Tech":3}
id2label={0:"World",1:"Sports",2:"Business",3:"Sci/Tech"}

classification=AutoModelForSequenceClassification.from_pretrained("bert-base-uncased",num_labels=4,label2id=label2id,id2label=id2label)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
for name, param in classification.base_model.named_parameters():
    param.requires_grad=False
for name, param in classification.base_model.named_parameters():
    if "pooler" in name:
     param.requires_grad=True  

In [7]:
def preprocess(examples):
    return tokenizer(examples["title"],truncation=True)

dat=dataset_dict.map(preprocess,batched=True)
dat=dat.map(norms)
dats=DataCollatorWithPadding(tokenizer= tokenizer)

In [8]:
acc=evaluate.load('accuracy')
f1=evaluate.load('f1')
def pred(eval_pred):
    prediction,label=eval_pred
    probabilities=np.exp(prediction)/np.exp(prediction).sum(-1,keepdims=True)
    ap=np.argmax(probabilities,axis=1)
    accuracy=acc.compute(predictions=ap,references=label)
    F1s=f1.compute(predictions=ap,references=label,average='macro')
    auu=round(accuracy['accuracy'],3)
    au=round(F1s['f1'],3)
    print('accuracy:',auu)
    print('F1:',au)
    return{'accuracy':auu,'F1':au}

In [25]:
l=2e-4
batch=8
eps=10
training_args=TrainingArguments(
    output_dir="berts_new",
    save_safetensors=False,
    learning_rate=l,
    per_device_train_batch_size=batch, 
    per_device_eval_batch_size=batch,
    num_train_epochs=eps,
    eval_strategy='epoch',
    logging_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    max_steps=1000
    
    
)

In [19]:
trainer=Trainer(
    model=classification,
    args=training_args, 
    data_collator=dats,
    tokenizer=tokenizer,
    compute_metrics=pred,
    train_dataset=dat['train'],
    eval_dataset=dat['test'],
)
trainer.train()

  trainer=Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.6837,0.624123,0.777,0.775


accuracy: 0.777
F1: 0.775


TrainOutput(global_step=1000, training_loss=0.6837337036132812, metrics={'train_runtime': 960.702, 'train_samples_per_second': 8.327, 'train_steps_per_second': 1.041, 'total_flos': 72455507943936.0, 'train_loss': 0.6837337036132812, 'epoch': 1.0})

In [11]:
trainer.evaluate()



accuracy: 0.77
F1: 0.768


{'eval_loss': 0.6337381601333618,
 'eval_accuracy': 0.77,
 'eval_F1': 0.768,
 'eval_runtime': 323.6095,
 'eval_samples_per_second': 23.485,
 'eval_steps_per_second': 2.936,
 'epoch': 1.0}

In [35]:
import os
os.environ['SAFE_SERIALIZATION']="false"
import shutil
if os.path.exists('bert_v123'):
    shutil.rmtree('bert_v123')
    time.sleep(1)
classification.save_pretrained('berts_v123')
tokenizer.save_pretrained('berts_v123')

('berts_v123\\tokenizer_config.json',
 'berts_v123\\special_tokens_map.json',
 'berts_v123\\vocab.txt',
 'berts_v123\\added_tokens.json',
 'berts_v123\\tokenizer.json')