In [None]:
import torch
from torch.utils.data import DataLoader
from torch.optim import SGD

import datasets
from datasets import load_dataset

import numpy as np

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

In [None]:
device=('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
print(device)

In [None]:
model_identifier='google-bert/bert-base-cased'

tokenizer=AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_identifier
                                      )
### loading 

dataset_name='yelp_review_full'

dataset=load_dataset(dataset_name)
num_label=np.unique(dataset['train']['label']).shape[0]
def tokenize_example(example):
    return (tokenizer(example['text'],padding=True,truncation=True)) ## it will radd features like input_ids,token_type_ids, attention_mask

In [None]:
dataset

In [None]:
dataset['train'].features

In [None]:
import random
seed=101
dataset['train']=dataset['train'].shuffle(seed=seed).select(range(700))
dataset['test']=dataset['test'].shuffle(seed=seed).select(range(500))

In [None]:
tokenized_dataset=dataset.map(tokenize_example,batched=True)

In [None]:
tokenized_dataset=tokenized_dataset.remove_columns(['text'])

In [None]:
tokenized_dataset=tokenized_dataset.rename_column("label","labels")

In [None]:
tokenized_dataset.set_format('torch')

In [None]:
train_dataset=tokenized_dataset['train']
test_dataset=tokenized_dataset['test']



In [None]:
from torch.utils.data import Dataset

In [None]:
### Now creating a dataloader 
batch_size=16
train_dataloader=DataLoader(dataset=train_dataset,
                            shuffle=True,
                            batch_size=batch_size)
test_dataloader=DataLoader(dataset=test_dataset,
                           shuffle=False,
                           batch_size=batch_size)

In [None]:
## defining model 
model=AutoModelForSequenceClassification.from_pretrained(model_identifier,num_labels=num_label).to(device)

In [None]:
## defining optimizer and lr_schedular
lr=1e-5
optimizer=torch.optim.AdamW(params=model.parameters(),lr=lr)

In [None]:
## Creating training parameters 
epochs=3
num_training_steps=epochs*len(train_dataloader)

In [None]:
from transformers import get_scheduler
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

In [47]:
len(train_dataloader)

44

In [46]:
## time to train the model
from tqdm.auto import tqdm
progress_bar=tqdm(range(num_training_steps))
model.train()

for epoch in range(epochs):
    for step,batch in enumerate(train_dataloader):
        optimizer.zero_grad()
        batch={k:v.to(device) for k,v in batch.items()}
        outputs=model(**batch)
        loss=outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        print(f'Epoch: {epoch+1}|Steps: {(epoch+1)*(step+1)}|loss:{loss.item():.4f}')
        progress_bar.update(1)

  3%|▎         | 4/132 [01:18<41:41, 19.55s/it]


Epoch: 1|Steps: 1|loss:1.6797




Epoch: 1|Steps: 2|loss:1.6844




Epoch: 1|Steps: 3|loss:1.7292




Epoch: 1|Steps: 4|loss:1.7062




Epoch: 1|Steps: 5|loss:1.6636




Epoch: 1|Steps: 6|loss:1.5089




Epoch: 1|Steps: 7|loss:1.7223




Epoch: 1|Steps: 8|loss:1.6530




Epoch: 1|Steps: 9|loss:1.6670




Epoch: 1|Steps: 10|loss:1.6494




Epoch: 1|Steps: 11|loss:1.5908




Epoch: 1|Steps: 12|loss:1.6643




Epoch: 1|Steps: 13|loss:1.5934




Epoch: 1|Steps: 14|loss:1.6381




Epoch: 1|Steps: 15|loss:1.6304




Epoch: 1|Steps: 16|loss:1.6298




Epoch: 1|Steps: 17|loss:1.6005




Epoch: 1|Steps: 18|loss:1.5290




Epoch: 1|Steps: 19|loss:1.6485




Epoch: 1|Steps: 20|loss:1.5871




Epoch: 1|Steps: 21|loss:1.5784




Epoch: 1|Steps: 22|loss:1.5760




Epoch: 1|Steps: 23|loss:1.5496




Epoch: 1|Steps: 24|loss:1.5958




Epoch: 1|Steps: 25|loss:1.5583




Epoch: 1|Steps: 26|loss:1.5768




Epoch: 1|Steps: 27|loss:1.5082




Epoch: 1|Steps: 28|loss:1.6421




Epoch: 1|Steps: 29|loss:1.5608




Epoch: 1|Steps: 30|loss:1.5488




Epoch: 1|Steps: 31|loss:1.5907




Epoch: 1|Steps: 32|loss:1.5608




Epoch: 1|Steps: 33|loss:1.6484




Epoch: 1|Steps: 34|loss:1.6534




Epoch: 1|Steps: 35|loss:1.5084




Epoch: 1|Steps: 36|loss:1.5125




Epoch: 1|Steps: 37|loss:1.5483




Epoch: 1|Steps: 38|loss:1.6067




Epoch: 1|Steps: 39|loss:1.6345




Epoch: 1|Steps: 40|loss:1.6160




Epoch: 1|Steps: 41|loss:1.5564




Epoch: 1|Steps: 42|loss:1.5632




Epoch: 1|Steps: 43|loss:1.5469




Epoch: 1|Steps: 44|loss:1.5480




Epoch: 2|Steps: 2|loss:1.6520




Epoch: 2|Steps: 4|loss:1.5283




Epoch: 2|Steps: 6|loss:1.5284




Epoch: 2|Steps: 8|loss:1.6260




Epoch: 2|Steps: 10|loss:1.4773




Epoch: 2|Steps: 12|loss:1.5924




Epoch: 2|Steps: 14|loss:1.5197




Epoch: 2|Steps: 16|loss:1.5186




Epoch: 2|Steps: 18|loss:1.5710




Epoch: 2|Steps: 20|loss:1.5067




Epoch: 2|Steps: 22|loss:1.5985




Epoch: 2|Steps: 24|loss:1.5974




Epoch: 2|Steps: 26|loss:1.5073




Epoch: 2|Steps: 28|loss:1.4334




Epoch: 2|Steps: 30|loss:1.4904




Epoch: 2|Steps: 32|loss:1.5081




Epoch: 2|Steps: 34|loss:1.5353




Epoch: 2|Steps: 36|loss:1.5506




Epoch: 2|Steps: 38|loss:1.5110




Epoch: 2|Steps: 40|loss:1.4285




Epoch: 2|Steps: 42|loss:1.4178




Epoch: 2|Steps: 44|loss:1.4710




Epoch: 2|Steps: 46|loss:1.3693




Epoch: 2|Steps: 48|loss:1.5110




Epoch: 2|Steps: 50|loss:1.4704




Epoch: 2|Steps: 52|loss:1.3855




Epoch: 2|Steps: 54|loss:1.4692




Epoch: 2|Steps: 56|loss:1.5562




Epoch: 2|Steps: 58|loss:1.5815




Epoch: 2|Steps: 60|loss:1.5812




Epoch: 2|Steps: 62|loss:1.5648




Epoch: 2|Steps: 64|loss:1.5892




Epoch: 2|Steps: 66|loss:1.4079




Epoch: 2|Steps: 68|loss:1.3738




Epoch: 2|Steps: 70|loss:1.4898




Epoch: 2|Steps: 72|loss:1.5808




Epoch: 2|Steps: 74|loss:1.4950




Epoch: 2|Steps: 76|loss:1.4703




Epoch: 2|Steps: 78|loss:1.4828




Epoch: 2|Steps: 80|loss:1.4036




Epoch: 2|Steps: 82|loss:1.4964




Epoch: 2|Steps: 84|loss:1.4768




Epoch: 2|Steps: 86|loss:1.4173




Epoch: 2|Steps: 88|loss:1.5359




Epoch: 3|Steps: 3|loss:1.3695




Epoch: 3|Steps: 6|loss:1.4860




Epoch: 3|Steps: 9|loss:1.5225




Epoch: 3|Steps: 12|loss:1.3217




Epoch: 3|Steps: 15|loss:1.4290




Epoch: 3|Steps: 18|loss:1.4658




Epoch: 3|Steps: 21|loss:1.5297




Epoch: 3|Steps: 24|loss:1.4295




Epoch: 3|Steps: 27|loss:1.3636




Epoch: 3|Steps: 30|loss:1.4179




Epoch: 3|Steps: 33|loss:1.4565




Epoch: 3|Steps: 36|loss:1.4338




Epoch: 3|Steps: 39|loss:1.3771




Epoch: 3|Steps: 42|loss:1.3461




Epoch: 3|Steps: 45|loss:1.4448




Epoch: 3|Steps: 48|loss:1.4709




Epoch: 3|Steps: 51|loss:1.4325




Epoch: 3|Steps: 54|loss:1.4728




Epoch: 3|Steps: 57|loss:1.4851




Epoch: 3|Steps: 60|loss:1.4959




Epoch: 3|Steps: 63|loss:1.4395




Epoch: 3|Steps: 66|loss:1.3597




Epoch: 3|Steps: 69|loss:1.4317




Epoch: 3|Steps: 72|loss:1.3364




Epoch: 3|Steps: 75|loss:1.4768




Epoch: 3|Steps: 78|loss:1.3874




Epoch: 3|Steps: 81|loss:1.3543




Epoch: 3|Steps: 84|loss:1.3799




Epoch: 3|Steps: 87|loss:1.3615




Epoch: 3|Steps: 90|loss:1.3960




Epoch: 3|Steps: 93|loss:1.5627




Epoch: 3|Steps: 96|loss:1.4631




Epoch: 3|Steps: 99|loss:1.4432




Epoch: 3|Steps: 102|loss:1.3169




Epoch: 3|Steps: 105|loss:1.4789




Epoch: 3|Steps: 108|loss:1.3764




Epoch: 3|Steps: 111|loss:1.3266




Epoch: 3|Steps: 114|loss:1.3739




Epoch: 3|Steps: 117|loss:1.4442




Epoch: 3|Steps: 120|loss:1.4650




Epoch: 3|Steps: 123|loss:1.5198




Epoch: 3|Steps: 126|loss:1.4388




Epoch: 3|Steps: 129|loss:1.3329




Epoch: 3|Steps: 132|loss:1.3388


In [70]:
import evaluate 
model.eval()
metrics=evaluate.load('accuracy')

for batch in test_dataloader:
    batch={k:v.to(device) for k,v in batch.items()}
    with torch.no_grad():
        outputs=model(**batch)

    logits=outputs.logits
    prediction=torch.argmax(logits,dim=-1)
    metrics.add_batch(predictions=prediction,references=batch['labels'])

metrics.compute()


{'accuracy': 0.402}