This notebook was executed in Google Colab using A100-GPU

### Start of execution

In [1]:
import time

In [2]:
start = time.time()

# 1. Setting the environment

In [3]:
!pip install -q datasets==2.20.0

# 2. Import libraries

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

# 3. Preparation

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
path_general = 'drive/MyDrive/Profesional_Academico/Github_Personal/ML_AI_Contents/09.Deep_Learning/50.BERT_HF_Trainer'

# 3. Load Dataset

In [8]:
imdb = load_dataset("imdb")

In [9]:
df_train = imdb['train'].to_pandas()

In [10]:
df_test = imdb['test'].to_pandas()

# 4. Load model

In [11]:
path_model = f'{path_general}/model'

In [12]:
model = AutoModelForSequenceClassification.from_pretrained(
    pretrained_model_name_or_path = path_model,
    device_map = 'cuda',
    offload_folder = path_model
    )

In [13]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# 5. Inference

In [14]:
def predict_text(text, tokenizer):

  encoded_input = tokenizer(text, return_tensors='pt', truncation = True).to('cuda')
  output = model(**encoded_input)
  logits_array = output.logits.to('cpu').detach().numpy()[0]

  if logits_array[0] > logits_array[1]:
    return 0

  else:
    return 1

In [15]:
df_train['pred'] = df_train['text'].map(lambda x: predict_text(x, tokenizer))

In [16]:
df_test['pred'] = df_test['text'].map(lambda x: predict_text(x, tokenizer))

# 6. Performance

In [17]:
from sklearn.metrics import recall_score, precision_score, f1_score

### a. Train

In [18]:
round(precision_score(df_train['label'], df_train['pred'], average = 'macro')*100, 2)

96.14

In [19]:
round(recall_score(df_train['label'], df_train['pred'], average = 'macro')*100, 2)

96.14

In [20]:
round(f1_score(df_train['label'], df_train['pred'], average = 'macro')*100, 2)

96.14

In [21]:
round(((df_train['label'] == df_train['pred'])*1).mean()*100, 2)

96.14

### b. Test

In [22]:
round(precision_score(df_test['label'], df_test['pred'], average = 'macro')*100, 2)

93.62

In [23]:
round(recall_score(df_test['label'], df_test['pred'], average = 'macro')*100, 2)

93.62

In [24]:
round(f1_score(df_test['label'], df_test['pred'], average = 'macro')*100, 2)

93.62

In [25]:
round(((df_test['label'] == df_test['pred'])*1).mean()*100, 2)

93.62

### End of execution

In [26]:
end = time.time()

delta = (end - start)

hours = int(delta/3_600)
mins = int((delta - hours*3_600)/60)
secs = int(delta - hours*3_600 - mins*60)

print(f'Hours: {hours}, Minutes: {mins}, Seconds: {secs}')

Hours: 0, Minutes: 9, Seconds: 7
