In [None]:
!pip install transformers datasets evaluate accelerate

In [None]:
from datasets import load_dataset
imdb=load_dataset("imdb")

In [None]:
imdb["test"][0]
{
    "label":0,
    "text":"I love sci-fi and am willing to put up with a lot.Sci-fi movies/TV are usually underfunded,under-appreciated and misunderstood."

}

In [None]:
from transformers import AutoTokenizer

tokenizer=AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

In [None]:
def preprocess_function(examples):
  return tokenizer(examples["text"],truncation=True)

In [None]:
tokenized_imdb=imdb.map(preprocess_function,batched=True)

In [None]:
from transformers import DataCollatorWithPadding

data_collator=DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
import evaluate
accuaracy=evaluate.load("accuracy")

In [None]:
import numpy as np
def compute_metrics(eval_pred):
  predictions,labels=eval_pred
  predictions=np.argmax(predictions,axis=1)
  return accuaracy.compute(predictions=predictions,references=labels)

In [None]:
id2label={0:"NEGATIVE",1:"POSITIVE"}
label2id={"NEGATIVE":0,"POSITIVE":1}

In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model=AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased",num_labels=2,id2label=id2label,label2id=label2id)

In [None]:
import huggingface_hub
huggingface_hub.login("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")

training_args=TrainingArguments(
    output_dir="my_awesome_model",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=True,
)

trainer=Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_imdb["train"],
    eval_dataset=tokenized_imdb["test"],
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
trainer.push_to_hub()

In [None]:
from transformers import create_optimizer
import tensorflow as tf
batch_size=16
num_epochs=5
batches_per_epoch=len(tokenized_imdb["train"])//batch_size
total_train_steps=int(batches_per_epoch*num_epochs)
optimizer,schedule=create_optimizer(init_lr=2e-5,num_warmup_steps=0,num_train_steps=total_train_steps)

In [None]:
from transformers import TFAutoModelForSequenceClassification
model=TFAutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased",num_labels=2,id2label=id2label,label2id=label2id)

In [None]:
tf_train_set=model.prepare_tf_dataset(
    tokenized_imdb["train"],
    shuffle=True,
    batch_size=16,
    collate_fn=data_collator,
)
tf_validation_set=model.prepare_tf_dataset(
    tokenized_imdb["test"],
    shuffle=False,
    batch_size=16,
    collate_fn=data_collator,
)


In [None]:
import tensorflow as tf
model.compile(optimizer=optimizer)

In [None]:
from transformers.keras_callbacks import KerasMetricCallback
metric_callback=KerasMetricCallback(metric_fn=compute_metrics,eval_dataset=tf_validation_set)



In [None]:
from transformers.keras_callbacks import PushToHubCallback
push_to_hub_callback=PushToHubCallback(output_dir="my_awesome_model",tokenizer=tokenizer)

In [None]:
callbacks=[metric_callback,push_to_hub_callback]

In [None]:
model.fit(tf_train_set,validation_data=tf_validation_set,epochs=3,callbacks=callbacks)

In [None]:
text="This was a masterpiece.Not completely faithful to books,but enthralling from beginning to end."

In [None]:
from transformers import pipeline
classifier=pipeline("sentiment-analysis",model="stevhliu/my_awesome_model")
classifier(text)

In [None]:
from tranformers import AutoTokenizer
tokenizer=AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
inputs=tokenizer(text,return_tensors="pt")

In [None]:
from transformers import AutoModelForSequenceClassification
model=AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
with torch.no_grad():
  logits=model(**inputs).logits

In [None]:
predicted_class_id=logits.argmax().item()
model.config.id2label[predicted_class_id]

In [None]:
from transformers import AutoTokenizer
tokenizer=AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
inputs=tokenizer(text,return_tensors="tf")

In [None]:
from transformers import TFAutoModelForSequenceClassification
model=TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
logits=model(**inputs).logits

In [None]:
predicted_class_id=int(tf.math.argmax(logits,axis=-1)[0])
model.config.id2label[predicted_class_id]