<a href="https://colab.research.google.com/github/futugyou/pyproject/blob/master/google_colab/generation_representation_model_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required dependencies
%pip install datasets
%pip install sentence_transformers
%pip install transformers
%pip install torch
%pip install tqdm
%pip install scikit-learn

In [None]:
from datasets import load_dataset

tomatoes = load_dataset("rotten_tomatoes")
train_dataset, test_dataset = tomatoes["train"], tomatoes["test"]

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_id = "bert-base-cased"
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
for name, param in model.named_parameters():
    print(name)
    if name.startswith('classifier'):
        param.requires_grad = True # Classifier layer parameters are
    else:
        param.requires_grad = False # Other layers are frozen

In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def preprocess_function(examples):
    # Tokenize the texts
    return tokenizer(examples["text"], truncation=True)

tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_test = test_dataset.map(preprocess_function, batched=True)

In [None]:
from numpy as np
from datasets import load_metric

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    
    load_f1 = load_metric("f1")
    f1 = load_f1.compute(predictions=predictions, references=labels)
    return {"f1", f1["f1"]}

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    "model",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.01,
    save_strategy="epoch",
    report_to="none",
)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
trainer.evaluate()