#### 📦 Import dependencies

In [1]:
from babybert.data import load_dataset, LanguageModelingDataset
from babybert.tokenizer import WordPieceTokenizer
from babybert.model import BabyBERTConfig, BabyBERT, BabyBERTForSentimentAnalysis
from babybert.trainer import TrainerConfig, Trainer

#### ⬆️ Loading our pretrained tokenizer and model

In [2]:
tokenizer = WordPieceTokenizer.from_pretrained("./checkpoints/toy-model")

In [3]:
model = BabyBERT.from_pretrained("./checkpoints/toy-model")

#### 📚 Building our training dataset 

In [4]:
dataset = load_dataset("./data/sentiment_analysis.txt")

In [5]:
encoded = tokenizer.batch_encode(dataset["text"], padding_length=model.config.block_size)

In [6]:
training_dataset = LanguageModelingDataset.from_dict({
    **encoded,
    "labels": dataset["label"]
})

#### 💭 Setting up sentiment analysis head 

In [7]:
sentiment_analysis_model = BabyBERTForSentimentAnalysis(model)

#### 💪 Instantiating the trainer

In [8]:
trainer_cfg = TrainerConfig(
    batch_size=16, num_workers=4, num_samples=1000
)

trainer = Trainer(sentiment_analysis_model, trainer_cfg)

#### 🏋️ Fine-tuning BabyBERT for sentiment analysis

In [9]:
trainer.run(training_dataset)

Training: 100%|[33m██████████[0m| 1008/1008 [03:40<00:00,  4.57samples/s, loss=0.6802]


In [None]:
example_input = [
    "I love this food!",
    "The weather today is great.",
    "I'm pretty upset.",
]