# SENTIMENT ANALYSIS USING ROMANIAN BERT

In [None]:
# Load the Drive helper and mount
from google.colab import drive
from google.colab import auth
auth.authenticate_user()

# This will prompt for authorization.
drive.mount('/content/drive')

In [None]:
!git clone https://github.com/dumitrescustefan/Romanian-Transformers.git

In [None]:
%cd "/content/drive/My Drive/sentiment/examples/sentiment_analysis"

In [None]:
!pip3 install transformers tokenizers pytorch-lightning torch

In [None]:
!nvcc --version

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from transformers import *
import logging
import os
from functools import lru_cache
from tokenizers import ByteLevelBPETokenizer
from tokenizers.processors import BertProcessing
import pytorch_lightning as pl
import pandas as pd
from sklearn.metrics import classification_report
from training_module import TrainingModule
import matplotlib.pyplot as plt
from transformers import *
from argparse import Namespace
import matplotlib
matplotlib.style.use('ggplot')

### BERT MODEL ###

In [None]:
name = "dumitrescustefan/bert-base-romanian-cased-v1"
tokenizer = AutoTokenizer.from_pretrained(name)
config = BertConfig.from_pretrained(name, output_hidden_states=True)
bert_model = AutoModel.from_pretrained(name, config=config)

hparams = Namespace(
    batch_size=16,
    warmup_steps=100,
    epochs=1,
    lr=5e-4,
    accumulate_grad_batches=1,
)

if torch.cuda.is_available():
  device="cuda"
else:
  device="cpu"

### TRAINING MODULE###

In [3]:
module = TrainingModule(bert_model=bert_model, tokenizer=tokenizer, hparams=hparams, device=device)

### TRAINING ###

The trainer is where the magic happens. We can feed the TrainingModule to the trainer `fit` method and can be very easily switch from CPU to GPU.

In [None]:
## train roughly for about 10-15 minutes with GPU enabled.
trainer = pl.Trainer(gpus=1, max_epochs=hparams.epochs, progress_bar_refresh_rate=10,
                     accumulate_grad_batches=hparams.accumulate_grad_batches)

trainer.fit(module)

### TESTING ###

In [None]:
trainer.test()

In [None]:
with torch.no_grad():
    progress = ["/", "-", "\\", "|", "/", "-", "\\", "|"]
    module.eval()
    true_y, pred_y = [], []
    for i, batch_ in enumerate(module.test_dataloader()):
        X, y = batch_
        batch = X
        y_pred = torch.argmax(module(batch), dim=1)
        true_y.extend(y.numpy())
        pred_y.extend(y_pred.cpu().numpy())
print("\n" + "_" * 80)

print(classification_report(true_y, pred_y,  digits=2))

### PLOT RESULTS###

In [None]:
# Start tensorboard.
%load_ext tensorboard
%tensorboard --logdir lightning_logs/