# Neural Networks in NLP HW 4

## Дарья Родионова

In [1]:
! pip install -q -U watermark

In [2]:
! pip install transformers

In [3]:
%reload_ext watermark
%watermark -v -p numpy,pandas,torch,transformers

Скачиваем нужные модули.

In [4]:
import transformers
from transformers import BertModel, AutoModel,AutoTokenizer, BertTokenizer, PreTrainedTokenizerFast, AdamW, get_linear_schedule_with_warmup
import torch.nn.functional as F

import torch
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap
from torch import nn, optim
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import Dataset, DataLoader

In [7]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 8, 6

In [8]:
! pip install datasets

### Downloading Data

Загружаем данные и делим их сразу на train/val/test.

In [9]:
from datasets import load_dataset

imdb_datasets = load_dataset('imdb', 
                             split={'train': 'train[:2000]+train[-2000:]', 
                                    'test': 'test[:1000]+test[-1000:]', 
                                    'validation': 'test[1000:2000]+test[-2000:-1000]'})

In [10]:
imdb_datasets

Решила выбрать модель bert-small, так как она маленькая и быстрая. 

In [11]:
MODEL_NAME = 'prajjwal1/bert-small'

In [12]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

### Preprocessing

Токенизируем данные. 

In [13]:
def preprocess(texts, tokenizer):
    result = tokenizer(texts['text'], 
                       max_length=512, 
                       padding='max_length', 
                       truncation=True)
    
    result['label'] = texts['label']
    
    return result

In [14]:
from functools import partial

tokenized_datasets = imdb_datasets.map(
    partial(preprocess, tokenizer=tokenizer),
    batched=True)

In [15]:
tokenized_datasets

Фукнция для подсчёта метрик. 

In [16]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

Задаём нужные параметры дляя обучения моделей.

In [17]:
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=2,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='eval_f1'
)

In [18]:
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)

In [19]:
from torch.nn import CrossEntropyLoss

### 1. SentimentClassifier

Немного видоизменённый класс SentimentClassifer с семинара. 

In [20]:
class SentimentClassifier(nn.Module):
    
    def __init__(self, n_classes):
        super().__init__()
        self.bert = AutoModel.from_pretrained(MODEL_NAME)
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
        self.n_classes = n_classes
  
    def forward(self, input_ids, attention_mask, labels=None):
        last_hidden_state, pooled_output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=False)
       
        logits = self.out(self.drop(pooled_output))

        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits.view(-1, self.n_classes), labels.view(-1))

        output = (logits,)
        return ((loss,) + output) if loss is not None else output

In [21]:
model = SentimentClassifier(2)
model = model.to('cuda')

Обучаем.

In [22]:
trainer = Trainer(
    model=model,                        
    args=training_args,                  
    train_dataset=tokenized_datasets['train'],         
    eval_dataset=tokenized_datasets['validation'],           
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    tokenizer=tokenizer
)

trainer_result = trainer.train()

In [23]:
trainer_result.metrics

Проверяем на данных из тестовой выборки.

In [24]:
tester_result = trainer.predict(test_dataset=tokenized_datasets['test'])

In [25]:
tester_result.metrics

### 2. SentimentClassifier with CLS

Добавляем эмбеддинг [CLS] токенов с последнего слоя.

In [26]:
class SentimentClassifierCLS(nn.Module):
    
    def __init__(self, n_classes):
        super().__init__()
        self.bert = AutoModel.from_pretrained(MODEL_NAME)
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size*2, n_classes)
        self.n_classes = n_classes
  
    def forward(self, input_ids, attention_mask, labels=None):
        last_hidden_state, pooled_output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=False)
       

        cls = last_hidden_state[:,0,:] # [CLS]
        stacked_layers = torch.hstack([cls, pooled_output])

        logits = self.out(self.drop(stacked_layers))

        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits.view(-1, self.n_classes), labels.view(-1))

        output = (logits,)
        
        return ((loss,) + output) if loss is not None else output

In [27]:
model = SentimentClassifierCLS(2)
model = model.to('cpu')

Обучаем. 

In [28]:
trainer = Trainer(
    model=model,                        
    args=training_args,                  
    train_dataset=tokenized_datasets['train'],         
    eval_dataset=tokenized_datasets['validation'],           
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    tokenizer=tokenizer
)

trainer_result_cls = trainer.train()

In [29]:
trainer_result_cls.metrics

Проверяем на данных из тестовой выборки.

In [30]:
tester_result_cls = trainer.predict(test_dataset=tokenized_datasets['test'])

In [31]:
tester_result_cls.metrics

### 3. BertForSequenceClassification

Всё просто, скачиваем предобученный БЕРТ для классификации, аналогично обучаем и тестируем. 

In [34]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained(MODEL_NAME)
model = model.to('cpu')

In [35]:
trainer = Trainer(
    model=model,                        
    args=training_args,                  
    train_dataset=tokenized_datasets['train'],         
    eval_dataset=tokenized_datasets['validation'],           
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    tokenizer=tokenizer
)

trainer_result_pre = trainer.train()

In [36]:
trainer_result_pre.metrics

In [37]:
tester_results_pre = trainer.predict(test_dataset=tokenized_datasets['test'])

In [39]:
tester_results_pre.metrics

### 4. SentimentClassifier with CLS tokens from all layers

Агрегируем [CLS] токены для всех слоёв и находим среднее.

In [40]:
class SentimentClassifierPooledCLS(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.n_classes = n_classes
        self.bert = AutoModel.from_pretrained(MODEL_NAME)
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.bert.config.hidden_size*2, n_classes)
  
    def forward(self, input_ids, attention_mask, labels=None):
        _, pooled_output, hidden_states = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=False,
            output_hidden_states=True)
       
        hidden_states = torch.stack(hidden_states)
        hidden_cls = hidden_states[:,:,1,:]
        hidden_cls = hidden_cls.mean(axis=0)

        stacked_layers = torch.hstack([hidden_cls, pooled_output])

        logits = self.out(self.drop(stacked_layers))

        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits.view(-1, self.n_classes), labels.view(-1))

        output = (logits,)
        return ((loss,) + output) if loss is not None else output

In [41]:
model = SentimentClassifierPooledCLS(2)
model = model.to('cpu')

In [42]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    tokenizer=tokenizer
)

trainer_results_pooled = trainer.train()

In [43]:
trainer_results_pooled.metrics

In [44]:
tester_results_pooled = trainer.predict(test_dataset=tokenized_datasets['test'])

In [45]:
tester_results_pooled.metrics

### Model Comparison

1. SentimentClassifier: 0.8655
2. SentimentClassifier with CSL: 0.874
3. <b>BertForSequenceClassification: 0.876</b>
4. SentimentClassifier with CLS tokens from all layers: 0.8695

В целом все модели показали достаточно высокие результаты. Как мы видим, лучшие результаты показала готовая модель для классификации. 

# GooglePlay Reviews

Посмотрим, как работает последняя модель на отзывах из GooglePlay.

In [46]:
! pip install gdown

In [47]:
! gdown --id 1zdmewp7ayS4js4VtrJEHzAheSW-5NBZv

Посмотрим на датасет с отзывами. 

In [48]:
df = pd.read_csv('reviews.csv')
df

In [52]:
negative_review = df[df.score<=2]

In [55]:
negative_review.content[3]

In [56]:
negative = '''It seems OK, but very basic. Recurring tasks need 
some work to be actually useful. 
For example, it would be nice to be able to set a task to be recurring on the first of every month, 
without only being able to set that up on the first of the month. 
Edit; I also just noticed that there is no dark theme. 
Both may be available as paid for options, but I'll never know, 
since they are basic options and without them, 
I have no reason to try this app, and thus will never pay for actual premium options.'''

encoding = tokenizer.encode_plus(
  negative,
  max_length=512,
  add_special_tokens=True, 
  return_token_type_ids=True,
  padding='max_length',
  return_attention_mask=True,
  return_tensors='pt',  
  truncation=True
).to('cuda')

In [57]:
model(
  input_ids=encoding['input_ids'], 
  attention_mask=encoding['attention_mask'])[0].cpu().detach().numpy().argmax()

Отрицательная полярность для отрицательного отзыва. 

In [58]:
neutral_review = df[df.score==3]

In [61]:
neutral_review.content[400]

In [62]:
neutral = '''I love the concept of the app I'm just not digging the functionality. 
Trying to set a custom date and time is a complete pain in the you know what. 
Every time I select pm it's stays at am. I have to do it multiple 
times before it catches. Same issue with the clock. 
I try to rotate the dial to select an hour and it hangs and 
throws me over to minutes. I go back to hours and it does the same thing. 
I finally just gave up. Back to the old string around the finger. ☹️'''

encoding = tokenizer.encode_plus(
  neutral,
  max_length=512,
  add_special_tokens=True, 
  return_token_type_ids=True,
  padding='max_length',
  return_attention_mask=True,
  return_tensors='pt',  
  truncation=True
).to('cuda')

In [72]:
model(
  input_ids=encoding['input_ids'], 
  attention_mask=encoding['attention_mask'])[0].cpu().detach().numpy().argmax()

Положительная полярность для нейтральной оценки.

In [64]:
positive_review = df[df.score>3]

In [66]:
positive_review.content[15741]

In [67]:
positive = '''I believe that this is by far the best app with helping 
ppl keep track of a lot of daily task and reminders. 
It's easy to function understand & it syncs with my Google calendar which is an A+ in my book.'''

encoding = tokenizer.encode_plus(
  positive,
  max_length=512,
  add_special_tokens=True, 
  return_token_type_ids=True,
  padding='max_length',
  return_attention_mask=True,
  return_tensors='pt',  
  truncation=True
).to('cuda')

In [68]:
model(
  input_ids=encoding['input_ids'], 
  attention_mask=encoding['attention_mask'])[0].cpu().detach().numpy().argmax()


Положительная полярность для положительного отзыва. 

В целом модель работает корректно, пусть и отзывы рассчитаны на 3 класса.