# Reappraisal Training For Linguistic Distancing and Emotion Regulation

Runs model training

## Setup
1. Create virtual environment and download required packages (use pipenv).

**Notes**
- Attention: which words are important for the decoder to focus on at a specific timestep?
    - Q = Query
    - K = Key
    - V = Value
- Self-attention: What if Q and K are both the same sentence.
- Multi-head Self-Attention: self-attention calculated independently and concurrently (allows transformers to learn representations at different positional encodings)

**Sources**
-  [Sentiment Analysis Text Classification Tutorial](https://www.youtube.com/watch?v=8N-nM3QW7O0)
- [Using Catalyst for Training Organization](https://github.com/catalyst-team/catalyst)



In [1]:
# %%capture
# !pip install wandb -qqq
# import wandb
# !wandb login

## Sample code for tracking model training runs in wandb 
# see: https://colab.research.google.com/github/wandb/examples/blob/master/colabs/intro/Intro_to_Weights_%26_Biases.ipynb#scrollTo=-VE3MabfZAcx
# import math
# import random

# # 1️⃣ Start a new run, tracking config metadata
# wandb.init(project="test-drive", config={
#     "learning_rate": 0.02,
#     "dropout": 0.2,
#     "architecture": "CNN",
#     "dataset": "CIFAR-100",
# })
# config = wandb.config

# # Simulating a training or evaluation loop
# for x in range(50):
#     acc = math.log(1 + x + random.random() * config.learning_rate) + random.random()
#     loss = 10 - math.log(1 + x + random.random() + config.learning_rate * x) + random.random()
#     # 2️⃣ Log metrics from your script to W&B
#     wandb.log({"acc":acc, "loss":loss})

# wandb.finish()

In [2]:
# TODO: Add Open in Colab Button
# TODO: Write scripts for running as CLI in pipfile
# TODO: hyperparameter search

In [3]:
# Imports
from datasets import Dataset, load_dataset
from transformers import AutoTokenizer
import torch

In [4]:
# Constants and environment setup
#TODO: Set up env files for dev and "prod"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # macOS incompatible with NVIDIA GPUs
#Casing can matter for sentiment analysis ("bad" vs. "BAD")
PRETRAINED_MODEL_NAME = 'distilbert-base-cased' 
RANDOM_SEED = 42
MAX_LEN = 160
BATCH_SIZE = 16
EPOCHS = 3

# Code

In [5]:
from torch import nn, optim
from torch.utils.data import DataLoader
from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast, AdamW, get_linear_schedule_with_warmup

tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-cased')
train_ds, eval_ds = load_dataset('imdb', split=[
    'train[:20%]', 'test[:5%]'
])

train_test_ds = train_ds.train_test_split(test_size=0.15)
encoded_ds= train_test_ds.map(
    lambda batch: tokenizer(
        batch['text'],
        add_special_tokens=True,
        padding=True,
        truncation=True), 
    batched=True, batch_size=64, remove_columns=['text'])
encoded_ds.set_format(type='torch')
encoded_ds['train'].features

Reusing dataset imdb (/Users/danielpham/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3)
Loading cached split indices for dataset at /Users/danielpham/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3/cache-8eed4330d393b9e5.arrow and /Users/danielpham/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3/cache-dccf1be34b3e016f.arrow
Loading cached processed dataset at /Users/danielpham/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3/cache-b978c13e2344aa93.arrow
Loading cached processed dataset at /Users/danielpham/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3/cache-bbe3f9b8a5cd625f.arrow


{'attention_mask': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
 'input_ids': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
 'label': ClassLabel(num_classes=2, names=['neg', 'pos'], names_file=None, id=None)}

In [6]:
from runUtils import train_model
import transformers

transformers.logging.set_verbosity_error()
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-cased')
transformers.logging.set_verbosity_warning()
loss_fn = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(encoded_ds['train']))

In [7]:
train_dl = DataLoader(encoded_ds['train'])

from torch.nn import functional as F

test_iters = 0
model.train()

i = 0
for data in train_dl:
    input_ids = data['input_ids']
    attention_mask = data['attention_mask']
    labels = data['label']
    loss, logits = model(input_ids, attention_mask=attention_mask, labels=labels)
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
    i +=1
    if i >= 10:
        break


  return torch.tensor(x, **format_kwargs)


AttributeError: 'str' object has no attribute 'backward'

In [8]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # total # of training epochs
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=64,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
)

trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=encoded_ds['train'],         # training dataset
    eval_dataset=encoded_ds['test']            # evaluation dataset
)


In [9]:
trainer.train()

Step,Training Loss


KeyboardInterrupt: 