<a href="https://colab.research.google.com/github/gfx73/PML-DL/blob/main/Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install allennlp
!pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 torchdata==0.4.1 torchtext==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu113
!pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting allennlp
  Downloading allennlp-2.10.1-py3-none-any.whl (730 kB)
[K     |████████████████████████████████| 730 kB 17.6 MB/s 
[?25hCollecting requests>=2.28
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 1.5 MB/s 
[?25hCollecting fairscale==0.4.6
  Downloading fairscale-0.4.6.tar.gz (248 kB)
[K     |████████████████████████████████| 248 kB 56.9 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting jsonnet>=0.10.0
  Downloading jsonnet-0.19.1.tar.gz (593 kB)
[K     |████████████████████████████████| 593 kB 57.5 MB/s 
Collecting transformers<4.21,>=4.1
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |█████████████████

In [None]:
import sys
from torchtext.datasets import IMDB
from tqdm import tqdm
from torchtext.data.utils import get_tokenizer
import gc
import random
from torch.utils.data import Dataset, DataLoader, Subset
from allennlp.modules.elmo import Elmo, batch_to_ids
import torch
from torch import nn
from torch.nn import functional as F
import torchmetrics

In [None]:
CLASSIFIER_PRETRAINED = False

IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
  PATH_TO_SAVE_ELMO_CLASSIFIER = '/content/drive/MyDrive/PML&DL/Assignment2/elmo_classifier.pt'
else:
  PATH_TO_SAVE_ELMO_CLASSIFIER = 'elmo_classifier.pt'

In [None]:
if IN_COLAB:
  from google.colab import drive
  drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
IMDB_train_iter, IMDB_test_iter = IMDB()

In [None]:
random.seed(11)
tokenizer = get_tokenizer('basic_english')

def get_tokens_and_labels(datasplit):
  tokens, labels = [], []
  for label, text in tqdm(datasplit):
    tokens.append(tokenizer(text))
    labels.append(label=='pos')
  return tokens, labels

train_tokens, train_labels = get_tokens_and_labels(IMDB_train_iter)
test_tokens, test_labels = get_tokens_and_labels(IMDB_test_iter)

sample_tokens_and_labels = lambda tokens, labels: zip(*random.sample(list(zip(tokens, labels)), len(labels)))

train_tokens, train_labels = sample_tokens_and_labels(train_tokens, train_labels)
test_tokens, test_labels = sample_tokens_and_labels(test_tokens, test_labels)


del IMDB_train_iter
del IMDB_test_iter
gc.collect()

25000it [00:30, 824.96it/s] 
25000it [00:16, 1559.43it/s]


0

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

class dataset(Dataset):
  def __init__(self, tokens, labels):
    self.tokens = tokens
    self.labels = torch.tensor(labels, dtype=torch.float32)
    self.length = self.labels.shape[0]

  def __getitem__(self, idx):
    return self.tokens[idx], self.labels[idx]

  def __len__(self):
    return self.length


trainset = dataset(train_tokens, train_labels)
testset = dataset(test_tokens, test_labels)

valset_size = int(len(testset) * 0.02)
testset_size = len(testset) - valset_size
valset = Subset(testset, range(valset_size))
testset = Subset(testset, range(valset_size, valset_size + testset_size))

class CollateBatch(object):
  def __init__(self, batch_to_ids):
    self.batch_to_ids = batch_to_ids

  def __call__(self, batch):
    tokens_batch, labels_batch = [tokens_and_label[0] for tokens_and_label in batch], [tokens_and_label[1] for tokens_and_label in batch]
    tok_ids = self.batch_to_ids(tokens_batch).to(device)
    labels_batch = torch.tensor(labels_batch, dtype=torch.float32, device=device)
    return tok_ids, labels_batch

collateBatch = CollateBatch(batch_to_ids)

BATCH_SIZE = 32
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collateBatch)
valloader = DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collateBatch)
testloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collateBatch)

In [None]:
class Classifier(nn.Module):
  def __init__(self, embed_size, elmo):
    super(Classifier, self).__init__()
    self.embed_size = embed_size
    self.elmo = elmo
    self.fc1 = nn.Linear(embed_size, 1)
    
  def forward(self, input):
    embs = self.elmo(input)['elmo_representations'][0]
    mean = embs.mean(dim=1)
    x = torch.sigmoid(self.fc1(mean))
    return x


if CLASSIFIER_PRETRAINED:
  classifier = torch.load(PATH_TO_SAVE_ELMO_CLASSIFIER)
else:
  if IN_COLAB:
    options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
    weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
  else:
    options_file = "options.json"
    weight_file = "weights.hdf5"
  elmo = Elmo(options_file, weight_file, dropout=0, requires_grad=False, num_output_representations=1).to(device)
  classifier = Classifier(1024, elmo=elmo).to(device)

Output()

Output()

In [None]:
sum(p.numel() for p in classifier.elmo.parameters() if p.requires_grad)

4

In [None]:
learning_rate = 0.003
epochs = 1
# l2_penalty = 0.001
l2_penalty = 0

optimizer = torch.optim.RMSprop(classifier.parameters(), lr=learning_rate, weight_decay=l2_penalty)
loss_fn = F.binary_cross_entropy_with_logits

In [None]:
def eval_model(model, data, loss_fn):
  acc_metric = torchmetrics.Accuracy().to(device)
  prec_metric = torchmetrics.Precision().to(device)
  rec_metric = torchmetrics.Recall().to(device)
  f1_metric = torchmetrics.F1Score().to(device)
  running_loss = 0
  for x, y in tqdm(data):
    with torch.no_grad():
      y = y.reshape(-1, 1)
      with torch.autocast(device_type=device, dtype=torch.float16):
        preds = model(x)
        loss = loss_fn(preds, y)


      running_loss += loss.item()
      
      y = y.type(torch.int8)
      acc_metric(preds.round(), y)
      prec_metric(preds.round(), y)
      rec_metric(preds.round(), y)
      f1_metric(preds.round(), y)

      # print(y)
      # print(preds.round())
      # print(acc_metric.compute())

  loss = running_loss / len(data)
  acc = acc_metric.compute().item()
  prec = prec_metric.compute().item()
  rec = rec_metric.compute().item()
  f1 = f1_metric.compute().item()
  return loss, acc, prec, rec, f1

In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [None]:
torch.cuda.empty_cache()
train_losses = []
train_accs = []
val_metrics = []

best_val_loss = 1e+8
for epoch in range(epochs):
  running_loss, correct, total = 0, 0, 0
  for iteration, (x_train ,y_train) in tqdm(enumerate(trainloader), total=len(trainloader)):
    optimizer.zero_grad()
    y_train = y_train.reshape(-1,1)
    with torch.autocast(device_type=device, dtype=torch.float16):
      preds = classifier(x_train)
      loss = loss_fn(preds, y_train)

    running_loss += loss.item()
    total += y_train.shape[0]
    correct += preds.round().eq(y_train).sum().item()

    loss.backward()
    optimizer.step()

    if iteration % 50 == 0:
      _loss = running_loss / (iteration + 1)
      acc = correct / total
      print("epoch: {}\titeration: {}\tloss: {}\tthis iteration loss: {}\taccuracy: {}".format(epoch, iteration, _loss, loss, acc))
      writer.add_scalar(f"ElmoClassifier/train/loss/epoch{epoch}/iteration{iteration}", loss, iteration)
      writer.add_scalar(f"ElmoClassifier/train/acc/epoch{epoch}/iteration{iteration}", acc, iteration)

      loss, acc, prec, rec, f1 = eval_model(classifier, valloader, loss_fn)
      writer.add_scalar(f"ElmoClassifier/val/loss/epoch{epoch}/iteration{iteration}", loss, iteration)
      writer.add_scalar(f"ElmoClassifier/val/acc/epoch{epoch}/iteration{iteration}", acc, iteration)
      writer.add_scalar(f"ElmoClassifier/val/prec/epoch{epoch}/iteration{iteration}", prec, iteration)
      writer.add_scalar(f"ElmoClassifier/val/rec/epoch{epoch}/iteration{iteration}", rec, iteration)
      writer.add_scalar(f"ElmoClassifier/val/f1/epoch{epoch}/iteration{iteration}", f1, iteration)


  loss = running_loss / len(trainloader)
  acc = correct / total
  train_losses.append(loss)
  train_accs.append(acc)
  print("epoch {}\ttrain loss : {}\ttrain accuracy : {}".format(epoch, loss, acc))

  loss, acc, prec, rec, f1 = eval_model(classifier, valloader, loss_fn)
  val_metrics.append([loss, acc, prec, rec, f1])
  print("epoch: {}\tval loss: {}\tval acc: {}\tval prec: {}\tval rec: {}\tval f1: {}".format(epoch, loss, acc, prec, rec, f1))
  if best_val_loss > loss:
    torch.save(classifier, PATH_TO_SAVE_ELMO_CLASSIFIER)
    best_val_loss = loss
  if not IN_COLAB:
    torch.save(classifier, f'classifier{epoch}.pt')

  0%|          | 1/782 [00:07<1:32:22,  7.10s/it]

epoch: 0	iteration: 0	loss: 0.6283023357391357	this iteration loss: 0.6283023357391357	accuracy: 0.78125


  7%|▋         | 51/782 [02:48<34:46,  2.85s/it]

epoch: 0	iteration: 50	loss: 0.6828417275466171	this iteration loss: 0.6961194276809692	accuracy: 0.5490196078431373


 13%|█▎        | 101/782 [05:36<38:19,  3.38s/it]

epoch: 0	iteration: 100	loss: 0.673536499538044	this iteration loss: 0.6901473999023438	accuracy: 0.5962252475247525


 19%|█▉        | 151/782 [08:32<42:03,  4.00s/it]

epoch: 0	iteration: 150	loss: 0.662827395050731	this iteration loss: 0.5992738604545593	accuracy: 0.6276903973509934


 26%|██▌       | 201/782 [11:18<34:09,  3.53s/it]

epoch: 0	iteration: 200	loss: 0.6553210992718217	this iteration loss: 0.6513580083847046	accuracy: 0.6449004975124378


 32%|███▏      | 251/782 [14:08<30:16,  3.42s/it]

epoch: 0	iteration: 250	loss: 0.6500494074061572	this iteration loss: 0.6181433200836182	accuracy: 0.6655876494023905


 38%|███▊      | 301/782 [17:00<30:57,  3.86s/it]

epoch: 0	iteration: 300	loss: 0.6445627172919999	this iteration loss: 0.6432056427001953	accuracy: 0.6795058139534884


 45%|████▍     | 351/782 [19:40<25:45,  3.59s/it]

epoch: 0	iteration: 350	loss: 0.6408058236806821	this iteration loss: 0.5683448314666748	accuracy: 0.6915064102564102


 51%|█████▏    | 401/782 [22:46<21:07,  3.33s/it]

epoch: 0	iteration: 400	loss: 0.6372700468857687	this iteration loss: 0.6406313180923462	accuracy: 0.6993453865336658


 58%|█████▊    | 451/782 [25:31<18:53,  3.42s/it]

epoch: 0	iteration: 450	loss: 0.63376255627482	this iteration loss: 0.6117323040962219	accuracy: 0.7084950110864745


 64%|██████▍   | 501/782 [28:19<19:13,  4.11s/it]

epoch: 0	iteration: 500	loss: 0.6316656812936247	this iteration loss: 0.6761535406112671	accuracy: 0.7153817365269461


 70%|███████   | 551/782 [31:22<12:21,  3.21s/it]

epoch: 0	iteration: 550	loss: 0.6297325337214392	this iteration loss: 0.5612467527389526	accuracy: 0.7206783121597096


 77%|███████▋  | 601/782 [34:07<08:21,  2.77s/it]

epoch: 0	iteration: 600	loss: 0.6277472713029325	this iteration loss: 0.6071460843086243	accuracy: 0.7255615640599001


 83%|████████▎ | 651/782 [37:07<07:25,  3.40s/it]

epoch: 0	iteration: 650	loss: 0.6259071945960987	this iteration loss: 0.6241366267204285	accuracy: 0.7307027649769585


 90%|████████▉ | 701/782 [39:57<04:08,  3.07s/it]

epoch: 0	iteration: 700	loss: 0.623754105537322	this iteration loss: 0.6158937215805054	accuracy: 0.734218972895863


 96%|█████████▌| 751/782 [43:02<01:49,  3.54s/it]

epoch: 0	iteration: 750	loss: 0.6216627938372794	this iteration loss: 0.5869169235229492	accuracy: 0.7382656458055925


100%|██████████| 782/782 [44:54<00:00,  3.45s/it]


epoch 0	train loss : 0.6209402933831105	train accuracy : 0.7414


100%|██████████| 16/16 [00:47<00:00,  2.98s/it]


epoch: 0	val loss: 0.5759647786617279	val acc: 0.7940000295639038	val prec: 0.8930232524871826	val rec: 0.7058823704719543	val f1: 0.7885010838508606


In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 5.2 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 34.9 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 33.9 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.1 transformers-4.24.0


In [2]:
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
inputs = tokenizer(["Hello world!", 'this is the end'], padding=True, truncation=True, return_tensors="pt")
inputs['input_ids']

tensor([[ 101, 7592, 2088,  999,  102,    0],
        [ 101, 2023, 2003, 1996, 2203,  102]])

In [12]:
outputs = model(**inputs)
outputs.pooler_output

tensor([[-0.8976, -0.3304, -0.7694,  ..., -0.6560, -0.6200,  0.9095],
        [-0.8040, -0.1418,  0.5634,  ...,  0.5303, -0.5168,  0.8233]],
       grad_fn=<TanhBackward0>)

In [None]:
if IN_COLAB:
  !kill $(ps aux | awk '{print $2}')