In [1]:
#!conda install tensorflow
#!conda install pytorch
#!pip install pytorch-lightning
#!pip install datasets
#!pip install transformers
#!conda install keras

In [2]:
from datasets import list_datasets
datasets_list = list_datasets()
len(datasets_list)
print(', '.join(dataset for dataset in datasets_list))

acronym_identification, ade_corpus_v2, adversarial_qa, aeslc, afrikaans_ner_corpus, ag_news, ai2_arc, air_dialogue, ajgt_twitter_ar, allegro_reviews, allocine, alt, amazon_polarity, amazon_reviews_multi, amazon_us_reviews, ambig_qa, amttl, anli, app_reviews, aqua_rat, aquamuse, ar_cov19, ar_res_reviews, ar_sarcasm, arabic_billion_words, arabic_pos_dialect, arabic_speech_corpus, arcd, arsentd_lev, art, arxiv_dataset, ascent_kb, aslg_pc12, asnq, asset, assin, assin2, atomic, autshumato, babi_qa, banking77, bbaw_egyptian, bbc_hindi_nli, bc2gm_corpus, beans, best2009, bianet, bible_para, big_patent, billsum, bing_coronavirus_query_set, biomrc, biosses, blended_skill_talk, blimp, blog_authorship_corpus, bn_hate_speech, bookcorpus, bookcorpusopen, boolq, bprec, break_data, brwac, bsd_ja_en, bswac, c3, c4, cail2018, caner, capes, casino, catalonia_independence, cats_vs_dogs, cawac, cbt, cc100, cc_news, ccaligned_multilingual, cdsc, cdt, cedr, cfq, chr_en, cifar10, cifar100, circa, civil_comme

In [3]:
from datasets import load_dataset

In [4]:
cola_dataset = load_dataset("glue", "cola")
print(cola_dataset)

Reusing dataset glue (C:\Users\jhseo\.cache\huggingface\datasets\glue\cola\1.0.0\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
100%|██████████| 3/3 [00:00<00:00, 200.60it/s]

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 8551
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1043
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1063
    })
})





In [5]:
train_dataset = cola_dataset['train']
print(train_dataset[0])

{'sentence': "Our friends won't buy this analysis, let alone the next one we propose.", 'label': 1, 'idx': 0}


In [6]:
import pytorch_lightning as pl

In [7]:
class DataModule(pl.LightningDataModule):
    def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", batch_size=32):
        super().__init__()

        self.batch_size = batch_size
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

    def prepare_data(self):
        cola_dataset = load_dataset("glue", "cola")
        self.train_data = cola_dataset["train"]
        self.val_data = cola_dataset["validation"]

    def tokenize_data(self, example):
        # processing the data
        return self.tokenizer(
            example["sentence"],
            truncation=True,
            padding="max_length",
            max_length=256,
        )

    def setup(self, stage=None):
        if stage == "fit" or stage is None:
            self.train_data = self.train_data.map(self.tokenize_data, batched=True)
            self.train_data.set_format(
                type="torch", columns=["input_ids", "attention_mask", "label"]
            )

            self.val_data = self.val_data.map(self.tokenize_data, batched=True)
            self.val_data.set_format(
                type="torch", columns=["input_ids", "attention_mask", "label"]
            )

    def train_dataloader(self):
        return torch.utils.data.DataLoader(
            self.train_data, batch_size=self.batch_size, shuffle=True
        )

    def val_dataloader(self):
        return torch.utils.data.DataLoader(
            self.val_data, batch_size=self.batch_size, shuffle=False
        )

In [8]:
class ColaModel(pl.LightningModule):
    def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=1e-2):
        super(ColaModel, self).__init__()
        self.save_hyperparameters()

        self.bert = AutoModel.from_pretrained(model_name)
        self.W = nn.Linear(self.bert.config.hidden_size, 2)
        self.num_classes = 2

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)

        h_cls = outputs.last_hidden_state[:, 0]
        logits = self.W(h_cls)
        return logits

    def training_step(self, batch, batch_idx):
        logits = self.forward(batch["input_ids"], batch["attention_mask"])
        loss = F.cross_entropy(logits, batch["label"])
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        logits = self.forward(batch["input_ids"], batch["attention_mask"])
        loss = F.cross_entropy(logits, batch["label"])
        _, preds = torch.max(logits, dim=1)
        val_acc = accuracy_score(preds.cpu(), batch["label"].cpu())
        val_acc = torch.tensor(val_acc)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", val_acc, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"])

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics import accuracy_score
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint


In [10]:
cola_data = DataModule()
cola_model = ColaModel()

checkpoint_callback = ModelCheckpoint(
    dirpath="./models", monitor="val_loss", mode="min"
)

early_stopping_callback = EarlyStopping(
    monitor="val_loss", patience=3, verbose=True, mode="min"
)

trainer = pl.Trainer(
    default_root_dir="logs",
    gpus=(1 if torch.cuda.is_available() else 0),
    max_epochs=1,
    fast_dev_run=False,
    logger=pl.loggers.TensorBoardLogger("logs/", name="cola", version=1),
    callbacks=[checkpoint_callback, early_stopping_callback],
)
trainer.fit(cola_model, cola_data)

Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 I

Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|██████████| 301/301 [08:59<00:00,  1.79s/it, loss=0.641, v_num=1, train_loss=0.834, val_loss=0.621, val_acc=0.691]

Metric val_loss improved. New best score: 0.621


Epoch 0: 100%|██████████| 301/301 [09:00<00:00,  1.79s/it, loss=0.641, v_num=1, train_loss=0.834, val_loss=0.621, val_acc=0.691]


In [11]:
class ColaPredictor:
    def __init__(self, model_path):
        self.model_path = model_path
        # loading the trained model
        self.model = ColaModel.load_from_checkpoint(model_path)
        # keep the model in eval mode
        self.model.eval()
        self.model.freeze()
        self.processor = DataModule()
        self.softmax = torch.nn.Softmax(dim=0)
        self.lables = ["unacceptable", "acceptable"]

    def predict(self, text):
        # text => run time input
        inference_sample = {"sentence": text}
        # tokenizing the input
        processed = self.processor.tokenize_data(inference_sample)
        # predictions
        logits = self.model(
            torch.tensor([processed["input_ids"]]),
            torch.tensor([processed["attention_mask"]]),
        )
        scores = self.softmax(logits[0]).tolist()
        predictions = []
        for score, label in zip(scores, self.lables):
            predictions.append({"label": label, "score": score})
        return predictions

In [12]:
import wandb

In [13]:
from pytorch_lightning.loggers import WandbLogger
wandb_logger = WandbLogger(project="MLOps Basics")

In [14]:
trainer = pl.Trainer(
        max_epochs=3,
        logger=wandb_logger,
        callbacks=[checkpoint_callback],
    )

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [15]:
import torchmetrics

In [16]:
class ColaModel(pl.LightningModule):
    def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5):
        self.train_accuracy_metric = torchmetrics.Accuracy()
        self.val_accuracy_metric = torchmetrics.Accuracy()
        self.f1_metric = torchmetrics.F1(num_classes=self.num_classes)
        self.precision_macro_metric = torchmetrics.Precision(
            average="macro", num_classes=self.num_classes
        )
        self.recall_macro_metric = torchmetrics.Recall(
            average="macro", num_classes=self.num_classes
        )
        self.precision_micro_metric = torchmetrics.Precision(average="micro")
        self.recall_micro_metric = torchmetrics.Recall(average="micro")

In [17]:
def training_step(self, batch, batch_idx):
    outputs = self.forward(
        batch["input_ids"], batch["attention_mask"], labels=batch["label"]
    )
    # loss = F.cross_entropy(logits, batch["label"])
    preds = torch.argmax(outputs.logits, 1)
    train_acc = self.train_accuracy_metric(preds, batch["label"])
    self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True)
    self.log("train/acc", train_acc, prog_bar=True, on_epoch=True)
    return outputs.loss

In [18]:
def validation_step(self, batch, batch_idx):
    labels = batch["label"]
    outputs = self.forward(
        batch["input_ids"], batch["attention_mask"], labels=batch["label"]
    )
    preds = torch.argmax(outputs.logits, 1)

    # Metrics
    valid_acc = self.val_accuracy_metric(preds, labels)
    precision_macro = self.precision_macro_metric(preds, labels)
    recall_macro = self.recall_macro_metric(preds, labels)
    precision_micro = self.precision_micro_metric(preds, labels)
    recall_micro = self.recall_micro_metric(preds, labels)
    f1 = self.f1_metric(preds, labels)

    # Logging metrics
    self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True)
    self.log("valid/acc", valid_acc, prog_bar=True)
    self.log("valid/precision_macro", precision_macro, prog_bar=True)
    self.log("valid/recall_macro", recall_macro, prog_bar=True)
    self.log("valid/precision_micro", precision_micro, prog_bar=True)
    self.log("valid/recall_micro", recall_micro, prog_bar=True)
    self.log("valid/f1", f1, prog_bar=True)
    return {"labels": labels, "logits": outputs.logits}

In [19]:
def validation_epoch_end(self, outputs):
    labels = torch.cat([x["labels"] for x in outputs])
    logits = torch.cat([x["logits"] for x in outputs])
    preds = torch.argmax(logits, 1)

    cm = confusion_matrix(labels.numpy(), preds.numpy())

In [31]:
class SamplesVisualisationLogger(pl.Callback):
    def __init__(self, datamodule):
        super().__init__()

        self.datamodule = datamodule

    def on_validation_end(self, trainer, pl_module):
        # can be done on complete dataset also
        val_batch = next(iter(self.datamodule.val_dataloader()))
        sentences = val_batch["sentence"]

        # get the predictions
        outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"])
        preds = torch.argmax(outputs.logits, 1)
        labels = val_batch["label"]

        # predicted and labelled data
        df = pd.DataFrame(
            {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()}
        )

        # wrongly predicted data
        wrong_df = df[df["Label"] != df["Predicted"]]

        # Logging wrongly predicted dataframe as a table
        trainer.logger.experiment.log(
            {
                "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True),
                "global_step": trainer.global_step,
            }
        )

In [32]:
trainer = pl.Trainer(
        max_epochs=3,
        logger=wandb_logger,
        callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback],
        log_every_n_steps=10,
        deterministic=True,
    )

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [33]:
trainer.fit(cola_model, cola_data)

  rank_zero_deprecation(

  | Name | Type      | Params
-----------------------------------
0 | bert | BertModel | 4.4 M 
1 | W    | Linear    | 258   
-----------------------------------
4.4 M     Trainable params
0         Non-trainable params
4.4 M     Total params
17.545    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Validation sanity check: 100%|██████████| 2/2 [00:03<00:00,  1.78s/it]

KeyError: 'sentence'

In [35]:

# class ColaModel(pl.LightningModule):
#     def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5):
#         super(ColaModel, self).__init__()
#         self.save_hyperparameters()

#         self.bert = AutoModelForSequenceClassification.from_pretrained(
#             model_name, num_labels=2
#         )
#         self.num_classes = 2
#         self.train_accuracy_metric = torchmetrics.Accuracy()
#         self.val_accuracy_metric = torchmetrics.Accuracy()
#         self.f1_metric = torchmetrics.F1(num_classes=self.num_classes)
#         self.precision_macro_metric = torchmetrics.Precision(
#             average="macro", num_classes=self.num_classes
#         )
#         self.recall_macro_metric = torchmetrics.Recall(
#             average="macro", num_classes=self.num_classes
#         )
#         self.precision_micro_metric = torchmetrics.Precision(average="micro")
#         self.recall_micro_metric = torchmetrics.Recall(average="micro")

#     def forward(self, input_ids, attention_mask, labels=None):
#         outputs = self.bert(
#             input_ids=input_ids, attention_mask=attention_mask, labels=labels
#         )
#         return outputs

#     def training_step(self, batch, batch_idx):
#         outputs = self.forward(
#             batch["input_ids"], batch["attention_mask"], labels=batch["label"]
#         )
#         # loss = F.cross_entropy(logits, batch["label"])
#         preds = torch.argmax(outputs.logits, 1)
#         train_acc = self.train_accuracy_metric(preds, batch["label"])
#         self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True)
#         self.log("train/acc", train_acc, prog_bar=True, on_epoch=True)
#         return outputs.loss

#     def validation_step(self, batch, batch_idx):
#         labels = batch["label"]
#         outputs = self.forward(
#             batch["input_ids"], batch["attention_mask"], labels=batch["label"]
#         )
#         preds = torch.argmax(outputs.logits, 1)

#         # Metrics
#         valid_acc = self.val_accuracy_metric(preds, labels)
#         precision_macro = self.precision_macro_metric(preds, labels)
#         recall_macro = self.recall_macro_metric(preds, labels)
#         precision_micro = self.precision_micro_metric(preds, labels)
#         recall_micro = self.recall_micro_metric(preds, labels)
#         f1 = self.f1_metric(preds, labels)

#         # Logging metrics
#         self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True)
#         self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True)
#         self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True)
#         self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True)
#         self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True)
#         self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True)
#         self.log("valid/f1", f1, prog_bar=True, on_epoch=True)
#         return {"labels": labels, "logits": outputs.logits}

#     def validation_epoch_end(self, outputs):
#         labels = torch.cat([x["labels"] for x in outputs])
#         logits = torch.cat([x["logits"] for x in outputs])
#         preds = torch.argmax(logits, 1)

#         ## There are multiple ways to track the metrics
#         # 1. Confusion matrix plotting using inbuilt W&B method
#         self.logger.experiment.log(
#             {
#                 "conf": wandb.plot.confusion_matrix(
#                     probs=logits.numpy(), y_true=labels.numpy()
#                 )
#             }
#         )

#         # 2. Confusion Matrix plotting using scikit-learn method
#         # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)})

#         # 3. Confusion Matric plotting using Seaborn
#         # data = confusion_matrix(labels.numpy(), preds.numpy())
#         # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels))
#         # df_cm.index.name = "Actual"
#         # df_cm.columns.name = "Predicted"
#         # plt.figure(figsize=(7, 4))
#         # plot = sns.heatmap(
#         #     df_cm, cmap="Blues", annot=True, annot_kws={"size": 16}
#         # )  # font size
#         # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)})

#         # self.logger.experiment.log(
#         #     {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())}
#         # )

#     def configure_optimizers(self):
#         return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"])

In [37]:
# from transformers import AutoModelForSequenceClassification
# cola_data = DataModule()
# cola_model = ColaModel()

# checkpoint_callback = ModelCheckpoint(
#     dirpath="./models",
#     filename="best-checkpoint.ckpt",
#     monitor="valid/loss",
#     mode="min",
# )

# early_stopping_callback = EarlyStopping(
#     monitor="valid/loss", patience=3, verbose=True, mode="min"
# )

# wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja")
# trainer = pl.Trainer(
#     max_epochs=1,
#     logger=wandb_logger,
#     callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback],
#     log_every_n_steps=10,
#     deterministic=True,
#     # limit_train_batches=0.25,
#     # limit_val_batches=0.25
# )
# trainer.fit(cola_model, cola_data)

Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Validation sanity check: 100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

KeyError: 'sentence'