<a href="https://colab.research.google.com/github/aegonwolf/reference_collection/blob/main/Lightning_Deberta_PatentSimilarity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pytorch_lightning
!pip install transformers
!pip install datasets
!pip install sacremoses
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_lightning
  Downloading pytorch_lightning-1.6.4-py3-none-any.whl (585 kB)
[K     |████████████████████████████████| 585 kB 13.2 MB/s 
Collecting pyDeprecate>=0.3.1
  Downloading pyDeprecate-0.3.2-py3-none-any.whl (10 kB)
Collecting PyYAML>=5.4
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 67.2 MB/s 
Collecting fsspec[http]!=2021.06.0,>=2021.05.0
  Downloading fsspec-2022.5.0-py3-none-any.whl (140 kB)
[K     |████████████████████████████████| 140 kB 82.7 MB/s 
[?25hCollecting torchmetrics>=0.4.1
  Downloading torchmetrics-0.9.1-py3-none-any.whl (419 kB)
[K     |████████████████████████████████| 419 kB 88.6 MB/s 
Collecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manyl

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import transformers
import torch
import transformers
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import os
import re
import json
import cv2
from sklearn.model_selection import StratifiedKFold

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

import warnings
warnings.simplefilter('ignore')

In [None]:
from transformers import AutoTokenizer, AutoModel, AutoConfig

In [None]:
model_ckpt = 'microsoft/deberta-v3-large'
class Config:
    NB_EPOCHS = 5
    LR = 2e-5
    MAX_LEN = 128
    N_SPLITS = 5
    TRAIN_BS = 8
    VALID_BS = 8
    NUM_WORKERS = 2
    MODEL_NAME = model_ckpt
    TOKENIZER = AutoTokenizer.from_pretrained(model_ckpt)

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/580 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.35M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
train = pd.read_csv('train.csv')

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
class PPPMDataset(Dataset):
    def __init__(self, df, is_test=False):
        self.is_test = is_test
        self.texts = df['input'].values
        if not self.is_test:
            self.scores = df['score'].values
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        inputs = Config.TOKENIZER.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=Config.MAX_LEN,
            pad_to_max_length=True
        )
        
        ids = torch.tensor(inputs['input_ids'], dtype=torch.long)
        mask = torch.tensor(inputs['attention_mask'], dtype=torch.long)
        
        if self.is_test:
            return {
                'ids': ids,
                'mask': mask,
            }
        else:
            targets = torch.tensor(self.scores[idx], dtype=torch.float)
            return {
                'ids': ids,
                'mask': mask,
                'targets': targets
            }

In [None]:
class Model(pl.LightningModule):
    def __init__(self, train_df, valid_df) -> None:
        super(Model, self).__init__()
        self.config = AutoConfig.from_pretrained(Config.MODEL_NAME, output_hidden_states=True)
        self.model = transformers.DebertaModel.from_pretrained(Config.MODEL_NAME, ignore_mismatched_sizes = True)
        #self.model = transformers.RobertaModel.from_pretrained(Config.MODEL_NAME)
        self.lstm = nn.LSTM(1024, 20, 1, batch_first=True, bidirectional=True)
        self.drop = nn.Dropout(0.3)
        self.out = nn.Linear(20*2, 1)
        self.all_targets = []
        self.train_loss_fn = nn.MSELoss()
        self.valid_loss_fn = nn.MSELoss()
        
        self.train_df = train_df
        self.valid_df = valid_df
        
    def forward(self, ids, mask) -> torch.Tensor:
        output = self.model(ids, attention_mask=mask, return_dict=False)
        # print(f'_ is {_.shape}, output is {output[0].shape}, output1 {output[1].shape}')
        # print(f'output {output[0].shape}')
        output, _ = self.lstm(output[0])
        output = self.drop(output[:, 1, :])
        output = self.out(output)
        # print(f"output is now {output.shape}")
        return output
    
    def prepare_data(self) -> None:
        # Make Training and Validation Datasets
        self.training_set = PPPMDataset(
            self.train_df
        )

        self.validation_set = PPPMDataset(
            self.valid_df
        )

    def train_dataloader(self):
        train_loader = DataLoader(
            self.training_set,
            batch_size=Config.TRAIN_BS,
            shuffle=True,
            num_workers=Config.NUM_WORKERS,
            pin_memory=True
        )
        return train_loader

    def val_dataloader(self):
        val_loader = DataLoader(
            self.validation_set,
            batch_size=Config.VALID_BS,
            shuffle=False,
            num_workers=Config.NUM_WORKERS,
        )
        return val_loader
    
    def training_step(self, batch, batch_idx):
        ids = batch['ids']
        mask = batch['mask']
        targets = batch['targets']
        # print(f'targets = {targets}')

        outputs = self(ids=ids, mask=mask)
        ploss = self.pearson(outputs, targets)
        train_loss = self.train_loss_fn(outputs, targets.view(-1, 1))
        return {'loss': ploss + train_loss}
    
    def validation_step(self, batch, batch_idx):
        ids = batch['ids']
        mask = batch['mask']
        targets = batch['targets']
        # print(f'targets = {targets}')
        outputs = self(ids=ids, mask=mask)

        self.all_targets.extend(targets.cpu().detach().numpy().tolist())
        
        valid_loss = self.valid_loss_fn(outputs, targets.view(-1, 1))
        return {'val_loss': valid_loss}

    # def pearson(self, predictions, labels):
    #     cos = nn.CosineSimilarity( eps=1e-6)
    #     coeff = cos(predictions - predictions.mean(), labels - labels.mean())
    #     return coeff

    def pearson(self, outputs, targets):
        eps=1e-6
        x = outputs - outputs.mean()
        y = targets - targets.mean()
        first = x / (torch.linalg.norm(x) + eps)
        second = y / (torch.linalg.norm(y) + eps)
        r = (first * second).sum()
        return -r
    
    def validation_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        logs = {'val_loss': avg_loss}
        
        print(f"val_loss: {avg_loss}")
        return {'avg_val_loss': avg_loss, 'log': logs}
    
    def configure_optimizers(self):
        param_optimizer = list(self.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.001,
            },
            {
                "params": [
                    p for n, p in param_optimizer if any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.0,
            },
        ]
        return transformers.AdamW(optimizer_parameters, lr=Config.LR)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
train_data = train[train.fold != 4]
valid_data = train[train.fold == 4]
        
model = Model(train_data, valid_data)
trainer = pl.Trainer(max_epochs=Config.NB_EPOCHS, gpus=1, accumulate_grad_batches = 4)
trainer.fit(model)

You are using a model of type deberta-v2 to instantiate a model of type deberta. This is not supported for all configurations of models and can yield errors.


Downloading:   0%|          | 0.00/833M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/deberta-v3-large were not used when initializing DebertaModel: ['lm_predictions.lm_head.bias', 'deberta.encoder.layer.0.attention.self.value_proj.weight', 'deberta.encoder.layer.9.attention.self.value_proj.bias', 'deberta.encoder.layer.14.attention.self.value_proj.weight', 'deberta.encoder.layer.8.attention.self.key_proj.bias', 'mask_predictions.classifier.bias', 'deberta.encoder.layer.13.attention.self.value_proj.bias', 'deberta.encoder.layer.16.attention.self.query_proj.bias', 'deberta.encoder.layer.11.attention.self.query_proj.bias', 'deberta.encoder.layer.22.attention.self.value_proj.bias', 'deberta.encoder.layer.19.attention.self.key_proj.weight', 'mask_predictions.LayerNorm.weight', 'deberta.encoder.layer.1.attention.self.value_proj.bias', 'deberta.encoder.layer.23.attention.self.key_proj.weight', 'deberta.encoder.layer.7.attention.self.value_proj.bias', 'deberta.encoder.layer.12.attention.self.key_proj.weight', 'mask_predictions.

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [None]:
# c = AutoConfig.from_pretrained(Config.MODEL_NAME, output_hidden_states=True)

In [None]:
# c.hidden_size

In [None]:
del model
import gc
gc.collect()
torch.cuda.empty_cache()