# Abstract

Train and prototype your models quickly by using GPUs. This notebook shows easy and quick way to train 🤗Transformers on GPUs.

**This notebook is trained on folds 4 and 5**



# Versions
[Inference Notebook]()

* Version 3: DeBertaV3 Base on GPUs **CV:- 0. LB:- 0.**


# Imports

In [None]:
!pip uninstall transformers -y
!pip install transformers

In [None]:
import gc
import numpy as np 
import pandas as pd 

from scipy.stats import pearsonr

from transformers import TFAutoModel, AutoTokenizer,AutoModel

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.losses import SparseCategoricalCrossentropy 
from tensorflow.keras.activations import tanh, softmax
from tensorflow.keras.layers import Layer,Input, Dense, Flatten, Dropout, GlobalAveragePooling1D
from tensorflow.keras.models import Model, save_model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau , EarlyStopping
from tensorflow.keras.optimizers import Adam, SGD

import os
if not os.path.exists("./result"):
    os.makedirs("./result")


In [None]:
import transformers
transformers.__version__

# Configs

In [None]:
class config:
    # dataset path
    train_dataset_path = "../input/ppmpcleaned/train.csv"
    test_dataset_path = "../input/ppmpcleaned/test.csv"
    sample_submission_path = "../input/ppmpcleaned/sample_submission.csv"
    cpc_path = "../input/ppmpcleaned/titles.csv"
    
   

    save_dir="./result"
    
    AUTOTUNE = tf.data.AUTOTUNE
    
    #tokenizer params
    truncation = True 
    padding = 'max_length'
    max_length = 75
    
    # model params
    model_name = "microsoft/deberta-v3-base"
    
    #training params
    learning_rate = 1e-5
    batch_size = 16
    epochs = 15

# PreProcessing

In [None]:
df_train = pd.read_csv(config.train_dataset_path)
df_test = pd.read_csv(config.test_dataset_path)
df_ss = pd.read_csv(config.sample_submission_path)
df_cpc = pd.read_csv(config.cpc_path)

df_train.drop(['context','anchor_length','target_length'] , axis = "columns" , inplace = True)

In [None]:
context_mapping = {}

for i,j in zip(df_cpc['code'],df_cpc['title']):
    context_mapping[i] = j
    
df_test['context_text'] = df_test['context'].map(context_mapping)


In [None]:
df_train['text'] = df_train['anchor'] + " " + "[SEP]" + " " + df_train['target'] + " " + "[SEP]" + " " + df_train['context_text']
df_test['text'] = df_test['anchor'] + " " + "[SEP]" + " " + df_test['target'] + " " + "[SEP]" + " " + df_test['context_text']

In [None]:
df_train

In [None]:
df_train['kfold']

# Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(config.model_name)
tokenizer.save_pretrained("./result/tokenizer/")

# Model

In [None]:
transformer = TFAutoModel.from_pretrained(config.model_name)
transformer.save_pretrained('./result/hf_model/')
del transformer
gc.collect()

In [None]:
class TransformerBlock(Layer):
    def __init__(self):
        super(TransformerBlock , self).__init__()
        self.transformer_model = TFAutoModel.from_pretrained(config.model_name)
        self.dense = Dense(1, activation='relu')
        
    def call(self,input_tensors):
        input_id = input_tensors[0]
        attention_mask = input_tensors[1]
        transformer_output = self.transformer_model(input_ids = input_id , attention_mask = attention_mask)
        transformer_output = transformer_output.last_hidden_state
        return transformer_output

class RegressionHead(Layer):
    def __init__(self):
        super(RegressionHead , self).__init__()
        self.dense = Dense(1, activation="relu")
    
    def call(self , input_tensors):
        x = self.dense(input_tensors)
        return x

class AttentionHead(Layer):
    def __init__(self):
        super(AttentionHead , self).__init__()
        self.dense1 = Dense(512)
        self.tanh =  tanh
        self.softmax = softmax
        self.dense2 = Dense(1,activation="softmax")
    
    def call(self , input_tensors):
        x = self.dense1(input_tensors)
        x = self.tanh(x)
        x = self.dense2(x)
        x = self.softmax(x , axis = 1)
        return x  
    
class PPPMModel(Model):
    def __init__(self):
        super(PPPMModel, self).__init__()
        self.transformer_model = TransformerBlock()
        self.attentionhead = AttentionHead()
        self.regressionhead = RegressionHead()
    
    def call(self,input_tensors):
        transformer_output = self.transformer_model(input_tensors)
        weights = self.attentionhead(transformer_output)
        context_vector = tf.reduce_sum(weights * transformer_output, axis=1)
        x = self.regressionhead(context_vector)
        return x
    
    def model(self):
        input_id = Input(shape = (config.max_length) , dtype = tf.int32, name = 'input_ids')
        attention_mask = Input(shape = (config.max_length), dtype = tf.int32, name = 'attention_mask')
        
        return Model(inputs = [input_id , attention_mask] , outputs = self.call([input_id , attention_mask]))

# Dataset Prep Function

In [None]:
@tf.function
def map_function(encodings , target):
    input_ids = encodings['input_ids']
    attention_mask = encodings['attention_mask']
    
    target = tf.cast(target, tf.float32 )
    
    return {'input_ids': input_ids , 'attention_mask': attention_mask}, target

# Competition Metrics
Grabbed this snippet from [here](https://www.kaggle.com/code/mohamadmerchant/us-phrase-matching-tf-keras-train-tpu#Competition-Metrics)

In [None]:
class Pearsonr(tf.keras.callbacks.Callback):
    def __init__(self, model , val_data, y_val):
        self.val_data = val_data
        self.y_val = y_val
        self.model = model
        
    def on_epoch_end(self, epoch, logs):
        val_preds = self.model.predict(self.val_data, verbose=1)
        val_pearsonr = pearsonr(self.y_val, val_preds.ravel())[0]

        print(f"val_pearsonr: {val_pearsonr:.4f}\n")
        logs["val_pearsonr"] = val_pearsonr

# KFold Training

In [None]:
histories = []
scores = []
for fold in range(4,6):
    print(f"====== FOLD RUNNING {fold}======")
    
    X_train = df_train.loc[df_train['kfold'] != fold]['text']
    y_train = df_train.loc[df_train['kfold'] != fold]['score']
    
    X_test = df_train.loc[df_train['kfold'] == fold]['text']
    y_test = df_train.loc[df_train['kfold'] == fold]['score']
    
    print("Generating Tokens")
    train_embeddings = tokenizer(
        X_train.tolist(),
        truncation = config.truncation, 
        padding = config.padding,
        max_length =config.max_length   
    )
    
    validation_embeddings = tokenizer(
        X_test.tolist(),
        truncation = config.truncation, 
        padding = config.padding,
        max_length =config.max_length   
    )
    
    print("Generating Datasets")
    
    train = tf.data.Dataset.from_tensor_slices((train_embeddings , y_train))
    train = (
                train
                .map(map_function, num_parallel_calls= config.AUTOTUNE)
                .batch(config.batch_size)
                .prefetch(config.AUTOTUNE)
            )
    
    val = tf.data.Dataset.from_tensor_slices((validation_embeddings , y_test))
    val = (
                val
                .map(map_function, num_parallel_calls= config.AUTOTUNE)
                .batch(config.batch_size)
                .prefetch(config.AUTOTUNE)
            )
    
    #Clearing backend session
    K.clear_session()
    print("Backend Cleared")

    print("Model Creation")
    model = PPPMModel().model()
    model.compile(
          optimizer = Adam(learning_rate = config.learning_rate), 
          metrics = ['mae'],
          loss = ['mae']
      )    
    early_stopping=EarlyStopping(monitor="val_loss",min_delta=0,patience=3, verbose=1,mode="min",restore_best_weights=True)
    pearson_corr = Pearsonr(model,val, y_test)
    
    hist = model.fit(train , validation_data = val,steps_per_epoch = 1, validation_steps = 1 , epochs = config.epochs,callbacks = [early_stopping,pearson_corr])
    
    # prediction on val
    print("prediction on validation data")
    preds = model.predict(val , verbose = 1)
    score = pearsonr(preds.reshape(preds.shape[0],), y_test)[0]    
    scores.append(score)
    
    print("saving model")
    save_model(model,f'{config.save_dir}/{config.model_name}_{fold}.h5')
        
    del model,X_train, y_train, X_test,y_test,train, val
    gc.collect()

    histories.append(hist)

print("the final average rmse is ", np.mean(scores))





























Thanks for viewing, drop your suggestions down in the comments below. 🙂