In [1]:
import os
os.environ['KERAS_BACKEND'] = 'tensorflow'

import keras_nlp
import keras
import keras.backend as K
import tensorflow as tf

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib as mpl

cmap = mpl.colormaps['coolwarm']

2024-04-27 15:54:30.139384: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-27 15:54:30.139482: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-27 15:54:30.302324: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


### Configuration

In [2]:
class CFG:
    seed = 42
    preset = 'deberta_v3_extra_small_en'
    sequence_length = 512
    epochs = 2
    batch_size = 32
    scheduler = 'exp'
    cache = False

In [3]:
keras.utils.set_random_seed(CFG.seed)
keras.mixed_precision.set_global_policy('mixed_float16')

### Load Data

In [4]:
BASE_PATH = '/kaggle/input/learning-agency-lab-automated-essay-scoring-2'

df = pd.read_csv(f'{BASE_PATH}/train.csv')

In [5]:
from sklearn.model_selection import train_test_split
train_df, valid_df = train_test_split(df, test_size=0.2, stratify=df['score'])

In [6]:
train_df.score.value_counts()

score
3    5024
2    3778
4    3141
1    1001
5     776
6     125
Name: count, dtype: int64

### Preprocessing

In [7]:
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
    preset=CFG.preset,
    sequence_length=CFG.sequence_length
)

for key, value in preprocessor("The quick fox jumped.").items():
    print(f'{key}: {value.shape}')

Attaching 'tokenizer.json' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...
Attaching 'tokenizer.json' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...
Attaching 'assets/tokenizer/vocabulary.spm' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...


token_ids: (512,)
padding_mask: (512,)


### Model Definition

In [8]:
classifier = keras_nlp.models.DebertaV3Classifier.from_preset(
    CFG.preset, preprocessor=None, num_classes=6, activation="softmax"
)

inp = classifier.input
logit = classifier(inp)

model = keras.Model(inp, logit)
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy())

model.summary()

Attaching 'config.json' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...
Attaching 'config.json' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...
Attaching 'model.weights.h5' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...


In [9]:
tmp = preprocessor(["hello my friend!"])
model(tmp)

<tf.Tensor: shape=(1, 6), dtype=float16, numpy=array([[0.1614, 0.193 , 0.163 , 0.1542, 0.1921, 0.1364]], dtype=float16)>

### Data Stream

In [10]:
def build_ds(texts, scores):
    inp = preprocessor(texts)
    ds = tf.data.Dataset.from_tensor_slices((inp, scores))
    ds = ds.prefetch(tf.data.AUTOTUNE)
    ds = ds.shuffle(256)
    ds = ds.cache()
    return ds

In [11]:
train_ds = build_ds(train_df.full_text.tolist(), np.asarray(train_df.score.tolist())-1)
train_ds = train_ds.batch(CFG.batch_size, drop_remainder=True)

val_ds = build_ds(train_df.full_text.tolist(), np.asarray(train_df.score.tolist())-1)
val_ds = val_ds.batch(CFG.batch_size, drop_remainder=False)

### Training

In [12]:
def rate_scheduler(epoch, lr):
    lr_max = 5e-4
    decay = 0.95
    ramp_ep = 3
    if epoch <= ramp_ep:
        return lr + (lr_max - lr) / (ramp_ep - epoch + 1)
    else:
        return lr * decay

In [13]:
checkpoint = keras.callbacks.ModelCheckpoint("/kaggle/working/v1-{epoch}.weights.h5", monitor="val_loss", save_weights_only=True)
best_checkpoint = keras.callbacks.ModelCheckpoint("/kaggle/working/v1.weights.h5", monitor="val_loss", save_weights_only=True, save_best_only=True)
rate_control = keras.callbacks.LearningRateScheduler(rate_scheduler)

In [14]:
history = model.fit(train_ds, validation_data=val_ds, epochs=5, callbacks=[checkpoint, rate_control, best_checkpoint])

Epoch 1/5


I0000 00:00:1714233443.155063      73 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m490s[0m 811ms/step - loss: 1.2641 - val_loss: 0.9383 - learning_rate: 1.3250e-04
Epoch 2/5
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m339s[0m 784ms/step - loss: 1.0539 - val_loss: 0.8199 - learning_rate: 2.5500e-04
Epoch 3/5
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m332s[0m 769ms/step - loss: 0.9957 - val_loss: 0.8782 - learning_rate: 3.7750e-04
Epoch 4/5
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m328s[0m 761ms/step - loss: 0.9651 - val_loss: 0.8745 - learning_rate: 5.0000e-04
Epoch 5/5
[1m432/432[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m330s[0m 764ms/step - loss: 0.9211 - val_loss: 0.7999 - learning_rate: 4.7500e-04


### Evaluate

In [15]:
def load_model(path):
    classifier = keras_nlp.models.DebertaV3Classifier.from_preset(
        CFG.preset, preprocessor=None, num_classes=6, activation="softmax"
    )

    inp = classifier.input
    logit = classifier(inp)

    model = keras.Model(inp, logit)

    model.load_weights(path)
    
    return model

In [16]:
test_model = load_model("v1.weights.h5")

Attaching 'config.json' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...
Attaching 'config.json' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...
Attaching 'model.weights.h5' from model 'keras/deberta_v3/keras/deberta_v3_extra_small_en/2' to your Kaggle notebook...


In [17]:
test_df = pd.read_csv(f'{BASE_PATH}/test.csv')
test_inp = preprocessor(test_df.full_text.tolist())
prob = test_model(test_inp)
predict = np.argmax(prob, axis=-1) + 1

In [18]:
out_df = pd.DataFrame({
    'essay_id': test_df.essay_id,
    'score': predict
})

In [19]:
out_df.to_csv("submission.csv")

In [20]:
out_df

Unnamed: 0,essay_id,score
0,000d118,3
1,000fe60,3
2,001ab80,4
