In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"  # or "tensorflow" or "torch"
# os.environ["WANDB_SILENT"] = "false" # for wandb

import keras_nlp
import keras_core as keras 
import keras_core.backend as K


# import torch
# import jax
import tensorflow as tf
# from tensorflow import keras
# import tensorflow.keras.backend as K

import numpy as np 
import pandas as pd

In [4]:
print("TensorFlow:", tf.__version__)
print("Keras:", keras.__version__)
print("KerasNLP:", keras_nlp.__version__)

TensorFlow: 2.13.0
Keras: 0.1.4
KerasNLP: 0.6.1


In [5]:
class CFG:
    verbose = 0  # Verbosity
    wandb = True  # Weights & Biases logging
    competition = 'kaggle-llm-science-exam'  # Competition name
    
    preset = "deberta_v3_base_en"  # Name of pretrained models
    sequence_length = 200  # Input sequence length
    
    device = 'GPU'  # Device
    
    seed = 42  # Random seed
    
    num_folds = 5  # Total folds
    selected_folds = [1, 2]  # Folds to train on
    
    epochs = 10  # Training epochs
    batch_size = 2  # Batch size
    drop_remainder = True  # Drop incomplete batches
    cache = True # Caches data after one iteration, use only with `TPU` to avoid OOM
    
    augment = True # Augmentation (Shuffle Options)
    
    scheduler = 'cosine'  # Learning rate scheduler
    
    external_data = True  # External data flag
    
    class_names = list("ABCDE")  # Class names [A, B, C, D, E]
    num_classes = len(class_names)  # Number of classes
    class_labels = list(range(num_classes))  # Class labels [0, 1, 2, 3, 4]
    label2name = dict(zip(class_labels, class_names))  # Label to class name mapping
    name2label = {v: k for k, v in label2name.items()}  # Class name to label mapping

In [7]:
def get_device():
    "Detect and intializes GPU/TPU automatically"
    try:
        # Connect to TPU
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect() 
        # Set TPU strategy
        strategy = tf.distribute.TPUStrategy(tpu)
        print(f'> Running on TPU', tpu.master(), end=' | ')
        print('Num of TPUs: ', strategy.num_replicas_in_sync)
        device=CFG.device
    except:
        # If TPU is not available, detect GPUs
        gpus = tf.config.list_logical_devices('GPU')
        ngpu = len(gpus)
         # Check number of GPUs
        if ngpu:
            # Set GPU strategy
            strategy = tf.distribute.MirroredStrategy(gpus) # single-GPU or multi-GPU
            # Print GPU details
            print("> Running on GPU", end=' | ')
            print("Num of GPUs: ", ngpu)
            device='GPU'
        else:
            # If no GPUs are available, use CPU
            print("> Running on CPU")
            strategy = tf.distribute.get_strategy()
            device='CPU'
    return strategy, device


strategy, CFG.device = get_device()
CFG.replicas = strategy.num_replicas_in_sync

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
> Running on GPU | Num of GPUs:  1


In [8]:
train_df = pd.read_csv("../data/train_extra_data.csv")
train_df.head()

Unnamed: 0,prompt,A,B,C,D,E,answer
0,Who is the dean of the University of Central F...,John Doe,Michael Georgiopoulos,Jane Smith,Robert Johnson,David Brown,B
1,What degrees does the college offer?,Engineering and computer science,Business and economics,Psychology and sociology,Mathematics and statistics,Arts and humanities,A
2,How many undergraduate and graduate students w...,"Over 5,000","Over 10,000","Over 7,500","Over 3,000","Over 15,000",C
3,What is the University of Central Florida list...,A liberal arts college,A research university,An Ivy League institution,A community college,A vocational school,B
4,What are some of the research contributions ma...,Architecture and design,Medicine and healthcare,Environmental science,Modeling and simulation,Linguistics and literature,D
