### Imports

In [1]:
# General imports
import numpy as np
import pandas as pd
from IPython.display import display
import random
import shutil
import warnings
import logging
import os

# Neural network imports
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.callbacks import EarlyStopping
from transformers import TFAutoModel, AutoTokenizer

from utils import (move_to_working_folder, preprocess, PreTrainedModel,
                   build_deberta, ExpandDimsLayer, MaskedEmbeddingsLayer,
                   mcrmse, generate_predictions, seed_everything, Speller, CFG)


# Disabling unnecceseray warnings
warnings.simplefilter("ignore")
logging.disable(logging.ERROR)
os.environ["TOKENIZERS_PARALLELISM"] = "false"

seed_everything(random_seed=CFG.random_seed)

2024-07-05 17:07:52.027302: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-05 17:07:52.027455: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-05 17:07:52.197948: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  pid, fd = os.forkpty()


Processing /kaggle/input/autocorrect/autocorrect-2.6.1.tar
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: autocorrect
  Building wheel for autocorrect (setup.py) ... [?25ldone
[?25h  Created wheel for autocorrect: filename=autocorrect-2.6.1-py3-none-any.whl size=622364 sha256=41bef5459da4afb9eb5af6c8462c1b8c2bd76e728b5a0fc81b6f493fa89b7c78
  Stored in directory: /root/.cache/pip/wheels/db/69/42/0fb0421d2fe70d195a04665edc760cfe5fd341d7bb8d8e0aaa
Successfully built autocorrect
Installing collected packages: autocorrect
Successfully installed autocorrect-2.6.1


### Load Model

In [None]:
# Choose the model to load
epochs = 12
CFG.final_model_path = f'full_model_scaled-{epochs}.keras'

In [None]:
# Load the model 
model_path = '/kaggle/working/' + CFG.final_model_path
move_to_working_folder('/kaggle/input/models/' + CFG.final_model_path, model_path)

model = keras.models.load_model(model_path)
deberta = build_deberta()
print(f'{model_path} loaded successfully')

### Sample Random Summary

In [3]:
df = pd.read_excel('/kaggle/input/llm-generate-test/LLM_Generate_Test.xlsx')
random_row = df.sample(n=1)

student_id = random_row.iloc[0,0]
summary = random_row.iloc[0,3]
prompt_question = random_row.iloc[0,2]
prompt_text = random_row.iloc[0,1]

print(f'student_id: {student_id}\n')
print(f'prompt_question: {prompt_question}\n')
print(f'summary: {summary}')

student_id: 12148

prompt_question: Describe the process of photosynthesis and explain its importance in the plant kingdom. 



summary: Photosynthesis is the process through which plants produce oxygen and glucose. It happens in the chloroplasts of the plant cells which have a green pigment called chlorophyll. Sunlight, water, and carbon dioxide are the three critical components that the plant needs for photosynthesis. The first step of the process is absorbing sunlight and converting it into chemical energy. The plant then takes in water through the roots and split it into hydrogen ions and oxygen, which is then released into the atmosphere. Also, carbon dioxide combines with hydrogen ions to form glucose. This process is important to plants as it helps in the production of oxygen, source of energy through glucose production, and in maintaining balance in the atmosphere.


### Score The Summary

In [4]:
# This function gets a students id, summary, prompt question and prompt text
# and scores the summary by content and wording
def run_demo(student_id, summary, prompt_question, prompt_text):
    input_ids, attention_mask, head_mask = preprocess(summary, prompt_question, prompt_text, deberta.tokenizer, is_demo=True)
    
    inputs = {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'head_mask': head_mask,
        'student_id': [student_id],
    }
    content_score, wording_score = (generate_predictions(model, inputs))[1:]
    return content_score[0], wording_score[0]

content_score, wording_score = run_demo(student_id, summary, prompt_question, prompt_text)

print(f'Grade of student: {student_id}')
print(f'Content score: {content_score}')
print(f'Wording score: {wording_score}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 41s/step
Grade of student: 12148
Content score: 1.6796875
Wording score: 0.22265625
