### Imports

In [1]:
# General imports
import pandas as pd
import warnings
import logging
import os
from tensorflow import keras
from utils import (move_to_working_folder, preprocess, build_deberta, generate_predictions, seed_everything, CFG)

# Disabling unnecceseray warnings
warnings.simplefilter("ignore")
logging.disable(logging.ERROR)
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# seed_everything(random_seed=CFG.random_seed)

2024-07-28 20:23:06.777287: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-28 20:23:06.777427: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-28 20:23:06.914475: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Processing /kaggle/input/autocorrect/autocorrect-2.6.1.tar
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: autocorrect
  Building wheel for autocorrect (setup.py) ... [?25ldone
[?25h  Created wheel for autocorrect: filename=autocorrect-2.6.1-py3-none-any.whl size=622364 sha256=8aa6210c922b20af5e41ade854cb9095810c88b659baf8d4e8d4b2c0d965b136
  Stored in directory: /root/.cache/pip/wheels/db/69/42/0fb0421d2fe70d195a04665edc760cfe5fd341d7bb8d8e0aaa
Successfully built autocorrect
Installing collected packages: autocorrect
Successfully installed autocorrect-2.6.1


### Load Model

In [2]:
# Choose the model to load
epochs = 12
CFG.final_model_path = f'full_model_scaled-{epochs}.keras'

In [3]:
# Load the model 
model_path = '/kaggle/working/' + CFG.final_model_path
move_to_working_folder('/kaggle/input/models/' + CFG.final_model_path, model_path)

model = keras.models.load_model(model_path)
deberta = build_deberta()
print(f'{model_path} loaded successfully')

/kaggle/working/full_model_scaled-12.keras loaded successfully


### Sample Random Summary

In [7]:
df = pd.read_excel('/kaggle/input/llm-generate-test/LLM_Generate_Test.xlsx')
random_row = df.sample(n=1)

student_id = random_row.iloc[0,0]
summary = random_row.iloc[0,3]
prompt_question = random_row.iloc[0,2]
prompt_text = random_row.iloc[0,1]

print(f'student_id: {student_id}\n')
print(f'prompt_question: {prompt_question}\n')
print(f'summary: {summary}')

student_id: 20790

prompt_question: Summarize the role and contribution of Nikola Tesla in the field of electricity and magnetism. How did his inventions and theories revolutionize the way electricity is used today? Cite evidence from the text. 



summary: Nikola Tesla, a Serbian-American inventor, significantly revolutionized the field of electricity and magnetism. His crucial contributions include the design of the modern alternating current (AC) electrical supply system and the invention of the Tesla coil—transformers creating high voltage at low current. Tesla identified the limitations of Thomas Edison's direct current (DC) power system and developed the AC system as an efficient alternative. This innovation allowed electricity to be transmitted over longer distances, permitting the establishment of centralized power plants. Tesla's AC system, demonstrated during the Chicago World's Columbian Exposition in 1893, caught the attention of George Westinghouse, leading to a significan

### Score The Summary

In [8]:
# This function gets a students id, summary, prompt question and prompt text
# and scores the summary by content and wording
def run_demo(student_id, summary, prompt_question, prompt_text):
    input_ids, attention_mask, head_mask = preprocess(summary, prompt_question, prompt_text, deberta.tokenizer, is_demo=True)
    
    inputs = {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'head_mask': head_mask,
        'student_id': [student_id],
    }
    content_score, wording_score = (generate_predictions(model, inputs))[1:]
    return content_score[0], wording_score[0]

content_score, wording_score = run_demo(student_id, summary, prompt_question, prompt_text)

print(f'Student id: {student_id}')
print(f'Content score: {content_score}')
print(f'Wording score: {wording_score}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 247ms/step
Student id: 20790
Content score: 2.16015625
Wording score: 0.8828125
