## Packages & Libraries

In [1]:
%%capture
%pip install -U bitsandbytes
%pip install -U transformers
%pip install -U accelerate
%pip install -U peft

In [2]:
import pandas as pd
import os

from huggingface_hub import login as hf_login
from kaggle_secrets import UserSecretsClient

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import torch

2025-04-12 22:05:22.824845: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744495523.064180      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744495523.126300      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## Functions & Configurations

In [3]:
data_loc = "/kaggle/input/grading-questions"
base_model = "meta-llama/Llama-3.2-3B-Instruct"
fine_tuned_model = "/kaggle/input/nlp-short-answer-grading-llama-fine-tunning/llama-3.2-fine-tuned-model/checkpoint-1125/"
output_dir="llama-3.2-fine-tuned-model"

# Tokens
user_secrets = UserSecretsClient()
access_token = user_secrets.get_secret("hf_read_token_access")
write_access_token = user_secrets.get_secret("hf_write_token_access")
hf_login(write_access_token)

In [4]:
# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

base_model_reload = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [5]:
# Merge adapter with base model
model = PeftModel.from_pretrained(base_model_reload, fine_tuned_model)
model = model.merge_and_unload()

In [6]:
def generate_test_prompt(data_point):
    return f"""
            You are a professor and need to grade the student response as Correct, Incorrect, or Partially Correct. Then return your decision with the corresponding grade label.
Question: {data_point['Question']}
Response: {data_point['Response']}
Correct Answer: {data_point['CorrectAnswer']}
label:
            """.strip()

## Preparing Dataset

In [7]:
# Load data
df_test = pd.read_csv(os.path.join(data_loc, "test.csv"), encoding='windows-1252')

# Label Mapping
label_map = {-1: "Incorrect", 0: "Partially Correct", 1: "Correct"}

# Replace numeric labels with string labels
df_test['label_text'] = df_test['label'].map(label_map)

# Prepare Input Text
df_test['input_text'] = df_test.apply(lambda x: generate_test_prompt(x), axis=1).tolist()

# Set test
X_test = pd.DataFrame(df_test['input_text'], columns=["input_text"])
y_test = df_test['label_text']

X_test.head()

Unnamed: 0,input_text
0,You are a professor and need to grade the stud...
1,You are a professor and need to grade the stud...
2,You are a professor and need to grade the stud...
3,You are a professor and need to grade the stud...
4,You are a professor and need to grade the stud...


## Inference

In [11]:
n=11
prompt = X_test['input_text'][n]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipe(prompt, max_new_tokens=120, do_sample=True, temperature=0.1)

print(f"Prompt:\n{prompt}\n")
print("Pred Label: ", outputs[0]["generated_text"].split("label: ")[-1].strip())
print("True Label: ", y_test[n])

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Prompt:
You are a professor and need to grade the student response as Correct, Incorrect, or Partially Correct. Then return your decision with the corresponding grade label.
Question: College admissions offices often keep records of admission standards over time.  You want to know if your college's admission standards have changed in the last 20 years, what kind of study is this?
Response: A retrospective study
Correct Answer: Retrospective
label:

Pred Label:  Correct
True Label:  Correct


## Push the model and tokenizer to the Hugging Face Hub.

In [9]:
model_dir = "Llama-3.2-3B-Instruct-Short-Answer-Classification"
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)

('Llama-3.2-3B-Instruct-Short-Answer-Classification/tokenizer_config.json',
 'Llama-3.2-3B-Instruct-Short-Answer-Classification/special_tokens_map.json',
 'Llama-3.2-3B-Instruct-Short-Answer-Classification/tokenizer.json')

In [10]:
model.push_to_hub(model_dir, use_temp_dir=False)
tokenizer.push_to_hub(model_dir, use_temp_dir=False)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/Gabriel-Ferreira/Llama-3.2-3B-Instruct-Short-Answer-Classification/commit/a74980b7341bad6893294bf4187d500915274c3f', commit_message='Upload tokenizer', commit_description='', oid='a74980b7341bad6893294bf4187d500915274c3f', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Gabriel-Ferreira/Llama-3.2-3B-Instruct-Short-Answer-Classification', endpoint='https://huggingface.co', repo_type='model', repo_id='Gabriel-Ferreira/Llama-3.2-3B-Instruct-Short-Answer-Classification'), pr_revision=None, pr_num=None)