In [None]:
import os
from google.colab import userdata, drive

COLAB = True
KAGGLE = True
DOWNLOAD_DATA = True
SAVE_TO_GITHUB = True
GIT_REPOSITORY = "CS221-project"
FILE_NAME = "colab_tuning_legacy.ipynb"

if COLAB:
    %cd / content
    drive.mount('/content/drive', force_remount=True)
if COLAB:
    PARENT_DIRECTORY_PATH = "/content"
    # In case you want to clone in your drive:
    PARENT_DIRECTORY_PATH = "/content/drive/MyDrive"
    PROJECT_PATH = PARENT_DIRECTORY_PATH + "/" + GIT_REPOSITORY
    %cd "{PARENT_DIRECTORY_PATH}"
if COLAB:
    import json
    import os

    with open(f"{PARENT_DIRECTORY_PATH}/Git/git.json", "r") as f:
        parsed_json = json.load(f)

    GIT_USER_NAME = parsed_json["GIT_USER_NAME"]
    GIT_TOKEN = parsed_json["GIT_TOKEN"]
    GIT_USER_EMAIL = parsed_json["GIT_USER_EMAIL"]

    GIT_PATH = (
        f"https://{GIT_TOKEN}@github.com/{GIT_USER_NAME}/{GIT_REPOSITORY}.git"
    )

    %cd "{PARENT_DIRECTORY_PATH}"

    if os.path.exists(f"{PARENT_DIRECTORY_PATH}/{GIT_REPOSITORY}"):
        %cd "{PROJECT_PATH}"
        !git pull
    else:
        !git clone "{GIT_PATH}"  # Clone the github repository
        %cd "{PROJECT_PATH}"

In [None]:
# Install Keras 3 last. See https://keras.io/getting_started/ for more details.
!pip install -q -U keras-nlp
!pip install -q -U keras>=3
### Select a backend
import os
os.environ["KERAS_BACKEND"] = "jax"  # Or "torch" or "tensorflow".
# Avoid memory fragmentation on JAX backend.
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "1.00"

In [None]:
import keras
import keras_nlp

In [None]:
os.environ["KAGGLE_CONFIG_DIR"] = f"{PARENT_DIRECTORY_PATH}/Kaggle/kaggle.json"

In [None]:
gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en")

In [None]:
from utils import preprocess_qa_data, compute_rouge_l

with open("qa_test_data.txt") as file:
    content = file.read()
data = preprocess_qa_data(content)

In [None]:
# Generate responses and compute ROUGE-L metric
template = "Instruction:\n{instruction}\n\nResponse:\n{response}"
results = []
rouge_l_scores = []

for item in data:
    prompt = template.format(instruction=item["instruction"], response="")
    model_response = gemma_lm.generate(prompt, max_length=256)[0]

    rouge_l_score = compute_rouge_l(item["response"], model_response)
    rouge_l_scores.append(rouge_l_score)

    result = {
        "instruction": item["instruction"],
        "model_response": model_response,
        "original_response": item["response"],
        "rouge_l_score": rouge_l_score
    }
    results.append(result)

# Save results to a file
with open("baseline_evaluation.json", "w") as outfile:
    json.dump(results, outfile, indent=4)


In [None]:
# Output average ROUGE-L metric
average_rouge_l = sum(rouge_l_scores) / len(rouge_l_scores)
print(f"Average ROUGE-L Metric: {average_rouge_l}")

In [6]:
import json
with open("fine_tuned_evaluation.json", "r") as file:
    data = json.load(file)

# Extract the rouge_l_score values
ft_scores = [item["rouge_l_score"] for item in data]


with open("baseline_evaluation.json", "r") as file:
    data = json.load(file)

# Extract the rouge_l_score values
base_scores = [item["rouge_l_score"] for item in data]

In [8]:
# histogram of ROUGE-L scores
import matplotlib.pyplot as plt

plt.hist(base_scores, bins=20, alpha=0.5, label="Baseline")
plt.hist(ft_scores, bins=20, alpha=0.5, label="Fine-tuned")
plt.legend(loc="upper right")
plt.xlabel("ROUGE-L Score")
plt.ylabel("Frequency")
plt.title("ROUGE-L Score Distribution")
plt.show()

In [7]:
!pip install -q -U matplotlib