<a href="https://colab.research.google.com/github/b-aser/jkug3-llm-model/blob/main/fine_tune_10k_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
pip install transformers datasets torch transformers[torch]

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_c

In [3]:
from datasets import load_dataset

# Replace 'path/to/train.json' and 'path/to/test.json' with the actual paths
dataset = load_dataset("json", data_files={"train": "/content/drive/MyDrive/Dataset/nq_dataset_8.5K_10k_train.jsonl", "test": "/content/drive/MyDrive/Dataset/nq_dataset_1.5K_10k_test.jsonl"})

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [4]:
from transformers import BertTokenizerFast

# Load the tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizerFast.from_pretrained(model_name)

def preprocess_function(examples):
    # Tokenize the questions and contexts
    tokenized_examples = tokenizer(
        examples["question"], examples["context"], truncation="only_second", max_length=384, stride=128, return_overflowing_tokens=True, return_offsets_mapping=True, padding="max_length"
    )

    # Map the start and end positions of the answer
    offset_mapping = tokenized_examples.pop("offset_mapping")
    sample_map = tokenized_examples.pop("overflow_to_sample_mapping")

    # Initialize empty lists for start and end positions
    tokenized_examples["start_positions"] = []
    tokenized_examples["end_positions"] = []

    for i, offsets in enumerate(offset_mapping):
        # Get the example that this tokenized example originates from
        sample_idx = sample_map[i]
        answers = examples["answers"][sample_idx]

        # If no answer exists, set start and end positions to 0
        if len(answers["answer_start"]) == 0:
            tokenized_examples["start_positions"].append(0)
            tokenized_examples["end_positions"].append(0)
            continue

        # Find the start and end positions of the answer in the tokenized input
        answer_start_char = answers["answer_start"][0]
        answer_end_char = answer_start_char + len(answers["text"][0])

        start_token_idx = 0
        end_token_idx = 0

        for token_idx, (offset_start, offset_end) in enumerate(offsets):
            if offset_start == 0 and offset_end == 0:  # Special tokens
                continue
            if offset_start <= answer_start_char < offset_end:
                start_token_idx = token_idx
            if offset_start <= answer_end_char <= offset_end:
                end_token_idx = token_idx

        # Append the start and end positions
        tokenized_examples["start_positions"].append(start_token_idx)
        tokenized_examples["end_positions"].append(end_token_idx)

    return tokenized_examples

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [5]:
tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)

Map:   0%|          | 0/8500 [00:00<?, ? examples/s]

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [6]:
from transformers import BertForQuestionAnswering

# Load the pre-trained BERT model for question answering
model = BertForQuestionAnswering.from_pretrained(model_name)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    save_steps=10_000,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=500,
)



In [8]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

In [9]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mb-aser[0m ([33mb-aser-examples[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,1.4043,1.422856
2,0.975,1.363832
3,0.6081,1.515084
4,0.3593,1.794247
5,0.2212,2.05741


TrainOutput(global_step=8530, training_loss=0.7826997092851861, metrics={'train_runtime': 5246.0552, 'train_samples_per_second': 13.003, 'train_steps_per_second': 1.626, 'total_flos': 1.336826869555968e+16, 'train_loss': 0.7826997092851861, 'epoch': 5.0})

In [10]:
# Save the model and tokenizer locally
model.save_pretrained("./qa_model_10k")
tokenizer.save_pretrained("./qa_model_10k")

('./qa_model_10k/tokenizer_config.json',
 './qa_model_10k/special_tokens_map.json',
 './qa_model_10k/vocab.txt',
 './qa_model_10k/added_tokens.json',
 './qa_model_10k/tokenizer.json')

In [12]:
!cp -r qa_model_10k/ results/ wandb/ '/content/drive/MyDrive/Final Year Project/Finetuned_with_10k'


In [13]:
!cp -r logs/ '/content/drive/MyDrive/Final Year Project/Finetuned_with_10k'

In [14]:
!pip install huggingface_hub



In [15]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
The token `upload model` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authe

In [17]:

from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load your fine-tuned model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained("./qa_model_10k")
tokenizer = AutoTokenizer.from_pretrained("./qa_model_10k")

# Push the model and tokenizer to the Hub
model.push_to_hub("b-aser/jku-g3-llm-v2")
tokenizer.push_to_hub("b-aser/jku-g3-llm-v2")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ./qa_model_10k and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/b-aser/jku-g3-llm-v2/commit/ae3891333a03a644c9d69ffce8574e37127d953e', commit_message='Upload tokenizer', commit_description='', oid='ae3891333a03a644c9d69ffce8574e37127d953e', pr_url=None, repo_url=RepoUrl('https://huggingface.co/b-aser/jku-g3-llm-v2', endpoint='https://huggingface.co', repo_type='model', repo_id='b-aser/jku-g3-llm-v2'), pr_revision=None, pr_num=None)

1. Evaluating Accuracy
Accuracy measures the proportion of correct predictions made by the model out of all predictions. It is most commonly used for classification tasks.

Steps:
Prepare the dataset : Split your data into training, validation, and test sets.
Generate predictions : Use the fine-tuned model to predict labels for the test set.
Compare predictions with ground truth : Count the number of correct predictions.
Calculate accuracy :
Accuracy=
Total Number of Predictions
Number of Correct Predictions


In [23]:
pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [26]:
# prompt: evaluate this fine tune model it's accuracy and f1-score do not use load_matric function instead use the 'evaluate' library

import evaluate

# Load the metric
metric = evaluate.load("accuracy")

# Replace with your actual predictions and labels
predictions = [0, 1, 0, 1, 0]  # Example predictions
references = [0, 0, 1, 1, 0] # Example ground truth labels

# Compute the accuracy
results = metric.compute(predictions=predictions, references=references)
print(results)

# Load the F1 metric
metric = evaluate.load("f1")

# Compute the F1 score
results = metric.compute(predictions=predictions, references=references, average="weighted") # Choose appropriate averaging method
results


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

{'accuracy': 0.6}


Downloading builder script:   0%|          | 0.00/6.79k [00:00<?, ?B/s]

{'f1': 0.6}

In [28]:
pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=5970039a4e483656790c1cce8c77d2a8a4234e2893710a46acce64e6fb924732
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [29]:
# prompt: evaluate this fine tune model it's BLEU andROUGE do not use load_matric function instead use the 'evaluate' library

# Load the BLEU metric
bleu_metric = evaluate.load("bleu")

# Load the ROUGE metric
rouge_metric = evaluate.load("rouge")

# Example predictions and references (replace with your actual data)
predictions = ["This is a sample prediction.", "Another prediction."]
references = [["This is a reference."], ["Another reference."]]


# Compute BLEU score
bleu_results = bleu_metric.compute(predictions=predictions, references=references)
print("BLEU:", bleu_results)

# Compute ROUGE scores
rouge_results = rouge_metric.compute(predictions=predictions, references=references)
print("ROUGE:", rouge_results)


BLEU: {'bleu': 0.0, 'precisions': [0.6666666666666666, 0.2857142857142857, 0.2, 0.0], 'brevity_penalty': 1.0, 'length_ratio': 1.125, 'translation_length': 9, 'reference_length': 8}
ROUGE: {'rouge1': np.float64(0.5833333333333333), 'rouge2': np.float64(0.28571428571428575), 'rougeL': np.float64(0.5833333333333333), 'rougeLsum': np.float64(0.5833333333333333)}
