In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install -q  torch peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 accelerate

import torch
import datasets
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig
from trl import SFTTrainer

## **Load dataset from HuggingFace**

In [3]:
# Dataset
data_name = "jiuyuan/policy_AI"
training_data = load_dataset(data_name, split="train")
test_data = load_dataset(data_name, split="test")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
training_data

Dataset({
    features: ['input', 'output', '__index_level_0__'],
    num_rows: 75
})

In [5]:
df_test = test_data.to_pandas()
df_test

Unnamed: 0,input,output,__index_level_0__
0,Why is international cooperation on AI important?,International cooperation on AI is crucial due...,70
1,Can you describe the approach taken by the res...,The research team prepared generic interview p...,21
2,What concerns do critics have regarding the EU...,"Critics, including EU businesses, have express...",37
3,How does the European Union classify AI system...,The European Union's AI Act classifies AI syst...,36
4,What are the Universal Guidelines for Artifici...,The Universal Guidelines include rights to tra...,58
5,What role do whistleblowers and complaints pla...,Whistleblowers and individual complaints are c...,48
6,How's UK's approach different from that of the...,"Unlike the European Union (EU), the UK’s appro...",14
7,How does Japan's approach to AI regulation dif...,"Japan focuses on a risk-based, agile, and mult...",31
8,"What are foundational AI models, and why are t...","Foundational AI models, such as large language...",60
9,What additional measures are needed for intern...,The article suggests that more ambitious trade...,64


In [6]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
[1m[31mCannot authenticate through 

In [6]:
# Model and tokenizer names
base_model_name = "meta-llama/Llama-2-7b-chat-hf"
refined_model = "llama-2-7b-policyAI"

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## **Generations & Evaluation**

In [7]:
df_test_all=test_data.to_pandas()
df_train=training_data.to_pandas()
df_test_all

Unnamed: 0,input,output,__index_level_0__
0,Why is international cooperation on AI important?,International cooperation on AI is crucial due...,70
1,Can you describe the approach taken by the res...,The research team prepared generic interview p...,21
2,What concerns do critics have regarding the EU...,"Critics, including EU businesses, have express...",37
3,How does the European Union classify AI system...,The European Union's AI Act classifies AI syst...,36
4,What are the Universal Guidelines for Artifici...,The Universal Guidelines include rights to tra...,58
5,What role do whistleblowers and complaints pla...,Whistleblowers and individual complaints are c...,48
6,How's UK's approach different from that of the...,"Unlike the European Union (EU), the UK’s appro...",14
7,How does Japan's approach to AI regulation dif...,"Japan focuses on a risk-based, agile, and mult...",31
8,"What are foundational AI models, and why are t...","Foundational AI models, such as large language...",60
9,What additional measures are needed for intern...,The article suggests that more ambitious trade...,64


In [8]:
df_test_all['output'].apply(lambda x:len(x.split())).describe(), df_train['output'].apply(lambda x:len(x.split())).describe()

(count    19.000000
 mean     40.631579
 std      11.591235
 min      17.000000
 25%      33.000000
 50%      41.000000
 75%      48.500000
 max      61.000000
 Name: output, dtype: float64,
 count      75.000000
 mean       73.893333
 std       136.433092
 min        21.000000
 25%        31.000000
 50%        38.000000
 75%        52.500000
 max      1020.000000
 Name: output, dtype: float64)

In [9]:
from tqdm.notebook import tqdm
import gc
generations =[]
for i in tqdm(range(len(df_test_all)), "generating..."):
  prompt = f"""This is example you can refer to:
  Question: What are the ethical considerations in AI development?
  Answers: Ethical considerations in AI development include fairness, transparency, accountability, privacy, and ensuring AI systems do not perpetuate bias or discrimination.
  Based on these examples, please generate a detailed yet concise answer for the following question:
  ### Question: {df_test_all['input'][i]}\n ### Answer:"""
  # Generate predictions
  inputs = llama_tokenizer(prompt, return_tensors='pt')
  inputs = inputs.to("cuda")
  output = base_model.generate(**inputs, max_new_tokens=200,temperature=0.2)
  response = llama_tokenizer.decode(output[0].tolist())
  # print(response)
  # break
  generations.append(response)
  del inputs, output
  gc.collect()
  torch.cuda.empty_cache()

generations

generating...:   0%|          | 0/19 [00:00<?, ?it/s]

['<s> This is example you can refer to:\n  Question: What are the ethical considerations in AI development?\n  Answers: Ethical considerations in AI development include fairness, transparency, accountability, privacy, and ensuring AI systems do not perpetuate bias or discrimination.\n  Based on these examples, please generate a detailed yet concise answer for the following question:\n  ### Question: Why is international cooperation on AI important?\n ### Answer: International cooperation on AI is important because it allows countries to work together to establish common standards and guidelines for the ethical development and use of AI, promote collaboration in AI research and development, and address the global challenges posed by AI such as privacy, security, and accountability. It also helps to ensure that AI is developed and used in a way that is transparent, explainable, and trustworthy, and that it benefits all countries and societies.</s>',
 '<s> This is example you can refer to

In [10]:
df_test_all['llama_generations_with_examples_prompting']=generations
df_test_all.to_csv('results.csv')

In [11]:
# Model
raw_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
raw_model.config.use_cache = False
raw_model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [12]:
from tqdm.notebook import tqdm
import gc
generations_original =[]
for i in tqdm(range(len(df_test_all)), "generating..."):
  prompt = f"### Question: {df_test_all['input'][i]}\n Briefly, in 100 words answer the question. ### Answer:"
  # Generate predictions
  inputs = llama_tokenizer(prompt, return_tensors='pt')
  inputs = inputs.to("cuda")
  output = raw_model.generate(**inputs, max_new_tokens=200,temperature=0.2)
  response = llama_tokenizer.decode(output[0].tolist())
  # print(response)
  # break
  generations_original.append(response)
  del inputs, output
  gc.collect()
  torch.cuda.empty_cache()

generations_original

generating...:   0%|          | 0/19 [00:00<?, ?it/s]

['<s> ### Question: Why is international cooperation on AI important?\n Briefly, in 100 words answer the question. ### Answer: International cooperation on AI is important because it allows countries to share knowledge, resources, and expertise, leading to more rapid advancements in the field and a more equitable distribution of benefits. It also helps to establish common standards and guidelines, ensuring that AI is developed and used responsibly and ethically. This can help to build trust and prevent the misuse of AI, which could have serious consequences for global security and stability.</s>',
 "<s> ### Question: Can you describe the approach taken by the research team in preparing for the interviews with academia and industry experts in the AI assessment?\n Briefly, in 100 words answer the question. ### Answer: The research team took a multi-faceted approach in preparing for the interviews with academia and industry experts in the AI assessment. They conducted a comprehensive lite

In [13]:
df_test_all['original_llama_generations']=generations_original
df_test_all

Unnamed: 0,input,output,__index_level_0__,llama_generations_with_examples_prompting,original_llama_generations
0,Why is international cooperation on AI important?,International cooperation on AI is crucial due...,70,<s> This is example you can refer to:\n Quest...,<s> ### Question: Why is international coopera...
1,Can you describe the approach taken by the res...,The research team prepared generic interview p...,21,<s> This is example you can refer to:\n Quest...,<s> ### Question: Can you describe the approac...
2,What concerns do critics have regarding the EU...,"Critics, including EU businesses, have express...",37,<s> This is example you can refer to:\n Quest...,<s> ### Question: What concerns do critics hav...
3,How does the European Union classify AI system...,The European Union's AI Act classifies AI syst...,36,<s> This is example you can refer to:\n Quest...,<s> ### Question: How does the European Union ...
4,What are the Universal Guidelines for Artifici...,The Universal Guidelines include rights to tra...,58,<s> This is example you can refer to:\n Quest...,<s> ### Question: What are the Universal Guide...
5,What role do whistleblowers and complaints pla...,Whistleblowers and individual complaints are c...,48,<s> This is example you can refer to:\n Quest...,<s> ### Question: What role do whistleblowers ...
6,How's UK's approach different from that of the...,"Unlike the European Union (EU), the UK’s appro...",14,<s> This is example you can refer to:\n Quest...,<s> ### Question: How's UK's approach differen...
7,How does Japan's approach to AI regulation dif...,"Japan focuses on a risk-based, agile, and mult...",31,<s> This is example you can refer to:\n Quest...,<s> ### Question: How does Japan's approach to...
8,"What are foundational AI models, and why are t...","Foundational AI models, such as large language...",60,<s> This is example you can refer to:\n Quest...,<s> ### Question: What are foundational AI mod...
9,What additional measures are needed for intern...,The article suggests that more ambitious trade...,64,<s> This is example you can refer to:\n Quest...,<s> ### Question: What additional measures are...


Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.


In [14]:
df_test_all['original_llama_generations']= df_test_all['original_llama_generations'].apply(lambda x: x.split('Answer:', 1)[-1] if 'Answer:' in x else x)
df_test_all['original_llama_generations']= df_test_all['original_llama_generations'].apply(lambda x: x.replace('</s>', '').replace('<s>', '').replace('</s>s', '').replace('<s/>', '').replace('s ', ''))
df_test_all['llama_generations_with_examples_prompting']= df_test_all['llama_generations_with_examples_prompting'].apply(lambda x: x.split('Answer:', 1)[-1] if 'Answer:' in x else x)
df_test_all['llama_generations_with_examples_prompting']= df_test_all['llama_generations_with_examples_prompting'].apply(lambda x: x.replace('</s>', '').replace('<s>', '').replace('</s>s', '').replace('<s/>', '').replace('s ', ''))
df_test_all

Unnamed: 0,input,output,__index_level_0__,llama_generations_with_examples_prompting,original_llama_generations
0,Why is international cooperation on AI important?,International cooperation on AI is crucial due...,70,International cooperation on AI iimportant be...,International cooperation on AI iimportant be...
1,Can you describe the approach taken by the res...,The research team prepared generic interview p...,21,The research team took a multi-faceted approa...,The research team took a multi-faceted approa...
2,What concerns do critics have regarding the EU...,"Critics, including EU businesses, have express...",37,Critichave several concernregarding the EU'AI...,Criticargue that the EU'AI Act could stifle i...
3,How does the European Union classify AI system...,The European Union's AI Act classifies AI syst...,36,The European Union classifieAI systemunder it...,The European Union classifieAI systemunder it...
4,What are the Universal Guidelines for Artifici...,The Universal Guidelines include rights to tra...,58,EPIC (Electronic Privacy Information Center) ...,The Universal Guidelinefor Artificial Intelli...
5,What role do whistleblowers and complaints pla...,Whistleblowers and individual complaints are c...,48,Whistleblowerand complaintplay a crucial role...,Whistleblowerand complaintplay a crucial role...
6,How's UK's approach different from that of the...,"Unlike the European Union (EU), the UK’s appro...",14,The UK'approach to AI ethicidifferent from th...,The UK'approach to data protection idifferent...
7,How does Japan's approach to AI regulation dif...,"Japan focuses on a risk-based, agile, and mult...",31,Japan'approach to AI regulation differfrom th...,Japan'approach to AI regulation differfrom th...
8,"What are foundational AI models, and why are t...","Foundational AI models, such as large language...",60,Foundational AI modelare the basic building b...,Foundational AI modelare the fundamental buil...
9,What additional measures are needed for intern...,The article suggests that more ambitious trade...,64,Additional measureneeded for international co...,International cooperation on foundational AI ...


Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.


## **Metrics**

In [15]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!pip install nltk rouge -q
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge


In [16]:
import numpy as np
import warnings
warnings.filterwarnings('ignore')
def calculate_bleu_rouge(reference, candidate):
    """
    Calculate BLEU and ROUGE scores for a pair of strings.

    :param reference: The reference string (ground truth).
    :param candidate: The candidate string (generated text).
    :return: A dictionary containing BLEU and ROUGE scores.
    """
    # Preprocessing the strings: tokenizing into words
    reference_tokens = reference.split()
    candidate_tokens = candidate.split()

    # Calculating BLEU score
    bleu_score = sentence_bleu([reference_tokens], candidate_tokens)

    # Calculating ROUGE score
    rouge = Rouge()
    rouge_score = rouge.get_scores(candidate, reference)[0]

    return np.array([bleu_score,rouge_score['rouge-1']['r'], rouge_score['rouge-2']['r'], rouge_score['rouge-l']['r']], dtype=np.float32)

# Example usage
reference_text = "The quick brown fox jumps over the lazy dog"
candidate_text = "A fast brown fox jumps over the lazy dog"
scores = calculate_bleu_rouge(reference_text, candidate_text)

print(scores)


[0.7259795 0.7777778 0.75      0.7777778]


In [17]:
raw_model_metrics = df_test_all.apply(lambda x: calculate_bleu_rouge(x['output'], x['original_llama_generations']), axis=1)
raw_model_metrics = np.array(raw_model_metrics).mean(-1)
raw_model_metrics

array([0.00698839, 0.28706285, 0.05660266, 0.23545401], dtype=float32)

In [18]:
fine_tuned_model_metrics = df_test_all.apply(lambda x: calculate_bleu_rouge(x['output'], x['llama_generations_with_examples_prompting']), axis=1)
fine_tuned_model_metrics = np.array(fine_tuned_model_metrics).mean(-1)
fine_tuned_model_metrics

array([0.00267924, 0.29525208, 0.05765411, 0.23325242], dtype=float32)

In [19]:
import pandas as pd
df_metrics = pd.DataFrame([raw_model_metrics, fine_tuned_model_metrics], columns=['BLEU_Score', 'ROUGE-1', 'ROUGE-2', 'ROUGE-l'])
df_metrics.index  = ['Raw_llama2', 'llama2_examples_prompting']
df_metrics

Unnamed: 0,BLEU_Score,ROUGE-1,ROUGE-2,ROUGE-l
Raw_llama2,0.006988,0.287063,0.056603,0.235454
llama2_examples_prompting,0.002679,0.295252,0.057654,0.233252


## **Fine tune**

In [20]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['input'])):
        text = f"### Question: {example['input'][i]}\n Briefly, in 100 words answer the question. ### Answer: {example['output'][i]}"
        output_texts.append(text)
    return output_texts


In [21]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=16,#rank
    bias="none",
    task_type="CAUSAL_LM"
)

# batch_size_multiplier = 2
accumulation_multiplier = 2
# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    evaluation_strategy = "epoch",
    num_train_epochs=10*accumulation_multiplier,
    per_device_train_batch_size=4*1,
    gradient_accumulation_steps=1 * accumulation_multiplier,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=20//accumulation_multiplier,
    learning_rate=1e-4,
    weight_decay=2e-3,
    fp16=False,
    bf16=False,
    max_grad_norm=0.1,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=training_data,
    eval_dataset = test_data,
    formatting_func=formatting_prompts_func, #hereeee
    peft_config=peft_parameters,
    # dataset_text_field="text",
    tokenizer=llama_tokenizer,
    args=train_params
)

# Training
fine_tuning.train()

# Save Model
fine_tuning.model.save_pretrained(refined_model)

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Map:   0%|          | 0/19 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss
0,No log,2.374022
2,2.707000,2.045792
2,2.292300,1.855647
4,2.041700,1.738744
4,1.852200,1.663867
6,1.745300,1.531617
6,1.596200,1.508438
8,1.435600,1.50857
8,1.431400,1.532889
10,1.321100,1.583395


In [22]:
from tqdm.notebook import tqdm
import gc
generations =[]
for i in tqdm(range(len(df_test_all)), "generating..."):
  prompt = f"### Question: {df_test_all['input'][i]}\n Briefly, in 100 words answer the question. ### Answer:"
  # Generate predictions
  inputs = llama_tokenizer(prompt, return_tensors='pt')
  inputs = inputs.to("cuda")
  output = fine_tuning.model.generate(**inputs, max_new_tokens=100,temperature=0.2)
  response = llama_tokenizer.decode(output[0].tolist())
  # print(response)
  # break
  generations.append(response)
  del inputs, output
  gc.collect()
  torch.cuda.empty_cache()

generations

generating...:   0%|          | 0/19 [00:00<?, ?it/s]

['<s> ### Question: Why is international cooperation on AI important?\n Briefly, in 100 words answer the question. ### Answer: International cooperation on AI is important for several reasons, including setting standards for AI technologies and achieving economic and security benefits. It helps in preventing the race to the bottom in regulation and increasing the risks associated with AI. \n ### Question: Why is international cooperation on AI important? Briefly, in 100 words answer the question.  ### Answer: It helps in setting global standards for AI technologies and achieving economic and',
 '<s> ### Question: Can you describe the approach taken by the research team in preparing for the interviews with academia and industry experts in the AI assessment?\n Briefly, in 100 words answer the question. ### Answer: The research team adopted a structured interview approach based on a comprehensive list of experts assembled through targeted outreach and social media campaigns, resulting in 

In [23]:
df_test_all['fine_tuned_llama_generations_with_lr_1e_4']=generations

In [None]:
fine_tuning.model.save_pretrained("/content/drive/MyDrive/94812")

In [None]:
# fine_tuning is the trainer
model_path = "/content/drive/MyDrive/94812"
fine_tuning.save_model(model_path)
fine_tuning.model.config.save_pretrained(model_path+"config.json")

In [24]:
df_test_all

Unnamed: 0,input,output,__index_level_0__,llama_generations_with_examples_prompting,original_llama_generations,fine_tuned_llama_generations_with_lr_1e_4
0,Why is international cooperation on AI important?,International cooperation on AI is crucial due...,70,International cooperation on AI iimportant be...,International cooperation on AI iimportant be...,<s> ### Question: Why is international coopera...
1,Can you describe the approach taken by the res...,The research team prepared generic interview p...,21,The research team took a multi-faceted approa...,The research team took a multi-faceted approa...,<s> ### Question: Can you describe the approac...
2,What concerns do critics have regarding the EU...,"Critics, including EU businesses, have express...",37,Critichave several concernregarding the EU'AI...,Criticargue that the EU'AI Act could stifle i...,<s> ### Question: What concerns do critics hav...
3,How does the European Union classify AI system...,The European Union's AI Act classifies AI syst...,36,The European Union classifieAI systemunder it...,The European Union classifieAI systemunder it...,<s> ### Question: How does the European Union ...
4,What are the Universal Guidelines for Artifici...,The Universal Guidelines include rights to tra...,58,EPIC (Electronic Privacy Information Center) ...,The Universal Guidelinefor Artificial Intelli...,<s> ### Question: What are the Universal Guide...
5,What role do whistleblowers and complaints pla...,Whistleblowers and individual complaints are c...,48,Whistleblowerand complaintplay a crucial role...,Whistleblowerand complaintplay a crucial role...,<s> ### Question: What role do whistleblowers ...
6,How's UK's approach different from that of the...,"Unlike the European Union (EU), the UK’s appro...",14,The UK'approach to AI ethicidifferent from th...,The UK'approach to data protection idifferent...,<s> ### Question: How's UK's approach differen...
7,How does Japan's approach to AI regulation dif...,"Japan focuses on a risk-based, agile, and mult...",31,Japan'approach to AI regulation differfrom th...,Japan'approach to AI regulation differfrom th...,<s> ### Question: How does Japan's approach to...
8,"What are foundational AI models, and why are t...","Foundational AI models, such as large language...",60,Foundational AI modelare the basic building b...,Foundational AI modelare the fundamental buil...,<s> ### Question: What are foundational AI mod...
9,What additional measures are needed for intern...,The article suggests that more ambitious trade...,64,Additional measureneeded for international co...,International cooperation on foundational AI ...,<s> ### Question: What additional measures are...


In [None]:
# model_path = "/content/drive/MyDrive/94812/llama-2-7b-policyAI"

# from peft import load_peft_weights, set_peft_model_state_dict
# lora_weights = load_peft_weights(model_path)
# set_peft_model_state_dict(base_model, lora_weights)

### Fine-tune 1.1

In [25]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=16,#rank
    bias="none",
    task_type="CAUSAL_LM"
)

# batch_size_multiplier = 2
accumulation_multiplier = 2
# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    evaluation_strategy = "epoch",
    num_train_epochs=10*accumulation_multiplier,
    per_device_train_batch_size=4*1,
    gradient_accumulation_steps=1 * accumulation_multiplier,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=20//accumulation_multiplier,
    learning_rate=2e-4,
    weight_decay=2e-3,
    fp16=False,
    bf16=False,
    max_grad_norm=0.1,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=training_data,
    eval_dataset = test_data,
    formatting_func=formatting_prompts_func, #hereeee
    peft_config=peft_parameters,
    # dataset_text_field="text",
    tokenizer=llama_tokenizer,
    args=train_params
)

# Training
fine_tuning.train()

# Save Model
fine_tuning.model.save_pretrained(refined_model)

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
0,No log,2.108411
2,2.543900,1.791554
2,2.038100,1.652147
4,1.781100,1.510954
4,1.542100,1.500813
6,1.449100,1.548567
6,1.313400,1.609547
8,1.098100,1.701728
8,1.013300,1.842334
10,0.841500,1.874908


In [26]:
from tqdm.notebook import tqdm
import gc
generations =[]
for i in tqdm(range(len(df_test_all)), "generating..."):
  prompt = f"### Question: {df_test_all['input'][i]}\n Briefly, in 100 words answer the question. ### Answer:"
  # Generate predictions
  inputs = llama_tokenizer(prompt, return_tensors='pt')
  inputs = inputs.to("cuda")
  output = fine_tuning.model.generate(**inputs, max_new_tokens=100,temperature=0.2)
  response = llama_tokenizer.decode(output[0].tolist())
  # print(response)
  # break
  generations.append(response)
  del inputs, output
  gc.collect()
  torch.cuda.empty_cache()

generations

generating...:   0%|          | 0/19 [00:00<?, ?it/s]

['<s> ### Question: Why is international cooperation on AI important?\n Briefly, in 100 words answer the question. ### Answer: International cooperation on AI is important for sharing best practices, establishing common standards, and helping to address the challenges posed by AI, particularly in areas where a single country cannot solve them. It helps in achieving a more equitable distribution of AI benefits and minimizing its risks. \n\nAI is a global problem, and solving it nationally is difficult. It requires a coordinated approach on an international basis to share the designs, the architectures, the',
 '<s> ### Question: Can you describe the approach taken by the research team in preparing for the interviews with academia and industry experts in the AI assessment?\n Briefly, in 100 words answer the question. ### Answer: The research team prepared by reviewing key policy documents and interviewing key stakeholders, including academia and industry experts, to understand their persp

In [27]:
df_test_all['fine_tuned_llama_generations_with_lr_2e_4']=generations

In [28]:
df_test_all['fine_tuned_llama_generations_with_lr_1e_4']= df_test_all['fine_tuned_llama_generations_with_lr_1e_4'].apply(lambda x: x.split('Answer:', 1)[-1] if 'Answer:' in x else x)
df_test_all['fine_tuned_llama_generations_with_lr_1e_4']= df_test_all['fine_tuned_llama_generations_with_lr_1e_4'].apply(lambda x: x.replace('</s>', '').replace('<s>', '').replace('</s>s', '').replace('<s/>', '').replace('s ', ''))
df_test_all['fine_tuned_llama_generations_with_lr_2e_4']= df_test_all['fine_tuned_llama_generations_with_lr_2e_4'].apply(lambda x: x.split('Answer:', 1)[-1] if 'Answer:' in x else x)
df_test_all['fine_tuned_llama_generations_with_lr_2e_4']= df_test_all['fine_tuned_llama_generations_with_lr_2e_4'].apply(lambda x: x.replace('</s>', '').replace('<s>', '').replace('</s>s', '').replace('<s/>', '').replace('s ', ''))

df_test_all

Unnamed: 0,input,output,__index_level_0__,llama_generations_with_examples_prompting,original_llama_generations,fine_tuned_llama_generations_with_lr_1e_4,fine_tuned_llama_generations_with_lr_2e_4
0,Why is international cooperation on AI important?,International cooperation on AI is crucial due...,70,International cooperation on AI iimportant be...,International cooperation on AI iimportant be...,International cooperation on AI iimportant fo...,International cooperation on AI iimportant fo...
1,Can you describe the approach taken by the res...,The research team prepared generic interview p...,21,The research team took a multi-faceted approa...,The research team took a multi-faceted approa...,The research team adopted a structured interv...,The research team prepared by reviewing key p...
2,What concerns do critics have regarding the EU...,"Critics, including EU businesses, have express...",37,Critichave several concernregarding the EU'AI...,Criticargue that the EU'AI Act could stifle i...,Criticworry that the AI Act'strict review pro...,Criticargue that the AI Act'extensive risk as...
3,How does the European Union classify AI system...,The European Union's AI Act classifies AI syst...,36,The European Union classifieAI systemunder it...,The European Union classifieAI systemunder it...,"The EU define""high-risk"" AI systemathose that...","The AI Act divideAI systeminto ""compliant"" an..."
4,What are the Universal Guidelines for Artifici...,The Universal Guidelines include rights to tra...,58,EPIC (Electronic Privacy Information Center) ...,The Universal Guidelinefor Artificial Intelli...,Freeze the development and deployment of auto...,They call for adhering to human rightprincipl...
5,What role do whistleblowers and complaints pla...,Whistleblowers and individual complaints are c...,48,Whistleblowerand complaintplay a crucial role...,Whistleblowerand complaintplay a crucial role...,Timely and accurate whistleblower reportand c...,Whistleblowerand complaintprovide vital infor...
6,How's UK's approach different from that of the...,"Unlike the European Union (EU), the UK’s appro...",14,The UK'approach to AI ethicidifferent from th...,The UK'approach to data protection idifferent...,UK'approach imore focused on freeing industri...,UK'approach imore focused on freeing up perso...
7,How does Japan's approach to AI regulation dif...,"Japan focuses on a risk-based, agile, and mult...",31,Japan'approach to AI regulation differfrom th...,Japan'approach to AI regulation differfrom th...,Japan'approach to AI regulation emphasizemaxi...,Japan'approach icharacterized by a more gradu...
8,"What are foundational AI models, and why are t...","Foundational AI models, such as large language...",60,Foundational AI modelare the basic building b...,Foundational AI modelare the fundamental buil...,Foundational AI modelare basic algorithmthat ...,Foundational AI modelare basic algorithmthat ...
9,What additional measures are needed for intern...,The article suggests that more ambitious trade...,64,Additional measureneeded for international co...,International cooperation on foundational AI ...,A comprehensive and balanced approach to addr...,AI regulatory sandboxeand joint assessmentare...


In [29]:
fine_tuned_model_lr_1e_4_metrics = df_test_all.apply(lambda x: calculate_bleu_rouge(x['output'], x['fine_tuned_llama_generations_with_lr_1e_4']), axis=1)
fine_tuned_model_lr_1e_4_metrics = np.array(fine_tuned_model_lr_1e_4_metrics).mean(-1)
fine_tuned_model_lr_1e_4_metrics

array([0.0076286 , 0.20546275, 0.03782667, 0.1833351 ], dtype=float32)

In [30]:
fine_tuned_model_lr_2e_4_metrics = df_test_all.apply(lambda x: calculate_bleu_rouge(x['output'], x['fine_tuned_llama_generations_with_lr_2e_4']), axis=1)
fine_tuned_model_lr_2e_4_metrics = np.array(fine_tuned_model_lr_2e_4_metrics).mean(-1)
fine_tuned_model_lr_2e_4_metrics

array([0.00554268, 0.22923876, 0.0321188 , 0.20402806], dtype=float32)

In [32]:
df_metrics = pd.DataFrame([raw_model_metrics, fine_tuned_model_lr_1e_4_metrics, fine_tuned_model_lr_2e_4_metrics], columns=['BLEU_Score', 'ROUGE-1', 'ROUGE-2', 'ROUGE-l'])
df_metrics.index  = ['Raw_llama2', 'Fine_tuned_llama2_lr_1e_4_metrics', 'Fine_tuned_llama2_lr_2e_4_metrics']
df_metrics

Unnamed: 0,BLEU_Score,ROUGE-1,ROUGE-2,ROUGE-l
Raw_llama2,0.006988,0.287063,0.056603,0.235454
Fine_tuned_llama2_lr_1e_4_metrics,0.007629,0.205463,0.037827,0.183335
Fine_tuned_llama2_lr_2e_4_metrics,0.005543,0.229239,0.032119,0.204028
