# **LoRAfrica: Scaling LLM Fine Tuning for African History**

## **Baseline BERT SCORE**

In [None]:
import os
import numpy as np
import wandb
from datasets import load_dataset, Dataset
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from bert_score import score as bert_score
from random import randint

In [None]:
# Login to Weights & Biases
wandb.login()

In [None]:
# Create W&B project and run
wandb.init(project="phi4_african_history", name="phi4_baseline")

### **Dataset**

In [None]:
# Load the full dataset from Hugging Face Hub
full_dataset = load_dataset("DannyAI/African-History-QA-Dataset")

In [None]:
full_dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 2114
    })
    validation: Dataset({
        features: ['question', 'answer'],
        num_rows: 200
    })
    test: Dataset({
        features: ['question', 'answer'],
        num_rows: 100
    })
})

In [None]:
# Extract questions and answers from the test set
questions = [question for question in full_dataset['test']['question']]
answers = [answer for answer in full_dataset['test']['answer']]

In [47]:
questions[0]

'How did European traders impact the textile industry in the Kingdom of Kongo?'

In [48]:
answers[0]

'European traders, particularly the Portuguese, played a significant role in the textile industry in the Kingdom of Kongo, with the Portuguese trading raffia cloth and other textiles with the kingdom and re-exporting them to other regions.'

In [None]:
# Extract the test dataset
test_data = full_dataset["test"]

In [53]:
def random_examples_selector(questions,answers):
   if len(questions) == len(answers):
      question = questions[randint(0, len(questions))]
      answer = answers[randint(0, len(answers))]
      print(f"Question: \n{question}\nAnswer: \n{answer}")

In [54]:
random_examples_selector(questions,answers)

Question: 
What is the significance of African feminist scholarly activism in contemporary resistance movements?
Answer: 
The notable kings of the Mangbetu kingdom included King Manziga, King Nabiembali, King Tuba, King Mbunza, and King Yangala, each contributing to the kingdom's expansion, military organization, and cultural development.


### **Define the Model and Tokeniser**

In [None]:
# Load model and tokenizer
model_id = "microsoft/Phi-4-mini-instruct"
torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(model_id)
tokeniser = AutoTokenizer.from_pretrained(model_id)

In [None]:
# Create text generation pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokeniser,
    device_map = "auto",
    dtype="auto"
    )

In [None]:
def generate_answer(question) -> str:
    """
    Docstring for generate_answer

    :param question: The question to be answered by the model.
    :return: The generated answer as a string.
    """
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant specialised in African history which gives concise answers to questions asked."},
        {"role": "user", "content": question}
    ]
    
    # pipeline() returns a list of dicts; 
    # return_full_text=False gives only the assistant's reply
    output = generator(
        messages, 
        max_new_tokens=2048, 
        temperature=0.1, 
        do_sample=False,
        return_full_text=False
    )
    return output[0]['generated_text'].strip()

In [None]:
# Generate predictions on the test set
print("--- Generating Predictions on Test Set ---")
test_predictions = []
# Assuming test_data is a list of dicts with "question" and "answer" keys
test_references = [item["answer"] for item in test_data]

for i, item in enumerate(test_data):
    pred = generate_answer(item["question"])
    test_predictions.append(pred)
    
    if i < 2: # Sample output for verification
        print(f"\nSample Q: {item['question']}")
        print(f"Sample A (Base Model): {pred}")
        print(f"Sample A (Ref): {item['answer']}\n")

# Metrics Calculation using BERTScore
print("--- Calculating BERTScore ---")
# P = Precision, R = Recall, F1 = F1 Score
P, R, F1 = bert_score(test_predictions, test_references, lang="en", verbose=True)

avg_f1 = F1.mean().item()
print(f"\nFinal Evaluation Results:")
print(f"Average BERTScore F1: {avg_f1:.4f}")

In [61]:
questions[8]

'What role do international investments play in African e-commerce development?'

In [62]:
answers[8]

"International investments, such as Amazon's entry into South Africa and various venture capital investments in platforms like Jumia, are crucial for the development of African e-commerce, bringing technology, expertise, and capital to the market."

In [63]:
test_predictions[8]

'International investments play a crucial role in African e-commerce development by providing the necessary capital, technology, and expertise to build and expand online businesses. They help in creating a robust digital infrastructure, enhancing internet connectivity, and fostering innovation. Additionally, these investments can lead to job creation, skill development, and increased market access for African entrepreneurs.'

In [64]:
# Finalize W&B
wandb.log({"Final_Test_BERTScore": avg_f1})
wandb.finish()

0,1
Final_Test_BERTScore,▁

0,1
Final_Test_BERTScore,0.88868
