# **LoRAfrica: Scaling LLM Fine Tuning for African History**

In [1]:
import torch
import numpy as np
from datasets import load_dataset, Dataset
from transformers import pipeline
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model, PeftModel
from huggingface_hub import login

HF_TOKEN = "your_token_here"  # Replace with your actual token

### **Dataset**

In [None]:
# Setup - Replace with your info
login(token=HF_TOKEN)

In [None]:
# Load the full dataset from Hugging Face Hub
full_dataset = load_dataset("DannyAI/African-History-QA-Dataset")

In [None]:
full_dataset

In [5]:
# Extract questions and answers from the test set
questions = [question for question in full_dataset['test']['question']]
answers = [answer for answer in full_dataset['test']['answer']]

### **Define the Model and Tokeniser**

In [None]:
# Load base model and tokenizer
model_id = "microsoft/Phi-4-mini-instruct"
base_model = AutoModelForCausalLM.from_pretrained(model_id)
tokeniser = AutoTokenizer.from_pretrained(model_id)

In [7]:
# Create text generation pipeline for base_model
base_generator = pipeline(
    "text-generation",
    model=base_model,
    tokenizer=tokeniser,
    device_map = "auto",
    dtype="auto"
    )

Device set to use cuda:0


In [8]:
def base_generate_answer(question) -> str:
    """
    Docstring for generate_answer

    :param question: The question to be answered by the model.
    :return: The generated answer as a string.
    """
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant specialised in African history which gives concise answers to questions asked."},
        {"role": "user", "content": question}
    ]

    output = base_generator(
        messages,
        max_new_tokens=2048,
        temperature=0.1,
        do_sample=False,
        return_full_text=False
    )
    return output[0]['generated_text'].strip()

In [9]:
# Lora models
fined_tuned_list = [
    "DannyAI/phi4_african_history_lora",
    "DannyAI/phi4_lora_axolotl",
    "DannyAI/phi4_african_history_lora_ds2",
    "DannyAI/phi4_african_history_lora_ds2_axolotl",
]

In [10]:
# Create text generation pipeline for lora_model
def lora_model_generator(lora_model_id:str):

    repo_id = lora_model_id

    lora_model = PeftModel.from_pretrained(
    base_model,repo_id)

    lora_generator = pipeline(
        "text-generation",
        model=lora_model,
        tokenizer=tokeniser,
        device_map = "auto",
        dtype="auto"
        )
    return lora_generator

In [11]:
def lora_generate_answer(question, lora_model_id):
    """
    Docstring for generate_answer

    :param question: The question to be answered by the model.
    :return: The generated answer as a string.
    """
    messages = [
        {"role": "system", "content": "You are a helpful AI assistant specialised in African history which gives concise answers to questions asked."},
        {"role": "user", "content": question}
    ]

    # Create text generation pipeline
    lora_generator = lora_model_generator(lora_model_id)

    output = lora_generator(
        messages,
        max_new_tokens=2048,
        temperature=0.1,
        do_sample=False,
        return_full_text=False
    )
    return output[0]['generated_text'].strip()

In [12]:
from random import randint
def random_examples_selector(questions,answers,lora_model:int):
      question = questions[randint(0,len(questions))]
      ref_answer = answers[questions.index(question)]
      base_model_answer = base_generate_answer(question)
      lora_model_answer = lora_generate_answer(question,fined_tuned_list[lora_model])
      print(f"Question:\n{question}\n\nReference: \n{ref_answer}\n\nBase Model:\n{base_model_answer}\n\nLoRA Model: ({fined_tuned_list[lora_model]}):\n{lora_model_answer}")

In [21]:
random_examples_selector(questions,answers,1)

adapter_config.json: 0.00B [00:00, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/6.30M [00:00<?, ?B/s]

Device set to use cuda:0


Question:
What economic losses is Africa facing due to climate change?

Reference: 
According to the African Development Bank, climate change is causing significant economic losses in Africa, estimated at between 3-5% of GDP annually. By 2050, adaptation costs could reach $50 billion annually if global temperatures rise by 2°C, and up to $100 billion if temperatures rise by 4°C.

Base Model:
Africa is facing economic losses of $50 billion annually due to climate change, with the agricultural sector being particularly affected.

LoRA Model: (DannyAI/phi4_lora_axolotl):
Africa is facing significant economic losses due to climate change, including reduced agricultural productivity, increased costs of disaster response, loss of biodiversity, and damage to infrastructure. These losses are estimated to be in the billions of dollars annually, with the agricultural sector being particularly vulnerable.
