## fine-tuning Kinyarwanda  - Inference 



In [1]:
#Import libraries 

from unsloth import FastLanguageModel
import torch

from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from unsloth import UnslothTrainer, UnslothTrainingArguments


from datasets import load_dataset
from datasets import Dataset

import json 
import pandas as pd 




🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


## Inference before continuing pretraining 

In [2]:
max_seq_length = 2048 # this can be adapted for longer context 
dtype = None # the datatype will be auto-detected : Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # we use 4bit quantization to reduce memory usage. 


xmodel = 'unsloth/llama-3-8b-bnb-4bit'

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = xmodel , 
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)


==((====))==  Unsloth: Fast Llama patching release 2024.6
   \\   /|    GPU: NVIDIA GeForce RTX 4090. Max memory: 23.647 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0. CUDA = 8.9. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

In [4]:
def complete_prompt(xprompt):
    inputs = tokenizer(xprompt, return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens = 1000, use_cache = True)
    q1 = tokenizer.batch_decode(outputs)
    
    return q1

In [None]:
xprompts = ['Imana ', 
            'Umugabo yaraje abwira abantu ati ',
            '''mu gihugu cy'ubufaransa''',
            'amateka ya Afurika', 
            '''Ejo bundi umugabo yaje nk'iya Gatera ''',
             'the history of the persian empire  ',
            'umwana wange yarambwiye   ',
            'Ejo bundi umwana yagiye ']

for xprompt in xprompts :
    
    xresp = complete_prompt(xprompt)[0]
    
    print('prompt:', xprompt)
    print('\n')
    print( xresp)
    print('-----')

#### test translation abilities 

In [6]:
xprompt = 'president ; presida, country ; igihugu , children ; abana, woman ; '

xresp = complete_prompt(xprompt)[0]
    
print('prompt:', xprompt)
print('\n')
print( xresp)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


prompt: president ; presida, country ; igihugu , children ; abana, woman ; 


<|begin_of_text|>president ; presida, country ; igihugu, children ; abana, woman ; iganje, man ; igihugu, child ; abana, man ; igihugu, woman ; iganje, country ; igihugu, man ; iganje, woman ; iganje, child ; abana, country ; igihugu, woman ; iganje, country ; igihugu, child ; abana, woman ; iganje, country ; igihugu, man ; iganje, child ; abana, country ; igihugu, man ; iganje, woman ; igihugu, child ; abana, country ; igihugu, man ; iganje, child ; abana, country ; igihugu, child ; abana, country ; igihugu, woman ; iganje, child ; abana, country ; igihugu, child ; abana, country ; igihugu, child ; abana, country ; igihugu, woman ; iganje, child ; abana, country ; igihugu, child ; abana, country ; igihugu, man ; iganje, child ; abana, country ; igihugu, child ; abana, country ; igihugu, child ; abana, country ; igihugu, man ; iganje, child ; abana, country ; igihugu, man ; iganje, child ; abana, country ; ig

## Inference AFTER  continuing pretraining 

In [None]:


max_seq_length = 2048 # this can be adapted for longer context 
dtype = None # the datatype will be auto-detected : Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # we use 4bit quantization to reduce memory usage. 


xmodel = '/home/mike/xGitHubRepos/kinyarwanda_ft_llm/02_continue_pretraining/llamarwanda_rw_v002'

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = xmodel , 
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference


In [None]:
q1 = ['<|begin_of_text|>Imana izarinda umugore wanjye n’umwana wanjye- Perezida Kagame abwira umwamikazi w’u Bwongereza Mu butumwa yanditse kuri twitter, Perezida Kagame yagize ati “Nyiricyubahiro umwamikazi w’u Bwongereza, nshimiye cyane ibyo twaganiriye byose. Imana izarinda umugore wanjye n’umwana wanjye.” Umwamikazi Elizabeth II ni we muyobozi ukuru wa Commonwealth. Umuryango uhuza ibihugu 54 bigize Commonwealth byo ku migabane yose igize isi, ukaba ufite abaturage barenga miliyari 2.4. Umuryango wa Commonwealth washinzwe mu 1949, u Rwanda rwinjiyemo mu 2009. U Rwanda rufitemo imyanya ibiri, aho Minisitiri w’Intebe Dr Edouard Ngirente ari mu bagize inama y’Abaminisitiri y’uyu muryango, naho Minisitiri w’Ububanyi n’Amahanga Dr Vincent Biruta akaba ari mu kanama k’ubuyobozi. Umunyamakuru @ CharlesRUZINDA2 <|end_of_text|>']
print(q1)

In [None]:
def complete_prompt(xprompt):
    inputs = tokenizer(xprompt, return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens = 1000, use_cache = True)
    q1 = tokenizer.batch_decode(outputs)
    
    return q1
    


In [None]:
xprompts = ['Imana ', 
            'Umugabo yaraje abwira abantu ati ',
            '''mu gihugu cy'ubufaransa''',
            'amateka ya Afurika', 
            '''Ejo bundi umugabo yaje nk'iya Gatera ''',
             'the history of the persian empire  ',
            'umwana wange yarambwiye   ',
            'Ejo bundi umwana yagiye ']

for xprompt in xprompts :
    
    xresp = complete_prompt(xprompt)[0]
    
    print('prompt:', xprompt)
    print('\n')
    print( xresp)
    print('-----')
            
            
            

## Test the "translation capabilities " 

In [None]:
xprompt = 'president ; presida, country ; igihugu , children ; abana, woman ; '

xresp = complete_prompt(xprompt)[0]
    
print('prompt:', xprompt)
print('\n')
print( xresp)

In [None]:
xprompt = 'yesterday, in a surprising turn of events, the president of Mexico '
xresp = complete_prompt(xprompt)[0]
    
print('prompt:', xprompt)
print('\n')
print( xresp)