# Demo

In [None]:
# Install llama-cpp

!set LLAMA_CUBLAS=1
!set CMAKE_ARGS=-DLLAMA_CUBLAS=on
!set FORCE_CMAKE=1

!python -m pip install llama-cpp-python==0.2.7 --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122

Looking in indexes: https://pypi.org/simple, https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122
Collecting llama-cpp-python==0.2.7
  Downloading https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/wheels/llama_cpp_python-0.2.7%2Bcu122-cp310-cp310-manylinux_2_31_x86_64.whl (14.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.8/14.8 MB[0m [31m64.7 MB/s[0m eta [36m0:00:00[0m
Collecting diskcache>=5.6.1 (from llama-cpp-python==0.2.7)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: diskcache, llama-cpp-python
Successfully installed diskcache-5.6.3 llama-cpp-python-0.2.7+cu122


In [None]:
# Download Airavata.gguf

import os
from huggingface_hub import hf_hub_download

def read_token(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.readline().strip()
    except FileNotFoundError:
        raise ValueError(f"Token file not found: {file_path}")

# Define the model name and file
model_name = "ai4bharat/Airavata"
model_file = "Airavata.gguf"

# Download the model from Hugging Face Hub
model_path = hf_hub_download(
    model_name,
    filename=model_file,
    local_dir='models/',  # Download the model to the "models" folder
)

print("Model path:", model_path) # models/Airavata

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Airavata.gguf:   0%|          | 0.00/13.7G [00:00<?, ?B/s]

Model path: models/Airavata.gguf


In [None]:
from llama_cpp import Llama

MESSAGES = []

def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True):
    formatted_text = ""
    for message in messages:
        if message["role"] == "system":
            formatted_text += "<|system|>\n" + message["content"] + "\n"
        elif message["role"] == "user":
            formatted_text += "<|user|>\n" + message["content"] + "\n"
        elif message["role"] == "assistant":
            formatted_text += "<|assistant|>\n" + message["content"].strip() + eos + "\n"
        else:
            raise ValueError(
                "Tulu chat template only supports 'system', 'user' and 'assistant' roles. Invalid role: {}.".format(
                    message["role"]
                )
            )
    formatted_text += "<|assistant|>\n"
    formatted_text = bos + formatted_text if add_bos else formatted_text
    return formatted_text


def select_llm() -> Llama:
    return Llama(model_path="models/Airavata.gguf", n_gpu_layers=-1, n_threads=2, n_ctx=4096, verbose=True)

def get_answer(llm, messages, memory=5) -> str:
    generation_kwargs = {
      "max_tokens":200,
      "stop":["</s>"],
      "echo":False,
      "top_k":50,
      "top_p":0.5
    }
    res = llm(create_prompt_with_chat_format(messages[-5:], add_bos=False), **generation_kwargs)

    return res["choices"][0]["text"]

In [None]:
llm = select_llm()

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


In [None]:
from IPython.display import display, HTML

!wget https://raw.githubusercontent.com/AI4Bharat/IndicInstruct/main/demo/airavata_html.py
from airavata_html import chat_html, js_code, update_html

MESSAGES = []

# Display the HTML and JavaScript
display(HTML(chat_html + js_code))


from IPython.display import display, HTML

# Define the Python function to process the input
def process_input(user_input):
    global MESSAGES, update_html
    print("User:",user_input, "\n")
    MESSAGES.append({"role": "user", "content": user_input})
    response = get_answer(llm, MESSAGES)
    MESSAGES.append({"role": "assistant", "content": response})
    print("Airavata:",response,"\n")

    user_input_escaped = user_input.replace("'", "\\'").replace('"', '\\"')
    response_escaped =  response = response.replace('\n', '<br>').replace("'", "\\'").replace('"', '\\"')


    display(HTML(update_html.format(user_input=user_input_escaped, response=response_escaped)))

# Register the Python function with the notebook
from google.colab import output
output.register_callback('process_input', process_input)
print("[Debug]\n")

# Load Airavata from 🤗 Hugging Face Transformers

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "ai4bharat/Airavata"

model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


[2024-08-06 04:43:05,043] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


  def forward(ctx, input, weight, bias=None):
  def backward(ctx, grad_output):


#IndicInstruct SFT Dataset

🤗 https://huggingface.co/datasets/ai4bharat/indic-instruct-data-v0.1

# Evaluations and Benchmarks

In [15]:
!git clone https://github.com/AI4Bharat/IndicInstruct/

Cloning into 'IndicInstruct'...
remote: Enumerating objects: 1122, done.[K
remote: Counting objects: 100% (495/495), done.[K
remote: Compressing objects: 100% (96/96), done.[K
remote: Total 1122 (delta 424), reused 411 (delta 399), pack-reused 627[K
Receiving objects: 100% (1122/1122), 30.69 MiB | 37.37 MiB/s, done.
Resolving deltas: 100% (691/691), done.


In [16]:
%cd IndicInstruct

/content/IndicInstruct


In [None]:
!pip install -r requirements.txt

In [None]:
# The following script evaluates the model given in arg1 with the entire evaluation suite and stores the results in the directory given in arg2
!bash full_eval.sh ai4bharat/Airavata results

In [None]:
# Scripts to perform individual evaluations can be found in scripts/<eval>/<name_of_the_task.sh>

# Eg. For hellaswag translated to indic languages:
!bash scripts/indic_eval/hellaswag.sh

# Eg. For ARC in English:
!bash scripts/english_eval/arc.sh

# Eg. For XLSum translate-test in English:
!bash scripts/translate_test_eval/xlsum.sh

# Feel free to modify the bash scripts as per your requirements for language, model name, few-shot examples etc.

In [None]:
# Evaluation on IndicSentiment (Hindi) on a 5-shot setting
!python3 -m eval.indicsentiment.run_eval \
    --ntrain 5 \
    --save_dir "results/indicsentiment/airavata-5shot" \
    --model_name_or_path "ai4bharat/Airavata" \
    --tokenizer_name_or_path "ai4bharat/Airavata" \
    --eval_batch_size 4

# Finetune Model

In [None]:
!git clone https://github.com/AI4Bharat/IndicInstruct/

In [None]:
!pip install -r requirements.txt

In [None]:
!bash scripts/finetune_lora_with_accelerate.sh