# English (CapybaraHermes-2.5-Mistral-7B-GPTQ)

In [None]:
! pip install optimum
! pip install auto-gptq
! pip install -U langsmith
! pip install langchain-community
! pip install optimum
! pip install auto-gptq
! pip install python-dotenv

In [None]:
from huggingface_hub import login
from google.colab import userdata
key = userdata.get('HuggingFace')

login(key)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name_or_path = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GPTQ"
model_english = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=False,
                                             revision="main")

tokenizer_english = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

In [None]:
config = model_english.config

num_layers = config.num_hidden_layers
num_attention_heads = config.num_attention_heads

print(f"Number of layers: {num_layers}")
print(f"Number of attention heads per layer: {num_attention_heads}")

In [None]:
prompt = "What is the capital of France"
system_message = "You are a fact database"
prompt_template=f'''<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant
'''

print("\n\n*** Generate:")

input_ids = tokenizer_english(prompt_template, return_tensors='pt').to('cuda')
output = model_english.generate(**input_ids, return_dict_in_generate=True, max_new_tokens=512 ,output_attentions=True)

In [None]:
output_text = tokenizer_english.decode(output.sequences[0], skip_special_tokens=True)
print(output_text)

# Inference

In [None]:
from google.colab import userdata
lkey = userdata.get('LangSmith')

! export LANGCHAIN_TRACING_V2=true
! export LANGCHAIN_API_KEY=lkey

In [None]:
from huggingface_hub import login
key = userdata.get('HuggingFace')
login(key)

In [None]:
from langsmith import Client
client = Client()

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [None]:
llm = pipeline("text-generation", model=model_english, tokenizer=tokenizer_english)

In [None]:
from datasets import load_dataset

dataset = load_dataset("squad")

In [None]:
from langsmith import evaluate


def evaluate_text_generation(model, dataset):
    generated_texts = []
    expected_texts = []
    i = 0
    for example in dataset["validation"]:
        prompt = example["question"]
        expected_answer = example["answers"]["text"][0]

        generated_text = model(prompt, max_length=100, num_return_sequences=1)[0]["generated_text"]
        generated_texts.append(generated_text)
        expected_texts.append(expected_answer)
        i += 1
        if i == 2:
          break
    return {"generated_texts": generated_texts, "expected_texts": expected_texts}


In [None]:
dataset['validation'][1]

In [None]:
evaluate_text_generation(llm, dataset)

# Token-Based Evaluation

In [None]:
from collections import Counter
import math

In [None]:
def ngram_counts(tokens, n):
    return Counter(tuple(tokens[i:i+n]) for i in range(len(tokens)-n+1))

In [None]:
def clipped_ngram_counts(reference, prediction, n):

    ref_ngrams = ngram_counts(reference, n)
    pred_ngrams = ngram_counts(prediction, n)

    clipped_counts = {ngram: min(pred_ngrams[ngram], ref_ngrams[ngram]) for ngram in pred_ngrams}
    return sum(clipped_counts.values()), sum(pred_ngrams.values())

In [None]:
def brevity_penalty(reference_tokens, prediction_tokens):
    ref_length = len(reference_tokens)
    pred_length = len(prediction_tokens)

    if pred_length == 0:
        return 0
    if pred_length > ref_length:
        return 1
    else:
        return math.exp(1 - ref_length / pred_length)

In [None]:
def compute_bleu(reference, prediction, max_n=4, weights=(0.25, 0.25, 0.25, 0.25)):

    precision_scores = []
    for n in range(1, max_n+1):
        clipped_count, total_count = clipped_ngram_counts(reference, prediction, n)
        if total_count == 0:
            precision_scores.append(0)
        else:
            precision_scores.append(clipped_count / total_count)

    if all(p == 0 for p in precision_scores):
        bleu_score = 0
    else:
        weighted_log_precisions = [w * math.log(p) for w, p in zip(weights, precision_scores) if p > 0]
        bleu_score = math.exp(sum(weighted_log_precisions))

    bleu_score *= brevity_penalty(reference, prediction)
    return bleu_score


In [None]:
def compute_tbleu(reference, prediction, reference_answer, prediction_answer, max_n=4, weights=(0.25, 0.25, 0.25, 0.25)):

    precision_scores = []
    for n in range(1, max_n+1):
        clipped_count, total_count = clipped_ngram_counts(reference_answer, prediction_answer, n)
        if total_count == 0:
            precision_scores.append(0)
        else:
            precision_scores.append(clipped_count / total_count)

    if all(p == 0 for p in precision_scores):
        bleu_score = 0
    else:
        weighted_log_precisions = [w * math.log(p) for w, p in zip(weights, precision_scores) if p > 0]
        bleu_score = math.exp(sum(weighted_log_precisions))

    bleu_score *= brevity_penalty(reference, prediction)
    return bleu_score


In [None]:
import math
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [None]:
def compute_bleu(reference_tokens, generated_tokens):

    smoothing_function = SmoothingFunction().method1
    return sentence_bleu([reference_tokens], generated_tokens, smoothing_function=smoothing_function)

In [None]:
def compute_token_probabilities(model, tokenizer, generated_text, device="cpu"):

    model = model.to(device)
    inputs = tokenizer(generated_text, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        logits = outputs.logits

    softmax = torch.nn.functional.softmax(logits, dim=-1)
    input_ids = inputs["input_ids"][0]

    token_probs = []
    for i, token_id in enumerate(input_ids):
        token_probs.append(softmax[0, i, token_id].item())

    return token_probs

In [None]:
def compute_perplexity(probabilities):
    epsilon = 1e-10
    probabilities = np.clip(probabilities, epsilon, 1.0)
    log_prob_sum = np.sum(np.log(probabilities))
    n = len(probabilities)
    return math.exp(-log_prob_sum / n)

In [None]:
def combined_metric(reference_tokens, generated_tokens, token_probabilities, alpha=0.5):
    if reference_tokens == generated_tokens:
        return 1.0

    bleu = compute_bleu(reference_tokens, generated_tokens)
    perplexity = compute_perplexity(token_probabilities)

    normalized_perplexity = 1 / (1 + perplexity)

    combined_score = alpha * bleu + (1 - alpha) * normalized_perplexity
    return combined_score

In [None]:
prompt = "What is the capital of France"
system_message = "You are a fact database"
prompt_template=f'''<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant
'''

print("\n\n*** Generate:")

input_ids = tokenizer_english(prompt_template, return_tensors='pt').to('cuda')
output = model_english.generate(**input_ids, return_dict_in_generate=True, max_new_tokens=512 ,output_attentions=True)
output_text = tokenizer_english.decode(output.sequences[0], skip_special_tokens=True)
print(output_text)

In [None]:
def compute_token_probabilities(model, tokenizer, generated_text, device="cpu"):
    model = model.to(device)
    # Tokenize the input text
    inputs = tokenizer(generated_text, return_tensors="pt").to(device)

    # Remove 'token_type_ids' if it exists
    if "token_type_ids" in inputs:
        del inputs["token_type_ids"]

    with torch.no_grad():
        # Generate logits using the model
        outputs = model(**inputs, labels=inputs["input_ids"])
        logits = outputs.logits

    # Apply softmax to calculate probabilities
    softmax = torch.nn.functional.softmax(logits, dim=-1)
    input_ids = inputs["input_ids"][0]

    token_probs = []
    for i, token_id in enumerate(input_ids):
        token_probs.append(softmax[0, i, token_id].item())

    return token_probs


In [None]:
reference = tokenizer_english("Shakespeare")
generated = tokenizer_english("Shakespeare wrote Romeo and Juliet")

generated_text = "Shakespeare wrote Romeo and Juliet"

In [None]:
probabilities = compute_token_probabilities(model_english, tokenizer_english, generated_text)

score = combined_metric(reference, generated, probabilities, alpha=0.7)
print("Combined Metric Score:", score)


In [None]:
reference_answer = "Denver Broncos"
prediction_answer = "The Denver Broncos represented the AFC at Super Bowl 50"
reference_tokens = tokenizer.tokenize(reference_answer)
prediction_tokens = tokenizer.tokenize(prediction_answer)



In [None]:
prediction_tokens

In [None]:
tbleu = compute_bleu(reference_tokens, prediction_tokens, reference_answer, prediction_answer)
print(f"Token-Based Score: {tbleu:.4f}")

In [None]:
bleu = compute_tbleu(reference, generated, reference, generated)
print(f"bleu Score: {bleu:.4f}")

# Hindi (flan-t5-base)

In [None]:
! pip install transformers torch
! pip install transformers huggingface_hub
! pip install -U langsmith
! pip install langchain-community

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [None]:
tokenizer_hindi = AutoTokenizer.from_pretrained("rahular/varta-t5")
model_hindi = AutoModelForSeq2SeqLM.from_pretrained("rahular/varta-t5")

In [None]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
model_hindi.to(device)

In [None]:
input_text = "Answer in hindi. फ्रांस की राजधानी क्या है?"
input_ids = tokenizer_hindi(input_text, return_tensors="pt").input_ids.to("cuda")

outputs = model_hindi.generate(input_ids)
print(tokenizer_hindi.decode(outputs[0]))

# Inference

In [None]:
from langsmith import Client
client = Client()

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from datasets import load_dataset
llm = pipeline("text-generation", model=model, tokenizer=tokenizer)
dataset = load_dataset("xquad", "xquad.hi")

In [None]:
from langsmith import evaluate

def evaluate_text_generation(model, dataset):
    generated_texts = []
    expected_texts = []

    for example in dataset["validation"]:
        prompt = "answer in Hindi " + example["question"]
        expected_answer = example["answers"]["text"][0]

        generated_text = model(prompt, max_length=100, num_return_sequences=1)[0]["generated_text"]

        generated_texts.append(generated_text)
        expected_texts.append(expected_answer)

        break

    return {"generated_texts": generated_texts, "expected_texts": expected_texts}

In [None]:
dataset['validation'][0]

In [None]:
evaluate_text_generation(llm, dataset)

# Token-Based Evaluation

In [None]:
reference_answer = "308"
prediction_answer = "1 से 5 तक वर्ड्स ऑफ वर्ड्स"
reference_tokens = tokenizer.tokenize(reference_answer)
prediction_tokens = tokenizer.tokenize(prediction_answer)

In [None]:
tbleu = compute_tbleu(reference_tokens, prediction_tokens,  reference_answer, prediction_answer)
print(f"Token-Based Score: {tbleu:.4f}")

In [None]:
bleu = compute_bleu(reference_answer, prediction_answer)
print(f"bleu Score: {bleu:.4f}")

In [None]:
reference_tokens

In [None]:
prediction_tokens

In [None]:
reference = tokenizer_hindi("पेरिस")
generated = tokenizer_hindi("फ्रांस की राजधानी पेरिस में एक बार फिर से कोरोना वायरस के नए वेरिएंट के मामले सामने आए हैं")

generated_text = "फ्रांस की राजधानी पेरिस में एक बार फिर से कोरोना वायरस के नए वेरिएंट के मामले सामने आए हैं"

probabilities = compute_token_probabilities(model_hindi, tokenizer_hindi, generated_text)

score = combined_metric(reference, generated, probabilities, alpha=0.5)
print("Combined Metric Score:", score)


# German

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM

In [None]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

In [None]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

In [None]:
device

In [None]:
input_text = "Answer in German. Was ist die Hauptstadt von Frankreich?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

# Inference

In [None]:
from langsmith import Client
client = Client()

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from datasets import load_dataset
llm = pipeline("text-generation", model=model, tokenizer=tokenizer)
dataset = load_dataset("xquad", "xquad.de")

In [None]:
from langsmith import evaluate

def evaluate_text_generation(model, dataset):
    generated_texts = []
    expected_texts = []

    for example in dataset["validation"]:
        prompt = example["question"]
        expected_answer = example["answers"]["text"][0]

        generated_text = model(prompt, max_length=100, num_return_sequences=1)[0]["generated_text"]

        generated_texts.append(generated_text)
        expected_texts.append(expected_answer)

        break

    return {"generated_texts": generated_texts, "expected_texts": expected_texts}

In [None]:
dataset['validation'][0]

In [None]:
evaluate_text_generation(llm, dataset)

# Evaluation

In [None]:
reference_answer = "Die Verteidigung der Panthers gab nur 308 Punkte ab und belegte"
prediction_answer = "Die Verteidigung der Panthers gab"
reference_tokens = tokenizer.tokenize(reference_answer)
prediction_tokens = tokenizer.tokenize(prediction_answer)

In [None]:
tbleu = compute_bleu(reference_tokens, prediction_tokens)
print(f"Token-Based Score: {tbleu:.4f}")

In [None]:
bleu = compute_bleu(reference_answer, prediction_answer)
print(f"bleu Score: {bleu:.4f}")