In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
from tqdm import tqdm
import torch
import json
import math
import time
import os

MODEL_PATH='../../Llama-3.2-3B-Instruct'
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

QUESTIONS_PATH = "../rag_questions_json"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, torch_dtype=torch.bfloat16).to(DEVICE).eval()
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 23.84it/s]


In [3]:
def create_chat(question):
    return [
        {"role": "user", "content": f"{question}"},
    ]

### Example question

In [4]:
question = "What are the main indicators that were chosen to study in order to understand and forecast the evolution of carbon emissions on a country-scale, and why were they chosen?"
prompt_with_context = tokenizer.apply_chat_template(
    create_chat(question), 
    tokenize=False, 
    add_generation_prompt=True
)
print(prompt_with_context)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 21 Apr 2025

<|eot_id|><|start_header_id|>user<|end_header_id|>

What are the main indicators that were chosen to study in order to understand and forecast the evolution of carbon emissions on a country-scale, and why were they chosen?<|eot_id|><|start_header_id|>assistant<|end_header_id|>




In [5]:
streamer = TextStreamer(tokenizer, skip_prompt=True)

input_ids = tokenizer.encode(prompt_with_context, return_tensors="pt").to(DEVICE)
outputs = model.generate(
    input_ids,
    max_new_tokens=500,
    pad_token_id=128004,
    eos_token_id=128009,
    streamer=streamer,
    do_sample=True,
)

To study and forecast the evolution of carbon emissions on a country-scale, researchers and policymakers have identified several key indicators that provide insights into the factors driving greenhouse gas emissions. Some of the main indicators used to study carbon emissions include:

1. **Energy consumption**: This indicator measures the amount of energy used by a country, including fossil fuels, renewable energy, and energy efficiency. It's a key driver of carbon emissions, as most energy is converted into carbon dioxide (CO2) through combustion.
2. **Population growth and density**: A growing population and increasing urbanization can lead to higher energy demand, transportation emissions, and other carbon-intensive activities.
3. **Economic growth**: Economic growth is often associated with increased energy consumption, industrial production, and transportation, which can lead to higher carbon emissions.
4. **Industrial production**: The production of goods and services, particular

### Evaluation (Perplexity)

In [6]:
def create_target_chat(question, answer):
    return [
        {"role": "user", "content": f"{question}"},
        {"role": "assistant", "content": f"{answer}"},
    ]

In [7]:
loss = 0
losses = list()
question_filenames = os.listdir(QUESTIONS_PATH)

for question_filename in tqdm(question_filenames):
    with open(os.path.join(QUESTIONS_PATH, question_filename), 'r', encoding='utf-8') as f:
        qa = json.load(f)

    target_chat = tokenizer.apply_chat_template(
        create_target_chat(qa["question"], qa["answer"]), 
        tokenize=False, 
        add_generation_prompt=False
    )
    tokens = tokenizer(target_chat, return_tensors="pt", truncation=False, padding=False, add_special_tokens=False).to(DEVICE)
    tokens = {k: v.to(DEVICE) for k, v in tokens.items()}

    prompt = tokenizer.apply_chat_template(
        create_chat(qa["question"]), 
        tokenize=False, 
        add_generation_prompt=True
    )
    ignore_idx = tokenizer.encode(prompt, return_tensors="pt", truncation=False, padding=False, add_special_tokens=False).shape[1]

    filtered_labels = tokens["input_ids"].clone()
    ignore_mask = torch.zeros_like(filtered_labels, dtype=torch.bool)
    ignore_mask[0, :ignore_idx] = True
    filtered_labels[ignore_mask] = -100

    with torch.no_grad():
        outputs = model(**tokens, labels=filtered_labels)
        losses.append(outputs.loss.item())
        loss += outputs.loss.item()

perplexity = math.exp(loss/len(question_filenames))
print(f"Perplexity: {perplexity:.2f}")

100%|██████████| 747/747 [00:29<00:00, 25.36it/s]

Perplexity: 12.84





### Evaluation (Average time per request)

In [8]:
begin = time.time()

for question_filename in tqdm(question_filenames):
    with open(os.path.join(QUESTIONS_PATH, question_filename), 'r', encoding='utf-8') as f:
        qa = json.load(f)

    prompt = tokenizer.apply_chat_template(
        create_chat(qa["question"]), 
        tokenize=False, 
        add_generation_prompt=True
    )
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(DEVICE)
    model.generate(
        input_ids,
        max_new_tokens=500,
        pad_token_id=128004,
        eos_token_id=128009,
        do_sample=False,
        top_p=1.0,
    )

print(f"Average time per request: {(time.time() - begin) / len(question_filenames):.2f} seconds")

100%|██████████| 747/747 [2:43:26<00:00, 13.13s/it]  

Average time per request: 13.13 seconds



