### Imports

In [1]:
# Native
import os
import sys
import logging
from pathlib import Path

# Third Party
import torch
import pandas as pd
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer

# Add project root to sys.path
p = Path.cwd()
for d in [p] + list(p.parents):
    if (d / "shared").exists():
        sys.path.insert(0, str(d))
        print("Inserted to sys.path:", d)
        break
else:
    print("Warning: 'shared' folder not found in parent dirs")

# Local
from shared.prompts import PRIORITIZATION_PROMPT

# Load environment variables from .env file
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


Inserted to sys.path: /home/luccasabbatini/github/uff/2025/genai_para_es/ai-requirements-priorization


True

### Constants

In [2]:
HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
MAX_TOKENS = 8000
DATASET = "pure/2001 - space fractions"
SOFTWARE_PROJECT = DATASET.split("/")[-1]
DATASET_PATH = f"../data/{DATASET}/"
REQUIREMENTS_PATH = f"../results/{DATASET}/"
OUTPUT_PATH = f"../results/{DATASET}/"

### Setup

In [3]:
# Configure logging (safe for notebook re-runs)
root_logger = logging.getLogger()

if not root_logger.handlers:
    logging.basicConfig(
        level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
    )
else:
    root_logger.setLevel(logging.INFO)
    for h in root_logger.handlers:
        h.setLevel(logging.INFO)
    root_logger.propagate = False
    
# Login to Hugging Face Hub
login(token=HUGGINGFACE_HUB_TOKEN)

### Verify GPU Availability and Info

In [4]:
# Log GPU info
if torch.cuda.is_available():
    logging.info(
        f"Torch CUDA version: {torch.version.cuda}; GPU: {torch.cuda.get_device_name(0)}"
    )
else:
    logging.info("No GPU found, training on CPU")

2025-12-01 18:59:16,246 - INFO - Torch CUDA version: 12.4; GPU: NVIDIA GeForce RTX 4060 Ti


### Setup Model

In [5]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Check if model is using GPU or CPU
logging.info(f"Model device: {next(model.parameters()).device}")

`torch_dtype` is deprecated! Use `dtype` instead!
2025-12-01 18:59:18,292 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Loading checkpoint shards: 100%|██████████| 4/4 [00:25<00:00,  6.37s/it]
2025-12-01 18:59:44,182 - INFO - Model device: cuda:0


### Prioritize Requirements

In [6]:
extended_requirements_file = os.path.join(REQUIREMENTS_PATH, f"{SOFTWARE_PROJECT} extended.csv")

with open(extended_requirements_file, "r", encoding="utf-8") as f:
    extended_requirements = f.read()
    
software_description_file = os.path.join(DATASET_PATH, f"{SOFTWARE_PROJECT}.md")

with open(software_description_file, "r", encoding="utf-8") as f:
		software_description = f.read()

prompt = PRIORITIZATION_PROMPT.format(
		software_description=software_description
	)

# Prepare input for model
messages = [
		{
				"role": "system",
				"content": prompt,
		},
		{
				"role": "user",
				"content": extended_requirements,
		},
]

# Generate response from model
text = tokenizer.apply_chat_template(
		messages, tokenize=False, add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(**model_inputs, max_new_tokens=MAX_TOKENS)
generated_ids = [
		output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

# Store the result
output_file = os.path.join(OUTPUT_PATH, f"{SOFTWARE_PROJECT} roadmap.csv")

KeyboardInterrupt: 