### Imports

In [4]:
# Native
import os
import sys
import logging
from pathlib import Path

# Third Party
import torch
import pandas as pd
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer

# Add project root to sys.path
p = Path.cwd()
for d in [p] + list(p.parents):
    if (d / "shared").exists():
        sys.path.insert(0, str(d))
        print("Inserted to sys.path:", d)
        break
else:
    print("Warning: 'shared' folder not found in parent dirs")

# Local
from shared.prompts import CALCULATE_PRIORITIZATION_PROMPT

# Load environment variables from .env file
load_dotenv()

Inserted to sys.path: /home/luccasabbatini/github/uff/2025/genai_para_es/ai-requirements-priorization


True

### Constants

In [5]:
HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
MAX_TOKENS = 1000
DATASET = "ralic"
SOFTWARE_PROJECT = DATASET.split("/")[-1]
DATASET_PATH = f"../data/{DATASET}/"
OUTPUT_PATH = f"../results/{DATASET}/"

### Setup

In [6]:
# Configure logging (safe for notebook re-runs)
root_logger = logging.getLogger()

if not root_logger.handlers:
    logging.basicConfig(
        level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
    )
else:
    root_logger.setLevel(logging.INFO)
    for h in root_logger.handlers:
        h.setLevel(logging.INFO)
    root_logger.propagate = False
    
# Login to Hugging Face Hub
login(token=HUGGINGFACE_HUB_TOKEN)

### Verify GPU Availability and Info

In [7]:
# Log GPU info
if torch.cuda.is_available():
    logging.info(
        f"Torch CUDA version: {torch.version.cuda}; GPU: {torch.cuda.get_device_name(0)}"
    )
else:
    logging.info("No GPU found, training on CPU")

2025-12-01 22:55:32,775 - INFO - Torch CUDA version: 12.4; GPU: NVIDIA GeForce RTX 4060 Ti


### Setup Model

In [8]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Check if model is using GPU or CPU
logging.info(f"Model device: {next(model.parameters()).device}")

`torch_dtype` is deprecated! Use `dtype` instead!
2025-12-01 22:55:34,452 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Loading checkpoint shards: 100%|██████████| 4/4 [00:23<00:00,  5.85s/it]
2025-12-01 22:55:58,312 - INFO - Model device: cuda:0


### Calculate Cost, Risk, and Value for Requirements

In [9]:
# Load requirements csv into pandas DataFrame
requirements_df = pd.read_csv(os.path.join(DATASET_PATH, f"{SOFTWARE_PROJECT}.csv"))
software_description_file = os.path.join(DATASET_PATH, f"{SOFTWARE_PROJECT}.md")

with open(software_description_file, "r", encoding="utf-8") as f:
		software_description = f.read()

# Calculate Cost, Risk, and Value for each requirement
calculation_results = ["ID, Requirement, Description, Cost, Risk, Value"]

for index, row in requirements_df.iterrows():
    logging.info(f"Processing requirement {index + 1} of {len(requirements_df)}")

    requirement_id = row['ID']
    requirement_text = row['Requirement']
    requirement_description = row['Description']

    prompt = CALCULATE_PRIORITIZATION_PROMPT.format(
				software_description=software_description,
		)

    # Prepare input for model
    messages = [
				{
						"role": "system",
						"content": prompt,
				},
				{
						"role": "user",
						"content": f"ID,Requirement,Description\n\"{requirement_id}\",\"{requirement_text}\",\"{requirement_description}\"",
				},
		]

    # Generate response from model
    text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    generated_ids = model.generate(**model_inputs, max_new_tokens=MAX_TOKENS)
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Store the result
    calculation_results.append(response.strip())

    logging.info(f"Completed requirement {index + 1}")

# Save results to CSV file
output_file = os.path.join(OUTPUT_PATH, f"{SOFTWARE_PROJECT} extended.csv")
with open(output_file, "w", encoding="utf-8") as f:
		f.write("\n".join(calculation_results))

2025-12-01 22:55:58,348 - INFO - Processing requirement 1 of 50
2025-12-01 22:56:12,857 - INFO - Completed requirement 1
2025-12-01 22:56:12,860 - INFO - Processing requirement 2 of 50
2025-12-01 22:56:19,585 - INFO - Completed requirement 2
2025-12-01 22:56:19,587 - INFO - Processing requirement 3 of 50
2025-12-01 22:56:25,885 - INFO - Completed requirement 3
2025-12-01 22:56:25,887 - INFO - Processing requirement 4 of 50
2025-12-01 22:56:34,415 - INFO - Completed requirement 4
2025-12-01 22:56:34,418 - INFO - Processing requirement 5 of 50
2025-12-01 22:56:40,976 - INFO - Completed requirement 5
2025-12-01 22:56:40,977 - INFO - Processing requirement 6 of 50
2025-12-01 22:56:47,360 - INFO - Completed requirement 6
2025-12-01 22:56:47,362 - INFO - Processing requirement 7 of 50
2025-12-01 22:56:55,623 - INFO - Completed requirement 7
2025-12-01 22:56:55,625 - INFO - Processing requirement 8 of 50
2025-12-01 22:57:07,555 - INFO - Completed requirement 8
2025-12-01 22:57:07,557 - INFO -

### Clean Up GPU Memory

In [10]:
if torch.cuda.is_available():
		torch.cuda.empty_cache()