### Imports

In [8]:
# Native
import os
import sys
import logging
from pathlib import Path

# Third Party
import torch
import pandas as pd
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer

# Add project root to sys.path
p = Path.cwd()
for d in [p] + list(p.parents):
    if (d / "shared").exists():
        sys.path.insert(0, str(d))
        print("Inserted to sys.path:", d)
        break
else:
    print("Warning: 'shared' folder not found in parent dirs")

# Local
from shared.prompts import CALCULATE_PRIORITIZATION_PROMPT

# Load environment variables from .env file
load_dotenv()

Inserted to sys.path: /home/luccasabbatini/github/uff/2025/genai_para_es/ai-requirements-priorization


True

### Constants

In [9]:
HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
MAX_TOKENS = 1000
DATASET = "ralic"
SOFTWARE_PROJECT = DATASET.split("/")[-1]
DATASET_PATH = f"../data/{DATASET}/"
OUTPUT_PATH = f"../results/{DATASET}/"

### Setup

In [10]:
# Configure logging (safe for notebook re-runs)
root_logger = logging.getLogger()

if not root_logger.handlers:
    logging.basicConfig(
        level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
    )
else:
    root_logger.setLevel(logging.INFO)
    for h in root_logger.handlers:
        h.setLevel(logging.INFO)
    root_logger.propagate = False
    
# Login to Hugging Face Hub
login(token=HUGGINGFACE_HUB_TOKEN)

### Verify GPU Availability and Info

In [11]:
# Log GPU info
if torch.cuda.is_available():
    logging.info(
        f"Torch CUDA version: {torch.version.cuda}; GPU: {torch.cuda.get_device_name(0)}"
    )
else:
    logging.info("No GPU found, training on CPU")

2025-12-01 21:24:31,471 - INFO - Torch CUDA version: 12.4; GPU: NVIDIA GeForce RTX 4060 Ti


### Setup Model

In [12]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Check if model is using GPU or CPU
logging.info(f"Model device: {next(model.parameters()).device}")

2025-12-01 21:24:36,380 - INFO - Based on the current allocation process, no modules could be assigned to the following devices due to insufficient memory:
  - 0: 2179989504 bytes required
These minimum requirements are specific to this allocation attempt and may vary. Consider increasing the available memory for these devices to at least the specified minimum, or adjusting the model config.
Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 37.61it/s]
2025-12-01 21:24:36,839 - INFO - Model device: cpu


### Calculate Cost, Risk, and Value for Requirements

In [None]:
# Load requirements csv into pandas DataFrame
requirements_df = pd.read_csv(os.path.join(DATASET_PATH, f"{SOFTWARE_PROJECT}.csv"))
software_description_file = os.path.join(DATASET_PATH, f"{SOFTWARE_PROJECT}.md")

with open(software_description_file, "r", encoding="utf-8") as f:
		software_description = f.read()

# Calculate Cost, Risk, and Value for each requirement
calculation_results = ["ID, Requirement, Description, Cost, Risk, Value"]

for index, row in requirements_df.iterrows():
    logging.info(f"Processing requirement {index + 1} of {len(requirements_df)}")

    requirement_id = row['ID']
    requirement_text = row['Requirement']
    requirement_description = row['Description']

    prompt = CALCULATE_PRIORITIZATION_PROMPT.format(
				software_description=software_description,
		)

    # Prepare input for model
    messages = [
				{
						"role": "system",
						"content": prompt,
				},
				{
						"role": "user",
						"content": f"ID,Requirement,Description\n\"{requirement_id}\",\"{requirement_text}\",\"{requirement_description}\"",
				},
		]

    # Generate response from model
    text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    generated_ids = model.generate(**model_inputs, max_new_tokens=MAX_TOKENS)
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Store the result
    calculation_results.append(response.strip())

    logging.info(f"Completed requirement {index + 1}")

# Save results to CSV file
output_file = os.path.join(OUTPUT_PATH, f"{SOFTWARE_PROJECT} extended.csv")
with open(output_file, "w", encoding="utf-8") as f:
		f.write("\n".join(calculation_results))

2025-12-01 22:00:12,082 - INFO - Processing requirement 1 of 50


### Clean Up GPU Memory

In [None]:
if torch.cuda.is_available():
		torch.cuda.empty_cache()