In [4]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.device_count())  # Should return the number of GPUs
print(torch.cuda.get_device_name(0))  # Should show the GPU model

True
1
NVIDIA RTX 5000 Ada Generation


In [6]:
import transformers
import torch
import os
import json
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
import time
from datetime import timedelta
import pandas as pd
from dotenv import load_dotenv


# Load environment variables
load_dotenv(dotenv_path="../../.env") # path is relative to this script, adjust as needed

True

In [7]:
# setting huggingface token
login(token=os.getenv("HUGGINGFACE_TOKEN"))

os.environ["HF_HOME"] = "D:/huggingface_cache" 
os.environ["TRANSFORMERS_CACHE"] = "D:/huggingface_cache"
os.environ["HUGGINGFACE_HUB_CACHE"] = "D:/huggingface_cache"

print("HF_HOME:", os.getenv("HF_HOME"))
print("TRANSFORMERS_CACHE:", os.getenv("TRANSFORMERS_CACHE"))
print("HUGGINGFACE_HUB_CACHE:", os.getenv("HUGGINGFACE_HUB_CACHE"))

transformers.utils.hub.TRANSFORMERS_CACHE = "D:/huggingface_cache"

HF_HOME: D:/huggingface_cache
TRANSFORMERS_CACHE: D:/huggingface_cache
HUGGINGFACE_HUB_CACHE: D:/huggingface_cache


In [8]:
model_name = "meta-llama/Meta-Llama-3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="cuda")


Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
chunk_sizes = [128, 256, 512, 1024, 2048]
questions_num = 2
max_token_list = [128, 256, 512, 1024, 2048]


In [10]:
results_df = pd.DataFrame(columns=[
    "chunk_size", "questions_num", "qa_count_mismatch", "total_questions", "token_Size",
    "total_chunks", "success_count", "fail_count",
    "elapsed_time"
])

In [11]:
# Checkpointing to avoid re-running completed configurations
checkpoint_path = "checkpoint.csv"

# Load existing checkpoint if it exists
if os.path.exists(checkpoint_path):
    completed_runs = pd.read_csv(checkpoint_path)
    completed_set = set(zip(
        completed_runs["chunk_size"].astype(int),
        completed_runs["max_tokens"].astype(int)
    ))
else:
    completed_set = set()
    pd.DataFrame(columns=["chunk_size", "max_tokens"]).to_csv(checkpoint_path, index=False)


# Convert logs to Panda


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


for chunk_size in chunk_sizes:
    json_file_path = f"../Yaman/Generate_Paragraphs/Results/extracted_chunks_{chunk_size}_overlap.json"

    if not os.path.exists(json_file_path):
        print(f"Missing input file: {json_file_path}, skipping.")
        continue

    with open(json_file_path, "r", encoding="utf-8") as file:
        chunk_data = json.load(file)

    for max_tokens in max_token_list:
        if (chunk_size, max_tokens) in completed_set:
            print(f"Skipping completed run: Chunk={chunk_size}, Max Tokens={max_tokens}")
            continue

        output_file_path = f"../Yaman/Generated_Results/LLAMA3_1/generation_log_{chunk_size}_Token_{max_tokens}_Q{questions_num}.json"
        if os.path.exists(output_file_path):
            print(f"Output already exists: {output_file_path}, skipping.")
            continue

        print(f"Starting new run: Chunk={chunk_size}, Max Tokens={max_tokens}")

        qa_results = {}
        total_chunks = 0
        success_count = 0
        fail_count = 0
        total_questions = 0
        qa_count_mismatch = 0

        start_time = time.time()

        for doc_name, chunks in chunk_data.items():
            qa_results[doc_name] = []

            for chunk in chunks:
                total_chunks += 1

                prompt = f"""
Generate {questions_num} question-answer pairs based on the following text segment. 
Return the result in valid JSON format as a list of objects.

Text Segment:

{chunk}

Response Format:
[
    {{"question": "generated question", "answer": "generated Answer"}},
]

Question answers should be at least 250 words long.

Do NOT include any explanation or preamble before or after the JSON output.
Return ONLY valid JSON output.

Answer:
                """

                inputs = tokenizer(prompt, return_tensors="pt").to(device)

                try:
                    max_context = model.config.max_position_embeddings
                    input_len = inputs['input_ids'].shape[1]
                    if input_len + max_tokens > max_context:
                        print(f"Skipping chunk (too long): input_len={input_len}")
                        continue

                    with torch.no_grad():
                        output_tokens = model.generate(**inputs, max_new_tokens=max_tokens)

                    generated_tokens = output_tokens[0][len(inputs["input_ids"][0]):]
                    generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

                    try:
                        qa_pairs = json.loads(generated_text)
                        if isinstance(qa_pairs, list):
                            qa_results[doc_name].append({
                                "chunk": chunk,
                                "qa_pairs": qa_pairs
                            })
                            success_count += 1
                            total_questions += len(qa_pairs)

                            if len(qa_pairs) != questions_num:
                                qa_count_mismatch += 1
                        else:
                            fail_count += 1
                    except json.JSONDecodeError:
                        fail_count += 1

                except Exception as e:
                    print(f"Error generating for chunk: {e}")
                    fail_count += 1

        # Save QA Output
        os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
        with open(output_file_path, "w", encoding="utf-8") as out_file:
            json.dump(qa_results, out_file, indent=4, ensure_ascii=False)

        end_time = time.time()
        elapsed_time = timedelta(seconds=end_time - start_time)

        # Save Results and Checkpoint
        results_df.loc[len(results_df)] = [
            chunk_size, questions_num, qa_count_mismatch, total_questions,
            max_tokens, total_chunks, success_count, fail_count, str(elapsed_time)
        ]
        
        csv_output_path = "../Yaman/Generated_Results/LLAMA3_1/results_log.csv"
        results_df.to_csv(csv_output_path, index=False)

        pd.DataFrame([[chunk_size, max_tokens]], columns=["chunk_size", "max_tokens"]) \
            .to_csv(checkpoint_path, mode='a', header=False, index=False)

        print(f"✅ Saved: {output_file_path} | Time: {elapsed_time}")

Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_128_Token_128_Q2.json, skipping.
Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_128_Token_256_Q2.json, skipping.
Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_128_Token_512_Q2.json, skipping.
Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_128_Token_1024_Q2.json, skipping.
Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_128_Token_2048_Q2.json, skipping.
Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_256_Token_128_Q2.json, skipping.
Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_256_Token_256_Q2.json, skipping.
Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_256_Token_512_Q2.json, skipping.
Output already exists: ../Yaman/Generated_Results/LLAMA3_1/generation_log_256_Token_1024_Q2.json, skipping.
Output already exists: ../Yaman/Ge

# Power Analysis