In [1]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.device_count())  # Should return the number of GPUs
print(torch.cuda.get_device_name(0))  # Should show the GPU model

True
1
NVIDIA RTX 5000 Ada Generation


In [20]:
import transformers
import torch
import os
import json
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
import time
from datetime import timedelta

In [None]:

login("#")
# Don't forget to remove the Key when uploading to GitHub

os.environ["HF_HOME"] = "D:/huggingface_cache" 
os.environ["TRANSFORMERS_CACHE"] = "D:/huggingface_cache"
os.environ["HUGGINGFACE_HUB_CACHE"] = "D:/huggingface_cache"

print("HF_HOME:", os.getenv("HF_HOME"))
print("TRANSFORMERS_CACHE:", os.getenv("TRANSFORMERS_CACHE"))
print("HUGGINGFACE_HUB_CACHE:", os.getenv("HUGGINGFACE_HUB_CACHE"))

transformers.utils.hub.TRANSFORMERS_CACHE = "D:/huggingface_cache"

HF_HOME: D:/huggingface_cache
TRANSFORMERS_CACHE: D:/huggingface_cache
HUGGINGFACE_HUB_CACHE: D:/huggingface_cache


In [4]:
model_name = "meta-llama/Meta-Llama-3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="cuda")


Loading checkpoint shards: 100%|██████████| 4/4 [09:43<00:00, 145.76s/it]


In [25]:
json_file_path = "../Generate_Paragraphs/Results/extracted_chunks_1024_overlap.json"  
with open(json_file_path, "r", encoding="utf-8") as file:
    chunk_data = json.load(file)
    
qa_results = {}

In [26]:
log_file_path = "generation_log.txt"

# Start timing
start_time = time.time()

# Tracking counters
total_chunks = 0
success_count = 0
fail_count = 0
token_Size = "Default"
questions_num = 1

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(
    filename=log_file_path,
    filemode='w',  
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)


for doc_name, chunks in chunk_data.items():
    qa_results[doc_name] = []

    for chunk in chunks[:5]:
        total_chunks += 1

        prompt = f"""
        Generate {questions_num} question-answer pairs based on the following text segment. 
        Return the result in valid JSON format as a list of objects.

        Text Segment:
        
        {chunk}

        Response Format:
        [
            {{"question": "What is ...?", "answer": "The answer is ..."}},
            {{"question": "How does ... work?", "answer": "It works by ..."}}
        ]

        Question answers should be at least 250 words long.

        Do NOT include any explanation or preamble before or after the JSON output.
        Return ONLY valid JSON output.

        Answer:
        """

        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        with torch.no_grad():
            output_tokens = model.generate(**inputs)

        generated_tokens = output_tokens[0][len(inputs["input_ids"][0]):]
        generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

        try:
            qa_pairs = json.loads(generated_text)
            if isinstance(qa_pairs, list):
                qa_results[doc_name].extend(qa_pairs)
                success_count += 1
            else:
                logging.warning(f"Invalid JSON object (not a list) in document '{doc_name}'")
                fail_count += 1
        except json.JSONDecodeError:
            logging.error(f"JSONDecodeError for document '{doc_name}'")
            fail_count += 1

# Save the QA results
output_file_path = "generated_qa_pairs.json"
with open(output_file_path, "w", encoding="utf-8") as out_file:
    json.dump(qa_results, out_file, indent=4, ensure_ascii=False)

# Logging summary
end_time = time.time()
elapsed_time = timedelta(seconds=end_time - start_time)

logging.info(f"Total chunks processed: {total_chunks}")
logging.info(f"Successful QA generations: {success_count}")
logging.info(f"Failed QA generations: {fail_count}")
logging.info(f"Total execution time: {elapsed_time}")
logging.info(f"Number of Questions: {questions_num}")
logging.info(f"Token Size: {token_Size}")

print(f"QA pairs saved to {output_file_path}")
print(f"Log file saved to {log_file_path}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

QA pairs saved to generated_qa_pairs.json
Log file saved to generation_log.txt


In [6]:
print(tokenizer.vocab_size)

128000


{}