In [1]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.device_count())  # Should return the number of GPUs
print(torch.cuda.get_device_name(0))  # Should show the GPU model

True
1
NVIDIA RTX 5000 Ada Generation


In [2]:
import transformers
import torch
import os
import json
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
import time
from datetime import timedelta
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [None]:

login("#")
# Don't forget to remove the Key when uploading to GitHub

os.environ["HF_HOME"] = "D:/huggingface_cache" 
os.environ["TRANSFORMERS_CACHE"] = "D:/huggingface_cache"
os.environ["HUGGINGFACE_HUB_CACHE"] = "D:/huggingface_cache"

print("HF_HOME:", os.getenv("HF_HOME"))
print("TRANSFORMERS_CACHE:", os.getenv("TRANSFORMERS_CACHE"))
print("HUGGINGFACE_HUB_CACHE:", os.getenv("HUGGINGFACE_HUB_CACHE"))

transformers.utils.hub.TRANSFORMERS_CACHE = "D:/huggingface_cache"

HF_HOME: D:/huggingface_cache
TRANSFORMERS_CACHE: D:/huggingface_cache
HUGGINGFACE_HUB_CACHE: D:/huggingface_cache


In [4]:
model_name = "meta-llama/Meta-Llama-3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="cuda")


Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.35s/it]


In [5]:
chunk_sizes = [128, 256, 512, 1024]
questions_num = 2
max_token_list = [128, 256, 512, 1024, 2048]


In [6]:
results_df = pd.DataFrame(columns=[
    "chunk_size", "questions_num", "token_Size",
    "total_chunks", "success_count", "fail_count",
    "elapsed_time"
])

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for chunk_size in chunk_sizes:
    json_file_path = f"../Generate_Paragraphs/Results/extracted_chunks_{chunk_size}_overlap.json"
    output_file_path = f"../Generated_Results/generation_log_{chunk_size}_Token_{token_Size}.json"
    log_file_path = f"../logs/generated_qa_pairs_{chunk_size}_Token_{token_Size}.txt"

    with open(json_file_path, "r", encoding="utf-8") as file:
        chunk_data = json.load(file)

    qa_results = {}
    total_chunks = 0
    success_count = 0
    fail_count = 0

    
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)

    logging.basicConfig(
        filename=log_file_path,
        filemode='w',
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s'
    )

    logging.info(f"Starting QA generation for chunk size: {chunk_size}")

    start_time = time.time()

    for doc_name, chunks in chunk_data.items():
        qa_results[doc_name] = []

        for chunk in chunks[:5]:  
            total_chunks += 1

            prompt = f"""
            Generate {questions_num} question-answer pairs based on the following text segment. 
            Return the result in valid JSON format as a list of objects.

            Text Segment:
            
            {chunk}

            Response Format:
            [
                {{"question": "What is ...?", "answer": "The answer is ..."}},
                {{"question": "How does ... work?", "answer": "It works by ..."}}
            ]

            Question answers should be at least 250 words long.

            Do NOT include any explanation or preamble before or after the JSON output.
            Return ONLY valid JSON output.

            Answer:
            """

            inputs = tokenizer(prompt, return_tensors="pt").to(device)

            with torch.no_grad():
                output_tokens = model.generate(**inputs, )

            generated_tokens = output_tokens[0][len(inputs["input_ids"][0]):]
            generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

            try:
                qa_pairs = json.loads(generated_text)
                if isinstance(qa_pairs, list):
                    qa_results[doc_name].append({
                        "chunk": chunk,
                        "qa_pairs": qa_pairs
                    })
                    success_count += 1
                else:
                    logging.warning(f"Invalid JSON object (not a list) in document '{doc_name}'")
                    fail_count += 1
            except json.JSONDecodeError:
                logging.error(f"JSONDecodeError for document '{doc_name}'")
                fail_count += 1

    
    with open(output_file_path, "w", encoding="utf-8") as out_file:
        json.dump(qa_results, out_file, indent=4, ensure_ascii=False)

    
    end_time = time.time()
    elapsed_time = timedelta(seconds=end_time - start_time)

    logging.info(f"Total chunks processed: {total_chunks}")
    logging.info(f"Successful QA generations: {success_count}")
    logging.info(f"Failed QA generations: {fail_count}")
    logging.info(f"Total execution time: {elapsed_time}")
    logging.info(f"Number of Questions: {questions_num}")
    logging.info(f"Token Size: {token_Size}")

    print(f"QA pairs saved to {output_file_path}")
    print(f"Log file saved to {log_file_path}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

QA pairs saved to ../Generated_Results/generation_log_128_Token_Default.json
Log file saved to ../logs/generated_qa_pairs_128_Token_Default.txt


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

QA pairs saved to ../Generated_Results/generation_log_256_Token_Default.json
Log file saved to ../logs/generated_qa_pairs_256_Token_Default.txt


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

QA pairs saved to ../Generated_Results/generation_log_512_Token_Default.json
Log file saved to ../logs/generated_qa_pairs_512_Token_Default.txt


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

QA pairs saved to ../Generated_Results/generation_log_1024_Token_Default.json
Log file saved to ../logs/generated_qa_pairs_1024_Token_Default.txt


# Convert logs to Panda


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


for chunk_size in chunk_sizes:
    json_file_path = f"../Generate_Paragraphs/Results/extracted_chunks_{chunk_size}_overlap.json"
    with open(json_file_path, "r", encoding="utf-8") as file:
        chunk_data = json.load(file)


    for max_tokens in max_token_list:

        output_file_path = f"../Generated_Results/generation_log_{chunk_size}_Token_{max_tokens}_Q{questions_num}.json"

        qa_results = {}
        total_chunks = 0
        success_count = 0
        fail_count = 0

        print(f"Starting QA generation - Chunk: {chunk_size}, Tokens: {max_tokens}, Questions: {questions_num}")
        start_time = time.time()

        for doc_name, chunks in chunk_data.items():
            qa_results[doc_name] = []

            for chunk in chunks[:5]:  # Limit to first 5 chunks
                total_chunks += 1

                prompt = f"""
                Generate {questions_num} question-answer pairs based on the following text segment. 
                Return the result in valid JSON format as a list of objects.

                Text Segment:
                
                {chunk}

                Response Format:
                [
                    {{"question": "generated question", "answer": "generated Answer"}},
                ]

                Question answers should be at least 250 words long.

                Do NOT include any explanation or preamble before or after the JSON output.
                Return ONLY valid JSON output.

                Answer:
                """

                inputs = tokenizer(prompt, return_tensors="pt").to(device)

                with torch.no_grad():
                    output_tokens = model.generate(**inputs, max_new_tokens=max_tokens,)

                generated_tokens = output_tokens[0][len(inputs["input_ids"][0]):]
                generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

                try:
                    qa_pairs = json.loads(generated_text)
                    if isinstance(qa_pairs, list):
                        qa_results[doc_name].append({
                            "chunk": chunk,
                            "qa_pairs": qa_pairs
                        })
                        success_count += 1
                    else:
                        fail_count += 1
                except json.JSONDecodeError:
                    fail_count += 1

        
        with open(output_file_path, "w", encoding="utf-8") as out_file:
            json.dump(qa_results, out_file, indent=4, ensure_ascii=False)

        end_time = time.time()
        elapsed_time = timedelta(seconds=end_time - start_time)

        results_df.loc[len(results_df)] = [
            chunk_size, questions_num, max_tokens,
            total_chunks, success_count, fail_count,
            str(elapsed_time)
        ]

        print(f"Saved to {output_file_path} | Time: {elapsed_time}")

# Save summary CSV
csv_output_path = "../Generated_Results/qa_generation_results_summary.csv"
results_df.to_csv(csv_output_path, index=False)
print(f"\nSummary saved to {csv_output_path}")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Starting QA generation - Chunk: 128, Tokens: 128, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_128_Token_128_Q2.json | Time: 0:01:30.083195
Starting QA generation - Chunk: 128, Tokens: 256, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_128_Token_256_Q2.json | Time: 0:02:12.342854
Starting QA generation - Chunk: 128, Tokens: 512, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_128_Token_512_Q2.json | Time: 0:03:10.701812
Starting QA generation - Chunk: 128, Tokens: 1024, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_128_Token_1024_Q2.json | Time: 0:03:28.090425
Starting QA generation - Chunk: 128, Tokens: 2048, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_128_Token_2048_Q2.json | Time: 0:08:12.724445
Starting QA generation - Chunk: 256, Tokens: 128, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_256_Token_128_Q2.json | Time: 0:01:26.796037
Starting QA generation - Chunk: 256, Tokens: 256, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_256_Token_256_Q2.json | Time: 0:02:10.381159
Starting QA generation - Chunk: 256, Tokens: 512, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_256_Token_512_Q2.json | Time: 0:03:11.175790
Starting QA generation - Chunk: 256, Tokens: 1024, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_256_Token_1024_Q2.json | Time: 0:03:09.792984
Starting QA generation - Chunk: 256, Tokens: 2048, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

Saved to ../Generated_Results/generation_log_256_Token_2048_Q2.json | Time: 0:05:28.842361


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Starting QA generation - Chunk: 512, Tokens: 128, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_512_Token_128_Q2.json | Time: 0:00:37.541970
Starting QA generation - Chunk: 512, Tokens: 256, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_512_Token_256_Q2.json | Time: 0:00:57.301689
Starting QA generation - Chunk: 512, Tokens: 512, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_512_Token_512_Q2.json | Time: 0:01:02.619074
Starting QA generation - Chunk: 512, Tokens: 1024, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_512_Token_1024_Q2.json | Time: 0:01:06.136499
Starting QA generation - Chunk: 512, Tokens: 2048, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_512_Token_2048_Q2.json | Time: 0:00:52.762223
Starting QA generation - Chunk: 1024, Tokens: 128, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_1024_Token_128_Q2.json | Time: 0:00:25.574797
Starting QA generation - Chunk: 1024, Tokens: 256, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_1024_Token_256_Q2.json | Time: 0:00:31.402152
Starting QA generation - Chunk: 1024, Tokens: 512, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_1024_Token_512_Q2.json | Time: 0:00:39.805774
Starting QA generation - Chunk: 1024, Tokens: 1024, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_1024_Token_1024_Q2.json | Time: 0:01:17.177957
Starting QA generation - Chunk: 1024, Tokens: 2048, Questions: 2


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Saved to ../Generated_Results/generation_log_1024_Token_2048_Q2.json | Time: 0:01:30.669500

Summary saved to ../Generated_Results/qa_generation_results_summary.csv


# Power Analysis