In [1]:
import sys

sys.path.append('/scratch/gpfs/JHA/mb5157/repos/graphmert/transformers/src')

In [4]:
import os
import sys
import json
import ast
import re
import nest_asyncio, asyncio
import logging
from pathlib import Path
import pyarrow as pa
from enum import Enum, auto

from datasets import Dataset, Features, Sequence, Value, load_from_disk, concatenate_datasets
from transformers import AutoTokenizer
from google import genai
from google.genai import types

  from .autonotebook import tqdm as notebook_tqdm


In [26]:
from importlib import reload # reload after updating prompts

import prompts_scores
reload(prompts_scores)
from prompts_scores import system_prompt_fact_score_seq_only, system_prompt_fact_score_general, system_prompt_validity_score

class Prompt(Enum):
    FactScore_seq_only = auto()
    FactScore_llm_knowledge_and_seq = auto()
    ValidityScore = auto()

PROMPTS = {
    Prompt.FactScore_seq_only: system_prompt_fact_score_seq_only,
    Prompt.FactScore_llm_knowledge_and_seq: system_prompt_fact_score_general,
    Prompt.ValidityScore: system_prompt_validity_score
}

# set here which score to run
PROMPT_MODE = Prompt.ValidityScore
SYSTEM_CONTEXT = PROMPTS[PROMPT_MODE]

In [27]:
GEMINI_API_KEY  = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise RuntimeError("GEMINI_API_KEY env var not set – please `export GEMINI_API_KEY=…` before launching.")
    
MODEL = "gemini-2.0-flash"
model_name = "gemini-2.0-flash"

# MODEL = "gemini-2.5-flash"
# model_name = "gemini-2.5-flash"

tokenizer = AutoTokenizer.from_pretrained(
    "/scratch/gpfs/JHA/mb5157/tokenizers/biomedbert_fast_tokenizer"
)

SAVE_CHUNK      = 1000  # save result each save every BATCH_SIZE examples
PRINTOUT        = True  # print progress
TAKE_SUBSET     = False # run only on subset
SUBSET_SIZE      = 100 # size for test run
BATCH_SIZE      = 128   # how many concurrent Gemini calls|

# TIME_SLEEP varies across the models
TIME_SLEEP = 0.05
# TIME_SLEEP = 1

PRINT_RAW_RESPONSE = False # set True for test

In [28]:
logging.basicConfig(level=logging.INFO, force=True)

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
fmt = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
handler.setFormatter(fmt)
logger.addHandler(handler)

# Quiet down other noisy loggers
logging.getLogger("google_genai.models").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)

In [29]:
def extract_answer_fact_score(response: str) -> str:
    """
    Extracts the rightmost JSON list from an LLM response string
    """
    start, end = response.rfind('['), response.rfind(']')
    if start == -1 or end == -1 or start > end:
        return ""
    candidate = response[start+1:end].strip().lower()
    if candidate in ("yes", "no") :
        return candidate
    else:
        return ""

def extract_answer_validty_score(response: str) -> str:
    """
    Extracts the rightmost JSON list from an LLM response string
    """
    start, end = response.rfind('['), response.rfind(']')
    if start == -1 or end == -1 or start > end:
        return ""
    candidate = response[start+1:end].strip().lower()
    if candidate in ("yes", "no", "maybe") :
        return candidate
    else:
        return ""

if PROMPT_MODE == Prompt.ValidityScore:
    extract_answer = extract_answer_validty_score
if PROMPT_MODE == Prompt.FactScore_llm_knowledge_and_seq or PROMPT_MODE == Prompt.FactScore_seq_only:
    extract_answer = extract_answer_fact_score

In [30]:
async def gemini_call(client, user_query: str, count_only, max_retries: int=3) -> str:
    """
    Wrap Gemini call in a retrying try/except so it never blows up.
    count_only: mock call for counting input tokens.
    """
    global input_tokens, output_tokens
    # count_tokens is mock request, generate_content is actual paid request
    if count_only:
        call_fn = client.aio.models.count_tokens
        config = None
        contents = [
            {"role": "user", "parts": [{"text": SYSTEM_CONTEXT}]},
            {"role": "user", "parts": [{"text": user_query}]},
        ]
    else:
        call_fn = client.aio.models.generate_content
        config = types.GenerateContentConfig(system_instruction=SYSTEM_CONTEXT)
        contents = [{
                "role": "user",
                "parts": [{"text": user_query}]
            }
        ]
        
    for attempt in range(1, max_retries + 1):
        try:            
            resp = await call_fn(model=MODEL, contents=contents, config=config)
            if count_only:
                # count_tokens returns an int of prompt tokens
                input_tokens += resp.total_tokens
                return ""

            # real response path
            metadata = resp.usage_metadata
            pt = metadata.prompt_token_count or 0
            ct = metadata.candidates_token_count or 0
            input_tokens += pt
            output_tokens += ct
            return getattr(resp, "text", "") or ""

        except Exception as e:
            # catch HTTPError, ValidationError, ServerError, etc.
            logger.warning(f"Gemini call attempt {attempt}/{max_retries} failed: {e!r}")
            if attempt < max_retries:
                await asyncio.sleep(0.5 * attempt)    # simple backoff
            else:
                logger.error(f"All {max_retries} Gemini attempts failed for query: {user_query[:50]!r}")
                return ""  # give up


async def process_batch(client, slice_of_examples, count_only, print_raw_response=PRINT_RAW_RESPONSE):
    """
    Fire off up to BATCH_SIZE concurrent requests.
    """
    tasks = []
    for example in slice_of_examples:
        head = example["head"]
        relation = example["relation"]
        tail = example["tail"]
        if PROMPT_MODE != Prompt.ValidityScore:
            sequence = example["text"]
            uq = f"Input:\n{sequence}\n\nhead: {head}\nrelation: {relation}\ntail: {tail}\n\nOutput:"
        else:
            uq = f"{head}\nrelation: {relation}\ntail: {tail}\n\nOutput:"
    

        tasks.append(gemini_call(client, uq, count_only))
        await asyncio.sleep(TIME_SLEEP)
        
    raw_resps = await asyncio.gather(*tasks)

    if print_raw_response:
        print("RAW RESPONSE")
        for raw_resp in raw_resps:
            print(raw_resp)

    
    cleaned = [extract_answer(r) for r in raw_resps]
    return cleaned
    

async def run_all(client, dataset: Dataset, chunk_start: int, chunk_end: int, count_only=True):
    """
    Split into batches
    """
    n = len(dataset)
    all_resp = [None] * n

    for start in range(0, n, BATCH_SIZE):
        end = min(start + BATCH_SIZE, n)
        slice_ds = dataset.select(range(start, end))
        cleaned  = await process_batch(client, slice_ds, count_only)
        all_resp[start:end] = cleaned
        if PRINTOUT:
            logger.info(f"Gemini: processed {end}/{n}")
            
    return all_resp

In [31]:
def get_chunk_suffix(chunk_start, chunk_end):    
    if PROMPT_MODE == Prompt.ValidityScore:
        prefix = 'validated'
    elif PROMPT_MODE == Prompt.FactScore_seq_only:
        prefix = 'accepted_seq_only'
    elif PROMPT_MODE == Prompt.FactScore_llm_knowledge_and_seq:
        prefix = 'accepted'
        
    if TAKE_SUBSET:
        save_name = f"{prefix}_{model_name}_subset_{SUBSET_SIZE}_{chunk_start}-{chunk_end}"
    else:
        save_name = f"{prefix}_{model_name}_{chunk_start}-{chunk_end}"
    return save_name


def load_dataset_with_triples(predictions_path):
    if predictions_path.endswith('.csv'):
        ds_full = Dataset.from_csv(predictions_path)
    else:
        ds_full = Dataset.load_from_disk(predictions_path)
    if 'relation_type' in ds_full.column_names:
        ds_full = ds_full.rename_column('relation_type', 'relation')
    if 'sequence' in ds_full.column_names:
        ds_full = ds_full.rename_column('sequence', 'text')

    return ds_full

In [32]:
async def main(predictions_path: str, path_to_save, count_only=True, start_idx: int = 0, end_idx=None):
    ds_full = load_dataset_with_triples(predictions_path)
    if TAKE_SUBSET:
        overall_end = min(start_idx + SUBSET_SIZE, len(ds_full))
    else:
        overall_end = len(ds_full) if end_idx is None else min(len(ds_full), end_idx)

    client = genai.Client(api_key=GEMINI_API_KEY)
          
    for chunk_start in range(start_idx, overall_end, SAVE_CHUNK):
        chunk_end = min(chunk_start + SAVE_CHUNK, overall_end)
        ds_chunk = ds_full.select(range(chunk_start, chunk_end))
        entities = await run_all(client, ds_chunk, chunk_start, chunk_end, count_only=count_only)
        
        assert len(entities) == ds_chunk.num_rows, f"{len(entities)=} vs {ds_chunk.num_rows=}"
        if not count_only:
            # coerce any stray string into a single‐element list
            fixed = [[str(x) for x in t] if isinstance(t, list) else [str(t)] for t in entities]
            entities_arr = pa.array(fixed, type=pa.list_(pa.string()))
            
            ds_chunk = ds_chunk.add_column("response", fixed)
            save_name = get_chunk_suffix(chunk_start, chunk_end)
            out_dir = os.path.join(str(path_to_save), save_name)
            ds_chunk.save_to_disk(out_dir)
            logger.info(f"Saved chunk {chunk_start}-{chunk_end} to {out_dir}")


In [33]:
input_tokens = 0
output_tokens = 0

count_only = False # True means no real requests

# csv or dataset
predictions_path = '/scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/triples_with_chunks_sampled.csv'

path_to_save = os.path.dirname(predictions_path)
start_idx = 0

logger.info(f'request with {model_name}')
await main(predictions_path, path_to_save, count_only=count_only, start_idx=start_idx, end_idx=None)

print('===finished===')

2025-12-15 11:22:28,627 INFO request with gemini-2.0-flash
2025-12-15 11:22:28,627 INFO request with gemini-2.0-flash


INFO:__main__:request with gemini-2.0-flash


2025-12-15 11:22:36,916 INFO Gemini: processed 128/1000
2025-12-15 11:22:36,916 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:22:45,032 INFO Gemini: processed 256/1000
2025-12-15 11:22:45,032 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:22:53,252 INFO Gemini: processed 384/1000
2025-12-15 11:22:53,252 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:23:01,466 INFO Gemini: processed 512/1000
2025-12-15 11:23:01,466 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:23:09,825 INFO Gemini: processed 640/1000
2025-12-15 11:23:09,825 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:23:17,992 INFO Gemini: processed 768/1000
2025-12-15 11:23:17,992 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:23:26,170 INFO Gemini: processed 896/1000
2025-12-15 11:23:26,170 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:23:32,945 INFO Gemini: processed 1000/1000
2025-12-15 11:23:32,945 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 53982.14 examples/s]

2025-12-15 11:23:32,989 INFO Saved chunk 0-1000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_0-1000
2025-12-15 11:23:32,989 INFO Saved chunk 0-1000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_0-1000



INFO:__main__:Saved chunk 0-1000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_0-1000


2025-12-15 11:23:41,254 INFO Gemini: processed 128/1000
2025-12-15 11:23:41,254 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:23:49,495 INFO Gemini: processed 256/1000
2025-12-15 11:23:49,495 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:23:57,934 INFO Gemini: processed 384/1000
2025-12-15 11:23:57,934 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:24:05,993 INFO Gemini: processed 512/1000
2025-12-15 11:24:05,993 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:24:14,201 INFO Gemini: processed 640/1000
2025-12-15 11:24:14,201 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:24:22,325 INFO Gemini: processed 768/1000
2025-12-15 11:24:22,325 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:24:30,444 INFO Gemini: processed 896/1000
2025-12-15 11:24:30,444 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:24:37,112 INFO Gemini: processed 1000/1000
2025-12-15 11:24:37,112 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 76525.83 examples/s]

2025-12-15 11:24:37,140 INFO Saved chunk 1000-2000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_1000-2000
2025-12-15 11:24:37,140 INFO Saved chunk 1000-2000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_1000-2000



INFO:__main__:Saved chunk 1000-2000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_1000-2000


2025-12-15 11:24:45,204 INFO Gemini: processed 128/1000
2025-12-15 11:24:45,204 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:24:53,528 INFO Gemini: processed 256/1000
2025-12-15 11:24:53,528 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:25:01,589 INFO Gemini: processed 384/1000
2025-12-15 11:25:01,589 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:25:09,549 INFO Gemini: processed 512/1000
2025-12-15 11:25:09,549 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:25:17,564 INFO Gemini: processed 640/1000
2025-12-15 11:25:17,564 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:25:25,644 INFO Gemini: processed 768/1000
2025-12-15 11:25:25,644 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:25:33,952 INFO Gemini: processed 896/1000
2025-12-15 11:25:33,952 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:25:40,586 INFO Gemini: processed 1000/1000
2025-12-15 11:25:40,586 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 118026.40 examples/s]

2025-12-15 11:25:40,609 INFO Saved chunk 2000-3000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_2000-3000
2025-12-15 11:25:40,609 INFO Saved chunk 2000-3000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_2000-3000



INFO:__main__:Saved chunk 2000-3000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_2000-3000


2025-12-15 11:25:48,627 INFO Gemini: processed 128/1000
2025-12-15 11:25:48,627 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:25:56,653 INFO Gemini: processed 256/1000
2025-12-15 11:25:56,653 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:26:09,762 INFO Gemini: processed 384/1000
2025-12-15 11:26:09,762 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:26:17,851 INFO Gemini: processed 512/1000
2025-12-15 11:26:17,851 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:26:26,101 INFO Gemini: processed 640/1000
2025-12-15 11:26:26,101 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:26:34,070 INFO Gemini: processed 768/1000
2025-12-15 11:26:34,070 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:26:42,164 INFO Gemini: processed 896/1000
2025-12-15 11:26:42,164 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:26:48,565 INFO Gemini: processed 1000/1000
2025-12-15 11:26:48,565 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 78487.70 examples/s]

2025-12-15 11:26:48,596 INFO Saved chunk 3000-4000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_3000-4000
2025-12-15 11:26:48,596 INFO Saved chunk 3000-4000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_3000-4000



INFO:__main__:Saved chunk 3000-4000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_3000-4000


2025-12-15 11:26:57,159 INFO Gemini: processed 128/1000
2025-12-15 11:26:57,159 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:27:05,121 INFO Gemini: processed 256/1000
2025-12-15 11:27:05,121 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:27:13,451 INFO Gemini: processed 384/1000
2025-12-15 11:27:13,451 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:27:21,530 INFO Gemini: processed 512/1000
2025-12-15 11:27:21,530 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:27:29,482 INFO Gemini: processed 640/1000
2025-12-15 11:27:29,482 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:27:37,523 INFO Gemini: processed 768/1000
2025-12-15 11:27:37,523 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:27:45,746 INFO Gemini: processed 896/1000
2025-12-15 11:27:45,746 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:27:52,210 INFO Gemini: processed 1000/1000
2025-12-15 11:27:52,210 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 75240.90 examples/s]

2025-12-15 11:27:52,249 INFO Saved chunk 4000-5000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_4000-5000
2025-12-15 11:27:52,249 INFO Saved chunk 4000-5000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_4000-5000



INFO:__main__:Saved chunk 4000-5000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_4000-5000


2025-12-15 11:28:00,418 INFO Gemini: processed 128/1000
2025-12-15 11:28:00,418 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:28:08,468 INFO Gemini: processed 256/1000
2025-12-15 11:28:08,468 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:28:16,443 INFO Gemini: processed 384/1000
2025-12-15 11:28:16,443 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:28:24,821 INFO Gemini: processed 512/1000
2025-12-15 11:28:24,821 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:28:32,944 INFO Gemini: processed 640/1000
2025-12-15 11:28:32,944 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:28:41,044 INFO Gemini: processed 768/1000
2025-12-15 11:28:41,044 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:28:49,185 INFO Gemini: processed 896/1000
2025-12-15 11:28:49,185 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:28:55,685 INFO Gemini: processed 1000/1000
2025-12-15 11:28:55,685 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 97940.55 examples/s] 

2025-12-15 11:28:55,713 INFO Saved chunk 5000-6000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_5000-6000
2025-12-15 11:28:55,713 INFO Saved chunk 5000-6000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_5000-6000



INFO:__main__:Saved chunk 5000-6000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_5000-6000


2025-12-15 11:29:03,895 INFO Gemini: processed 128/1000
2025-12-15 11:29:03,895 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:29:11,979 INFO Gemini: processed 256/1000
2025-12-15 11:29:11,979 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:29:20,245 INFO Gemini: processed 384/1000
2025-12-15 11:29:20,245 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:29:28,721 INFO Gemini: processed 512/1000
2025-12-15 11:29:28,721 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:29:37,032 INFO Gemini: processed 640/1000
2025-12-15 11:29:37,032 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:31:28,442 INFO Gemini: processed 384/1000
2025-12-15 11:31:28,442 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:31:36,615 INFO Gemini: processed 512/1000
2025-12-15 11:31:36,615 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:31:44,935 INFO Gemini: processed 640/1000
2025-12-15 11:31:44,935 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:31:52,981 INFO Gemini: processed 768/1000
2025-12-15 11:31:52,981 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:32:01,279 INFO Gemini: processed 896/1000
2025-12-15 11:32:01,279 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:32:07,912 INFO Gemini: processed 1000/1000
2025-12-15 11:32:07,912 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 96425.21 examples/s] 

2025-12-15 11:32:07,939 INFO Saved chunk 8000-9000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_8000-9000
2025-12-15 11:32:07,939 INFO Saved chunk 8000-9000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_8000-9000



INFO:__main__:Saved chunk 8000-9000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_8000-9000


2025-12-15 11:32:16,077 INFO Gemini: processed 128/1000
2025-12-15 11:32:16,077 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:32:24,325 INFO Gemini: processed 256/1000
2025-12-15 11:32:24,325 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:32:32,501 INFO Gemini: processed 384/1000
2025-12-15 11:32:32,501 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:32:40,714 INFO Gemini: processed 512/1000
2025-12-15 11:32:40,714 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:32:49,062 INFO Gemini: processed 640/1000
2025-12-15 11:32:49,062 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:32:57,072 INFO Gemini: processed 768/1000
2025-12-15 11:32:57,072 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:33:05,423 INFO Gemini: processed 896/1000
2025-12-15 11:33:05,423 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:33:15,334 INFO Gemini: processed 1000/1000
2025-12-15 11:33:15,334 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 70696.87 examples/s]

2025-12-15 11:33:15,367 INFO Saved chunk 9000-10000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_9000-10000
2025-12-15 11:33:15,367 INFO Saved chunk 9000-10000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_9000-10000



INFO:__main__:Saved chunk 9000-10000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_9000-10000


2025-12-15 11:33:23,425 INFO Gemini: processed 128/1000
2025-12-15 11:33:23,425 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:33:31,766 INFO Gemini: processed 256/1000
2025-12-15 11:33:31,766 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:33:39,918 INFO Gemini: processed 384/1000
2025-12-15 11:33:39,918 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:33:48,195 INFO Gemini: processed 512/1000
2025-12-15 11:33:48,195 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:33:56,190 INFO Gemini: processed 640/1000
2025-12-15 11:33:56,190 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:34:04,255 INFO Gemini: processed 768/1000
2025-12-15 11:34:04,255 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:34:12,307 INFO Gemini: processed 896/1000
2025-12-15 11:34:12,307 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:34:18,827 INFO Gemini: processed 1000/1000
2025-12-15 11:34:18,827 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 75307.10 examples/s]

2025-12-15 11:34:18,856 INFO Saved chunk 10000-11000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_10000-11000
2025-12-15 11:34:18,856 INFO Saved chunk 10000-11000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_10000-11000



INFO:__main__:Saved chunk 10000-11000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_10000-11000


2025-12-15 11:34:27,427 INFO Gemini: processed 128/1000
2025-12-15 11:34:27,427 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:34:35,609 INFO Gemini: processed 256/1000
2025-12-15 11:34:35,609 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:34:43,710 INFO Gemini: processed 384/1000
2025-12-15 11:34:43,710 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:34:52,062 INFO Gemini: processed 512/1000
2025-12-15 11:34:52,062 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:35:00,144 INFO Gemini: processed 640/1000
2025-12-15 11:35:00,144 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:35:08,533 INFO Gemini: processed 768/1000
2025-12-15 11:35:08,533 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:35:16,624 INFO Gemini: processed 896/1000
2025-12-15 11:35:16,624 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:35:23,557 INFO Gemini: processed 1000/1000
2025-12-15 11:35:23,557 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62038.58 examples/s]

2025-12-15 11:35:23,590 INFO Saved chunk 11000-12000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_11000-12000
2025-12-15 11:35:23,590 INFO Saved chunk 11000-12000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_11000-12000



INFO:__main__:Saved chunk 11000-12000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_11000-12000


2025-12-15 11:35:31,676 INFO Gemini: processed 128/1000
2025-12-15 11:35:31,676 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:35:39,979 INFO Gemini: processed 256/1000
2025-12-15 11:35:39,979 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:35:48,201 INFO Gemini: processed 384/1000
2025-12-15 11:35:48,201 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:35:56,320 INFO Gemini: processed 512/1000
2025-12-15 11:35:56,320 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:36:04,978 INFO Gemini: processed 640/1000
2025-12-15 11:36:04,978 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:36:14,067 INFO Gemini: processed 768/1000
2025-12-15 11:36:14,067 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:36:22,361 INFO Gemini: processed 896/1000
2025-12-15 11:36:22,361 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:36:29,060 INFO Gemini: processed 1000/1000
2025-12-15 11:36:29,060 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62199.58 examples/s]

2025-12-15 11:36:29,093 INFO Saved chunk 12000-13000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_12000-13000
2025-12-15 11:36:29,093 INFO Saved chunk 12000-13000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_12000-13000



INFO:__main__:Saved chunk 12000-13000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_12000-13000


2025-12-15 11:36:37,302 INFO Gemini: processed 128/1000
2025-12-15 11:36:37,302 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:36:45,410 INFO Gemini: processed 256/1000
2025-12-15 11:36:45,410 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:36:53,428 INFO Gemini: processed 384/1000
2025-12-15 11:36:53,428 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:37:01,949 INFO Gemini: processed 512/1000
2025-12-15 11:37:01,949 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:37:10,270 INFO Gemini: processed 640/1000
2025-12-15 11:37:10,270 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:37:18,435 INFO Gemini: processed 768/1000
2025-12-15 11:37:18,435 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:37:26,766 INFO Gemini: processed 896/1000
2025-12-15 11:37:26,766 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:37:33,231 INFO Gemini: processed 1000/1000
2025-12-15 11:37:33,231 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 79999.69 examples/s]

2025-12-15 11:37:33,258 INFO Saved chunk 13000-14000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_13000-14000
2025-12-15 11:37:33,258 INFO Saved chunk 13000-14000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_13000-14000



INFO:__main__:Saved chunk 13000-14000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_13000-14000


2025-12-15 11:37:41,440 INFO Gemini: processed 128/1000
2025-12-15 11:37:41,440 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:37:49,379 INFO Gemini: processed 256/1000
2025-12-15 11:37:49,379 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:37:57,608 INFO Gemini: processed 384/1000
2025-12-15 11:37:57,608 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:38:05,613 INFO Gemini: processed 512/1000
2025-12-15 11:38:05,613 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:38:13,783 INFO Gemini: processed 640/1000
2025-12-15 11:38:13,783 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:38:22,056 INFO Gemini: processed 768/1000
2025-12-15 11:38:22,056 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:38:30,028 INFO Gemini: processed 896/1000
2025-12-15 11:38:30,028 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:38:36,729 INFO Gemini: processed 1000/1000
2025-12-15 11:38:36,729 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 87334.03 examples/s]

2025-12-15 11:38:36,755 INFO Saved chunk 14000-15000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_14000-15000
2025-12-15 11:38:36,755 INFO Saved chunk 14000-15000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_14000-15000



INFO:__main__:Saved chunk 14000-15000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_14000-15000


2025-12-15 11:38:44,653 INFO Gemini: processed 128/1000
2025-12-15 11:38:44,653 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:38:52,712 INFO Gemini: processed 256/1000
2025-12-15 11:38:52,712 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:39:01,040 INFO Gemini: processed 384/1000
2025-12-15 11:39:01,040 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:39:09,513 INFO Gemini: processed 512/1000
2025-12-15 11:39:09,513 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:39:18,173 INFO Gemini: processed 640/1000
2025-12-15 11:39:18,173 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:39:26,353 INFO Gemini: processed 768/1000
2025-12-15 11:39:26,353 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:39:34,446 INFO Gemini: processed 896/1000
2025-12-15 11:39:34,446 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:39:41,007 INFO Gemini: processed 1000/1000
2025-12-15 11:39:41,007 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 90834.95 examples/s]

2025-12-15 11:39:41,043 INFO Saved chunk 15000-16000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_15000-16000
2025-12-15 11:39:41,043 INFO Saved chunk 15000-16000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_15000-16000



INFO:__main__:Saved chunk 15000-16000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_15000-16000


2025-12-15 11:39:49,176 INFO Gemini: processed 128/1000
2025-12-15 11:39:49,176 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:39:57,395 INFO Gemini: processed 256/1000
2025-12-15 11:39:57,395 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:40:05,522 INFO Gemini: processed 384/1000
2025-12-15 11:40:05,522 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:40:13,582 INFO Gemini: processed 512/1000
2025-12-15 11:40:13,582 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:40:21,878 INFO Gemini: processed 640/1000
2025-12-15 11:40:21,878 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:40:30,348 INFO Gemini: processed 768/1000
2025-12-15 11:40:30,348 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:40:38,356 INFO Gemini: processed 896/1000
2025-12-15 11:40:38,356 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:40:44,891 INFO Gemini: processed 1000/1000
2025-12-15 11:40:44,891 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 76132.72 examples/s]

2025-12-15 11:40:44,930 INFO Saved chunk 16000-17000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_16000-17000
2025-12-15 11:40:44,930 INFO Saved chunk 16000-17000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_16000-17000



INFO:__main__:Saved chunk 16000-17000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_16000-17000


2025-12-15 11:40:53,274 INFO Gemini: processed 128/1000
2025-12-15 11:40:53,274 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:41:01,349 INFO Gemini: processed 256/1000
2025-12-15 11:41:01,349 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:41:09,531 INFO Gemini: processed 384/1000
2025-12-15 11:41:09,531 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:41:17,806 INFO Gemini: processed 512/1000
2025-12-15 11:41:17,806 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:41:25,911 INFO Gemini: processed 640/1000
2025-12-15 11:41:25,911 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:41:33,878 INFO Gemini: processed 768/1000
2025-12-15 11:41:33,878 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:41:42,103 INFO Gemini: processed 896/1000
2025-12-15 11:41:42,103 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:41:48,981 INFO Gemini: processed 1000/1000
2025-12-15 11:41:48,981 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 91915.85 examples/s]

2025-12-15 11:41:49,009 INFO Saved chunk 17000-18000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_17000-18000
2025-12-15 11:41:49,009 INFO Saved chunk 17000-18000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_17000-18000



INFO:__main__:Saved chunk 17000-18000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_17000-18000


2025-12-15 11:41:57,058 INFO Gemini: processed 128/1000
2025-12-15 11:41:57,058 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:42:05,113 INFO Gemini: processed 256/1000
2025-12-15 11:42:05,113 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:42:13,336 INFO Gemini: processed 384/1000
2025-12-15 11:42:13,336 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:42:21,604 INFO Gemini: processed 512/1000
2025-12-15 11:42:21,604 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:42:29,758 INFO Gemini: processed 640/1000
2025-12-15 11:42:29,758 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:42:37,914 INFO Gemini: processed 768/1000
2025-12-15 11:42:37,914 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:42:45,976 INFO Gemini: processed 896/1000
2025-12-15 11:42:45,976 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:42:52,700 INFO Gemini: processed 1000/1000
2025-12-15 11:42:52,700 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 78361.59 examples/s]

2025-12-15 11:42:52,733 INFO Saved chunk 18000-19000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_18000-19000
2025-12-15 11:42:52,733 INFO Saved chunk 18000-19000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_18000-19000



INFO:__main__:Saved chunk 18000-19000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_18000-19000


2025-12-15 11:43:01,111 INFO Gemini: processed 128/1000
2025-12-15 11:43:01,111 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:43:09,251 INFO Gemini: processed 256/1000
2025-12-15 11:43:09,251 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:43:17,507 INFO Gemini: processed 384/1000
2025-12-15 11:43:17,507 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:43:25,610 INFO Gemini: processed 512/1000
2025-12-15 11:43:25,610 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:43:33,543 INFO Gemini: processed 640/1000
2025-12-15 11:43:33,543 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:43:41,959 INFO Gemini: processed 768/1000
2025-12-15 11:43:41,959 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:43:50,116 INFO Gemini: processed 896/1000
2025-12-15 11:43:50,116 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:43:56,836 INFO Gemini: processed 1000/1000
2025-12-15 11:43:56,836 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 101257.88 examples/s]

2025-12-15 11:43:56,863 INFO Saved chunk 19000-20000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_19000-20000
2025-12-15 11:43:56,863 INFO Saved chunk 19000-20000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_19000-20000



INFO:__main__:Saved chunk 19000-20000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_19000-20000


2025-12-15 11:44:04,956 INFO Gemini: processed 128/1000
2025-12-15 11:44:04,956 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:44:13,003 INFO Gemini: processed 256/1000
2025-12-15 11:44:13,003 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:44:21,296 INFO Gemini: processed 384/1000
2025-12-15 11:44:21,296 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:44:29,260 INFO Gemini: processed 512/1000
2025-12-15 11:44:29,260 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:44:37,704 INFO Gemini: processed 640/1000
2025-12-15 11:44:37,704 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:44:45,983 INFO Gemini: processed 768/1000
2025-12-15 11:44:45,983 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:44:54,211 INFO Gemini: processed 896/1000
2025-12-15 11:44:54,211 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:45:00,897 INFO Gemini: processed 1000/1000
2025-12-15 11:45:00,897 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 72817.78 examples/s]

2025-12-15 11:45:00,927 INFO Saved chunk 20000-21000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_20000-21000
2025-12-15 11:45:00,927 INFO Saved chunk 20000-21000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_20000-21000



INFO:__main__:Saved chunk 20000-21000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_20000-21000


2025-12-15 11:45:09,198 INFO Gemini: processed 128/1000
2025-12-15 11:45:09,198 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:45:17,632 INFO Gemini: processed 256/1000
2025-12-15 11:45:17,632 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:45:25,740 INFO Gemini: processed 384/1000
2025-12-15 11:45:25,740 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:45:33,720 INFO Gemini: processed 512/1000
2025-12-15 11:45:33,720 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:45:41,800 INFO Gemini: processed 640/1000
2025-12-15 11:45:41,800 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:45:49,901 INFO Gemini: processed 768/1000
2025-12-15 11:45:49,901 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:45:58,121 INFO Gemini: processed 896/1000
2025-12-15 11:45:58,121 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:46:04,760 INFO Gemini: processed 1000/1000
2025-12-15 11:46:04,760 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 52200.42 examples/s]

2025-12-15 11:46:04,803 INFO Saved chunk 21000-22000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_21000-22000
2025-12-15 11:46:04,803 INFO Saved chunk 21000-22000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_21000-22000



INFO:__main__:Saved chunk 21000-22000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_21000-22000


2025-12-15 11:46:12,939 INFO Gemini: processed 128/1000
2025-12-15 11:46:12,939 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:46:21,058 INFO Gemini: processed 256/1000
2025-12-15 11:46:21,058 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:46:29,196 INFO Gemini: processed 384/1000
2025-12-15 11:46:29,196 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:46:37,627 INFO Gemini: processed 512/1000
2025-12-15 11:46:37,627 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:46:45,750 INFO Gemini: processed 640/1000
2025-12-15 11:46:45,750 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:46:54,102 INFO Gemini: processed 768/1000
2025-12-15 11:46:54,102 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:47:02,177 INFO Gemini: processed 896/1000
2025-12-15 11:47:02,177 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:47:08,704 INFO Gemini: processed 1000/1000
2025-12-15 11:47:08,704 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60663.05 examples/s]

2025-12-15 11:47:08,746 INFO Saved chunk 22000-23000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_22000-23000
2025-12-15 11:47:08,746 INFO Saved chunk 22000-23000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_22000-23000



INFO:__main__:Saved chunk 22000-23000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_22000-23000


2025-12-15 11:47:16,912 INFO Gemini: processed 128/1000
2025-12-15 11:47:16,912 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:47:24,930 INFO Gemini: processed 256/1000
2025-12-15 11:47:24,930 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:47:33,047 INFO Gemini: processed 384/1000
2025-12-15 11:47:33,047 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:47:40,967 INFO Gemini: processed 512/1000
2025-12-15 11:47:40,967 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:47:49,099 INFO Gemini: processed 640/1000
2025-12-15 11:47:49,099 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:47:57,338 INFO Gemini: processed 768/1000
2025-12-15 11:47:57,338 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:48:05,501 INFO Gemini: processed 896/1000
2025-12-15 11:48:05,501 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:48:12,262 INFO Gemini: processed 1000/1000
2025-12-15 11:48:12,262 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 78858.09 examples/s]

2025-12-15 11:48:12,292 INFO Saved chunk 23000-24000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_23000-24000
2025-12-15 11:48:12,292 INFO Saved chunk 23000-24000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_23000-24000



INFO:__main__:Saved chunk 23000-24000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_23000-24000


2025-12-15 11:48:20,303 INFO Gemini: processed 128/1000
2025-12-15 11:48:20,303 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:48:28,444 INFO Gemini: processed 256/1000
2025-12-15 11:48:28,444 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:48:36,751 INFO Gemini: processed 384/1000
2025-12-15 11:48:36,751 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:48:44,863 INFO Gemini: processed 512/1000
2025-12-15 11:48:44,863 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:48:53,008 INFO Gemini: processed 640/1000
2025-12-15 11:48:53,008 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:49:01,120 INFO Gemini: processed 768/1000
2025-12-15 11:49:01,120 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:49:09,307 INFO Gemini: processed 896/1000
2025-12-15 11:49:09,307 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:49:16,093 INFO Gemini: processed 1000/1000
2025-12-15 11:49:16,093 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 87024.17 examples/s]

2025-12-15 11:49:16,120 INFO Saved chunk 24000-25000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_24000-25000
2025-12-15 11:49:16,120 INFO Saved chunk 24000-25000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_24000-25000



INFO:__main__:Saved chunk 24000-25000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_24000-25000


2025-12-15 11:49:24,101 INFO Gemini: processed 128/1000
2025-12-15 11:49:24,101 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:49:32,108 INFO Gemini: processed 256/1000
2025-12-15 11:49:32,108 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:49:40,108 INFO Gemini: processed 384/1000
2025-12-15 11:49:40,108 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:49:48,883 INFO Gemini: processed 512/1000
2025-12-15 11:49:48,883 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:49:56,987 INFO Gemini: processed 640/1000
2025-12-15 11:49:56,987 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:50:05,033 INFO Gemini: processed 768/1000
2025-12-15 11:50:05,033 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:50:13,413 INFO Gemini: processed 896/1000
2025-12-15 11:50:13,413 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:50:19,989 INFO Gemini: processed 1000/1000
2025-12-15 11:50:19,989 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 72044.80 examples/s]

2025-12-15 11:50:20,020 INFO Saved chunk 25000-26000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_25000-26000
2025-12-15 11:50:20,020 INFO Saved chunk 25000-26000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_25000-26000



INFO:__main__:Saved chunk 25000-26000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_25000-26000


2025-12-15 11:50:28,131 INFO Gemini: processed 128/1000
2025-12-15 11:50:28,131 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:50:36,228 INFO Gemini: processed 256/1000
2025-12-15 11:50:36,228 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:50:44,695 INFO Gemini: processed 384/1000
2025-12-15 11:50:44,695 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:50:52,838 INFO Gemini: processed 512/1000
2025-12-15 11:50:52,838 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:51:00,872 INFO Gemini: processed 640/1000
2025-12-15 11:51:00,872 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:51:08,976 INFO Gemini: processed 768/1000
2025-12-15 11:51:08,976 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:51:16,999 INFO Gemini: processed 896/1000
2025-12-15 11:51:16,999 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:51:23,749 INFO Gemini: processed 1000/1000
2025-12-15 11:51:23,749 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 78024.85 examples/s]

2025-12-15 11:51:23,781 INFO Saved chunk 26000-27000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_26000-27000
2025-12-15 11:51:23,781 INFO Saved chunk 26000-27000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_26000-27000



INFO:__main__:Saved chunk 26000-27000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_26000-27000


2025-12-15 11:51:31,877 INFO Gemini: processed 128/1000
2025-12-15 11:51:31,877 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:51:39,982 INFO Gemini: processed 256/1000
2025-12-15 11:51:39,982 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:51:48,115 INFO Gemini: processed 384/1000
2025-12-15 11:51:48,115 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:51:56,151 INFO Gemini: processed 512/1000
2025-12-15 11:51:56,151 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:52:04,938 INFO Gemini: processed 640/1000
2025-12-15 11:52:04,938 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:52:12,947 INFO Gemini: processed 768/1000
2025-12-15 11:52:12,947 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:52:21,005 INFO Gemini: processed 896/1000
2025-12-15 11:52:21,005 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:52:27,591 INFO Gemini: processed 1000/1000
2025-12-15 11:52:27,591 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 45669.19 examples/s]

2025-12-15 11:52:27,630 INFO Saved chunk 27000-28000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_27000-28000
2025-12-15 11:52:27,630 INFO Saved chunk 27000-28000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_27000-28000



INFO:__main__:Saved chunk 27000-28000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_27000-28000


2025-12-15 11:52:35,629 INFO Gemini: processed 128/1000
2025-12-15 11:52:35,629 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:52:43,892 INFO Gemini: processed 256/1000
2025-12-15 11:52:43,892 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:52:52,545 INFO Gemini: processed 384/1000
2025-12-15 11:52:52,545 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:53:00,772 INFO Gemini: processed 512/1000
2025-12-15 11:53:00,772 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:53:08,727 INFO Gemini: processed 640/1000
2025-12-15 11:53:08,727 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:53:16,871 INFO Gemini: processed 768/1000
2025-12-15 11:53:16,871 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:53:24,979 INFO Gemini: processed 896/1000
2025-12-15 11:53:24,979 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:53:31,825 INFO Gemini: processed 1000/1000
2025-12-15 11:53:31,825 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61204.80 examples/s]

2025-12-15 11:53:31,857 INFO Saved chunk 28000-29000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_28000-29000
2025-12-15 11:53:31,857 INFO Saved chunk 28000-29000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_28000-29000



INFO:__main__:Saved chunk 28000-29000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_28000-29000


2025-12-15 11:53:40,054 INFO Gemini: processed 128/1000
2025-12-15 11:53:40,054 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:53:48,443 INFO Gemini: processed 256/1000
2025-12-15 11:53:48,443 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:53:56,686 INFO Gemini: processed 384/1000
2025-12-15 11:53:56,686 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:54:05,143 INFO Gemini: processed 512/1000
2025-12-15 11:54:05,143 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:54:13,635 INFO Gemini: processed 640/1000
2025-12-15 11:54:13,635 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:54:21,659 INFO Gemini: processed 768/1000
2025-12-15 11:54:21,659 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:54:29,744 INFO Gemini: processed 896/1000
2025-12-15 11:54:29,744 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:54:36,368 INFO Gemini: processed 1000/1000
2025-12-15 11:54:36,368 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 75663.02 examples/s]

2025-12-15 11:54:36,404 INFO Saved chunk 29000-30000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_29000-30000
2025-12-15 11:54:36,404 INFO Saved chunk 29000-30000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_29000-30000



INFO:__main__:Saved chunk 29000-30000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_29000-30000


2025-12-15 11:54:44,682 INFO Gemini: processed 128/1000
2025-12-15 11:54:44,682 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:54:52,774 INFO Gemini: processed 256/1000
2025-12-15 11:54:52,774 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:55:00,784 INFO Gemini: processed 384/1000
2025-12-15 11:55:00,784 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:55:08,765 INFO Gemini: processed 512/1000
2025-12-15 11:55:08,765 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:55:17,039 INFO Gemini: processed 640/1000
2025-12-15 11:55:17,039 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:55:25,542 INFO Gemini: processed 768/1000
2025-12-15 11:55:25,542 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:55:33,731 INFO Gemini: processed 896/1000
2025-12-15 11:55:33,731 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:55:40,220 INFO Gemini: processed 1000/1000
2025-12-15 11:55:40,220 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 51851.95 examples/s]

2025-12-15 11:55:40,255 INFO Saved chunk 30000-31000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_30000-31000
2025-12-15 11:55:40,255 INFO Saved chunk 30000-31000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_30000-31000



INFO:__main__:Saved chunk 30000-31000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_30000-31000


2025-12-15 11:55:48,401 INFO Gemini: processed 128/1000
2025-12-15 11:55:48,401 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:55:56,472 INFO Gemini: processed 256/1000
2025-12-15 11:55:56,472 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:56:04,692 INFO Gemini: processed 384/1000
2025-12-15 11:56:04,692 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:56:13,029 INFO Gemini: processed 512/1000
2025-12-15 11:56:13,029 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:56:21,180 INFO Gemini: processed 640/1000
2025-12-15 11:56:21,180 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:56:29,368 INFO Gemini: processed 768/1000
2025-12-15 11:56:29,368 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:56:37,404 INFO Gemini: processed 896/1000
2025-12-15 11:56:37,404 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:56:43,925 INFO Gemini: processed 1000/1000
2025-12-15 11:56:43,925 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 80935.18 examples/s]

2025-12-15 11:56:43,955 INFO Saved chunk 31000-32000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_31000-32000
2025-12-15 11:56:43,955 INFO Saved chunk 31000-32000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_31000-32000



INFO:__main__:Saved chunk 31000-32000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_31000-32000


2025-12-15 11:56:52,496 INFO Gemini: processed 128/1000
2025-12-15 11:56:52,496 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:57:00,992 INFO Gemini: processed 256/1000
2025-12-15 11:57:00,992 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:57:09,143 INFO Gemini: processed 384/1000
2025-12-15 11:57:09,143 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:57:17,185 INFO Gemini: processed 512/1000
2025-12-15 11:57:17,185 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:57:25,366 INFO Gemini: processed 640/1000
2025-12-15 11:57:25,366 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:57:33,537 INFO Gemini: processed 768/1000
2025-12-15 11:57:33,537 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:57:41,726 INFO Gemini: processed 896/1000
2025-12-15 11:57:41,726 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:57:48,414 INFO Gemini: processed 1000/1000
2025-12-15 11:57:48,414 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 53563.68 examples/s]

2025-12-15 11:57:48,496 INFO Saved chunk 32000-33000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_32000-33000
2025-12-15 11:57:48,496 INFO Saved chunk 32000-33000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_32000-33000



INFO:__main__:Saved chunk 32000-33000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_32000-33000


2025-12-15 11:57:56,834 INFO Gemini: processed 128/1000
2025-12-15 11:57:56,834 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:58:05,280 INFO Gemini: processed 256/1000
2025-12-15 11:58:05,280 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:58:13,546 INFO Gemini: processed 384/1000
2025-12-15 11:58:13,546 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:58:22,214 INFO Gemini: processed 512/1000
2025-12-15 11:58:22,214 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:58:30,189 INFO Gemini: processed 640/1000
2025-12-15 11:58:30,189 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:58:38,761 INFO Gemini: processed 768/1000
2025-12-15 11:58:38,761 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:58:46,867 INFO Gemini: processed 896/1000
2025-12-15 11:58:46,867 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:58:53,678 INFO Gemini: processed 1000/1000
2025-12-15 11:58:53,678 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 73858.98 examples/s]

2025-12-15 11:58:53,737 INFO Saved chunk 33000-34000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_33000-34000
2025-12-15 11:58:53,737 INFO Saved chunk 33000-34000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_33000-34000



INFO:__main__:Saved chunk 33000-34000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_33000-34000


2025-12-15 11:59:02,319 INFO Gemini: processed 128/1000
2025-12-15 11:59:02,319 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 11:59:10,361 INFO Gemini: processed 256/1000
2025-12-15 11:59:10,361 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 11:59:18,472 INFO Gemini: processed 384/1000
2025-12-15 11:59:18,472 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 11:59:26,802 INFO Gemini: processed 512/1000
2025-12-15 11:59:26,802 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 11:59:34,872 INFO Gemini: processed 640/1000
2025-12-15 11:59:34,872 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 11:59:43,048 INFO Gemini: processed 768/1000
2025-12-15 11:59:43,048 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 11:59:51,282 INFO Gemini: processed 896/1000
2025-12-15 11:59:51,282 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 11:59:57,864 INFO Gemini: processed 1000/1000
2025-12-15 11:59:57,864 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 34806.92 examples/s]

2025-12-15 11:59:58,016 INFO Saved chunk 34000-35000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_34000-35000
2025-12-15 11:59:58,016 INFO Saved chunk 34000-35000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_34000-35000



INFO:__main__:Saved chunk 34000-35000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_34000-35000


2025-12-15 12:00:06,089 INFO Gemini: processed 128/1000
2025-12-15 12:00:06,089 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:00:14,114 INFO Gemini: processed 256/1000
2025-12-15 12:00:14,114 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:00:22,395 INFO Gemini: processed 384/1000
2025-12-15 12:00:22,395 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:00:30,763 INFO Gemini: processed 512/1000
2025-12-15 12:00:30,763 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:00:39,270 INFO Gemini: processed 640/1000
2025-12-15 12:00:39,270 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:00:47,362 INFO Gemini: processed 768/1000
2025-12-15 12:00:47,362 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:00:55,586 INFO Gemini: processed 896/1000
2025-12-15 12:00:55,586 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:01:02,327 INFO Gemini: processed 1000/1000
2025-12-15 12:01:02,327 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 16366.16 examples/s]

2025-12-15 12:01:02,422 INFO Saved chunk 35000-36000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_35000-36000
2025-12-15 12:01:02,422 INFO Saved chunk 35000-36000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_35000-36000



INFO:__main__:Saved chunk 35000-36000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_35000-36000


2025-12-15 12:01:10,819 INFO Gemini: processed 128/1000
2025-12-15 12:01:10,819 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:01:19,030 INFO Gemini: processed 256/1000
2025-12-15 12:01:19,030 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:01:27,281 INFO Gemini: processed 384/1000
2025-12-15 12:01:27,281 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:01:35,298 INFO Gemini: processed 512/1000
2025-12-15 12:01:35,298 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:01:43,366 INFO Gemini: processed 640/1000
2025-12-15 12:01:43,366 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:01:51,718 INFO Gemini: processed 768/1000
2025-12-15 12:01:51,718 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:01:59,857 INFO Gemini: processed 896/1000
2025-12-15 12:01:59,857 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:02:06,555 INFO Gemini: processed 1000/1000
2025-12-15 12:02:06,555 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 49230.08 examples/s]

2025-12-15 12:02:06,597 INFO Saved chunk 36000-37000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_36000-37000
2025-12-15 12:02:06,597 INFO Saved chunk 36000-37000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_36000-37000



INFO:__main__:Saved chunk 36000-37000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_36000-37000


2025-12-15 12:02:14,705 INFO Gemini: processed 128/1000
2025-12-15 12:02:14,705 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:02:23,061 INFO Gemini: processed 256/1000
2025-12-15 12:02:23,061 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:02:31,406 INFO Gemini: processed 384/1000
2025-12-15 12:02:31,406 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:02:39,717 INFO Gemini: processed 512/1000
2025-12-15 12:02:39,717 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:02:47,820 INFO Gemini: processed 640/1000
2025-12-15 12:02:47,820 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:02:55,975 INFO Gemini: processed 768/1000
2025-12-15 12:02:55,975 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:03:04,181 INFO Gemini: processed 896/1000
2025-12-15 12:03:04,181 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:03:11,070 INFO Gemini: processed 1000/1000
2025-12-15 12:03:11,070 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 58990.79 examples/s]

2025-12-15 12:03:11,106 INFO Saved chunk 37000-38000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_37000-38000
2025-12-15 12:03:11,106 INFO Saved chunk 37000-38000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_37000-38000



INFO:__main__:Saved chunk 37000-38000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_37000-38000


2025-12-15 12:03:23,074 INFO Gemini: processed 128/1000
2025-12-15 12:03:23,074 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:03:31,190 INFO Gemini: processed 256/1000
2025-12-15 12:03:31,190 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:03:39,362 INFO Gemini: processed 384/1000
2025-12-15 12:03:39,362 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:03:47,530 INFO Gemini: processed 512/1000
2025-12-15 12:03:47,530 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:03:55,631 INFO Gemini: processed 640/1000
2025-12-15 12:03:55,631 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:04:03,886 INFO Gemini: processed 768/1000
2025-12-15 12:04:03,886 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:04:11,884 INFO Gemini: processed 896/1000
2025-12-15 12:04:11,884 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:04:18,493 INFO Gemini: processed 1000/1000
2025-12-15 12:04:18,493 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 59290.99 examples/s]

2025-12-15 12:04:18,527 INFO Saved chunk 38000-39000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_38000-39000
2025-12-15 12:04:18,527 INFO Saved chunk 38000-39000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_38000-39000



INFO:__main__:Saved chunk 38000-39000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_38000-39000


2025-12-15 12:04:26,603 INFO Gemini: processed 128/1000
2025-12-15 12:04:26,603 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:04:41,753 INFO Gemini: processed 256/1000
2025-12-15 12:04:41,753 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:04:49,670 INFO Gemini: processed 384/1000
2025-12-15 12:04:49,670 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:04:57,746 INFO Gemini: processed 512/1000
2025-12-15 12:04:57,746 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:05:05,759 INFO Gemini: processed 640/1000
2025-12-15 12:05:05,759 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:05:13,699 INFO Gemini: processed 768/1000
2025-12-15 12:05:13,699 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:05:21,874 INFO Gemini: processed 896/1000
2025-12-15 12:05:21,874 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:05:28,535 INFO Gemini: processed 1000/1000
2025-12-15 12:05:28,535 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 36532.89 examples/s]

2025-12-15 12:05:28,587 INFO Saved chunk 39000-40000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_39000-40000
2025-12-15 12:05:28,587 INFO Saved chunk 39000-40000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_39000-40000



INFO:__main__:Saved chunk 39000-40000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_39000-40000


2025-12-15 12:05:36,552 INFO Gemini: processed 128/1000
2025-12-15 12:05:36,552 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:05:44,596 INFO Gemini: processed 256/1000
2025-12-15 12:05:44,596 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:05:52,995 INFO Gemini: processed 384/1000
2025-12-15 12:05:52,995 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:06:01,352 INFO Gemini: processed 512/1000
2025-12-15 12:06:01,352 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:06:09,386 INFO Gemini: processed 640/1000
2025-12-15 12:06:09,386 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:06:17,479 INFO Gemini: processed 768/1000
2025-12-15 12:06:17,479 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:06:25,559 INFO Gemini: processed 896/1000
2025-12-15 12:06:25,559 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:06:32,337 INFO Gemini: processed 1000/1000
2025-12-15 12:06:32,337 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 45767.36 examples/s]

2025-12-15 12:06:32,382 INFO Saved chunk 40000-41000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_40000-41000
2025-12-15 12:06:32,382 INFO Saved chunk 40000-41000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_40000-41000



INFO:__main__:Saved chunk 40000-41000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_40000-41000


2025-12-15 12:06:40,796 INFO Gemini: processed 128/1000
2025-12-15 12:06:40,796 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:06:48,978 INFO Gemini: processed 256/1000
2025-12-15 12:06:48,978 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:06:57,081 INFO Gemini: processed 384/1000
2025-12-15 12:06:57,081 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:07:05,494 INFO Gemini: processed 512/1000
2025-12-15 12:07:05,494 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:07:13,952 INFO Gemini: processed 640/1000
2025-12-15 12:07:13,952 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:07:22,364 INFO Gemini: processed 768/1000
2025-12-15 12:07:22,364 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:07:30,867 INFO Gemini: processed 896/1000
2025-12-15 12:07:30,867 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:07:37,565 INFO Gemini: processed 1000/1000
2025-12-15 12:07:37,565 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61817.30 examples/s]

2025-12-15 12:07:37,609 INFO Saved chunk 41000-42000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_41000-42000
2025-12-15 12:07:37,609 INFO Saved chunk 41000-42000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_41000-42000



INFO:__main__:Saved chunk 41000-42000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_41000-42000


2025-12-15 12:07:45,681 INFO Gemini: processed 128/1000
2025-12-15 12:07:45,681 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:07:53,881 INFO Gemini: processed 256/1000
2025-12-15 12:07:53,881 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:08:02,345 INFO Gemini: processed 384/1000
2025-12-15 12:08:02,345 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:08:10,759 INFO Gemini: processed 512/1000
2025-12-15 12:08:10,759 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:08:18,823 INFO Gemini: processed 640/1000
2025-12-15 12:08:18,823 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:08:26,894 INFO Gemini: processed 768/1000
2025-12-15 12:08:26,894 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:08:34,935 INFO Gemini: processed 896/1000
2025-12-15 12:08:34,935 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:08:41,416 INFO Gemini: processed 1000/1000
2025-12-15 12:08:41,416 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 74385.56 examples/s]

2025-12-15 12:08:41,448 INFO Saved chunk 42000-43000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_42000-43000
2025-12-15 12:08:41,448 INFO Saved chunk 42000-43000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_42000-43000



INFO:__main__:Saved chunk 42000-43000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_42000-43000


2025-12-15 12:08:49,731 INFO Gemini: processed 128/1000
2025-12-15 12:08:49,731 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:08:57,737 INFO Gemini: processed 256/1000
2025-12-15 12:08:57,737 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:09:05,781 INFO Gemini: processed 384/1000
2025-12-15 12:09:05,781 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:09:13,873 INFO Gemini: processed 512/1000
2025-12-15 12:09:13,873 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:09:21,919 INFO Gemini: processed 640/1000
2025-12-15 12:09:21,919 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:09:30,142 INFO Gemini: processed 768/1000
2025-12-15 12:09:30,142 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:09:38,571 INFO Gemini: processed 896/1000
2025-12-15 12:09:38,571 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:09:45,280 INFO Gemini: processed 1000/1000
2025-12-15 12:09:45,280 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 74369.73 examples/s]

2025-12-15 12:09:45,312 INFO Saved chunk 43000-44000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_43000-44000
2025-12-15 12:09:45,312 INFO Saved chunk 43000-44000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_43000-44000



INFO:__main__:Saved chunk 43000-44000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_43000-44000


2025-12-15 12:09:53,739 INFO Gemini: processed 128/1000
2025-12-15 12:09:53,739 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:10:01,747 INFO Gemini: processed 256/1000
2025-12-15 12:10:01,747 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:10:10,089 INFO Gemini: processed 384/1000
2025-12-15 12:10:10,089 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:10:18,235 INFO Gemini: processed 512/1000
2025-12-15 12:10:18,235 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:10:26,399 INFO Gemini: processed 640/1000
2025-12-15 12:10:26,399 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:10:34,391 INFO Gemini: processed 768/1000
2025-12-15 12:10:34,391 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:10:42,777 INFO Gemini: processed 896/1000
2025-12-15 12:10:42,777 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:10:49,603 INFO Gemini: processed 1000/1000
2025-12-15 12:10:49,603 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 82587.80 examples/s]

2025-12-15 12:10:49,637 INFO Saved chunk 44000-45000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_44000-45000
2025-12-15 12:10:49,637 INFO Saved chunk 44000-45000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_44000-45000



INFO:__main__:Saved chunk 44000-45000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_44000-45000


2025-12-15 12:10:57,799 INFO Gemini: processed 128/1000
2025-12-15 12:10:57,799 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:11:05,813 INFO Gemini: processed 256/1000
2025-12-15 12:11:05,813 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:11:13,873 INFO Gemini: processed 384/1000
2025-12-15 12:11:13,873 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:11:22,003 INFO Gemini: processed 512/1000
2025-12-15 12:11:22,003 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:11:30,216 INFO Gemini: processed 640/1000
2025-12-15 12:11:30,216 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:11:38,366 INFO Gemini: processed 768/1000
2025-12-15 12:11:38,366 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:11:46,540 INFO Gemini: processed 896/1000
2025-12-15 12:11:46,540 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:11:53,168 INFO Gemini: processed 1000/1000
2025-12-15 12:11:53,168 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 89541.52 examples/s]

2025-12-15 12:11:53,200 INFO Saved chunk 45000-46000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_45000-46000
2025-12-15 12:11:53,200 INFO Saved chunk 45000-46000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_45000-46000



INFO:__main__:Saved chunk 45000-46000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_45000-46000


2025-12-15 12:12:01,393 INFO Gemini: processed 128/1000
2025-12-15 12:12:01,393 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:12:09,523 INFO Gemini: processed 256/1000
2025-12-15 12:12:09,523 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:12:17,593 INFO Gemini: processed 384/1000
2025-12-15 12:12:17,593 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:12:25,636 INFO Gemini: processed 512/1000
2025-12-15 12:12:25,636 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:12:33,783 INFO Gemini: processed 640/1000
2025-12-15 12:12:33,783 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:12:41,840 INFO Gemini: processed 768/1000
2025-12-15 12:12:41,840 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:12:50,055 INFO Gemini: processed 896/1000
2025-12-15 12:12:50,055 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:12:56,568 INFO Gemini: processed 1000/1000
2025-12-15 12:12:56,568 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 66695.88 examples/s]

2025-12-15 12:12:56,602 INFO Saved chunk 46000-47000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_46000-47000
2025-12-15 12:12:56,602 INFO Saved chunk 46000-47000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_46000-47000



INFO:__main__:Saved chunk 46000-47000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_46000-47000


2025-12-15 12:13:04,682 INFO Gemini: processed 128/1000
2025-12-15 12:13:04,682 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:13:12,889 INFO Gemini: processed 256/1000
2025-12-15 12:13:12,889 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:13:21,063 INFO Gemini: processed 384/1000
2025-12-15 12:13:21,063 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:13:29,124 INFO Gemini: processed 512/1000
2025-12-15 12:13:29,124 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:13:37,335 INFO Gemini: processed 640/1000
2025-12-15 12:13:37,335 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:13:45,225 INFO Gemini: processed 768/1000
2025-12-15 12:13:45,225 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:13:53,250 INFO Gemini: processed 896/1000
2025-12-15 12:13:53,250 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:13:59,777 INFO Gemini: processed 1000/1000
2025-12-15 12:13:59,777 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 83676.89 examples/s]

2025-12-15 12:13:59,806 INFO Saved chunk 47000-48000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_47000-48000
2025-12-15 12:13:59,806 INFO Saved chunk 47000-48000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_47000-48000



INFO:__main__:Saved chunk 47000-48000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_47000-48000


2025-12-15 12:14:07,794 INFO Gemini: processed 128/1000
2025-12-15 12:14:07,794 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:14:15,906 INFO Gemini: processed 256/1000
2025-12-15 12:14:15,906 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:14:24,079 INFO Gemini: processed 384/1000
2025-12-15 12:14:24,079 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:14:32,173 INFO Gemini: processed 512/1000
2025-12-15 12:14:32,173 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:14:40,190 INFO Gemini: processed 640/1000
2025-12-15 12:14:40,190 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:14:48,392 INFO Gemini: processed 768/1000
2025-12-15 12:14:48,392 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:14:56,663 INFO Gemini: processed 896/1000
2025-12-15 12:14:56,663 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:15:03,183 INFO Gemini: processed 1000/1000
2025-12-15 12:15:03,183 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 56756.48 examples/s]

2025-12-15 12:15:03,217 INFO Saved chunk 48000-49000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_48000-49000
2025-12-15 12:15:03,217 INFO Saved chunk 48000-49000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_48000-49000



INFO:__main__:Saved chunk 48000-49000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_48000-49000


2025-12-15 12:15:11,553 INFO Gemini: processed 128/1000
2025-12-15 12:15:11,553 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:15:19,602 INFO Gemini: processed 256/1000
2025-12-15 12:15:19,602 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:15:27,550 INFO Gemini: processed 384/1000
2025-12-15 12:15:27,550 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:15:35,843 INFO Gemini: processed 512/1000
2025-12-15 12:15:35,843 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:15:43,986 INFO Gemini: processed 640/1000
2025-12-15 12:15:43,986 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:15:51,934 INFO Gemini: processed 768/1000
2025-12-15 12:15:51,934 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:16:00,162 INFO Gemini: processed 896/1000
2025-12-15 12:16:00,162 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:16:06,979 INFO Gemini: processed 1000/1000
2025-12-15 12:16:06,979 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60745.64 examples/s]

2025-12-15 12:16:07,013 INFO Saved chunk 49000-50000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_49000-50000
2025-12-15 12:16:07,013 INFO Saved chunk 49000-50000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_49000-50000



INFO:__main__:Saved chunk 49000-50000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_49000-50000


2025-12-15 12:16:15,480 INFO Gemini: processed 128/1000
2025-12-15 12:16:15,480 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:16:23,712 INFO Gemini: processed 256/1000
2025-12-15 12:16:23,712 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:16:31,624 INFO Gemini: processed 384/1000
2025-12-15 12:16:31,624 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:16:39,485 INFO Gemini: processed 512/1000
2025-12-15 12:16:39,485 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:16:47,488 INFO Gemini: processed 640/1000
2025-12-15 12:16:47,488 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:16:55,610 INFO Gemini: processed 768/1000
2025-12-15 12:16:55,610 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:17:03,641 INFO Gemini: processed 896/1000
2025-12-15 12:17:03,641 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:17:10,110 INFO Gemini: processed 1000/1000
2025-12-15 12:17:10,110 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 66698.00 examples/s]

2025-12-15 12:17:10,140 INFO Saved chunk 50000-51000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_50000-51000
2025-12-15 12:17:10,140 INFO Saved chunk 50000-51000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_50000-51000



INFO:__main__:Saved chunk 50000-51000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_50000-51000


2025-12-15 12:17:18,170 INFO Gemini: processed 128/1000
2025-12-15 12:17:18,170 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:17:26,341 INFO Gemini: processed 256/1000
2025-12-15 12:17:26,341 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:17:34,598 INFO Gemini: processed 384/1000
2025-12-15 12:17:34,598 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:17:42,567 INFO Gemini: processed 512/1000
2025-12-15 12:17:42,567 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:17:50,402 INFO Gemini: processed 640/1000
2025-12-15 12:17:50,402 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:17:58,298 INFO Gemini: processed 768/1000
2025-12-15 12:17:58,298 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:18:06,161 INFO Gemini: processed 896/1000
2025-12-15 12:18:06,161 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:18:12,857 INFO Gemini: processed 1000/1000
2025-12-15 12:18:12,857 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 65711.57 examples/s]

2025-12-15 12:18:12,889 INFO Saved chunk 51000-52000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_51000-52000
2025-12-15 12:18:12,889 INFO Saved chunk 51000-52000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_51000-52000



INFO:__main__:Saved chunk 51000-52000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_51000-52000


2025-12-15 12:18:20,947 INFO Gemini: processed 128/1000
2025-12-15 12:18:20,947 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:18:29,095 INFO Gemini: processed 256/1000
2025-12-15 12:18:29,095 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:18:37,084 INFO Gemini: processed 384/1000
2025-12-15 12:18:37,084 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:18:45,003 INFO Gemini: processed 512/1000
2025-12-15 12:18:45,003 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:18:53,247 INFO Gemini: processed 640/1000
2025-12-15 12:18:53,247 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:19:01,170 INFO Gemini: processed 768/1000
2025-12-15 12:19:01,170 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:19:09,095 INFO Gemini: processed 896/1000
2025-12-15 12:19:09,095 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:19:15,642 INFO Gemini: processed 1000/1000
2025-12-15 12:19:15,642 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 67178.73 examples/s]

2025-12-15 12:19:15,673 INFO Saved chunk 52000-53000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_52000-53000
2025-12-15 12:19:15,673 INFO Saved chunk 52000-53000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_52000-53000



INFO:__main__:Saved chunk 52000-53000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_52000-53000


2025-12-15 12:19:23,725 INFO Gemini: processed 128/1000
2025-12-15 12:19:23,725 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:19:31,851 INFO Gemini: processed 256/1000
2025-12-15 12:19:31,851 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:19:39,861 INFO Gemini: processed 384/1000
2025-12-15 12:19:39,861 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:19:47,746 INFO Gemini: processed 512/1000
2025-12-15 12:19:47,746 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:19:55,800 INFO Gemini: processed 640/1000
2025-12-15 12:19:55,800 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:20:03,825 INFO Gemini: processed 768/1000
2025-12-15 12:20:03,825 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:20:12,297 INFO Gemini: processed 896/1000
2025-12-15 12:20:12,297 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:20:18,918 INFO Gemini: processed 1000/1000
2025-12-15 12:20:18,918 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 59454.04 examples/s]

2025-12-15 12:20:18,950 INFO Saved chunk 53000-54000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_53000-54000
2025-12-15 12:20:18,950 INFO Saved chunk 53000-54000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_53000-54000



INFO:__main__:Saved chunk 53000-54000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_53000-54000


2025-12-15 12:20:26,984 INFO Gemini: processed 128/1000
2025-12-15 12:20:26,984 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:20:34,869 INFO Gemini: processed 256/1000
2025-12-15 12:20:34,869 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:20:42,909 INFO Gemini: processed 384/1000
2025-12-15 12:20:42,909 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:20:51,203 INFO Gemini: processed 512/1000
2025-12-15 12:20:51,203 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:20:59,374 INFO Gemini: processed 640/1000
2025-12-15 12:20:59,374 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:21:07,560 INFO Gemini: processed 768/1000
2025-12-15 12:21:07,560 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:21:15,554 INFO Gemini: processed 896/1000
2025-12-15 12:21:15,554 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:21:22,121 INFO Gemini: processed 1000/1000
2025-12-15 12:21:22,121 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 70849.73 examples/s]

2025-12-15 12:21:22,150 INFO Saved chunk 54000-55000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_54000-55000
2025-12-15 12:21:22,150 INFO Saved chunk 54000-55000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_54000-55000



INFO:__main__:Saved chunk 54000-55000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_54000-55000


2025-12-15 12:21:30,320 INFO Gemini: processed 128/1000
2025-12-15 12:21:30,320 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:21:38,677 INFO Gemini: processed 256/1000
2025-12-15 12:21:38,677 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:21:46,777 INFO Gemini: processed 384/1000
2025-12-15 12:21:46,777 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:21:54,673 INFO Gemini: processed 512/1000
2025-12-15 12:21:54,673 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:22:02,611 INFO Gemini: processed 640/1000
2025-12-15 12:22:02,611 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:22:10,602 INFO Gemini: processed 768/1000
2025-12-15 12:22:10,602 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:22:18,716 INFO Gemini: processed 896/1000
2025-12-15 12:22:18,716 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:22:25,200 INFO Gemini: processed 1000/1000
2025-12-15 12:22:25,200 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 68516.47 examples/s]

2025-12-15 12:22:25,228 INFO Saved chunk 55000-56000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_55000-56000
2025-12-15 12:22:25,228 INFO Saved chunk 55000-56000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_55000-56000



INFO:__main__:Saved chunk 55000-56000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_55000-56000


2025-12-15 12:22:33,263 INFO Gemini: processed 128/1000
2025-12-15 12:22:33,263 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:22:41,329 INFO Gemini: processed 256/1000
2025-12-15 12:22:41,329 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:22:49,582 INFO Gemini: processed 384/1000
2025-12-15 12:22:49,582 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:22:57,715 INFO Gemini: processed 512/1000
2025-12-15 12:22:57,715 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:23:05,771 INFO Gemini: processed 640/1000
2025-12-15 12:23:05,771 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:23:13,832 INFO Gemini: processed 768/1000
2025-12-15 12:23:13,832 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:23:21,900 INFO Gemini: processed 896/1000
2025-12-15 12:23:21,900 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:23:28,823 INFO Gemini: processed 1000/1000
2025-12-15 12:23:28,823 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 90269.97 examples/s]

2025-12-15 12:23:28,849 INFO Saved chunk 56000-57000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_56000-57000
2025-12-15 12:23:28,849 INFO Saved chunk 56000-57000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_56000-57000



INFO:__main__:Saved chunk 56000-57000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_56000-57000


2025-12-15 12:23:37,038 INFO Gemini: processed 128/1000
2025-12-15 12:23:37,038 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:23:44,997 INFO Gemini: processed 256/1000
2025-12-15 12:23:44,997 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:23:53,000 INFO Gemini: processed 384/1000
2025-12-15 12:23:53,000 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:24:00,990 INFO Gemini: processed 512/1000
2025-12-15 12:24:00,990 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:24:08,969 INFO Gemini: processed 640/1000
2025-12-15 12:24:08,969 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:24:17,042 INFO Gemini: processed 768/1000
2025-12-15 12:24:17,042 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:24:25,107 INFO Gemini: processed 896/1000
2025-12-15 12:24:25,107 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:24:32,009 INFO Gemini: processed 1000/1000
2025-12-15 12:24:32,009 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 53259.65 examples/s]

2025-12-15 12:24:32,042 INFO Saved chunk 57000-58000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_57000-58000
2025-12-15 12:24:32,042 INFO Saved chunk 57000-58000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_57000-58000



INFO:__main__:Saved chunk 57000-58000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_57000-58000


2025-12-15 12:24:39,993 INFO Gemini: processed 128/1000
2025-12-15 12:24:39,993 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:24:47,939 INFO Gemini: processed 256/1000
2025-12-15 12:24:47,939 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:24:55,996 INFO Gemini: processed 384/1000
2025-12-15 12:24:55,996 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:25:04,319 INFO Gemini: processed 512/1000
2025-12-15 12:25:04,319 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:25:12,300 INFO Gemini: processed 640/1000
2025-12-15 12:25:12,300 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:25:20,326 INFO Gemini: processed 768/1000
2025-12-15 12:25:20,326 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:25:28,472 INFO Gemini: processed 896/1000
2025-12-15 12:25:28,472 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:25:35,617 INFO Gemini: processed 1000/1000
2025-12-15 12:25:35,617 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 86366.53 examples/s]

2025-12-15 12:25:35,644 INFO Saved chunk 58000-59000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_58000-59000
2025-12-15 12:25:35,644 INFO Saved chunk 58000-59000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_58000-59000



INFO:__main__:Saved chunk 58000-59000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_58000-59000


2025-12-15 12:25:43,949 INFO Gemini: processed 128/1000
2025-12-15 12:25:43,949 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:25:51,859 INFO Gemini: processed 256/1000
2025-12-15 12:25:51,859 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:25:59,799 INFO Gemini: processed 384/1000
2025-12-15 12:25:59,799 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:26:07,847 INFO Gemini: processed 512/1000
2025-12-15 12:26:07,847 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:26:16,114 INFO Gemini: processed 640/1000
2025-12-15 12:26:16,114 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:26:24,082 INFO Gemini: processed 768/1000
2025-12-15 12:26:24,082 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:26:32,109 INFO Gemini: processed 896/1000
2025-12-15 12:26:32,109 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:26:38,629 INFO Gemini: processed 1000/1000
2025-12-15 12:26:38,629 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 79488.76 examples/s]

2025-12-15 12:26:38,656 INFO Saved chunk 59000-60000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_59000-60000
2025-12-15 12:26:38,656 INFO Saved chunk 59000-60000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_59000-60000



INFO:__main__:Saved chunk 59000-60000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_59000-60000


2025-12-15 12:26:46,651 INFO Gemini: processed 128/1000
2025-12-15 12:26:46,651 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:26:54,580 INFO Gemini: processed 256/1000
2025-12-15 12:26:54,580 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:27:02,542 INFO Gemini: processed 384/1000
2025-12-15 12:27:02,542 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:27:10,761 INFO Gemini: processed 512/1000
2025-12-15 12:27:10,761 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:27:18,828 INFO Gemini: processed 640/1000
2025-12-15 12:27:18,828 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:27:26,928 INFO Gemini: processed 768/1000
2025-12-15 12:27:26,928 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:27:34,953 INFO Gemini: processed 896/1000
2025-12-15 12:27:34,953 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:27:41,566 INFO Gemini: processed 1000/1000
2025-12-15 12:27:41,566 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 57338.40 examples/s]

2025-12-15 12:27:41,600 INFO Saved chunk 60000-61000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_60000-61000
2025-12-15 12:27:41,600 INFO Saved chunk 60000-61000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_60000-61000



INFO:__main__:Saved chunk 60000-61000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_60000-61000


2025-12-15 12:27:49,648 INFO Gemini: processed 128/1000
2025-12-15 12:27:49,648 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:27:57,931 INFO Gemini: processed 256/1000
2025-12-15 12:27:57,931 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:28:06,067 INFO Gemini: processed 384/1000
2025-12-15 12:28:06,067 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:28:14,125 INFO Gemini: processed 512/1000
2025-12-15 12:28:14,125 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:28:23,271 INFO Gemini: processed 640/1000
2025-12-15 12:28:23,271 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:28:31,312 INFO Gemini: processed 768/1000
2025-12-15 12:28:31,312 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:28:39,327 INFO Gemini: processed 896/1000
2025-12-15 12:28:39,327 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:28:45,910 INFO Gemini: processed 1000/1000
2025-12-15 12:28:45,910 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 84783.09 examples/s]

2025-12-15 12:28:45,936 INFO Saved chunk 61000-62000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_61000-62000
2025-12-15 12:28:45,936 INFO Saved chunk 61000-62000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_61000-62000



INFO:__main__:Saved chunk 61000-62000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_61000-62000


2025-12-15 12:28:53,879 INFO Gemini: processed 128/1000
2025-12-15 12:28:53,879 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:29:02,073 INFO Gemini: processed 256/1000
2025-12-15 12:29:02,073 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:29:10,027 INFO Gemini: processed 384/1000
2025-12-15 12:29:10,027 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:29:17,966 INFO Gemini: processed 512/1000
2025-12-15 12:29:17,966 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:29:25,998 INFO Gemini: processed 640/1000
2025-12-15 12:29:25,998 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:29:34,010 INFO Gemini: processed 768/1000
2025-12-15 12:29:34,010 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:29:42,023 INFO Gemini: processed 896/1000
2025-12-15 12:29:42,023 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:29:48,625 INFO Gemini: processed 1000/1000
2025-12-15 12:29:48,625 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 72786.19 examples/s]

2025-12-15 12:29:48,653 INFO Saved chunk 62000-63000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_62000-63000
2025-12-15 12:29:48,653 INFO Saved chunk 62000-63000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_62000-63000



INFO:__main__:Saved chunk 62000-63000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_62000-63000


2025-12-15 12:29:56,661 INFO Gemini: processed 128/1000
2025-12-15 12:29:56,661 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:30:04,730 INFO Gemini: processed 256/1000
2025-12-15 12:30:04,730 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:30:12,921 INFO Gemini: processed 384/1000
2025-12-15 12:30:12,921 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:30:21,066 INFO Gemini: processed 512/1000
2025-12-15 12:30:21,066 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:30:28,896 INFO Gemini: processed 640/1000
2025-12-15 12:30:28,896 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:30:37,167 INFO Gemini: processed 768/1000
2025-12-15 12:30:37,167 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:30:45,468 INFO Gemini: processed 896/1000
2025-12-15 12:30:45,468 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:30:52,094 INFO Gemini: processed 1000/1000
2025-12-15 12:30:52,094 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 68859.55 examples/s]

2025-12-15 12:30:52,132 INFO Saved chunk 63000-64000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_63000-64000
2025-12-15 12:30:52,132 INFO Saved chunk 63000-64000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_63000-64000



INFO:__main__:Saved chunk 63000-64000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_63000-64000


2025-12-15 12:31:00,636 INFO Gemini: processed 128/1000
2025-12-15 12:31:00,636 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:31:08,820 INFO Gemini: processed 256/1000
2025-12-15 12:31:08,820 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:31:17,027 INFO Gemini: processed 384/1000
2025-12-15 12:31:17,027 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:31:25,158 INFO Gemini: processed 512/1000
2025-12-15 12:31:25,158 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:31:33,259 INFO Gemini: processed 640/1000
2025-12-15 12:31:33,259 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:31:41,835 INFO Gemini: processed 768/1000
2025-12-15 12:31:41,835 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:31:50,115 INFO Gemini: processed 896/1000
2025-12-15 12:31:50,115 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:31:56,775 INFO Gemini: processed 1000/1000
2025-12-15 12:31:56,775 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 63749.03 examples/s]

2025-12-15 12:31:56,806 INFO Saved chunk 64000-65000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_64000-65000
2025-12-15 12:31:56,806 INFO Saved chunk 64000-65000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_64000-65000



INFO:__main__:Saved chunk 64000-65000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_64000-65000


2025-12-15 12:32:04,879 INFO Gemini: processed 128/1000
2025-12-15 12:32:04,879 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:32:12,990 INFO Gemini: processed 256/1000
2025-12-15 12:32:12,990 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:32:21,709 INFO Gemini: processed 384/1000
2025-12-15 12:32:21,709 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:32:30,143 INFO Gemini: processed 512/1000
2025-12-15 12:32:30,143 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:32:38,698 INFO Gemini: processed 640/1000
2025-12-15 12:32:38,698 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:32:46,819 INFO Gemini: processed 768/1000
2025-12-15 12:32:46,819 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:32:55,078 INFO Gemini: processed 896/1000
2025-12-15 12:32:55,078 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:33:01,721 INFO Gemini: processed 1000/1000
2025-12-15 12:33:01,721 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 82376.93 examples/s]

2025-12-15 12:33:01,748 INFO Saved chunk 65000-66000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_65000-66000
2025-12-15 12:33:01,748 INFO Saved chunk 65000-66000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_65000-66000



INFO:__main__:Saved chunk 65000-66000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_65000-66000


2025-12-15 12:33:10,003 INFO Gemini: processed 128/1000
2025-12-15 12:33:10,003 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:33:18,283 INFO Gemini: processed 256/1000
2025-12-15 12:33:18,283 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:33:26,251 INFO Gemini: processed 384/1000
2025-12-15 12:33:26,251 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:33:34,534 INFO Gemini: processed 512/1000
2025-12-15 12:33:34,534 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:33:43,234 INFO Gemini: processed 640/1000
2025-12-15 12:33:43,234 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:33:51,420 INFO Gemini: processed 768/1000
2025-12-15 12:33:51,420 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:33:59,729 INFO Gemini: processed 896/1000
2025-12-15 12:33:59,729 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:34:06,708 INFO Gemini: processed 1000/1000
2025-12-15 12:34:06,708 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 77913.25 examples/s]

2025-12-15 12:34:06,737 INFO Saved chunk 66000-67000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_66000-67000
2025-12-15 12:34:06,737 INFO Saved chunk 66000-67000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_66000-67000



INFO:__main__:Saved chunk 66000-67000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_66000-67000


2025-12-15 12:34:15,002 INFO Gemini: processed 128/1000
2025-12-15 12:34:15,002 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:34:23,220 INFO Gemini: processed 256/1000
2025-12-15 12:34:23,220 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:34:31,481 INFO Gemini: processed 384/1000
2025-12-15 12:34:31,481 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:34:39,731 INFO Gemini: processed 512/1000
2025-12-15 12:34:39,731 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:34:48,128 INFO Gemini: processed 640/1000
2025-12-15 12:34:48,128 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:34:56,389 INFO Gemini: processed 768/1000
2025-12-15 12:34:56,389 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:35:04,394 INFO Gemini: processed 896/1000
2025-12-15 12:35:04,394 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:35:11,080 INFO Gemini: processed 1000/1000
2025-12-15 12:35:11,080 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62251.27 examples/s]

2025-12-15 12:35:11,113 INFO Saved chunk 67000-68000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_67000-68000
2025-12-15 12:35:11,113 INFO Saved chunk 67000-68000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_67000-68000



INFO:__main__:Saved chunk 67000-68000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_67000-68000


2025-12-15 12:35:19,250 INFO Gemini: processed 128/1000
2025-12-15 12:35:19,250 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:35:27,360 INFO Gemini: processed 256/1000
2025-12-15 12:35:27,360 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:35:35,632 INFO Gemini: processed 384/1000
2025-12-15 12:35:35,632 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:35:43,885 INFO Gemini: processed 512/1000
2025-12-15 12:35:43,885 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:35:51,867 INFO Gemini: processed 640/1000
2025-12-15 12:35:51,867 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:35:59,922 INFO Gemini: processed 768/1000
2025-12-15 12:35:59,922 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:36:07,869 INFO Gemini: processed 896/1000
2025-12-15 12:36:07,869 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:36:14,460 INFO Gemini: processed 1000/1000
2025-12-15 12:36:14,460 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 66424.43 examples/s]

2025-12-15 12:36:14,498 INFO Saved chunk 68000-69000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_68000-69000
2025-12-15 12:36:14,498 INFO Saved chunk 68000-69000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_68000-69000



INFO:__main__:Saved chunk 68000-69000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_68000-69000


2025-12-15 12:36:22,696 INFO Gemini: processed 128/1000
2025-12-15 12:36:22,696 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:36:30,910 INFO Gemini: processed 256/1000
2025-12-15 12:36:30,910 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:36:39,130 INFO Gemini: processed 384/1000
2025-12-15 12:36:39,130 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:36:47,581 INFO Gemini: processed 512/1000
2025-12-15 12:36:47,581 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:36:56,055 INFO Gemini: processed 640/1000
2025-12-15 12:36:56,055 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:37:04,215 INFO Gemini: processed 768/1000
2025-12-15 12:37:04,215 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:37:12,245 INFO Gemini: processed 896/1000
2025-12-15 12:37:12,245 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:37:19,185 INFO Gemini: processed 1000/1000
2025-12-15 12:37:19,185 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 73325.71 examples/s]

2025-12-15 12:37:19,214 INFO Saved chunk 69000-70000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_69000-70000
2025-12-15 12:37:19,214 INFO Saved chunk 69000-70000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_69000-70000



INFO:__main__:Saved chunk 69000-70000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_69000-70000


2025-12-15 12:37:27,376 INFO Gemini: processed 128/1000
2025-12-15 12:37:27,376 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:37:35,395 INFO Gemini: processed 256/1000
2025-12-15 12:37:35,395 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:37:43,394 INFO Gemini: processed 384/1000
2025-12-15 12:37:43,394 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:37:51,542 INFO Gemini: processed 512/1000
2025-12-15 12:37:51,542 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:37:59,714 INFO Gemini: processed 640/1000
2025-12-15 12:37:59,714 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:38:07,663 INFO Gemini: processed 768/1000
2025-12-15 12:38:07,663 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:38:15,785 INFO Gemini: processed 896/1000
2025-12-15 12:38:15,785 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:38:22,432 INFO Gemini: processed 1000/1000
2025-12-15 12:38:22,432 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 86542.95 examples/s]

2025-12-15 12:38:22,459 INFO Saved chunk 70000-71000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_70000-71000
2025-12-15 12:38:22,459 INFO Saved chunk 70000-71000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_70000-71000



INFO:__main__:Saved chunk 70000-71000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_70000-71000


2025-12-15 12:38:30,698 INFO Gemini: processed 128/1000
2025-12-15 12:38:30,698 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:38:38,779 INFO Gemini: processed 256/1000
2025-12-15 12:38:38,779 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:38:47,012 INFO Gemini: processed 384/1000
2025-12-15 12:38:47,012 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:38:55,027 INFO Gemini: processed 512/1000
2025-12-15 12:38:55,027 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:39:03,254 INFO Gemini: processed 640/1000
2025-12-15 12:39:03,254 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:39:11,419 INFO Gemini: processed 768/1000
2025-12-15 12:39:11,419 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:39:19,519 INFO Gemini: processed 896/1000
2025-12-15 12:39:19,519 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:39:26,222 INFO Gemini: processed 1000/1000
2025-12-15 12:39:26,222 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 68807.59 examples/s]

2025-12-15 12:39:26,252 INFO Saved chunk 71000-72000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_71000-72000
2025-12-15 12:39:26,252 INFO Saved chunk 71000-72000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_71000-72000



INFO:__main__:Saved chunk 71000-72000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_71000-72000


2025-12-15 12:39:34,431 INFO Gemini: processed 128/1000
2025-12-15 12:39:34,431 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:39:42,356 INFO Gemini: processed 256/1000
2025-12-15 12:39:42,356 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:39:50,656 INFO Gemini: processed 384/1000
2025-12-15 12:39:50,656 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:39:58,621 INFO Gemini: processed 512/1000
2025-12-15 12:39:58,621 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:40:06,870 INFO Gemini: processed 640/1000
2025-12-15 12:40:06,870 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:40:14,940 INFO Gemini: processed 768/1000
2025-12-15 12:40:14,940 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:40:22,896 INFO Gemini: processed 896/1000
2025-12-15 12:40:22,896 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:40:35,921 INFO Gemini: processed 1000/1000
2025-12-15 12:40:35,921 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 81096.37 examples/s]

2025-12-15 12:40:35,952 INFO Saved chunk 72000-73000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_72000-73000
2025-12-15 12:40:35,952 INFO Saved chunk 72000-73000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_72000-73000



INFO:__main__:Saved chunk 72000-73000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_72000-73000


2025-12-15 12:40:43,937 INFO Gemini: processed 128/1000
2025-12-15 12:40:43,937 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:40:51,936 INFO Gemini: processed 256/1000
2025-12-15 12:40:51,936 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:40:59,926 INFO Gemini: processed 384/1000
2025-12-15 12:40:59,926 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:41:08,206 INFO Gemini: processed 512/1000
2025-12-15 12:41:08,206 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:41:16,715 INFO Gemini: processed 640/1000
2025-12-15 12:41:16,715 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:41:24,865 INFO Gemini: processed 768/1000
2025-12-15 12:41:24,865 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:41:32,963 INFO Gemini: processed 896/1000
2025-12-15 12:41:32,963 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:41:39,585 INFO Gemini: processed 1000/1000
2025-12-15 12:41:39,585 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 76965.36 examples/s]

2025-12-15 12:41:39,616 INFO Saved chunk 73000-74000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_73000-74000
2025-12-15 12:41:39,616 INFO Saved chunk 73000-74000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_73000-74000



INFO:__main__:Saved chunk 73000-74000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_73000-74000


2025-12-15 12:41:47,814 INFO Gemini: processed 128/1000
2025-12-15 12:41:47,814 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:41:55,895 INFO Gemini: processed 256/1000
2025-12-15 12:41:55,895 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:42:03,993 INFO Gemini: processed 384/1000
2025-12-15 12:42:03,993 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:42:12,042 INFO Gemini: processed 512/1000
2025-12-15 12:42:12,042 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:42:20,186 INFO Gemini: processed 640/1000
2025-12-15 12:42:20,186 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:42:28,373 INFO Gemini: processed 768/1000
2025-12-15 12:42:28,373 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:42:36,444 INFO Gemini: processed 896/1000
2025-12-15 12:42:36,444 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:42:43,334 INFO Gemini: processed 1000/1000
2025-12-15 12:42:43,334 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 94093.32 examples/s]

2025-12-15 12:42:43,360 INFO Saved chunk 74000-75000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_74000-75000
2025-12-15 12:42:43,360 INFO Saved chunk 74000-75000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_74000-75000



INFO:__main__:Saved chunk 74000-75000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_74000-75000


2025-12-15 12:42:51,702 INFO Gemini: processed 128/1000
2025-12-15 12:42:51,702 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:42:59,652 INFO Gemini: processed 256/1000
2025-12-15 12:42:59,652 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:43:07,906 INFO Gemini: processed 384/1000
2025-12-15 12:43:07,906 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:43:15,998 INFO Gemini: processed 512/1000
2025-12-15 12:43:15,998 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:43:24,348 INFO Gemini: processed 640/1000
2025-12-15 12:43:24,348 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:43:32,339 INFO Gemini: processed 768/1000
2025-12-15 12:43:32,339 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:43:40,196 INFO Gemini: processed 896/1000
2025-12-15 12:43:40,196 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:43:46,694 INFO Gemini: processed 1000/1000
2025-12-15 12:43:46,694 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 76758.30 examples/s]

2025-12-15 12:43:46,724 INFO Saved chunk 75000-76000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_75000-76000
2025-12-15 12:43:46,724 INFO Saved chunk 75000-76000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_75000-76000



INFO:__main__:Saved chunk 75000-76000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_75000-76000


2025-12-15 12:43:54,740 INFO Gemini: processed 128/1000
2025-12-15 12:43:54,740 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:44:02,856 INFO Gemini: processed 256/1000
2025-12-15 12:44:02,856 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:44:10,866 INFO Gemini: processed 384/1000
2025-12-15 12:44:10,866 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:44:18,958 INFO Gemini: processed 512/1000
2025-12-15 12:44:18,958 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:44:26,927 INFO Gemini: processed 640/1000
2025-12-15 12:44:26,927 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:44:34,923 INFO Gemini: processed 768/1000
2025-12-15 12:44:34,923 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:44:42,983 INFO Gemini: processed 896/1000
2025-12-15 12:44:42,983 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:44:49,672 INFO Gemini: processed 1000/1000
2025-12-15 12:44:49,672 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 45384.06 examples/s]

2025-12-15 12:44:49,715 INFO Saved chunk 76000-77000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_76000-77000
2025-12-15 12:44:49,715 INFO Saved chunk 76000-77000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_76000-77000



INFO:__main__:Saved chunk 76000-77000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_76000-77000


2025-12-15 12:44:57,765 INFO Gemini: processed 128/1000
2025-12-15 12:44:57,765 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:45:05,890 INFO Gemini: processed 256/1000
2025-12-15 12:45:05,890 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:45:13,868 INFO Gemini: processed 384/1000
2025-12-15 12:45:13,868 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:45:22,008 INFO Gemini: processed 512/1000
2025-12-15 12:45:22,008 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:45:30,132 INFO Gemini: processed 640/1000
2025-12-15 12:45:30,132 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:45:38,307 INFO Gemini: processed 768/1000
2025-12-15 12:45:38,307 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:45:46,331 INFO Gemini: processed 896/1000
2025-12-15 12:45:46,331 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:45:53,379 INFO Gemini: processed 1000/1000
2025-12-15 12:45:53,379 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 68934.24 examples/s]

2025-12-15 12:45:53,411 INFO Saved chunk 77000-78000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_77000-78000
2025-12-15 12:45:53,411 INFO Saved chunk 77000-78000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_77000-78000



INFO:__main__:Saved chunk 77000-78000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_77000-78000


2025-12-15 12:46:01,614 INFO Gemini: processed 128/1000
2025-12-15 12:46:01,614 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:46:09,853 INFO Gemini: processed 256/1000
2025-12-15 12:46:09,853 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:46:17,887 INFO Gemini: processed 384/1000
2025-12-15 12:46:17,887 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:46:25,956 INFO Gemini: processed 512/1000
2025-12-15 12:46:25,956 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:46:34,047 INFO Gemini: processed 640/1000
2025-12-15 12:46:34,047 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:46:42,220 INFO Gemini: processed 768/1000
2025-12-15 12:46:42,220 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:46:50,243 INFO Gemini: processed 896/1000
2025-12-15 12:46:50,243 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:46:58,491 INFO Gemini: processed 1000/1000
2025-12-15 12:46:58,491 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 88141.55 examples/s]

2025-12-15 12:46:58,519 INFO Saved chunk 78000-79000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_78000-79000
2025-12-15 12:46:58,519 INFO Saved chunk 78000-79000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_78000-79000



INFO:__main__:Saved chunk 78000-79000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_78000-79000


2025-12-15 12:47:06,590 INFO Gemini: processed 128/1000
2025-12-15 12:47:06,590 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:47:14,891 INFO Gemini: processed 256/1000
2025-12-15 12:47:14,891 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:47:23,245 INFO Gemini: processed 384/1000
2025-12-15 12:47:23,245 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:47:31,251 INFO Gemini: processed 512/1000
2025-12-15 12:47:31,251 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:47:39,239 INFO Gemini: processed 640/1000
2025-12-15 12:47:39,239 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:47:47,279 INFO Gemini: processed 768/1000
2025-12-15 12:47:47,279 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:47:55,400 INFO Gemini: processed 896/1000
2025-12-15 12:47:55,400 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:48:02,051 INFO Gemini: processed 1000/1000
2025-12-15 12:48:02,051 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 76043.00 examples/s]

2025-12-15 12:48:02,080 INFO Saved chunk 79000-80000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_79000-80000
2025-12-15 12:48:02,080 INFO Saved chunk 79000-80000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_79000-80000



INFO:__main__:Saved chunk 79000-80000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_79000-80000


2025-12-15 12:48:10,159 INFO Gemini: processed 128/1000
2025-12-15 12:48:10,159 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:48:18,215 INFO Gemini: processed 256/1000
2025-12-15 12:48:18,215 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:48:26,165 INFO Gemini: processed 384/1000
2025-12-15 12:48:26,165 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:48:34,268 INFO Gemini: processed 512/1000
2025-12-15 12:48:34,268 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:48:42,460 INFO Gemini: processed 640/1000
2025-12-15 12:48:42,460 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:48:50,496 INFO Gemini: processed 768/1000
2025-12-15 12:48:50,496 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:48:58,463 INFO Gemini: processed 896/1000
2025-12-15 12:48:58,463 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:49:05,068 INFO Gemini: processed 1000/1000
2025-12-15 12:49:05,068 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 79663.89 examples/s]

2025-12-15 12:49:05,099 INFO Saved chunk 80000-81000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_80000-81000
2025-12-15 12:49:05,099 INFO Saved chunk 80000-81000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_80000-81000



INFO:__main__:Saved chunk 80000-81000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_80000-81000


2025-12-15 12:49:13,006 INFO Gemini: processed 128/1000
2025-12-15 12:49:13,006 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:49:20,931 INFO Gemini: processed 256/1000
2025-12-15 12:49:20,931 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:49:29,153 INFO Gemini: processed 384/1000
2025-12-15 12:49:29,153 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:49:37,371 INFO Gemini: processed 512/1000
2025-12-15 12:49:37,371 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:49:45,289 INFO Gemini: processed 640/1000
2025-12-15 12:49:45,289 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:49:53,251 INFO Gemini: processed 768/1000
2025-12-15 12:49:53,251 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:50:01,107 INFO Gemini: processed 896/1000
2025-12-15 12:50:01,107 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:50:07,847 INFO Gemini: processed 1000/1000
2025-12-15 12:50:07,847 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 66361.37 examples/s]

2025-12-15 12:50:07,883 INFO Saved chunk 81000-82000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_81000-82000
2025-12-15 12:50:07,883 INFO Saved chunk 81000-82000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_81000-82000



INFO:__main__:Saved chunk 81000-82000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_81000-82000


2025-12-15 12:50:15,778 INFO Gemini: processed 128/1000
2025-12-15 12:50:15,778 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:50:23,748 INFO Gemini: processed 256/1000
2025-12-15 12:50:23,748 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:50:31,764 INFO Gemini: processed 384/1000
2025-12-15 12:50:31,764 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:50:39,740 INFO Gemini: processed 512/1000
2025-12-15 12:50:39,740 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:50:47,873 INFO Gemini: processed 640/1000
2025-12-15 12:50:47,873 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:50:55,963 INFO Gemini: processed 768/1000
2025-12-15 12:50:55,963 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:51:04,060 INFO Gemini: processed 896/1000
2025-12-15 12:51:04,060 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:51:10,832 INFO Gemini: processed 1000/1000
2025-12-15 12:51:10,832 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 78471.54 examples/s]

2025-12-15 12:51:10,864 INFO Saved chunk 82000-83000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_82000-83000
2025-12-15 12:51:10,864 INFO Saved chunk 82000-83000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_82000-83000



INFO:__main__:Saved chunk 82000-83000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_82000-83000


2025-12-15 12:51:19,299 INFO Gemini: processed 128/1000
2025-12-15 12:51:19,299 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:51:27,233 INFO Gemini: processed 256/1000
2025-12-15 12:51:27,233 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:51:35,141 INFO Gemini: processed 384/1000
2025-12-15 12:51:35,141 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:51:43,092 INFO Gemini: processed 512/1000
2025-12-15 12:51:43,092 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:51:51,363 INFO Gemini: processed 640/1000
2025-12-15 12:51:51,363 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:51:59,322 INFO Gemini: processed 768/1000
2025-12-15 12:51:59,322 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:52:07,527 INFO Gemini: processed 896/1000
2025-12-15 12:52:07,527 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:52:14,232 INFO Gemini: processed 1000/1000
2025-12-15 12:52:14,232 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 99935.76 examples/s] 

2025-12-15 12:52:14,258 INFO Saved chunk 83000-84000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_83000-84000
2025-12-15 12:52:14,258 INFO Saved chunk 83000-84000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_83000-84000



INFO:__main__:Saved chunk 83000-84000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_83000-84000


2025-12-15 12:52:22,217 INFO Gemini: processed 128/1000
2025-12-15 12:52:22,217 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:52:30,330 INFO Gemini: processed 256/1000
2025-12-15 12:52:30,330 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:52:38,492 INFO Gemini: processed 384/1000
2025-12-15 12:52:38,492 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:52:46,701 INFO Gemini: processed 512/1000
2025-12-15 12:52:46,701 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:52:54,912 INFO Gemini: processed 640/1000
2025-12-15 12:52:54,912 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:53:03,070 INFO Gemini: processed 768/1000
2025-12-15 12:53:03,070 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:53:11,255 INFO Gemini: processed 896/1000
2025-12-15 12:53:11,255 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:53:17,829 INFO Gemini: processed 1000/1000
2025-12-15 12:53:17,829 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 67706.85 examples/s]

2025-12-15 12:53:17,868 INFO Saved chunk 84000-85000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_84000-85000
2025-12-15 12:53:17,868 INFO Saved chunk 84000-85000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_84000-85000



INFO:__main__:Saved chunk 84000-85000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_84000-85000


2025-12-15 12:53:30,992 INFO Gemini: processed 128/1000
2025-12-15 12:53:30,992 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:53:39,313 INFO Gemini: processed 256/1000
2025-12-15 12:53:39,313 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:53:47,437 INFO Gemini: processed 384/1000
2025-12-15 12:53:47,437 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:53:55,409 INFO Gemini: processed 512/1000
2025-12-15 12:53:55,409 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:54:03,533 INFO Gemini: processed 640/1000
2025-12-15 12:54:03,533 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:54:11,790 INFO Gemini: processed 768/1000
2025-12-15 12:54:11,790 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:54:19,888 INFO Gemini: processed 896/1000
2025-12-15 12:54:19,888 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:54:26,359 INFO Gemini: processed 1000/1000
2025-12-15 12:54:26,359 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 63549.10 examples/s]

2025-12-15 12:54:26,399 INFO Saved chunk 85000-86000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_85000-86000
2025-12-15 12:54:26,399 INFO Saved chunk 85000-86000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_85000-86000



INFO:__main__:Saved chunk 85000-86000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_85000-86000


2025-12-15 12:54:34,390 INFO Gemini: processed 128/1000
2025-12-15 12:54:34,390 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:54:42,564 INFO Gemini: processed 256/1000
2025-12-15 12:54:42,564 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:54:50,879 INFO Gemini: processed 384/1000
2025-12-15 12:54:50,879 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:54:58,892 INFO Gemini: processed 512/1000
2025-12-15 12:54:58,892 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:55:07,107 INFO Gemini: processed 640/1000
2025-12-15 12:55:07,107 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:55:15,279 INFO Gemini: processed 768/1000
2025-12-15 12:55:15,279 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:55:23,452 INFO Gemini: processed 896/1000
2025-12-15 12:55:23,452 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:55:30,107 INFO Gemini: processed 1000/1000
2025-12-15 12:55:30,107 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 95742.88 examples/s] 

2025-12-15 12:55:30,144 INFO Saved chunk 86000-87000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_86000-87000
2025-12-15 12:55:30,144 INFO Saved chunk 86000-87000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_86000-87000



INFO:__main__:Saved chunk 86000-87000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_86000-87000


2025-12-15 12:55:38,608 INFO Gemini: processed 128/1000
2025-12-15 12:55:38,608 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:55:47,183 INFO Gemini: processed 256/1000
2025-12-15 12:55:47,183 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:55:55,412 INFO Gemini: processed 384/1000
2025-12-15 12:55:55,412 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:56:03,529 INFO Gemini: processed 512/1000
2025-12-15 12:56:03,529 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:56:11,546 INFO Gemini: processed 640/1000
2025-12-15 12:56:11,546 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:56:19,756 INFO Gemini: processed 768/1000
2025-12-15 12:56:19,756 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:56:27,638 INFO Gemini: processed 896/1000
2025-12-15 12:56:27,638 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:56:34,071 INFO Gemini: processed 1000/1000
2025-12-15 12:56:34,071 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 76521.64 examples/s]

2025-12-15 12:56:34,119 INFO Saved chunk 87000-88000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_87000-88000
2025-12-15 12:56:34,119 INFO Saved chunk 87000-88000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_87000-88000



INFO:__main__:Saved chunk 87000-88000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_87000-88000


2025-12-15 12:56:42,107 INFO Gemini: processed 128/1000
2025-12-15 12:56:42,107 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:56:50,131 INFO Gemini: processed 256/1000
2025-12-15 12:56:50,131 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:56:58,448 INFO Gemini: processed 384/1000
2025-12-15 12:56:58,448 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:57:06,451 INFO Gemini: processed 512/1000
2025-12-15 12:57:06,451 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:57:14,480 INFO Gemini: processed 640/1000
2025-12-15 12:57:14,480 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:57:22,527 INFO Gemini: processed 768/1000
2025-12-15 12:57:22,527 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:57:30,685 INFO Gemini: processed 896/1000
2025-12-15 12:57:30,685 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:57:37,502 INFO Gemini: processed 1000/1000
2025-12-15 12:57:37,502 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 73865.49 examples/s]

2025-12-15 12:57:37,535 INFO Saved chunk 88000-89000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_88000-89000
2025-12-15 12:57:37,535 INFO Saved chunk 88000-89000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_88000-89000



INFO:__main__:Saved chunk 88000-89000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_88000-89000


2025-12-15 12:57:45,525 INFO Gemini: processed 128/1000
2025-12-15 12:57:45,525 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:57:53,561 INFO Gemini: processed 256/1000
2025-12-15 12:57:53,561 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:58:01,596 INFO Gemini: processed 384/1000
2025-12-15 12:58:01,596 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:58:09,732 INFO Gemini: processed 512/1000
2025-12-15 12:58:09,732 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:58:17,988 INFO Gemini: processed 640/1000
2025-12-15 12:58:17,988 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:58:26,008 INFO Gemini: processed 768/1000
2025-12-15 12:58:26,008 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:58:34,119 INFO Gemini: processed 896/1000
2025-12-15 12:58:34,119 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:58:40,625 INFO Gemini: processed 1000/1000
2025-12-15 12:58:40,625 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 45223.07 examples/s]

2025-12-15 12:58:40,670 INFO Saved chunk 89000-90000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_89000-90000
2025-12-15 12:58:40,670 INFO Saved chunk 89000-90000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_89000-90000



INFO:__main__:Saved chunk 89000-90000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_89000-90000


2025-12-15 12:58:48,722 INFO Gemini: processed 128/1000
2025-12-15 12:58:48,722 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:58:57,005 INFO Gemini: processed 256/1000
2025-12-15 12:58:57,005 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 12:59:05,069 INFO Gemini: processed 384/1000
2025-12-15 12:59:05,069 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 12:59:13,099 INFO Gemini: processed 512/1000
2025-12-15 12:59:13,099 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 12:59:21,055 INFO Gemini: processed 640/1000
2025-12-15 12:59:21,055 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 12:59:29,101 INFO Gemini: processed 768/1000
2025-12-15 12:59:29,101 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 12:59:37,463 INFO Gemini: processed 896/1000
2025-12-15 12:59:37,463 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 12:59:43,910 INFO Gemini: processed 1000/1000
2025-12-15 12:59:43,910 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 69963.37 examples/s]

2025-12-15 12:59:43,941 INFO Saved chunk 90000-91000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_90000-91000
2025-12-15 12:59:43,941 INFO Saved chunk 90000-91000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_90000-91000



INFO:__main__:Saved chunk 90000-91000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_90000-91000


2025-12-15 12:59:51,866 INFO Gemini: processed 128/1000
2025-12-15 12:59:51,866 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 12:59:59,972 INFO Gemini: processed 256/1000
2025-12-15 12:59:59,972 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:00:08,151 INFO Gemini: processed 384/1000
2025-12-15 13:00:08,151 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:00:16,700 INFO Gemini: processed 512/1000
2025-12-15 13:00:16,700 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:00:24,662 INFO Gemini: processed 640/1000
2025-12-15 13:00:24,662 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:00:32,698 INFO Gemini: processed 768/1000
2025-12-15 13:00:32,698 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:00:40,855 INFO Gemini: processed 896/1000
2025-12-15 13:00:40,855 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:00:47,415 INFO Gemini: processed 1000/1000
2025-12-15 13:00:47,415 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64980.62 examples/s]

2025-12-15 13:00:47,456 INFO Saved chunk 91000-92000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_91000-92000
2025-12-15 13:00:47,456 INFO Saved chunk 91000-92000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_91000-92000



INFO:__main__:Saved chunk 91000-92000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_91000-92000


2025-12-15 13:00:55,757 INFO Gemini: processed 128/1000
2025-12-15 13:00:55,757 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 13:01:03,880 INFO Gemini: processed 256/1000
2025-12-15 13:01:03,880 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:01:11,859 INFO Gemini: processed 384/1000
2025-12-15 13:01:11,859 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:01:19,889 INFO Gemini: processed 512/1000
2025-12-15 13:01:19,889 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:01:28,894 INFO Gemini: processed 640/1000
2025-12-15 13:01:28,894 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:01:37,222 INFO Gemini: processed 768/1000
2025-12-15 13:01:37,222 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:01:45,517 INFO Gemini: processed 896/1000
2025-12-15 13:01:45,517 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:01:52,138 INFO Gemini: processed 1000/1000
2025-12-15 13:01:52,138 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 36946.41 examples/s]

2025-12-15 13:01:52,188 INFO Saved chunk 92000-93000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_92000-93000
2025-12-15 13:01:52,188 INFO Saved chunk 92000-93000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_92000-93000



INFO:__main__:Saved chunk 92000-93000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_92000-93000


2025-12-15 13:02:00,321 INFO Gemini: processed 128/1000
2025-12-15 13:02:00,321 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 13:02:08,424 INFO Gemini: processed 256/1000
2025-12-15 13:02:08,424 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:02:16,692 INFO Gemini: processed 384/1000
2025-12-15 13:02:16,692 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:02:25,239 INFO Gemini: processed 512/1000
2025-12-15 13:02:25,239 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:02:33,327 INFO Gemini: processed 640/1000
2025-12-15 13:02:33,327 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:02:41,444 INFO Gemini: processed 768/1000
2025-12-15 13:02:41,444 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:02:49,586 INFO Gemini: processed 896/1000
2025-12-15 13:02:49,586 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:02:56,371 INFO Gemini: processed 1000/1000
2025-12-15 13:02:56,371 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 65077.41 examples/s]

2025-12-15 13:02:56,408 INFO Saved chunk 93000-94000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_93000-94000
2025-12-15 13:02:56,408 INFO Saved chunk 93000-94000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_93000-94000



INFO:__main__:Saved chunk 93000-94000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_93000-94000


2025-12-15 13:03:04,580 INFO Gemini: processed 128/1000
2025-12-15 13:03:04,580 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 13:03:12,720 INFO Gemini: processed 256/1000
2025-12-15 13:03:12,720 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:03:20,949 INFO Gemini: processed 384/1000
2025-12-15 13:03:20,949 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:03:29,124 INFO Gemini: processed 512/1000
2025-12-15 13:03:29,124 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:03:37,384 INFO Gemini: processed 640/1000
2025-12-15 13:03:37,384 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:03:45,483 INFO Gemini: processed 768/1000
2025-12-15 13:03:45,483 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:03:53,881 INFO Gemini: processed 896/1000
2025-12-15 13:03:53,881 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:04:00,722 INFO Gemini: processed 1000/1000
2025-12-15 13:04:00,722 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 80877.44 examples/s]

2025-12-15 13:04:00,755 INFO Saved chunk 94000-95000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_94000-95000
2025-12-15 13:04:00,755 INFO Saved chunk 94000-95000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_94000-95000



INFO:__main__:Saved chunk 94000-95000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_94000-95000


2025-12-15 13:04:08,913 INFO Gemini: processed 128/1000
2025-12-15 13:04:08,913 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 13:04:17,381 INFO Gemini: processed 256/1000
2025-12-15 13:04:17,381 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:04:25,449 INFO Gemini: processed 384/1000
2025-12-15 13:04:25,449 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:04:33,737 INFO Gemini: processed 512/1000
2025-12-15 13:04:33,737 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:04:41,966 INFO Gemini: processed 640/1000
2025-12-15 13:04:41,966 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:04:50,264 INFO Gemini: processed 768/1000
2025-12-15 13:04:50,264 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:04:58,399 INFO Gemini: processed 896/1000
2025-12-15 13:04:58,399 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:05:05,334 INFO Gemini: processed 1000/1000
2025-12-15 13:05:05,334 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 73410.41 examples/s]

2025-12-15 13:05:05,366 INFO Saved chunk 95000-96000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_95000-96000
2025-12-15 13:05:05,366 INFO Saved chunk 95000-96000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_95000-96000



INFO:__main__:Saved chunk 95000-96000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_95000-96000


2025-12-15 13:05:13,545 INFO Gemini: processed 128/1000
2025-12-15 13:05:13,545 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 13:05:21,794 INFO Gemini: processed 256/1000
2025-12-15 13:05:21,794 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:05:29,928 INFO Gemini: processed 384/1000
2025-12-15 13:05:29,928 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:05:37,935 INFO Gemini: processed 512/1000
2025-12-15 13:05:37,935 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:05:46,149 INFO Gemini: processed 640/1000
2025-12-15 13:05:46,149 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:05:54,253 INFO Gemini: processed 768/1000
2025-12-15 13:05:54,253 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:06:02,381 INFO Gemini: processed 896/1000
2025-12-15 13:06:02,381 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:06:09,031 INFO Gemini: processed 1000/1000
2025-12-15 13:06:09,031 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 65458.27 examples/s]

2025-12-15 13:06:09,065 INFO Saved chunk 96000-97000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_96000-97000
2025-12-15 13:06:09,065 INFO Saved chunk 96000-97000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_96000-97000



INFO:__main__:Saved chunk 96000-97000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_96000-97000


2025-12-15 13:06:17,419 INFO Gemini: processed 128/1000
2025-12-15 13:06:17,419 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 13:06:25,527 INFO Gemini: processed 256/1000
2025-12-15 13:06:25,527 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:06:33,814 INFO Gemini: processed 384/1000
2025-12-15 13:06:33,814 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:06:42,344 INFO Gemini: processed 512/1000
2025-12-15 13:06:42,344 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:06:50,374 INFO Gemini: processed 640/1000
2025-12-15 13:06:50,374 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:06:58,749 INFO Gemini: processed 768/1000
2025-12-15 13:06:58,749 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:07:07,146 INFO Gemini: processed 896/1000
2025-12-15 13:07:07,146 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:07:13,851 INFO Gemini: processed 1000/1000
2025-12-15 13:07:13,851 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64845.00 examples/s]

2025-12-15 13:07:13,886 INFO Saved chunk 97000-98000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_97000-98000
2025-12-15 13:07:13,886 INFO Saved chunk 97000-98000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_97000-98000



INFO:__main__:Saved chunk 97000-98000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_97000-98000


2025-12-15 13:07:22,031 INFO Gemini: processed 128/1000
2025-12-15 13:07:22,031 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 13:07:30,468 INFO Gemini: processed 256/1000
2025-12-15 13:07:30,468 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:07:38,800 INFO Gemini: processed 384/1000
2025-12-15 13:07:38,800 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:07:47,302 INFO Gemini: processed 512/1000
2025-12-15 13:07:47,302 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:07:55,583 INFO Gemini: processed 640/1000
2025-12-15 13:07:55,583 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:08:03,830 INFO Gemini: processed 768/1000
2025-12-15 13:08:03,830 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:08:12,148 INFO Gemini: processed 896/1000
2025-12-15 13:08:12,148 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:08:18,918 INFO Gemini: processed 1000/1000
2025-12-15 13:08:18,918 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 94947.46 examples/s]

2025-12-15 13:08:18,944 INFO Saved chunk 98000-99000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_98000-99000
2025-12-15 13:08:18,944 INFO Saved chunk 98000-99000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_98000-99000



INFO:__main__:Saved chunk 98000-99000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_98000-99000


2025-12-15 13:08:27,408 INFO Gemini: processed 128/1000
2025-12-15 13:08:27,408 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-12-15 13:08:35,510 INFO Gemini: processed 256/1000
2025-12-15 13:08:35,510 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-12-15 13:08:43,662 INFO Gemini: processed 384/1000
2025-12-15 13:08:43,662 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-12-15 13:08:51,835 INFO Gemini: processed 512/1000
2025-12-15 13:08:51,835 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-12-15 13:09:00,819 INFO Gemini: processed 640/1000
2025-12-15 13:09:00,819 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-12-15 13:09:08,864 INFO Gemini: processed 768/1000
2025-12-15 13:09:08,864 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-12-15 13:09:16,937 INFO Gemini: processed 896/1000
2025-12-15 13:09:16,937 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-12-15 13:09:23,550 INFO Gemini: processed 1000/1000
2025-12-15 13:09:23,550 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 54059.37 examples/s]

2025-12-15 13:09:23,587 INFO Saved chunk 99000-100000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_99000-100000
2025-12-15 13:09:23,587 INFO Saved chunk 99000-100000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_99000-100000



INFO:__main__:Saved chunk 99000-100000 to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_99000-100000


===finished===


In [34]:
print('input_tokens:', input_tokens)
print('output_tokens:', output_tokens)

input_tokens: 5203369
output_tokens: 2312982


In [35]:
print(input_tokens / 10**6 *0.1)
print(output_tokens / 10**6 * 0.4)


0.5203369000000001
0.9251928


## Unite chunks

In [36]:
datasets = []

dataset_path = os.path.dirname(predictions_path)
dataset_len = len(load_dataset_with_triples(predictions_path))
for i in range(0, dataset_len, 1000):
    last = dataset_len if i + 1000 > dataset_len else i + 1000
    suffix = get_chunk_suffix(i, last)
    path = os.path.join(dataset_path, suffix)
    datasets.append(load_from_disk(path))

united_dataset = concatenate_datasets(datasets)
print(len(united_dataset))
assert len(united_dataset) == dataset_len

100000


In [37]:
suffix = get_chunk_suffix(i, last)
suffix = '_'.join(suffix.split('_')[:-1]) + '_all'
output_path = os.path.join(dataset_path, suffix)
united_dataset.save_to_disk(output_path)
print(f'saved to {output_path}')

Saving the dataset (2/2 shards): 100%|██████████| 100000/100000 [00:02<00:00, 47762.72 examples/s]

saved to /scratch/gpfs/JHA/mb5157/large_data/graphrag_results/qwen14b_fixed_rel_kg/validated_gemini-2.0-flash_all





In [38]:
import shutil

dataset_path = os.path.dirname(predictions_path)
for i in range(0, dataset_len, 1000):
    last = dataset_len if i + 1000 > dataset_len else i + 1000
    suffix = get_chunk_suffix(i, last)
    path = os.path.join(dataset_path, suffix)
    shutil.rmtree(path)