In [1]:
import sys

sys.path.append('/scratch/gpfs/mb5157/repos/graphmert/transformers/src')

In [2]:
import os
import sys
import json
import ast
import re
import nest_asyncio, asyncio
import logging
from pathlib import Path
import pyarrow as pa

from datasets import Dataset, Features, Sequence, Value, load_from_disk, concatenate_datasets
from transformers import AutoTokenizer
from google import genai
from google.genai import types

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from importlib import reload

import prompt_library.entity_discovery_prompts as prompts_module
reload(prompts_module)

from prompt_library.entity_discovery_prompts import (
    SYSTEM_CONTEXT,
    example_user_1, example_assistant_1, example_explanation_1,
    example_user_2, example_assistant_2, example_explanation_2,
    example_user_3, example_assistant_3, example_explanation_3,
    example_user_4, example_assistant_4, example_explanation_4,
    example_user_5, example_assistant_5, example_explanation_5,
    example_user_6, example_assistant_6, example_explanation_6,
    example_user_7, example_assistant_7, example_explanation_7,
    example_user_8, example_assistant_8, example_explanation_8,

    example_user_negative_1, example_assistant_negative_1, example_explanation_negative_1,
    example_user_negative_2, example_assistant_negative_2, example_explanation_negative_2,
    example_user_negative_3, example_assistant_negative_3, example_explanation_negative_3,
)

In [4]:
GEMINI_API_KEY  = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise RuntimeError("GEMINI_API_KEY env var not set – please `export GEMINI_API_KEY=…` before launching.")
    
MODEL = "gemini-2.0-flash"
model_name = "gemini-2.0"

tokenizer = AutoTokenizer.from_pretrained(
    "/scratch/gpfs/mb5157/tokenizers/biomedbert_fast_tokenizer"
)


SAVE_CHUNK      = 1000               # save result each save every BATCH_SIZE examples
PRINTOUT        = True
TAKE_SUBSET     = False
SUBSET_SIZE      = 100
BATCH_SIZE      = 128               # how many concurrent Gemini calls
TIME_SLEEP = 0.05

TTL = '259200s' # for cache
PRINT_RAW_RESPONSE = False

In [5]:
logging.basicConfig(level=logging.INFO, force=True)

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
fmt = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
handler.setFormatter(fmt)
logger.addHandler(handler)

# Quiet down other noisy loggers
logging.getLogger("google_genai.models").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)

In [6]:
few_shot_examples = [
    {"user_query": example_user_1, "model_answer": example_assistant_1, "model_explanation": example_explanation_1},
    {"user_query": example_user_2, "model_answer": example_assistant_2, "model_explanation": example_explanation_2},
    {"user_query": example_user_3, "model_answer": example_assistant_3, "model_explanation": example_explanation_3},
    {"user_query": example_user_4, "model_answer": example_assistant_4,  "model_explanation": example_explanation_4},
    {"user_query": example_user_5, "model_answer": example_assistant_5,  "model_explanation": example_explanation_5},
    {"user_query": example_user_6, "model_answer": example_assistant_6,  "model_explanation": example_explanation_6},
    {"user_query": example_user_7, "model_answer": example_assistant_7,  "model_explanation": example_explanation_7},
    {"user_query": example_user_8, "model_answer": example_assistant_8,  "model_explanation": example_explanation_8},
    
    # negative examples
    {"user_query": example_user_negative_1, "model_answer": example_assistant_negative_1, "model_explanation_negative": example_explanation_negative_1},
    {"user_query": example_user_negative_2, "model_answer": example_assistant_negative_2, "model_explanation_negative": example_explanation_negative_2},
    {"user_query": example_user_negative_3, "model_answer": example_assistant_negative_3, "model_explanation_negative": example_explanation_negative_3},
]

STRUCTURED_CONTENT_FEW_SHOTS = [
    {"role": "user", "parts": [{"text": "I will provide you with examples"}]},
    {"role": "model", "parts": [{"text": "Understood — send the sample and I’ll output entities"}]},
]

for ex in few_shot_examples:
    STRUCTURED_CONTENT_FEW_SHOTS.append({
        "role": "user",
        "parts": [{"text": ex["user_query"]}]
    })
    # example model answer
    STRUCTURED_CONTENT_FEW_SHOTS.append({
        "role": "model",
        "parts": [{"text": ex["model_answer"]}]
    })

    if "model_explanation" in ex:
        STRUCTURED_CONTENT_FEW_SHOTS.append({
            "role": "user",
            "parts": [{"text": "Explanation of the previous output:"}]
        })
        STRUCTURED_CONTENT_FEW_SHOTS.append({
            "role": "model",
            "parts": [{"text": ex["model_explanation"]}]
        })

    if "model_explanation_negative" in ex:
        STRUCTURED_CONTENT_FEW_SHOTS.append({
            "role": "user",
            "parts": [{"text": "Explanation of what is wrong with the previous output:"}]
        })
        STRUCTURED_CONTENT_FEW_SHOTS.append({
            "role": "model",
            "parts": [{"text": ex["model_explanation_negative"]}]
        })

STRUCTURED_CONTENT_FEW_SHOTS.append({
    "role": "user",
    "parts": [{"text": "**End of examples**.\nNow read the actual input:"}]
})

In [7]:
def extract_rightmost_list(response: str) -> list:
    # 1) Strip any lines that start with ``` (with or without language tag)
    #    but kepp what's between them.
    response = re.sub(r'(?m)^```.*\n?', "", response)

    # 2) Find all [ ... ] spans (non-greedy, across newlines)
    matches = re.findall(r"\[.*?\]", response, flags=re.DOTALL)
    if not matches:
        return []
    candidate = matches[-1]

    # 3) Try JSON, then Python literal
    try:
        return json.loads(candidate)
    except json.JSONDecodeError:
        pass
    try:
        return ast.literal_eval(candidate)
    except Exception:
        return []

In [12]:
async def gemini_call(client, user_query: str, count_only, cache=None, cache_system_prompt=True, max_retries: int=3) -> str:
    """
    Wrap Gemini call in a retrying try/except so it never blows up.
    count_only: mock call for counting input tokens.
    """
    global input_tokens, output_tokens
    # count_tokens is mock request, generate_content is actual paid request
    call_fn = client.aio.models.count_tokens if count_only else client.aio.models.generate_content

    config = types.GenerateContentConfig(cached_content=cache.name) if cache is not None else None
    if cache_system_prompt:
        contents = [{
                "role": "user",
                "parts": [{"text": user_query}]
            }]
    else:
        contents = []
        contents.extend(STRUCTURED_CONTENT_FEW_SHOTS)
        contents.append({
                "role": "user",
                "parts": [{"text": user_query}]
            })
        
    for attempt in range(1, max_retries + 1):
        try:            
            resp = await call_fn(model=MODEL, contents=contents, config=config)
            if count_only:
                # count_tokens returns an int of prompt tokens
                input_tokens += resp.total_tokens
                return ""

            # real response path
            metadata = resp.usage_metadata
            pt = metadata.prompt_token_count or 0
            cc = metadata.cached_content_token_count or 0
            ct = metadata.candidates_token_count or 0
            input_tokens += pt - cc
            output_tokens += ct
            return getattr(resp, "text", "") or ""

        except Exception as e:
            # catch HTTPError, ValidationError, ServerError, etc.
            logger.warning(f"Gemini call attempt {attempt}/{max_retries} failed: {e!r}")
            if attempt < max_retries:
                await asyncio.sleep(0.5 * attempt)    # simple backoff
            else:
                logger.error(f"All {max_retries} Gemini attempts failed for query: {user_query[:50]!r}")
                return ""  # give up


async def process_batch(client, cache, slice_of_examples, count_only, cache_system_prompt, print_raw_response=PRINT_RAW_RESPONSE):
    """
    Fire off up to BATCH_SIZE concurrent requests.
    """
    tasks = []
    for ex in slice_of_examples:
        seq  = tokenizer.decode(ex["input_ids"], skip_special_tokens=True)
        uq = f"Input:\n{seq}" 
        tasks.append(gemini_call(client, uq, count_only, cache, cache_system_prompt))
        await asyncio.sleep(TIME_SLEEP)
        
    raw_resps = await asyncio.gather(*tasks)

    if print_raw_response:
        for raw_resp in raw_resps:
            print(raw_resp)
    
    cleaned = [extract_rightmost_list(r) for r in raw_resps]
    return cleaned
        

async def create_cache(client, count_only):
    global input_tokens, cached_tokens

    if count_only: # free, only for input token count
        cache_contents_mock = [{"role": "user", "parts": [{"text": SYSTEM_CONTEXT}]}]
        cache_contents_mock.extend(STRUCTURED_CONTENT_FEW_SHOTS)
        
        cache_call_tokens = await client.aio.models.count_tokens(
            model=MODEL,
            contents=cache_contents_mock,
        )
        cached_tokens += cache_call_tokens.total_tokens
        return None
        
    else: # paid call
        cache_config = types.CreateCachedContentConfig(
                system_instruction=SYSTEM_CONTEXT,
                contents=STRUCTURED_CONTENT_FEW_SHOTS,
                ttl=TTL
            )
        cache = await client.aio.caches.create(
            model=MODEL,
            config=cache_config,  
        )
        logger.info("created cache")
        # count actual tokens cached in a paid request 
        cached_tokens_call = cache.usage_metadata.total_token_count
        cached_tokens += cached_tokens_call
        return cache


async def run_all(client, dataset: Dataset, chunk_start: int, chunk_end: int, cache=None, count_only=True):
    """
    Split into batches
    """
    n = len(dataset)
    all_resp = [None] * n

    for start in range(0, n, BATCH_SIZE):
        end = min(start + BATCH_SIZE, n)
        slice_ds = dataset.select(range(start, end))
        cleaned  = await process_batch(client, cache, slice_ds, count_only, cache_system_prompt)
        all_resp[start:end] = cleaned
        if PRINTOUT:
            logger.info(f"Gemini: processed {end}/{n}")
            
    return all_resp

In [9]:
async def main(dataset_dir: str, path_to_save, count_only=True, cache_system_prompt=True, start_idx: int = 0, end_idx=None):
    ds_full = Dataset.load_from_disk(str(dataset_dir))
    if TAKE_SUBSET:
        overall_end = min(start_idx + SUBSET_SIZE, len(ds_full))
    else:
        overall_end = len(ds_full) if end_idx is None else min(len(ds_full), end_idx)

    client = genai.Client(api_key=GEMINI_API_KEY)
    cache = None

    try:
        if cache_system_prompt:
            cache = await create_cache(client, count_only)
            
        for chunk_start in range(start_idx, overall_end, SAVE_CHUNK):
            chunk_end = min(chunk_start + SAVE_CHUNK, overall_end)
            ds_chunk = ds_full.select(range(chunk_start, chunk_end))
            entities = await run_all(client, ds_chunk, chunk_start, chunk_end, cache, count_only=count_only)
            
            assert len(entities) == ds_chunk.num_rows, f"{len(entities)=} vs {ds_chunk.num_rows=}"
            if not count_only:
                # coerce any stray string into a single‐element list
                fixed = [[str(x) for x in t] if isinstance(t, list) else [str(t)] for t in entities]
                entities_arr = pa.array(fixed, type=pa.list_(pa.string()))
                ds_chunk = ds_chunk.add_column("response", fixed)
                if TAKE_SUBSET:
                    save_name = f"{model_name}_subset_{SUBSET_SIZE}_{chunk_start}-{chunk_end}"
                else:
                    save_name = f"{model_name}_{chunk_start}-{chunk_end}"
        
                out_dir = os.path.join(str(path_to_save), save_name)
                ds_chunk.save_to_disk(out_dir)
                logger.info(f"Saved chunk {chunk_start}-{chunk_end} to {out_dir}")
    finally:
        if cache is not None:
            await client.aio.caches.delete(name=cache.name)

In [None]:
input_tokens = 0
output_tokens = 0
cached_tokens = 0

count_only = False # True means no real requests
cache_system_prompt = True

dataset_dir = Path('/scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_eval')
path_to_save = Path('/scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads')
logger.info(f'request with {model_name}')
await main(dataset_dir, path_to_save, count_only=count_only, cache_system_prompt=cache_system_prompt, start_idx=0, end_idx=None)

print('===finished===')
print('input_tokens:', input_tokens)
print('output_tokens:', output_tokens)
print('cached_tokens', cached_tokens)

2025-07-10 09:17:29,262 INFO request with gemini-2.0


INFO:__main__:request with gemini-2.0


2025-07-10 09:17:30,225 INFO created cache


INFO:__main__:created cache


2025-07-10 09:17:39,159 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:17:48,054 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:17:57,236 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:18:06,389 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:18:15,397 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:18:24,425 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:18:33,259 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:18:40,635 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 79695.68 examples/s]

2025-07-10 09:18:40,665 INFO Saved chunk 0-1000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_0-1000



INFO:__main__:Saved chunk 0-1000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_0-1000


2025-07-10 09:18:49,699 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:19:10,290 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:19:19,153 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:19:28,164 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:19:37,429 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:19:46,401 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:19:55,665 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:20:02,887 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 75880.67 examples/s]

2025-07-10 09:20:02,917 INFO Saved chunk 1000-2000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_1000-2000



INFO:__main__:Saved chunk 1000-2000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_1000-2000


2025-07-10 09:20:17,490 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:20:28,799 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:20:37,761 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:20:46,708 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:20:55,664 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:21:04,368 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:21:13,294 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:21:20,492 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 79633.64 examples/s]

2025-07-10 09:21:20,522 INFO Saved chunk 2000-3000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_2000-3000



INFO:__main__:Saved chunk 2000-3000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_2000-3000


2025-07-10 09:21:29,326 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:21:38,423 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:21:47,280 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:21:57,348 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:22:06,413 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:22:15,264 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:22:24,464 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:22:31,612 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 81499.77 examples/s]

2025-07-10 09:22:31,640 INFO Saved chunk 3000-4000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_3000-4000



INFO:__main__:Saved chunk 3000-4000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_3000-4000


2025-07-10 09:22:40,663 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:22:49,823 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:22:58,760 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:23:07,885 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:23:16,741 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:23:25,529 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:23:34,778 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:23:42,102 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 77314.36 examples/s]

2025-07-10 09:23:42,130 INFO Saved chunk 4000-5000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_4000-5000



INFO:__main__:Saved chunk 4000-5000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_4000-5000


2025-07-10 09:23:51,400 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:24:00,489 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:24:09,557 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:24:18,915 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:24:27,940 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:24:37,043 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:24:45,768 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:24:52,912 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 83545.22 examples/s]

2025-07-10 09:24:52,941 INFO Saved chunk 5000-6000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_5000-6000



INFO:__main__:Saved chunk 5000-6000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_5000-6000


2025-07-10 09:25:02,672 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:25:12,218 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:25:21,345 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:25:30,492 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:25:39,675 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:25:48,980 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:25:58,176 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:26:05,546 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 76183.89 examples/s]

2025-07-10 09:26:05,577 INFO Saved chunk 6000-7000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_6000-7000



INFO:__main__:Saved chunk 6000-7000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_6000-7000


2025-07-10 09:26:14,664 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:26:23,862 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:26:33,011 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:26:41,971 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:26:50,811 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:27:00,172 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:27:09,308 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:27:16,518 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 79713.86 examples/s]

2025-07-10 09:27:16,546 INFO Saved chunk 7000-8000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_7000-8000



INFO:__main__:Saved chunk 7000-8000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_7000-8000


2025-07-10 09:27:25,499 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:27:35,499 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:27:45,021 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:27:53,872 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:28:02,973 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:28:12,416 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:28:21,518 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:28:28,760 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 84951.37 examples/s]

2025-07-10 09:28:28,789 INFO Saved chunk 8000-9000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_8000-9000



INFO:__main__:Saved chunk 8000-9000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_8000-9000


2025-07-10 09:28:37,945 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:28:47,261 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:28:56,374 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:29:05,132 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:29:14,531 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:29:23,543 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:29:32,333 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:29:39,997 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 78966.47 examples/s]

2025-07-10 09:29:40,028 INFO Saved chunk 9000-10000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_9000-10000



INFO:__main__:Saved chunk 9000-10000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_9000-10000


2025-07-10 09:29:48,739 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:29:57,769 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:30:06,758 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:30:21,143 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:30:30,246 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:30:39,007 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:30:48,065 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:30:56,607 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 86908.77 examples/s]

2025-07-10 09:30:56,635 INFO Saved chunk 10000-11000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_10000-11000



INFO:__main__:Saved chunk 10000-11000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_10000-11000


2025-07-10 09:31:06,053 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:31:15,327 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:31:24,514 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:31:33,505 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:31:43,173 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:31:53,084 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:32:02,469 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:32:09,651 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 81069.72 examples/s]

2025-07-10 09:32:09,680 INFO Saved chunk 11000-12000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_11000-12000



INFO:__main__:Saved chunk 11000-12000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_11000-12000


2025-07-10 09:32:18,650 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:32:27,489 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:32:36,796 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:32:45,729 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:32:54,797 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:33:03,721 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:33:12,662 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:33:20,106 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 88723.27 examples/s]

2025-07-10 09:33:20,134 INFO Saved chunk 12000-13000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_12000-13000



INFO:__main__:Saved chunk 12000-13000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_12000-13000


2025-07-10 09:33:29,111 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:33:38,348 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:33:47,280 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:33:56,187 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:34:05,340 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:34:14,297 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:34:28,145 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:34:35,610 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 88834.14 examples/s]

2025-07-10 09:34:35,638 INFO Saved chunk 13000-14000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_13000-14000



INFO:__main__:Saved chunk 13000-14000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_13000-14000


2025-07-10 09:34:45,465 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:34:54,482 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:35:03,606 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:35:12,672 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:35:21,687 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:35:30,895 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:35:39,919 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:35:47,139 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 84457.01 examples/s]

2025-07-10 09:35:47,166 INFO Saved chunk 14000-15000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_14000-15000



INFO:__main__:Saved chunk 14000-15000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_14000-15000


2025-07-10 09:35:56,731 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:36:12,250 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:36:21,101 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:36:30,392 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:36:39,355 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:36:48,202 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:36:57,710 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:37:05,061 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 65101.65 examples/s]

2025-07-10 09:37:05,092 INFO Saved chunk 15000-16000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_15000-16000



INFO:__main__:Saved chunk 15000-16000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_15000-16000


2025-07-10 09:37:14,212 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:37:23,636 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:37:32,824 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:37:42,139 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:37:50,938 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:37:59,803 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:38:08,764 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:38:15,949 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61500.96 examples/s]

2025-07-10 09:38:15,981 INFO Saved chunk 16000-17000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_16000-17000



INFO:__main__:Saved chunk 16000-17000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_16000-17000


2025-07-10 09:38:24,880 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:38:33,935 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:38:42,715 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:38:51,755 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:39:00,742 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:39:09,658 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:39:18,990 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:39:26,050 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64742.90 examples/s]

2025-07-10 09:39:26,081 INFO Saved chunk 17000-18000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_17000-18000



INFO:__main__:Saved chunk 17000-18000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_17000-18000


2025-07-10 09:39:35,503 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:39:44,929 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:39:53,920 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:40:03,152 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:40:17,585 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:40:27,793 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000






2025-07-10 09:40:46,161 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:40:53,293 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 66340.38 examples/s]

2025-07-10 09:40:53,322 INFO Saved chunk 18000-19000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_18000-19000



INFO:__main__:Saved chunk 18000-19000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_18000-19000


2025-07-10 09:41:02,132 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:41:10,922 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:41:19,479 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:41:28,623 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:41:37,252 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:41:46,547 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:41:55,432 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:42:02,624 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62107.47 examples/s]

2025-07-10 09:42:02,653 INFO Saved chunk 19000-20000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_19000-20000



INFO:__main__:Saved chunk 19000-20000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_19000-20000


2025-07-10 09:42:11,919 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:42:21,028 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:42:29,946 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:42:38,863 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:42:48,014 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:42:57,857 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000






2025-07-10 09:43:24,086 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:43:31,557 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64242.12 examples/s]

2025-07-10 09:43:31,588 INFO Saved chunk 20000-21000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_20000-21000



INFO:__main__:Saved chunk 20000-21000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_20000-21000


2025-07-10 09:43:40,866 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:43:49,690 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:43:58,814 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:44:07,606 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:44:16,448 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:44:25,803 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:44:35,297 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:44:42,583 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 63829.56 examples/s]

2025-07-10 09:44:42,615 INFO Saved chunk 21000-22000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_21000-22000



INFO:__main__:Saved chunk 21000-22000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_21000-22000






2025-07-10 09:44:56,595 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:45:05,736 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:45:14,837 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:45:23,815 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:45:32,728 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:45:41,806 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:45:50,919 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:45:58,296 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 63784.91 examples/s]

2025-07-10 09:45:58,327 INFO Saved chunk 22000-23000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_22000-23000



INFO:__main__:Saved chunk 22000-23000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_22000-23000


2025-07-10 09:46:07,124 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:46:16,134 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:46:25,296 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:46:34,661 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:46:43,613 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:46:52,655 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:47:02,081 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:47:09,275 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 59896.38 examples/s]

2025-07-10 09:47:09,307 INFO Saved chunk 23000-24000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_23000-24000



INFO:__main__:Saved chunk 23000-24000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_23000-24000


2025-07-10 09:47:18,828 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:47:27,857 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:47:36,807 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:47:45,629 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:47:54,521 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:48:03,474 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:48:12,851 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:48:20,172 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60620.97 examples/s]

2025-07-10 09:48:20,205 INFO Saved chunk 24000-25000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_24000-25000



INFO:__main__:Saved chunk 24000-25000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_24000-25000


2025-07-10 09:48:31,719 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:48:40,783 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:48:49,745 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:49:00,630 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:49:09,703 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:49:18,567 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:49:27,461 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:49:34,661 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60529.98 examples/s]

2025-07-10 09:49:34,693 INFO Saved chunk 25000-26000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_25000-26000



INFO:__main__:Saved chunk 25000-26000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_25000-26000


2025-07-10 09:49:43,900 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:49:53,218 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:50:02,237 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:50:11,495 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:50:20,852 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:50:30,032 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:50:39,426 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:50:47,069 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 63973.64 examples/s]

2025-07-10 09:50:47,101 INFO Saved chunk 26000-27000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_26000-27000



INFO:__main__:Saved chunk 26000-27000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_26000-27000


2025-07-10 09:50:56,232 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:51:05,477 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:51:14,651 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:51:23,940 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:51:33,003 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:51:41,926 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:51:50,678 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:51:57,987 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62482.18 examples/s]

2025-07-10 09:51:58,018 INFO Saved chunk 27000-28000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_27000-28000



INFO:__main__:Saved chunk 27000-28000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_27000-28000


2025-07-10 09:52:07,246 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:52:16,176 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:52:25,477 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:52:34,370 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:52:44,109 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:52:53,193 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:53:02,677 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:53:09,964 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61827.33 examples/s]

2025-07-10 09:53:09,995 INFO Saved chunk 28000-29000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_28000-29000



INFO:__main__:Saved chunk 28000-29000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_28000-29000


2025-07-10 09:53:19,255 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:53:28,566 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:53:37,501 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:53:46,478 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:53:55,486 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:54:04,498 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:54:13,515 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:54:21,861 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 65572.88 examples/s]

2025-07-10 09:54:21,892 INFO Saved chunk 29000-30000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_29000-30000



INFO:__main__:Saved chunk 29000-30000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_29000-30000


2025-07-10 09:54:30,833 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:54:39,793 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:54:48,535 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:54:57,691 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:55:06,804 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:55:15,664 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:55:24,609 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:55:31,904 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61428.90 examples/s]

2025-07-10 09:55:31,935 INFO Saved chunk 30000-31000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_30000-31000



INFO:__main__:Saved chunk 30000-31000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_30000-31000


2025-07-10 09:55:41,093 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:55:50,045 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:55:59,730 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:56:09,086 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:56:18,054 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:56:27,928 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:56:36,972 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:56:44,554 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62947.29 examples/s]

2025-07-10 09:56:44,586 INFO Saved chunk 31000-32000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_31000-32000



INFO:__main__:Saved chunk 31000-32000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_31000-32000


2025-07-10 09:56:53,758 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:57:02,623 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:57:12,790 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:57:46,167 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:57:55,297 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:58:04,559 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:58:13,582 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:58:20,782 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64477.16 examples/s]

2025-07-10 09:58:20,813 INFO Saved chunk 32000-33000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_32000-33000



INFO:__main__:Saved chunk 32000-33000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_32000-33000


2025-07-10 09:58:30,310 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:58:39,190 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:58:47,902 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 09:58:56,994 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 09:59:06,021 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 09:59:15,130 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 09:59:24,258 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 09:59:31,432 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 59265.86 examples/s]

2025-07-10 09:59:31,465 INFO Saved chunk 33000-34000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_33000-34000



INFO:__main__:Saved chunk 33000-34000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_33000-34000


2025-07-10 09:59:41,585 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 09:59:50,792 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 09:59:59,997 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:00:09,640 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:00:18,614 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:00:27,883 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:00:36,713 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:00:43,805 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 66686.34 examples/s]

2025-07-10 10:00:43,947 INFO Saved chunk 34000-35000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_34000-35000



INFO:__main__:Saved chunk 34000-35000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_34000-35000


2025-07-10 10:00:53,154 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:01:02,326 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:01:12,253 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:01:21,184 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:01:30,134 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:01:39,131 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:01:48,209 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:01:55,461 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61218.20 examples/s]

2025-07-10 10:01:55,493 INFO Saved chunk 35000-36000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_35000-36000



INFO:__main__:Saved chunk 35000-36000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_35000-36000


2025-07-10 10:02:04,945 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:02:13,800 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:02:23,051 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:02:32,471 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:02:41,493 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:02:50,498 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:02:59,753 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:03:07,059 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 63251.06 examples/s]

2025-07-10 10:03:07,090 INFO Saved chunk 36000-37000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_36000-37000



INFO:__main__:Saved chunk 36000-37000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_36000-37000


2025-07-10 10:03:16,170 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:03:25,062 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:03:34,078 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:03:42,861 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:03:51,937 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:04:01,760 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:04:10,806 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:04:18,338 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62684.82 examples/s]

2025-07-10 10:04:18,370 INFO Saved chunk 37000-38000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_37000-38000



INFO:__main__:Saved chunk 37000-38000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_37000-38000


2025-07-10 10:04:27,469 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:04:36,472 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:04:46,139 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:04:55,093 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:05:04,215 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:05:13,234 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:05:22,525 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:05:29,794 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61116.51 examples/s]

2025-07-10 10:05:29,826 INFO Saved chunk 38000-39000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_38000-39000



INFO:__main__:Saved chunk 38000-39000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_38000-39000


2025-07-10 10:05:38,832 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:05:48,161 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:05:57,224 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:06:06,697 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:06:15,636 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:06:25,126 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:06:34,043 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:06:41,406 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 59028.98 examples/s]

2025-07-10 10:06:41,439 INFO Saved chunk 39000-40000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_39000-40000



INFO:__main__:Saved chunk 39000-40000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_39000-40000


2025-07-10 10:06:50,943 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:07:14,365 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:07:23,650 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:07:32,779 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:07:41,727 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:07:50,984 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:08:00,190 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:08:07,652 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60474.13 examples/s]

2025-07-10 10:08:07,686 INFO Saved chunk 40000-41000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_40000-41000



INFO:__main__:Saved chunk 40000-41000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_40000-41000


2025-07-10 10:08:16,846 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:08:25,838 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:08:34,874 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:08:43,925 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:08:53,173 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:09:07,078 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:09:16,229 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:09:23,565 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 59897.24 examples/s]

2025-07-10 10:09:23,598 INFO Saved chunk 41000-42000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_41000-42000



INFO:__main__:Saved chunk 41000-42000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_41000-42000


2025-07-10 10:09:32,741 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:09:41,802 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:09:51,107 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:10:00,155 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:10:09,873 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:10:19,770 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:10:28,795 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:10:36,518 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 63967.79 examples/s]

2025-07-10 10:10:36,550 INFO Saved chunk 42000-43000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_42000-43000



INFO:__main__:Saved chunk 42000-43000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_42000-43000


2025-07-10 10:10:45,719 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:10:54,944 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:11:03,829 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:11:12,663 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:11:21,473 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:11:30,458 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:11:39,533 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:11:46,838 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 58892.22 examples/s]

2025-07-10 10:11:46,872 INFO Saved chunk 43000-44000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_43000-44000



INFO:__main__:Saved chunk 43000-44000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_43000-44000


2025-07-10 10:11:56,072 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:12:05,979 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:12:15,122 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:12:24,149 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:12:33,605 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:12:42,558 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:12:56,623 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:13:05,342 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64789.90 examples/s]

2025-07-10 10:13:05,374 INFO Saved chunk 44000-45000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_44000-45000



INFO:__main__:Saved chunk 44000-45000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_44000-45000


2025-07-10 10:13:14,469 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:13:23,598 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:13:33,337 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:13:42,403 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:13:51,466 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:14:00,352 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:14:09,388 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:14:16,697 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60473.25 examples/s]

2025-07-10 10:14:16,728 INFO Saved chunk 45000-46000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_45000-46000



INFO:__main__:Saved chunk 45000-46000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_45000-46000


2025-07-10 10:14:25,634 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:14:34,723 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:14:43,556 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:14:52,630 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:15:01,731 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:15:11,038 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:15:20,098 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:15:27,488 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61201.23 examples/s]

2025-07-10 10:15:27,521 INFO Saved chunk 46000-47000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_46000-47000



INFO:__main__:Saved chunk 46000-47000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_46000-47000


2025-07-10 10:15:36,388 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:15:45,845 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:15:54,793 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:16:03,951 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:16:12,945 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:16:21,873 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:16:30,966 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:16:38,260 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60447.11 examples/s]

2025-07-10 10:16:38,294 INFO Saved chunk 47000-48000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_47000-48000



INFO:__main__:Saved chunk 47000-48000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_47000-48000


2025-07-10 10:16:47,472 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:16:56,885 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:17:05,948 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:17:15,467 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:17:24,565 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:17:33,861 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:17:42,969 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:17:50,161 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61789.07 examples/s]

2025-07-10 10:17:50,194 INFO Saved chunk 48000-49000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_48000-49000



INFO:__main__:Saved chunk 48000-49000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_48000-49000


2025-07-10 10:17:59,252 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:18:08,474 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:18:17,480 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:18:26,890 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:18:36,128 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:18:46,367 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:18:55,466 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:19:02,922 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 58341.73 examples/s]

2025-07-10 10:19:02,956 INFO Saved chunk 49000-50000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_49000-50000



INFO:__main__:Saved chunk 49000-50000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_49000-50000


2025-07-10 10:19:12,773 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:19:21,720 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:19:30,690 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:19:39,669 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:19:48,857 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:20:04,764 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:20:21,276 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:20:30,035 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 55274.76 examples/s]

2025-07-10 10:20:30,070 INFO Saved chunk 50000-51000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_50000-51000



INFO:__main__:Saved chunk 50000-51000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_50000-51000


2025-07-10 10:20:39,655 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:20:48,791 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:20:58,286 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:21:15,350 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:21:24,425 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:21:33,997 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:21:43,126 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:21:50,544 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62697.00 examples/s]

2025-07-10 10:21:50,576 INFO Saved chunk 51000-52000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_51000-52000



INFO:__main__:Saved chunk 51000-52000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_51000-52000


2025-07-10 10:22:02,709 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:22:17,241 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:22:26,223 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:22:35,379 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:22:44,672 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:22:53,743 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:23:03,132 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:23:10,722 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60964.61 examples/s]

2025-07-10 10:23:10,754 INFO Saved chunk 52000-53000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_52000-53000



INFO:__main__:Saved chunk 52000-53000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_52000-53000


2025-07-10 10:23:20,121 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:23:29,367 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:23:38,376 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:23:47,477 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:23:56,518 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:24:11,411 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:24:20,638 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:24:27,905 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61674.59 examples/s]

2025-07-10 10:24:27,939 INFO Saved chunk 53000-54000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_53000-54000



INFO:__main__:Saved chunk 53000-54000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_53000-54000


2025-07-10 10:24:36,915 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:24:46,456 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:24:55,417 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:25:07,562 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:25:16,603 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:25:25,509 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:25:36,366 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:25:43,964 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 56939.86 examples/s]

2025-07-10 10:25:43,999 INFO Saved chunk 54000-55000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_54000-55000



INFO:__main__:Saved chunk 54000-55000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_54000-55000


2025-07-10 10:25:53,188 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:26:02,601 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:26:11,553 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:26:20,985 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:26:39,107 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:26:50,381 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:26:58,423 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61797.27 examples/s]

2025-07-10 10:26:58,455 INFO Saved chunk 55000-56000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_55000-56000



INFO:__main__:Saved chunk 55000-56000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_55000-56000


2025-07-10 10:27:13,639 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:27:23,083 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:27:32,581 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:27:41,755 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:27:51,044 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:28:02,320 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:28:11,698 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:28:18,972 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 65057.22 examples/s]

2025-07-10 10:28:19,003 INFO Saved chunk 56000-57000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_56000-57000



INFO:__main__:Saved chunk 56000-57000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_56000-57000


2025-07-10 10:28:28,390 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:28:37,298 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:28:46,664 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:28:55,806 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:29:04,844 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:29:13,933 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:29:23,016 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:29:30,115 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61469.41 examples/s]

2025-07-10 10:29:30,148 INFO Saved chunk 57000-58000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_57000-58000



INFO:__main__:Saved chunk 57000-58000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_57000-58000


2025-07-10 10:29:39,390 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:29:48,459 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:29:57,323 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:30:22,157 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:30:31,277 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:30:43,361 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:30:52,556 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:31:00,697 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64485.09 examples/s]

2025-07-10 10:31:00,729 INFO Saved chunk 58000-59000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_58000-59000



INFO:__main__:Saved chunk 58000-59000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_58000-59000


2025-07-10 10:31:10,333 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:31:19,823 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:31:28,908 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:31:38,049 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:31:46,998 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:31:56,523 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:32:06,021 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:32:13,494 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60777.33 examples/s]

2025-07-10 10:32:13,528 INFO Saved chunk 59000-60000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_59000-60000



INFO:__main__:Saved chunk 59000-60000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_59000-60000


2025-07-10 10:32:22,611 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:32:31,552 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:32:40,767 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:32:49,997 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:32:59,896 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:33:08,933 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:33:18,954 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:33:26,174 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60387.93 examples/s]

2025-07-10 10:33:26,207 INFO Saved chunk 60000-61000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_60000-61000



INFO:__main__:Saved chunk 60000-61000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_60000-61000


2025-07-10 10:33:35,365 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:33:44,831 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:33:53,984 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:34:03,308 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:34:12,975 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:34:21,818 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:34:31,922 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:34:39,214 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64383.14 examples/s]

2025-07-10 10:34:39,246 INFO Saved chunk 61000-62000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_61000-62000



INFO:__main__:Saved chunk 61000-62000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_61000-62000


2025-07-10 10:34:48,337 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:34:57,774 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:35:08,671 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:35:17,705 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:35:26,948 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:35:36,095 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:35:45,259 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:35:52,812 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60355.78 examples/s]

2025-07-10 10:35:52,849 INFO Saved chunk 62000-63000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_62000-63000



INFO:__main__:Saved chunk 62000-63000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_62000-63000


2025-07-10 10:36:02,669 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:36:17,895 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:36:27,259 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:36:36,715 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:36:45,892 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:36:54,951 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:37:04,338 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:37:11,558 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 39809.26 examples/s]

2025-07-10 10:37:11,601 INFO Saved chunk 63000-64000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_63000-64000



INFO:__main__:Saved chunk 63000-64000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_63000-64000


2025-07-10 10:37:20,801 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:37:29,727 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:37:38,610 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:37:47,608 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:37:56,772 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:38:06,303 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:38:15,380 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:38:23,907 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 65009.83 examples/s]

2025-07-10 10:38:23,939 INFO Saved chunk 64000-65000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_64000-65000



INFO:__main__:Saved chunk 64000-65000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_64000-65000


2025-07-10 10:38:32,985 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:38:41,882 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:38:50,623 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:39:00,310 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:39:09,381 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:39:20,495 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:39:37,458 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:39:44,995 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 59056.41 examples/s]

2025-07-10 10:39:45,030 INFO Saved chunk 65000-66000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_65000-66000



INFO:__main__:Saved chunk 65000-66000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_65000-66000


2025-07-10 10:39:54,024 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:40:03,075 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:40:13,064 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:40:22,089 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:40:30,850 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:40:40,206 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:40:49,395 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:40:56,565 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 58895.53 examples/s]

2025-07-10 10:40:56,597 INFO Saved chunk 66000-67000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_66000-67000



INFO:__main__:Saved chunk 66000-67000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_66000-67000


2025-07-10 10:41:05,943 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:41:15,519 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:41:24,690 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:41:33,787 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:41:42,775 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:41:51,768 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:42:00,596 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:42:08,347 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64800.91 examples/s]

2025-07-10 10:42:08,378 INFO Saved chunk 67000-68000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_67000-68000



INFO:__main__:Saved chunk 67000-68000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_67000-68000


2025-07-10 10:42:17,480 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:42:26,783 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:42:36,576 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:42:46,033 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:42:55,027 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:43:04,397 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:43:13,306 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:43:20,840 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 54453.09 examples/s]

2025-07-10 10:43:20,875 INFO Saved chunk 68000-69000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_68000-69000



INFO:__main__:Saved chunk 68000-69000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_68000-69000


2025-07-10 10:43:29,645 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:43:38,655 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:43:47,953 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:43:56,950 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:44:06,313 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:44:15,912 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:44:25,385 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:44:32,881 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 55024.58 examples/s]

2025-07-10 10:44:32,916 INFO Saved chunk 69000-70000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_69000-70000



INFO:__main__:Saved chunk 69000-70000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_69000-70000


2025-07-10 10:44:42,285 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:44:51,534 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:45:07,690 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:45:16,781 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:45:26,049 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:45:35,409 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:45:44,825 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:45:52,270 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 56045.86 examples/s]

2025-07-10 10:45:52,305 INFO Saved chunk 70000-71000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_70000-71000



INFO:__main__:Saved chunk 70000-71000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_70000-71000


2025-07-10 10:46:06,442 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:46:16,154 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:46:25,697 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:46:34,946 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:46:44,444 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:46:54,715 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:47:04,103 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:47:11,531 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 63663.88 examples/s]

2025-07-10 10:47:11,562 INFO Saved chunk 71000-72000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_71000-72000



INFO:__main__:Saved chunk 71000-72000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_71000-72000


2025-07-10 10:47:20,468 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:47:29,561 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:47:38,610 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:47:47,627 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:47:56,464 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:48:05,595 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:48:14,799 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:48:22,289 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60969.04 examples/s]

2025-07-10 10:48:22,321 INFO Saved chunk 72000-73000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_72000-73000



INFO:__main__:Saved chunk 72000-73000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_72000-73000


2025-07-10 10:48:31,322 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:48:40,708 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:48:49,843 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:48:58,804 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:49:07,758 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:49:17,126 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:49:26,362 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:49:33,869 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62102.88 examples/s]

2025-07-10 10:49:33,901 INFO Saved chunk 73000-74000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_73000-74000



INFO:__main__:Saved chunk 73000-74000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_73000-74000


2025-07-10 10:49:43,132 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:49:53,963 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:50:03,133 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:50:12,386 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:50:21,812 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:50:30,924 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:50:40,669 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:50:48,142 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 56750.34 examples/s]

2025-07-10 10:50:48,176 INFO Saved chunk 74000-75000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_74000-75000



INFO:__main__:Saved chunk 74000-75000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_74000-75000


2025-07-10 10:50:57,394 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:51:06,543 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:51:15,705 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:51:24,578 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:51:33,699 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:51:43,228 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:51:52,894 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:52:00,492 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61253.97 examples/s]

2025-07-10 10:52:00,524 INFO Saved chunk 75000-76000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_75000-76000



INFO:__main__:Saved chunk 75000-76000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_75000-76000


2025-07-10 10:52:09,485 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:52:19,433 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:52:28,975 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:52:38,468 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:52:48,066 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:52:57,290 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:53:06,833 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:53:14,100 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 59843.40 examples/s]

2025-07-10 10:53:14,135 INFO Saved chunk 76000-77000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_76000-77000



INFO:__main__:Saved chunk 76000-77000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_76000-77000


2025-07-10 10:53:23,344 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:53:32,331 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:53:41,640 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:53:50,825 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:54:00,392 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:54:09,321 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:54:19,470 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:54:42,925 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64123.28 examples/s]

2025-07-10 10:54:42,957 INFO Saved chunk 77000-78000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_77000-78000



INFO:__main__:Saved chunk 77000-78000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_77000-78000


2025-07-10 10:54:51,947 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:55:01,270 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:55:10,288 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:55:19,228 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:55:28,188 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:55:44,383 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:55:54,015 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:56:01,315 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 58043.01 examples/s]

2025-07-10 10:56:01,348 INFO Saved chunk 78000-79000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_78000-79000



INFO:__main__:Saved chunk 78000-79000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_78000-79000


2025-07-10 10:56:10,870 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:56:22,978 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:56:32,464 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:56:42,025 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:56:51,194 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:57:00,789 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:57:10,297 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:57:27,005 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:57:36,176 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:57:45,166 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:57:54,128 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:58:03,292 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:58:12,768 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:58:21,945 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:58:28,958 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 58702.65 examples/s]

2025-07-10 10:58:28,991 INFO Saved chunk 80000-81000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_80000-81000



INFO:__main__:Saved chunk 80000-81000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_80000-81000


2025-07-10 10:58:38,046 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:58:47,015 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 10:58:55,939 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 10:59:05,088 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 10:59:14,628 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 10:59:23,705 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 10:59:32,591 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 10:59:40,093 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62063.36 examples/s]

2025-07-10 10:59:40,125 INFO Saved chunk 81000-82000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_81000-82000



INFO:__main__:Saved chunk 81000-82000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_81000-82000


2025-07-10 10:59:48,833 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 10:59:58,079 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 11:00:13,410 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 11:00:22,638 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 11:00:31,896 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 11:00:40,891 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 11:00:50,085 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 11:00:58,922 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 62159.94 examples/s]

2025-07-10 11:00:58,954 INFO Saved chunk 82000-83000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_82000-83000



INFO:__main__:Saved chunk 82000-83000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_82000-83000


2025-07-10 11:01:08,047 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 11:01:17,264 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 11:01:26,768 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 11:01:36,214 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 11:01:46,161 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 11:01:55,620 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 11:02:07,333 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 11:02:14,501 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60093.76 examples/s]

2025-07-10 11:02:14,534 INFO Saved chunk 83000-84000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_83000-84000



INFO:__main__:Saved chunk 83000-84000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_83000-84000


2025-07-10 11:02:24,051 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 11:02:33,982 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 11:02:43,182 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 11:02:52,462 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 11:03:17,242 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 11:03:36,546 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 11:03:52,395 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 11:04:17,371 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 64918.26 examples/s]

2025-07-10 11:04:17,403 INFO Saved chunk 84000-85000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_84000-85000



INFO:__main__:Saved chunk 84000-85000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_84000-85000


2025-07-10 11:04:35,274 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 11:04:57,515 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 11:05:19,182 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 11:05:41,938 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 11:06:00,032 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 11:06:09,622 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 11:06:18,997 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 11:06:30,204 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 61225.35 examples/s]

2025-07-10 11:06:30,237 INFO Saved chunk 85000-86000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_85000-86000



INFO:__main__:Saved chunk 85000-86000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_85000-86000


2025-07-10 11:06:40,032 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 11:06:49,282 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 11:06:59,067 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 11:07:07,968 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 11:07:17,022 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 11:07:26,117 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 11:07:35,363 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 11:07:42,565 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 65228.20 examples/s]

2025-07-10 11:07:42,597 INFO Saved chunk 86000-87000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_86000-87000



INFO:__main__:Saved chunk 86000-87000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_86000-87000


2025-07-10 11:07:52,077 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 11:08:01,433 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 11:08:11,663 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 11:08:21,225 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 11:08:30,689 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 11:08:40,176 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 11:08:49,486 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 11:08:56,650 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 60250.00 examples/s]

2025-07-10 11:08:56,682 INFO Saved chunk 87000-88000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_87000-88000



INFO:__main__:Saved chunk 87000-88000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_87000-88000


2025-07-10 11:09:06,067 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 11:10:04,523 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 11:10:13,720 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 11:10:23,484 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 11:10:36,727 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 11:10:46,524 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 11:10:55,822 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 11:11:03,060 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000
Saving the dataset (1/1 shards): 100%|██████████| 1000/1000 [00:00<00:00, 56871.15 examples/s]

2025-07-10 11:11:03,094 INFO Saved chunk 88000-89000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_88000-89000



INFO:__main__:Saved chunk 88000-89000 to /scratch/gpfs/mb5157/repos/graphmert/graphmert/datasets/dataset_with_heads/39k_abstracts_gemini_heads/gemini-2.0_88000-89000


2025-07-10 11:11:12,159 INFO Gemini: processed 128/1000


INFO:__main__:Gemini: processed 128/1000


2025-07-10 11:11:21,580 INFO Gemini: processed 256/1000


INFO:__main__:Gemini: processed 256/1000


2025-07-10 11:11:30,465 INFO Gemini: processed 384/1000


INFO:__main__:Gemini: processed 384/1000


2025-07-10 11:11:43,250 INFO Gemini: processed 512/1000


INFO:__main__:Gemini: processed 512/1000


2025-07-10 11:11:52,044 INFO Gemini: processed 640/1000


INFO:__main__:Gemini: processed 640/1000


2025-07-10 11:12:01,202 INFO Gemini: processed 768/1000


INFO:__main__:Gemini: processed 768/1000


2025-07-10 11:12:11,358 INFO Gemini: processed 896/1000


INFO:__main__:Gemini: processed 896/1000


2025-07-10 11:12:18,616 INFO Gemini: processed 1000/1000


INFO:__main__:Gemini: processed 1000/1000


In [14]:
print(input_tokens)
print(output_tokens)
print(cached_tokens)

143462700
16092648
4101


## Unite chunks

In [155]:
def unite_output(dataset_path, original_dataset_size, chunk_size, llm_name, start_idx=0):
    """
    original_dataset_size -- how many records in the original dataset;
    it's also in the filename in the folder -- the largest end index
    """
    output_path = os.path.join(dataset_path, f'{llm_name}_307000_989666_all')

    start_idx = start_idx
    end_idx = 0

    datasets = []
    while end_idx < original_dataset_size:
        end_idx = min(start_idx + chunk_size, original_dataset_size)
        path = os.path.join(dataset_path, f"{llm_name}_{start_idx}-{end_idx}")
        dataset = load_from_disk(path)
        print(f'loaded from {path}')
        datasets.append(dataset)
        start_idx += chunk_size
    
    united_dataset = concatenate_datasets(datasets)
    united_datasetc
    print('saved to', output_path)
    return output_path

In [156]:
prefix = '../'

dataset_path = 'datasets/dataset_with_heads/350k_abstracts_gemini_heads'
dataset_path = os.path.join(prefix, dataset_path)

original_dataset_size = 989666
saved_to_path = unite_output(dataset_path, original_dataset_size, chunk_size=1_000, llm_name=model_name, start_idx=307000)

loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_307000-308000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_308000-309000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_309000-310000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_310000-311000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_311000-312000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_312000-313000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_313000-314000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_314000-315000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_315000-316000
loaded from ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_316000-317000
loaded from ../datasets/dataset_with_hea

Saving the dataset (41/41 shards): 100%|██████████| 682666/682666 [00:35<00:00, 19056.23 examples/s]


saved to ../datasets/dataset_with_heads/350k_abstracts_gemini_heads/gemini-2.0_307000_989666_all
