In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Re-imports after kernel reset
import os
import time
import json
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

In [3]:
# ---------------------------
# Aryabhata model setup
# ---------------------------
# model_id = "PhysicsWallahAI/Aryabhata-1.0"
model_id = "Qwen/Qwen2.5-Math-7B"
# model_id = "Qwen/Qwen2.5-0.5B"
# model_id = "MiniLLM/Pretrain-Qwen-200M"
tokenizer_aryabhata = AutoTokenizer.from_pretrained(model_id)
model_aryabhata = AutoModelForCausalLM.from_pretrained(model_id).to("cuda" if torch.cuda.is_available() else "cpu")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/678 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [4]:
# ---------------------------
# Stop strings and cleaner
# ---------------------------
stop_strings = ["<|im_end|>", "<|end|>", "<im_start|>", "⁠```python\n", "⁠<|im_start|>", "]}}]}}]"]

def strip_bad_tokens(s, stop_strings):
    for suffix in stop_strings:
        if s.endswith(suffix):
            return s[:-len(suffix)]
    return s

generation_config = GenerationConfig(
    max_new_tokens=4096,
    stop_strings=stop_strings
)

In [5]:
# ---------------------------
# Prompt formatter (matches website)
# ---------------------------
def create_chat_prompt_options(question, options):
    if options:
        opt_str = '\n'.join([f"{chr(65+i)}. {opt}" for i, opt in enumerate(options)])
        content = f"{question}\n\nOptions:\n{opt_str}"
    else:
        content = question
    return [
        {'role': 'system', 'content': 'Think step-by-step; put only the final answer inside \\boxed{}.'},
        {'role': 'user', 'content': content}
    ]


In [6]:
# ---------------------------
# Prompt formatter (matches website)
# ---------------------------
def create_chat_prompt_question(question):

    content = f"{question}"
    return [
        {'role': 'system', 'content': 'Think step-by-step; put only the final answer inside \\boxed{}.'},
        {'role': 'user', 'content': content}
    ]

In [7]:
# ---------------------------
# Prompt formatter (matches website)
# ---------------------------
def create_chat_prompt_system_mes_modified(question):

    content = f"{question}"
    return [
        {'role': 'system', 'content': 'Think step-by-step to come up to a final answer.'},
        {'role': 'user', 'content': content}
    ]

In [8]:
# ---------------------------
# Inference function
# ---------------------------
def run_aryabhata_prompt(messages, tokenizer, model):
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, generation_config=generation_config, tokenizer=tokenizer)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [9]:
def return_assistant_part(tokenizer, outputs):
  # Return ONLY the assistant's part of the response (after last <|im_start|>assistant)
    raw = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "<|im_start|>assistant" in raw:
        return raw.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
    else:
        return raw.strip()

In [10]:
from tqdm import tqdm
from datetime import datetime

# Modified version with tqdm progress bar per document and per batch
def process_all_json_files_batched(input_folder, output_folder, list_json=None, only_question=False, with_options=False, w_sys_prompt_mod=False, local=False, batch_size=5):
    os.makedirs(output_folder, exist_ok=True)
    timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")

    suffix_parts = []
    if only_question:
        suffix_parts.append("only_question")
    if with_options:
        suffix_parts.append("with_options")
    if w_sys_prompt_mod:
        suffix_parts.append("w_sys_prompt_mod")
    suffix_str = "_".join(suffix_parts) if suffix_parts else "default"

    subfolder = os.path.join(output_folder, f"{suffix_str}_{timestamp_str}")
    os.makedirs(subfolder, exist_ok=True)

    if local:
        local_subfolder = os.path.join("./output", f"{suffix_str}_{timestamp_str}")
        os.makedirs(local_subfolder, exist_ok=True)

    input_files = list_json if list_json else [f for f in os.listdir(input_folder) if f.endswith(".json")]

    for filename in tqdm(input_files, desc="Processing JSON files"):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(subfolder, f"pred_{suffix_str}_{timestamp_str}_{filename}")
        local_path = os.path.join(local_subfolder, f"pred_{suffix_str}_{timestamp_str}_{filename}") if local else None

        with open(input_path, "r") as f:
            questions = json.load(f)

        total_time_taken = 0

        def generate_batch_responses(prompts):
            tokenizer_aryabhata.padding_side = "left"
            tokenizer_aryabhata.pad_token = tokenizer_aryabhata.eos_token
            formatted_prompts = [
                tokenizer_aryabhata.apply_chat_template(p, tokenize=False, add_generation_prompt=True)
                for p in prompts
            ]
            inputs = tokenizer_aryabhata(formatted_prompts, return_tensors="pt", padding=True, truncation=True).to(model_aryabhata.device)
            with torch.no_grad():
                outputs = model_aryabhata.generate(
                    **inputs,
                    generation_config=generation_config,
                    tokenizer=tokenizer_aryabhata,
                    do_sample=False,
                    pad_token_id=tokenizer_aryabhata.pad_token_id
                )
            return tokenizer_aryabhata.batch_decode(outputs, skip_special_tokens=True)

        for mode, flag in [("qwen-2.5-math_response_Woptions", with_options),
                           ("qwen-2.5-math_response_Wquestion", only_question),
                           ("qwen-2.5-math_response_sys_mes_modified", w_sys_prompt_mod)]:
            if not flag:
                continue

            print(f"→ Running mode: {mode}")
            prompts = []
            idx_map = []

            mode_bar = tqdm(total=len(questions), desc=f"{mode}", leave=False)

            for i, q in enumerate(questions):
                question_text = q["question"]
                options = q.get("options", [])

                if mode == "qwen-2.5-math_response_Woptions":
                    prompt = create_chat_prompt_options(question_text, options)
                elif mode == "qwen-2.5-math_response_Wquestion":
                    prompt = create_chat_prompt_question(question_text)
                else:
                    prompt = create_chat_prompt_system_mes_modified(question_text)

                prompts.append(prompt)
                idx_map.append(i)

                if len(prompts) == batch_size or i == len(questions) - 1:
                    start_time = time.time()
                    responses = generate_batch_responses(prompts)
                    elapsed = time.time() - start_time
                    total_time_taken += elapsed

                    for idx, response in zip(idx_map, responses):
                        questions[idx][mode] = response
                        questions[idx][f"inference_time_seconds_{mode}"] = round(elapsed / len(responses), 2)

                    with open(output_path, "w") as fout:
                        json.dump(questions, fout, indent=2, ensure_ascii=False)
                        fout.flush()
                        os.fsync(fout.fileno())
                    if local:
                        with open(local_path, "w") as fout:
                            json.dump(questions, fout, indent=2, ensure_ascii=False)
                            fout.flush()
                            os.fsync(fout.fileno())

                    prompts = []
                    idx_map = []
                    mode_bar.update(batch_size)

            mode_bar.close()

        print(f"\n✅ Total Time taken for {filename}: {total_time_taken:.2f} sec ({total_time_taken / 60:.2f} min)")


In [12]:
input_folder = "/content/drive/MyDrive/Aryabhata-1.0-Evals/Jee_questions/mains2025Jan"
output_folder = "/content/drive/MyDrive/Aryabhata-1.0-Evals/llm_generated_answers/qwen-2.5-math"

Jee_questions = sorted([f for f in os.listdir(input_folder) if f.endswith(".json")])
list_json = Jee_questions[:4]


list_json

['jan_22_shift_1_questions.json',
 'jan_22_shift_2_questions.json',
 'jan_23_shift_1_questions.json',
 'jan_23_shift_2_questions.json']

In [13]:
process_all_json_files_batched(
    input_folder=input_folder,
    output_folder=output_folder,
    list_json=list_json,
    with_options=False,
    only_question=True,
    w_sys_prompt_mod=False,
    local=True,
    batch_size=5  # or 10, etc.
)

Processing JSON files:   0%|          | 0/4 [00:00<?, ?it/s]

→ Running mode: aryabhata_response_Wquestion



aryabhata_response_Wquestion:   0%|          | 0/25 [00:00<?, ?it/s][AThis is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.

aryabhata_response_Wquestion:  20%|██        | 5/25 [05:09<20:36, 61.85s/it][A
aryabhata_response_Wquestion:  40%|████      | 10/25 [10:14<15:20, 61.37s/it][A
aryabhata_response_Wquestion:  60%|██████    | 15/25 [15:21<10:13, 61.35s/it][A
aryabhata_response_Wquestion:  80%|████████  | 20/25 [20:27<05:06, 61.30s/it][A
aryabhata_response_Wquestion: 100%|██████████| 25/25 [25:34<00:00, 61.33s/it][A
Processing JSON files:  25%|██▌       | 1/4 [25:34<1:16:43, 1534.40s/it]


✅ Total Time taken for jan_22_shift_1_questions.json: 1534.03 sec (25.57 min)
→ Running mode: aryabhata_response_Wquestion



aryabhata_response_Wquestion:   0%|          | 0/25 [00:00<?, ?it/s][A
aryabhata_response_Wquestion:  20%|██        | 5/25 [05:06<20:26, 61.32s/it][A
aryabhata_response_Wquestion:  40%|████      | 10/25 [10:12<15:18, 61.26s/it][A
aryabhata_response_Wquestion:  60%|██████    | 15/25 [15:18<10:11, 61.20s/it][A
aryabhata_response_Wquestion:  80%|████████  | 20/25 [20:25<05:06, 61.32s/it][A
aryabhata_response_Wquestion: 100%|██████████| 25/25 [22:47<00:00, 49.44s/it][A
Processing JSON files:  50%|█████     | 2/4 [48:22<47:53, 1436.53s/it]  


✅ Total Time taken for jan_22_shift_2_questions.json: 1367.63 sec (22.79 min)
→ Running mode: aryabhata_response_Wquestion



aryabhata_response_Wquestion:   0%|          | 0/25 [00:00<?, ?it/s][A
aryabhata_response_Wquestion:  20%|██        | 5/25 [05:06<20:26, 61.32s/it][A
aryabhata_response_Wquestion:  40%|████      | 10/25 [10:12<15:18, 61.22s/it][A
aryabhata_response_Wquestion:  60%|██████    | 15/25 [15:17<10:11, 61.14s/it][A
aryabhata_response_Wquestion:  80%|████████  | 20/25 [20:23<05:05, 61.18s/it][A
aryabhata_response_Wquestion: 100%|██████████| 25/25 [25:30<00:00, 61.22s/it][A
Processing JSON files:  75%|███████▌  | 3/4 [1:13:52<24:39, 1479.45s/it]


✅ Total Time taken for jan_23_shift_1_questions.json: 1530.20 sec (25.50 min)
→ Running mode: aryabhata_response_Wquestion



aryabhata_response_Wquestion:   0%|          | 0/25 [00:00<?, ?it/s][A
aryabhata_response_Wquestion:  20%|██        | 5/25 [01:57<07:51, 23.58s/it][A
aryabhata_response_Wquestion:  40%|████      | 10/25 [07:03<11:25, 45.70s/it][A
aryabhata_response_Wquestion:  60%|██████    | 15/25 [12:09<08:47, 52.76s/it][A
aryabhata_response_Wquestion:  80%|████████  | 20/25 [17:14<04:40, 56.01s/it][A
aryabhata_response_Wquestion: 100%|██████████| 25/25 [22:19<00:00, 57.81s/it][A
Processing JSON files: 100%|██████████| 4/4 [1:36:12<00:00, 1443.22s/it]


✅ Total Time taken for jan_23_shift_2_questions.json: 1339.56 sec (22.33 min)





In [None]:
# ---------------------------
# Main processing function
# ---------------------------

from datetime import datetime

def process_all_json_files(input_folder, output_folder, list_json=None, only_question=False, with_options= False, w_sys_prompt_mod=False, local=False):
    os.makedirs(output_folder, exist_ok=True)
    timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")

    # Build dynamic suffix based on active flags
    suffix_parts = []
    if only_question:
        suffix_parts.append("only_question")
    if with_options:
        suffix_parts.append("with_options")
    if w_sys_prompt_mod:
        suffix_parts.append("w_sys_prompt_mod")
    suffix_str = "_".join(suffix_parts) if suffix_parts else "default"

    # Create subfolder with timestamp + mode suffix
    subfolder = os.path.join(output_folder, f"{suffix_str}_{timestamp_str}")
    os.makedirs(subfolder, exist_ok=True)

    if local:
      os.makedirs("./output", exist_ok=True)
      local_subfolder = os.path.join("./output", f"{suffix_str}_{timestamp_str}")
      os.makedirs(local_subfolder, exist_ok=True)

    if list_json is not None:
      input_files = list_json
    else:
      input_files = [f for f in os.listdir(input_folder) if f.endswith(".json")]


    for filename in input_files:
        input_path = os.path.join(input_folder, filename)

        # Final output path
        output_path = os.path.join(subfolder, f"pred_{suffix_str}_{timestamp_str}_{filename}")


        with open(input_path, "r") as f:
            questions = json.load(f)

        results = []
        total_time_taken = 0

        for i, q in enumerate(tqdm(questions, desc=f"Processing {filename}")):
            question_text = q["question"]
            options = q.get("options", [])
            answer = q["answer"]

            messages_options = create_chat_prompt_options(question_text, options)
            messages_question = create_chat_prompt_question(question_text)
            message_system_mes_modified = create_chat_prompt_system_mes_modified(question_text)

            if with_options:
                start_time = time.time()
                response_options = run_aryabhata_prompt(messages_options, tokenizer_aryabhata, model_aryabhata)
                end_time = time.time()
                elapsed = end_time - start_time
                total_time_taken += elapsed

                print(f"→ [{i+1}/{len(questions)}] Time taken: {elapsed:.2f} sec")


                # Save response with original question
                q["aryabhata_response_Woptions"] = response_options
                q["aryabhata_response_Woptions"] = response_options
                q["inference_time_seconds_Woptions"] = round(elapsed, 2)

            if only_question:

                start_time = time.time()
                response_question = run_aryabhata_prompt(messages_question, tokenizer_aryabhata, model_aryabhata)
                end_time = time.time()
                elapsed = end_time - start_time
                total_time_taken += elapsed

                print(f"→ [{i+1}/{len(questions)}] Time taken: {elapsed:.2f} sec")

                # Save response with original question
                q["aryabhata_response_Wquestion"] = response_question
                q["inference_time_seconds_Wquestion"] = round(elapsed, 2)

            if w_sys_prompt_mod:

                start_time = time.time()
                response_sys_mes_modified = run_aryabhata_prompt(message_system_mes_modified, tokenizer_aryabhata, model_aryabhata)
                end_time = time.time()
                elapsed = end_time - start_time
                total_time_taken += elapsed

                print(f"→ [{i+1}/{len(questions)}] Time taken: {elapsed:.2f} sec")

                # Save response with original question
                q["aryabhata_response_sys_mes_modified"] = response_sys_mes_modified
                q["inference_time_seconds_sys_mes_modified"] = round(elapsed, 2)

            results.append(q)

            # ✅ Save incrementally and ensure Drive sync
            with open(output_path, "w") as fout:
                json.dump(results, fout, indent=2, ensure_ascii=False)
                fout.flush()
                os.fsync(fout.fileno())
            if local:
              # ✅ Save incrementally and ensure Drive sync
              with open(os.path.join(local_subfolder, f"pred_{suffix_str}_{timestamp_str}_{filename}"), "w") as fout:
                  json.dump(results, fout, indent=2, ensure_ascii=False)
                  fout.flush()
                  os.fsync(fout.fileno())



        print(f"\n✅ Total Time taken for {filename}: {total_time_taken:.2f} sec ({total_time_taken/60:.2f} min)")



In [None]:
def gen_response(input_folder, output_folder, list_json=None, only_question=False, with_options= False, w_sys_prompt_mod=False, local=False):
    os.makedirs(output_folder, exist_ok=True)

    total = 0
    for ques_paper in list_json:
      with open(os.path.join(input_folder, ques_paper), "r") as f:
        data = json.load(f)
      total += len(data)
      print(f"total question in {ques_paper}: ", len(data))
    print("total questions to gen response: ", total)

    process_all_json_files(input_folder, output_folder, list_json=list_json, only_question=only_question, with_options= with_options, w_sys_prompt_mod=w_sys_prompt_mod, local= local)




In [None]:
input_folder = "/content/drive/MyDrive/Aryabhata-1.0-Evals/Jee_questions/mains2025Jan"
output_folder = "/content/drive/MyDrive/Aryabhata-1.0-Evals/llm_generated_answers/aryabhata1.0"

Jee_questions = sorted([f for f in os.listdir(input_folder) if f.endswith(".json")])
list_json = Jee_questions[-3:]
list_json

['jan_28_shift_2_questions.json',
 'jan_29_shift_1_questions.json',
 'jan_29_shift_2_questions.json']

In [None]:
gen_response(input_folder,
             output_folder,
             list_json=list_json,
             only_question=True,
             with_options= False,
             w_sys_prompt_mod=False,
             local=True)

total question in jan_28_shift_2_questions.json:  25
total question in jan_29_shift_1_questions.json:  25
total question in jan_29_shift_2_questions.json:  25
total questions to gen response:  75


Processing jan_28_shift_2_questions.json:   0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Processing jan_28_shift_2_questions.json:   4%|▍         | 1/25 [00:29<11:39, 29.14s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [1/25] Time taken: 29.13 sec


Processing jan_28_shift_2_questions.json:   8%|▊         | 2/25 [01:06<12:59, 33.88s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [2/25] Time taken: 37.18 sec


Processing jan_28_shift_2_questions.json:  12%|█▏        | 3/25 [01:54<14:49, 40.41s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [3/25] Time taken: 48.18 sec


Processing jan_28_shift_2_questions.json:  16%|█▌        | 4/25 [04:31<30:14, 86.39s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [4/25] Time taken: 156.86 sec


Processing jan_28_shift_2_questions.json:  20%|██        | 5/25 [07:07<37:11, 111.57s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [5/25] Time taken: 156.23 sec


Processing jan_28_shift_2_questions.json:  24%|██▍       | 6/25 [07:31<25:54, 81.84s/it] Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [6/25] Time taken: 24.11 sec


Processing jan_28_shift_2_questions.json:  28%|██▊       | 7/25 [08:09<20:10, 67.27s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [7/25] Time taken: 37.27 sec


Processing jan_28_shift_2_questions.json:  32%|███▏      | 8/25 [10:45<27:05, 95.64s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [8/25] Time taken: 156.36 sec


Processing jan_28_shift_2_questions.json:  36%|███▌      | 9/25 [13:21<30:32, 114.53s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [9/25] Time taken: 156.07 sec


Processing jan_28_shift_2_questions.json:  40%|████      | 10/25 [16:02<32:13, 128.89s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [10/25] Time taken: 161.02 sec


Processing jan_28_shift_2_questions.json:  44%|████▍     | 11/25 [18:38<32:02, 137.30s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [11/25] Time taken: 156.36 sec


Processing jan_28_shift_2_questions.json:  48%|████▊     | 12/25 [19:14<23:00, 106.23s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [12/25] Time taken: 35.15 sec


Processing jan_28_shift_2_questions.json:  52%|█████▏    | 13/25 [19:48<16:54, 84.51s/it] Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [13/25] Time taken: 34.54 sec


Processing jan_28_shift_2_questions.json:  56%|█████▌    | 14/25 [20:39<13:37, 74.28s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [14/25] Time taken: 50.63 sec


Processing jan_28_shift_2_questions.json:  60%|██████    | 15/25 [23:14<16:28, 98.85s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [15/25] Time taken: 155.76 sec


Processing jan_28_shift_2_questions.json:  64%|██████▍   | 16/25 [24:57<14:58, 99.88s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [16/25] Time taken: 102.27 sec


Processing jan_28_shift_2_questions.json:  68%|██████▊   | 17/25 [25:50<11:26, 85.76s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [17/25] Time taken: 52.92 sec


Processing jan_28_shift_2_questions.json:  72%|███████▏  | 18/25 [27:05<09:37, 82.53s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [18/25] Time taken: 75.00 sec


Processing jan_28_shift_2_questions.json:  76%|███████▌  | 19/25 [29:42<10:29, 104.95s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [19/25] Time taken: 157.15 sec


Processing jan_28_shift_2_questions.json:  80%|████████  | 20/25 [30:41<07:35, 91.16s/it] Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [20/25] Time taken: 59.02 sec


Processing jan_28_shift_2_questions.json:  84%|████████▍ | 21/25 [31:16<04:57, 74.43s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [21/25] Time taken: 35.40 sec


Processing jan_28_shift_2_questions.json:  88%|████████▊ | 22/25 [31:50<03:07, 62.34s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [22/25] Time taken: 34.14 sec


Processing jan_28_shift_2_questions.json:  92%|█████████▏| 23/25 [32:32<01:52, 56.02s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [23/25] Time taken: 41.27 sec


Processing jan_28_shift_2_questions.json:  96%|█████████▌| 24/25 [33:31<00:57, 57.04s/it]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


→ [24/25] Time taken: 59.39 sec


Processing jan_28_shift_2_questions.json: 100%|██████████| 25/25 [36:08<00:00, 86.73s/it]


→ [25/25] Time taken: 156.57 sec

✅ Total Time taken for jan_28_shift_2_questions.json: 2167.98 sec (36.13 min)


Processing jan_29_shift_1_questions.json:   0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Processing jan_29_shift_1_questions.json:   0%|          | 0/25 [00:39<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# Replace with your actual paths
# !mv /content/output /content/drive/MyDrive/Aryabhata-1.0-Evals/


In [None]:
input_folder = "/content/drive/MyDrive/Aryabhata-1.0-Evals/Jee_questions/mains2025Jan"
output_folder = "/content/drive/MyDrive/Aryabhata-1.0-Evals/llm_generated_answers/aryabhata1.0"

Jee_questions = sorted([f for f in os.listdir(input_folder) if f.endswith(".json")])
list_json = Jee_questions[-2:]
list_json

['jan_29_shift_1_questions.json', 'jan_29_shift_2_questions.json']

In [None]:
input_folder = "/content/drive/MyDrive/Aryabhata-1.0-Evals/Jee_questions/mains2025Jan"
output_folder = "/content/drive/MyDrive/Aryabhata-1.0-Evals/llm_generated_answers/aryabhata1.0"

Jee_questions = sorted([f for f in os.listdir(input_folder) if f.endswith(".json")])

process_all_json_files_batched(
    input_folder=input_folder,
    output_folder=output_folder,
    list_json=list_json,
    with_options=False,
    only_question=True,
    w_sys_prompt_mod=False,
    local=True,
    batch_size=5  # or 10, etc.
)


Processing JSON files:   0%|          | 0/2 [00:00<?, ?it/s]

→ Running mode: aryabhata_response_Wquestion



aryabhata_response_Wquestion:   0%|          | 0/25 [00:00<?, ?it/s][A`generation_config` default values have been modified to match model-specific defaults: {'bos_token_id': 151643, 'eos_token_id': 151643}. If this is not desired, please set these values explicitly.

aryabhata_response_Wquestion:  20%|██        | 5/25 [02:10<08:42, 26.12s/it][A
aryabhata_response_Wquestion:  40%|████      | 10/25 [07:16<11:41, 46.77s/it][A
aryabhata_response_Wquestion:  60%|██████    | 15/25 [12:22<08:53, 53.33s/it][A
aryabhata_response_Wquestion:  80%|████████  | 20/25 [17:29<04:42, 56.54s/it][A
aryabhata_response_Wquestion: 100%|██████████| 25/25 [19:27<00:00, 44.67s/it][A
Processing JSON files:  50%|█████     | 1/2 [19:28<19:28, 1168.11s/it]


✅ Total Time taken for jan_29_shift_1_questions.json: 1167.76 sec (19.46 min)
→ Running mode: aryabhata_response_Wquestion



aryabhata_response_Wquestion:   0%|          | 0/25 [00:00<?, ?it/s][A
aryabhata_response_Wquestion:  20%|██        | 5/25 [05:07<20:28, 61.43s/it][A
aryabhata_response_Wquestion:  40%|████      | 10/25 [10:13<15:19, 61.28s/it][A
aryabhata_response_Wquestion:  60%|██████    | 15/25 [15:18<10:12, 61.23s/it][A
aryabhata_response_Wquestion:  80%|████████  | 20/25 [20:24<05:05, 61.19s/it][A
aryabhata_response_Wquestion: 100%|██████████| 25/25 [25:30<00:00, 61.16s/it][A
Processing JSON files: 100%|██████████| 2/2 [44:58<00:00, 1349.22s/it]


✅ Total Time taken for jan_29_shift_2_questions.json: 1530.01 sec (25.50 min)





In [None]:
!mv /content/output/only_question_20250730_102733 /content/drive/MyDrive/Aryabhata-1.0-Evals/output/