In [1]:
import os
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from typing import List, Dict

In [2]:
# Uncomment the models you want to use
MODELS = [
"microsoft/phi-1_5",
# "microsoft/phi-2",
# "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
# "TinyLlama/TinyLlama_v1.1_math_code",
# "Qwen/Qwen3-1.7B",
# "Qwen/Qwen2.5-Math-1.5B",
# "microsoft/rho-math-1b-v0.1",
# "google/gemma-2b",
# "tiiuae/falcon-rw-1b",
# "stabilityai/stablelm-2-zephyr-1_6b",
# "stabilityai/stablelm-zephyr-3b",
# "openbmb/MiniCPM-2B-dpo-bf16",
# "apple/OpenELM-3B",
# "mtgv/MobileLLaMA-2.7B-Base",
]

In [8]:
def load_pipeline(model_name: str):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
        trust_remote_code=True
    )
    return pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device.type == "cuda" else -1)

In [None]:
# Run this only for model apple/OpenELM-3B

# def load_pipeline(model_name: str):
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
#     model = AutoModelForCausalLM.from_pretrained(
#         model_name,
#         torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
#         trust_remote_code=True
#     )
#     return pipeline("text-generation", model=model, tokenizer=tokenizer, device=device.index if device.type == "cuda" else -1)

In [4]:
def evaluate(config: Dict):
  questions = config.get("questions", [])
  pre_text = config.get("prompt_pre_text", "")
  post_text = config.get("prompt_post_text", "")
  responses = {}
  for model_name in MODELS:
    responses[model_name] = []
    model_pipe = load_pipeline(model_name)
    count = 1
    for question in questions:
      prompt = f"{pre_text} {question}\n\n{post_text}"
      response = model_pipe(prompt, max_new_tokens=1000, do_sample=False)[0]['generated_text']
      print(f"------------------------------------------------")
      print(f"Question: {count}")
      print(response)
      responses[model_name].append(response)
      count += 1
  return responses

In [5]:
def get_maths_questions(questions_file):
    with open(questions_file, "r") as f:
        questions = json.load(f)
    return questions

In [6]:
def main():
  config = get_maths_questions("maths.json")
  result = evaluate(config)
  print(json.dumps(result, indent=2))
  return

In [None]:
main()