In [1]:
import sys
import os
from data import Database
from data.repositories import PreparedQuestionRepository, ModelResultRepository
from benchmarks.mmul import MMULDataProvider, MMULBenchmark, MMULTestPreparation
from benchmarks import BenchmarkFactory, BenchmarkRunner
from models import ModelFactory, TestModel

# Settings
max_tests_per_benchmark = 1
num_few_shot = 5

# Initialize database
db = Database()
db.create_all_tables()
prepared_question_repo = PreparedQuestionRepository(db)
model_result_repo = ModelResultRepository(db)

# Initialize factories
model_factory = ModelFactory()
benchmark_factory = BenchmarkFactory()

# Register models
# model_factory.register_model("gpt-4o", OpenAIModel, "gpt-4o", rpm_limit=500)
# model_factory.register_model("gpt-4o-mini", OpenAIModel, "gpt-4o-mini", rpm_limit=500)
# model_factory.register_model("gpt-4-turbo", OpenAIModel, "gpt-4-turbo", rpm_limit=500)
# model_factory.register_model("gpt-4", OpenAIModel, "gpt-4", rpm_limit=500)
# model_factory.register_model("gpt-3.5-turbo", OpenAIModel, "gpt-3.5-turbo", rpm_limit=500)
model_factory.register_model("test", TestModel)

# Initialize data provider and test preparation
mmul_data_provider = MMULDataProvider(db)
mmul_test_preparation = MMULTestPreparation(mmul_data_provider, prepared_question_repo)

# Register benchmarks
benchmark_factory.register_benchmark(
    "MMUL-0Shot",
    MMULBenchmark,
    model_result_repo=model_result_repo,
    test_preparation=mmul_test_preparation
)

benchmark_factory.register_benchmark(
    "MMUL-5Shot",
    MMULBenchmark,
    model_result_repo=model_result_repo,
    test_preparation=mmul_test_preparation
)

# Create and run the benchmark runner
runner = BenchmarkRunner(model_factory, benchmark_factory)
runner.run_benchmarks(max_tests_per_benchmark=max_tests_per_benchmark, num_few_shot=num_few_shot)


Running benchmarks...

Running mmul-0shot benchmark for all models...
Error running mmul-0shot for model test: 'Database' object has no attribute 'get_by_data_type'

Finished mmul-0shot benchmark for all models

Running mmul-5shot benchmark for all models...
Error running mmul-5shot for model test: 'Database' object has no attribute 'get_by_data_type'

Finished mmul-5shot benchmark for all models

All benchmarks completed.


{'mmul-0shot': {}, 'mmul-5shot': {}}