In [1]:
%env CUDA_VISIBLE_DEVICES=3
%env HF_HOME=/mnt/LLM

env: CUDA_VISIBLE_DEVICES=3
env: HF_HOME=/mnt/LLM


In [2]:
import os
import subprocess


def exec_command(command, env=None) -> str:
    if env is None:
        env = os.environ.copy()
    
    process = subprocess.run(command, capture_output=True, env=env)
    
    if process.returncode == 0:
        return process.stdout.decode() + '\n' + process.stderr.decode()
    
    raise Exception('command {} failed\nstdout:\n{}\nstderr:\n{}'.format(
        command,
        process.stdout.decode(),
        process.stderr.decode(),
    ))

In [4]:
import json
import os
import tempfile
from transformers import AutoModelForCausalLM


def trim_lm_eval_output(output):
    return {
        'model_name': output['model_name'],
        'config': output['config'],
        'configs': output['configs'],
        'results': output['results'],
    }


def get_model_size(model):
    output = 0
    for param in model.parameters():
        output += param.nelement() * param.element_size()
    for buffer in model.buffers():
        output += buffer.nelement() * buffer.element_size()
    return output


def run_lm_eval(model_name, task, num_fewshot=1, batch_size=16):
    with tempfile.TemporaryDirectory() as tmpdirname:
        cmd = ['lm_eval']
    
        cmd.extend(['--model', 'hf'])
        cmd.extend(['--model_args', f'pretrained={model_name}'])
        cmd.extend(['--tasks', task])
        cmd.extend(['--batch_size', str(batch_size)])
        cmd.extend(['--output_path', tmpdirname])
        cmd.extend(['--num_fewshot', str(num_fewshot)])
    
        exec_command(cmd)
    
        def get_ony_dir_entry(path):
            entries = os.listdir(path)
            assert len(entries) == 1
            return entries[0]
    
        path = tmpdirname
        path = os.path.join(path, get_ony_dir_entry(path))
        path = os.path.join(path, get_ony_dir_entry(path))
    
        with open(path, 'r') as file:
            output = trim_lm_eval_output(json.load(file))

        output['model_size'] = get_model_size(AutoModelForCausalLM.from_pretrained(
            model_name,
            trust_remote_code=True, low_cpu_mem_usage=True,
        ))
        
        return output

run_lm_eval('ISTA-DASLab/Meta-Llama-3-8B-AQLM-PV-2Bit-1x16', 'mmlu_continuation_high_school_government_and_politics')

{'model_name': 'ISTA-DASLab/Meta-Llama-3-8B-AQLM-PV-2Bit-1x16',
 'config': {'model': 'hf',
  'model_args': 'pretrained=ISTA-DASLab/Meta-Llama-3-8B-AQLM-PV-2Bit-1x16',
  'model_num_parameters': 2042171392,
  'model_dtype': 'torch.float16',
  'model_revision': 'main',
  'model_sha': 'ad1d994ac589738b729ece3c334edd6765bed365',
  'batch_size': '16',
  'batch_sizes': [],
  'device': None,
  'use_cache': None,
  'limit': None,
  'bootstrap_iters': 100000,
  'gen_kwargs': None,
  'random_seed': 0,
  'numpy_seed': 1234,
  'torch_seed': 1234,
  'fewshot_seed': 1234},
 'configs': {'mmlu_continuation_high_school_government_and_politics': {'task': 'mmlu_continuation_high_school_government_and_politics',
   'group': 'mmlu_continuation_social_sciences',
   'dataset_path': 'hails/mmlu_no_train',
   'dataset_name': 'high_school_government_and_politics',
   'dataset_kwargs': {'trust_remote_code': True},
   'test_split': 'test',
   'fewshot_split': 'dev',
   'doc_to_text': 'Question: {{question.strip()}