In [None]:
import json
from os import path, getcwd
import pandas as pd

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
TOP_LEVEL_DIR = getcwd()
TOP_LEVEL_DIR

In [None]:
API_PORT = 8691
API_PORT

In [None]:
MODEL_PARAMS = {
    # LLaVA 1.5-13B

    # 8 bits
    "model_name": "TheBloke/llava-v1.5-13B-GPTQ:gptq-8bit-32g-actorder_True",
    "model": "TheBloke_llava-v1.5-13B-GPTQ_gptq-8bit-32g-actorder_True",

    # 4 bits
    #"model_name": "TheBloke/llava-v1.5-13B-GPTQ:gptq-4bit-32g-actorder_True",
    #"model": "TheBloke_llava-v1.5-13B-GPTQ_gptq-4bit-32g-actorder_True",

    # 4 bits - 128g - actorder_True
    #"model_name": "TheBloke/llava-v1.5-13B-GPTQ",
    #"model": "TheBloke_llava-v1.5-13B-GPTQ",


    "pipeline": "llava-v1.5-13b"
}

In [None]:
SERVER_CMD = [
    "bash",
    "start_linux.sh",
    "--model", MODEL_PARAMS['model'],
    "--multimodal-pipeline", MODEL_PARAMS['pipeline'],
    "--disable_exllama",
    "--loader autogptq", "--no_inject_fused_attention", # Fused attention causes an error
    "--api", "--api-port", f"{API_PORT}",
    "--no-cache"
]

In [None]:
SCRIPT_NAME = "run_seed_" + MODEL_PARAMS['model']

In [None]:
SCITAS_PARAMS = f"""#!/bin/bash -l

#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --time 36:00:00
#SBATCH --cpus-per-task=10
#SBATCH --partition=gpu
#SBATCH --qos=gpu
#SBATCH --gres=gpu:2
#SBATCH --mem 64G

cd ~/tgw
{' '.join(SERVER_CMD)} &
cd ~/
ipython {SCRIPT_NAME}.py
"""

In [None]:
# Generate SCITAS job script
with open(f'{SCRIPT_NAME}.run', 'w+') as job_file:
    job_file.write(SCITAS_PARAMS)

# Generate actual python script
!jupyter nbconvert --to script run_seed.ipynb --output {SCRIPT_NAME} \
    -TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags noconvert

# 1. Loading the questions

In [None]:
%cd {TOP_LEVEL_DIR}/qvlm

questions_path = 'datasets/SEED/SEED-Bench.json'
questions_json = json.load(open(questions_path))
questions_df = pd.DataFrame(questions_json['questions'])
questions_df = questions_df.loc[questions_df['question_type_id'] < 10] # Only image questions (dimensions 1-9)
questions_df

# 2. Launching the model server

In [None]:
%cd {TOP_LEVEL_DIR}/tgw

In [None]:
import threading
import subprocess
%cd {TOP_LEVEL_DIR}/tgw

def get_model_server_process(params: dict):
  if (not path.exists(params['model'])):
    !python download-model.py {params['model_name']}
  return lambda: subprocess.run(SERVER_CMD, check=True, shell=True, close_fds=True)

In [None]:
%cd {TOP_LEVEL_DIR}/qvlm

import socket,time
from eval.connectors import Connector

def wait_for_port(connector: Connector, delay: int = 3, max_retries: int = 1000):
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  conn_info = (connector.url, connector.port)
  result = sock.connect_ex(conn_info)
  counter = max_retries
  while (counter >= 0 and result != 0):
    print(f"Port is not open, retrying in {delay}s...\t({max_retries - counter}/{max_retries})")
    time.sleep(delay)
    result = sock.connect_ex(conn_info)
    counter = counter - 1
  
  if (result == 0):
    print("Port is open!")
    sock.close()
  else:
    print(f"Port was not open after n={max_retries} max retries")
    sock.close()
    exit(1)

In [None]:
#%cd {TOP_LEVEL_DIR}/tgw
# Do not uncomment, this doesn't work yet
#threading.Thread(target=get_model_server_process(PARAMS), daemon=True).start()

# 3. Computing the responses

In [None]:
%cd {TOP_LEVEL_DIR}/qvlm

In [None]:
#from eval.connectors.llamafile import LlamafileConnector
from eval.connectors.textgenerationwebui import TextGenerationWebUIConnector

connector = TextGenerationWebUIConnector('127.0.0.1', API_PORT)

In [None]:
from eval.evaluation.SEED import SEED1Evaluator

evaluator = SEED1Evaluator(questions_df, img_dir='datasets/SEED/SEED-Bench-image')
evaluator.connect(connector)

#wait_for_port(connector, delay=3, max_retries = 200)
#time.sleep(30) # The server can take some time to keep booting after the port has been opened...
#evaluator.get_responses(f'datasets/SEED/{MODEL_PARAMS["model"]}_responses.jsonl')

In [None]:
evaluator.eval(f'datasets/SEED/{MODEL_PARAMS["model"]}_responses.jsonl')