In [None]:
!pip install google-ai-generativelanguage
!pip install google-generativeai



In [None]:
import google.generativeai as genai
import inspect
import json
import os
import sys

from google.colab import drive
from google.colab import userdata
from io import StringIO

drive.mount('/content/drive')

Mounted at /content/drive


# Load Papers

In [None]:
# @title Task Specific

BASEDIR = '/content/drive/MyDrive/benchmarks/public_release'  # @param {type:"string"}
assert(os.path.exists(BASEDIR))

TASK_NAME = "qecc_65" # @param {type:"string"}  # specify valid names
TASK_DIR = f"{BASEDIR}/data/{TASK_NAME}/"

PROMPT_NAME = "" # @param {type:"string"}
PROMPT_FULL_NAME = PROMPT_NAME+".txt"

DIRPATH = f'{BASEDIR}/inference'
INPUT_DIR= f'{TASK_DIR}/inputs/'

INFERENCE_TRIAL = "run_0" # @param {type:"string"}
EXP_DIR = f"{DIRPATH}/{TASK_NAME}/{PROMPT_NAME}/gemini/{INFERENCE_TRIAL}/success/"

MY_API_KEY=None # @param




if not os.path.exists(EXP_DIR):
  os.makedirs(EXP_DIR)
  print(f"Created directory {EXP_DIR}")

print(f"The result of your inference is going to be saved in {EXP_DIR}")

The result of your inference is going to be saved in /content/drive/MyDrive/benchmarks/public_release/inference/qecc_65/describe_code_in_paper/gemini/run_0/success/


In [None]:
def get_paper_list(inputdir):
  files = os.listdir(inputdir)
  papers = []
  for f in files:
    if f.endswith('.json'):
      papers.append(f[:f.rindex(".json")])
  return papers

In [None]:
papers = get_paper_list(INPUT_DIR)

print(f"{len(papers)} papers are loaded. \nHere is the list: \n{papers}")

65 papers are loaded. 
Here is the list: 
['1502.05267', 'quant-ph_9705052', '1802.07419', '1712.07666', '1709.08658', '2003.02717', '2203.16534', '1503.08800', 'quant-ph_9711049', '2209.11405', '2311.08653', '2007.09154', 'quant-ph_9703002', '1709.04471', '2007.12152', '1910.10746', '1801.05897', '2107.02194', '2303.04798', '1903.03937', '2311.07679', '2201.07802', '1707.02308', 'quant-ph_9810055', '2106.02649', 'cond-mat_0607736', '1503.06237', '2210.16957', '1505.02576', '2008.09495', '2311.13040', '1703.02973', '1809.09801', '1907.09528', '2306.11621', '2009.03921', '2402.07476', '2312.04522', '2212.09935', '2309.16503', '2010.06628', '1906.11394', '1603.04442', 'quant-ph_0008040', 'quant-ph_0502086', 'quant-ph_9711021', 'cond-mat_0010440', '2303.02432', '2210.10808', 'quant-ph_0702075', 'quant-ph_0701020', '1602.00008', '1710.04631', 'quant-ph_0605138', 'quant-ph_0210097', '2110.11510', '1604.07925', 'quant-ph_9906114', '2112.01446', '1501.07779', '2203.00103', 'cs_0509062', 'cond

# Run inference

In [None]:
#@title Helper Functions

def load_prompt(filename: str) -> str:
  with open(filename, 'r') as file:
    prompt = file.read()
  return prompt.strip()


def prepare_task_for_paper(paper: str,
                           prompt_path: str,
                           )-> dict[str, str]:
  paper_input = f'{INPUT_DIR}/{paper}.json'
  inputs = json.load(open(paper_input, 'r'))
  raw_prompt = load_prompt(prompt_path)
  prompt = raw_prompt.replace('{{text}}', paper_input)
  return {
          'record_id': inputs['record_id'],
          'prompt_text':  prompt,
          'response_text': ''
          }


def query_model(query_prompt: str,
                model_name: str = 'gemini-1.5-pro-latest'
                ) -> str:
    model = genai.GenerativeModel(model_name=model_name)
    response = model.generate_content(query_prompt)
    return response.text


def run_eval_loop(paper_list,
                  results_dir: str,
                  ):
  genai.configure(api_key=MY_API_KEY)
  for PAPER in paper_list:
    print(PAPER)
    outpath = f'{results_dir}/{PAPER}.json'
    if os.path.exists(outpath):
      print(f'Skipping since result for {PAPER} already exists.')
    else:
      out_dict = prepare_task_for_paper(
          paper=PAPER,
          prompt_path=f'{BASEDIR}/prompts/{PROMPT_FULL_NAME}'
      )

      out_dict['response_text'] = query_model(out_dict['prompt_text'])
      json.dump(out_dict, open(outpath, 'w'))
  return out_dict


In [None]:
run_eval_loop(papers, EXP_DIR)

1502.05267
quant-ph_9705052
1802.07419
1712.07666
1709.08658
2003.02717
2203.16534
1503.08800
quant-ph_9711049
2209.11405
2311.08653
2007.09154
quant-ph_9703002
1709.04471
2007.12152
1910.10746
1801.05897
2107.02194
2303.04798
1903.03937
2311.07679
2201.07802
1707.02308
quant-ph_9810055
2106.02649
cond-mat_0607736
1503.06237
2210.16957
1505.02576
2008.09495
2311.13040
1703.02973
1809.09801
1907.09528
2306.11621
2009.03921
2402.07476
2312.04522
2212.09935
2309.16503
2010.06628
1906.11394
1603.04442
quant-ph_0008040
quant-ph_0502086
quant-ph_9711021
cond-mat_0010440
2303.02432
2210.10808
quant-ph_0702075
quant-ph_0701020
1602.00008
1710.04631
quant-ph_0605138
quant-ph_0210097
2110.11510
1604.07925
quant-ph_9906114
2112.01446
1501.07779
2203.00103
cs_0509062
cond-mat_9707273
1805.01474
1708.08474


{'record_id': '1708.08474',
 'prompt_text': 'Fill in a YAML file for the code described in the attached paper according to the prescription defined in the YAML template that starts on the next paragraph. Fields are to be filled only if they are directly relevant to the code introduced in the paper. Be sure to extract any quantitative data like thresholds and code rates. Above all, be concise! If you cannot explain something technical in detail, do not try to explain it. If something is not detailed in the paper, do not mention it.\n\n#######################################################\n## This is a code entry in the error correction zoo. ##\n##       https://github.com/errorcorrectionzoo       ##\n#######################################################\n\n# Use UTF-8 unicode encoding\n# AMS-TeX commands are rendered inside \\( ... \\) using MathJaX.\n# Allowed external bibliographic references are\n#    \\cite{arXiv:#.#} or \\cite{arXiv:quant-ph/#} (PREFERRED),\n#    \\cite{doi:#},