In [7]:
import google.generativeai as genai
import os
import pandas as pd
from os.path import basename
from pathlib import Path
from tqdm import tqdm

In [8]:
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# See https://ai.google.dev/api/python/google/generativeai/GenerativeModel
generation_config = {
  "temperature": 0.5,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

In [9]:
data_dir = "data/eutils_raw/"
api_docs = []

for _file in Path(data_dir).iterdir():
    with open(_file) as f:
        doc = f.readlines()
    doc = "".join(doc)
    api_docs.append([basename(_file), doc])

api_docs = pd.DataFrame(api_docs, columns=["file", "text"])

In [10]:
with open("data/prompts/apidoc2json.md") as f:
    base_prompt = f.readlines()
base_prompt = "".join(base_prompt)

In [11]:
api_docs

Unnamed: 0,file,text
0,efetch.txt,EFetch\nBase URL\nhttps://eutils.ncbi.nlm.nih....
1,epost.txt,EPost\nBase URL\nhttps://eutils.ncbi.nlm.nih.g...
2,egquery.txt,EGQuery\nBase URL\nhttps://eutils.ncbi.nlm.nih...
3,esummary.txt,ESummary\nBase URL\nhttps://eutils.ncbi.nlm.ni...
4,esearch.txt,ESearch\nBase URL\nhttps://eutils.ncbi.nlm.nih...
5,ecitmatch.txt,ECitMatch\nBase URL\nhttps://eutils.ncbi.nlm.n...
6,espell.txt,ESpell\nBase URL\nhttps://eutils.ncbi.nlm.nih....
7,einfo.txt,EInfo\nBase URL\nhttps://eutils.ncbi.nlm.nih.g...
8,elink.txt,ELink\nBase URL\nhttps://eutils.ncbi.nlm.nih.g...


In [None]:
model = genai.GenerativeModel(
  model_name="gemini-1.5-flash-latest",
  generation_config=generation_config,
)


responses = []
for doc in tqdm(api_docs["text"]):
  prompt = base_prompt.replace("{API_DOC}", doc)

  response = model.generate_content([prompt])
  try:
    responses.append(response.text)
  except ValueError:
    try: 
      print(response.candidates)
    except:
      print("Gemini is throwing a tantrum.")