In [None]:
import json
import time
from typing import Dict, List

import requests

BASE_URL = "https://api.ai71.ai/v1/"

In [None]:
API_KEY = 'ai71-api-2f336f7f-318c-4773-bee9-a8651428640b'

In [None]:
def check_budget():
    resp = requests.get(
        f"{BASE_URL}check_budget",
        headers={"Authorization": f"Bearer {API_KEY}"},
    )
    resp.raise_for_status()
    print(json.dumps(resp.json(), indent=4))

In [None]:
check_budget()

{
    "remaining_budget": 10000
}


In [None]:
def bulk_generate(n_questions: int, question_categorizations: List[Dict], user_categorizations: List[Dict]):
    resp = requests.post(
        f"{BASE_URL}bulk_generation",
        headers={"Authorization": f"Bearer {API_KEY}"},
        json={
                "n_questions": n_questions,
                "question_categorizations": question_categorizations,
                "user_categorizations": user_categorizations
            }
    )
    resp.raise_for_status()
    request_id = resp.json()["request_id"]
    print(json.dumps(resp.json(), indent=4))

    result = wait_for_generation_to_finish(request_id)
    return result


def wait_for_generation_to_finish(request_id: str):
    first_print = True
    while True:
        resp = requests.get(
            f"{BASE_URL}fetch_generation_results",
            headers={"Authorization": f"Bearer {API_KEY}"},
            params={"request_id": request_id},
        )
        resp.raise_for_status()
        if resp.json()["status"] == "completed":
            print('completed')
            print(json.dumps(resp.json(), indent=4))
            return resp.json()
        else:
            if first_print:
                first_print = False
                print("Waiting for generation to finish...", end='')
            else:
                print('.', end='')
            time.sleep(5)

In [None]:
user_expertise_categorization = {
    "categorization_name": "user-expertise",
    "categories": [
        {
            "name": "expert",
            "description": "an expert on the subject discussed in the documents, therefore he asks complex questions.",
            "probability": 0.5
        },
        {
            "name": "novice",
            "description": "a person with very basic knowledge on the topic discussed in the topic. Therefore, he asks very simple questions.",
            "probability": 0.5
        }
    ]
}

In [None]:
question_formulation_categorization = {
    "categorization_name": "question-formulation",
    "categories": [
      {
        "name": "concise and natural",
        "description": "a concise direct natural question consisting of a few words.",
        "probability": 0.35
      },
      {
        "name": "verbose and natural",
        "description": "a relatively long question consisting of more than 9 words.",
        "probability": 0.35
      },
      {
        "name": "short search query",
        "description": ("phrased as a typed web query for search engines "
                        "only keywords, without punctuation and without a natural-sounding structure)."
                        " It consists of less than 7 words."),
        "probability": 0.15
      },
      {
        "name": "long search query",
        "description": ("phrased as a typed web query for search engines "
                        "only keywords, without punctuation and without a natural-sounding structure)."
                        " It consists of more than 6 words."),
        "probability": 0.15
      }
    ]
}

premise_categorization = {
    "categorization_name": "premise-categorization",
    "categories": [
      {
        "name": "without premise",
        "description": "a question that does not contain any premise or any information about the user.",
        "probability": 0.7
      },
      {
        "name": "with premise",
        "description": ("a question starting with a very short premise, where the users reveal "
                        "their needs or some information about themselves."),
        "probability": 0.3
      }
    ]
}

In [None]:
answer_type_categorization = {
    "categorization_name": "answer-type",
    "categories": [
        {
            "name": "factoid",
            "description": "a question seeking a specific, concise piece of information or a short fact about a particular subject, such as a name, date, or number.",
            "probability": 0.2,
            "is_multi_doc": False
        },
        {
            "name": "multi-aspect",
            "description": ("A question about two different aspects of the same entity/concept. "
                            "For example: 'What are the advantages of AI-powered diagnostics, and what are the associated risks of bias in medical decision-making?', "
                            "'How do cryptocurrencies enable financial inclusion, and what are the security risks associated with them?'. "
                            "The information required to answer the question needs to come from two documents, "
                            "specifically, the first document must provide information about the first aspect, while the second must provide information about the second aspect."),
            "probability": 0.3,
            "is_multi_doc": True
        },
        {
            "name": "comparison",
            "description": ("a comparison question that requires comparing two related concepts or entities. "
                            "The comparison must be natural and reasonable, i.e., comparing two entities by a common attribute which is meaningful and relevant to both entities. "
                            "For example: 'Who is older, Glenn Hughes or Ross Lynch?', 'Are Pizhou and Jiujiang in the same province?', "
                            "'Pyotr Ilyich Tchaikovsky and Giuseppe Verdi have this profession in common'. "
                            "The information required to answer the question needs to come from two documents, specifically, "
                            "the first document must provide information about the first entity/concept, while the second must provide information about the second entity/concept."),
            "probability": 0.5,
            "is_multi_doc": True
        }
    ]
}

In [None]:
results = bulk_generate(n_questions=20,
                        question_categorizations=[question_formulation_categorization, premise_categorization, answer_type_categorization],
                        user_categorizations=[user_expertise_categorization]
                        )

{
    "request_id": "d5b529c8-0c5e-43b6-a679-e01e01b1bba8",
    "type": "async"
}
Waiting for generation to finish..............................completed
{
    "status": "completed",
    "file": "https://s3.amazonaws.com/data.aiir/data_morgana/web_api/results_id_91b042e9-d65e-473e-b192-f5520912909c_user_id_bad19819-ee30-4bc3-995b-cb08b8ccc233.jsonl?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA2UC3AHBF6O75X7VR%2F20250415%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250415T124217Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEKT%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJGMEQCIBegswPFO4GuLBOM4YZ3axljQBRVYQWxEUGs5tuqMkFyAiAFZhPYsZGu4Dq6chm%2BjjRgxvcOECfpzIA%2Bx5e1inHOeyq6BQgtEAAaDDczMDMzNTI5NTU2MyIMwwJq%2FrZhZfxrx2%2BsKpcFfNyNL5Tfga5WDRNluPauV6NUCfNLx0HV2PiQZJz%2FRdbh8vJ7bbtmkk4DyZPsbppnuWmePTziUkWfbkdoXDo8UIauxKicUMGANgxlefCJWOh59jFPDgFOtar6C%2FNxb4i5%2FaCOHfmR5gCUiDsnIZDMmwGxySP5Zxm6hnUgEsimMRBDKgZx1SVeZbhAGg9JVgRcE7ceWwWaSuiO2MCNqe6ZIn4

In [None]:
response = requests.get(results["file"])
qa_pairs = [json.loads(line) for line in response.text.splitlines()]

In [None]:
qa_pairs[0]

{'question': "I'm worried about environmental issues and want to know how biochar affects carbon sequestration and what are its effects on soil pollution?",
 'answer': 'Biochar is effective for carbon sequestration, as it can sequester about 489 kg of CO2 per 1,000 kg of organic material, storing carbon in a stable form for over 1,000 years and creating a carbon sink that helps slow down climate change. Regarding pollution, biochar is capable of binding with heavy metals and chemicals from agricultural and road runoff, making it useful for environmental remediation. It can bind toxic substances like heavy metals and pesticides, which is important not only for healthy plants but also for protecting clean water and groundwater. Research by various organizations including the USDA Forest Service and Environmental Protection Agency is ongoing to better understand its effectiveness in binding with these pollutants.',
 'context': ['Estimated reading time: 3 minutes\nBiochar has been used ove

In [None]:
def get_all_requests():
    resp = requests.get(
        f"{BASE_URL}get_all_requests",
        headers={"Authorization": f"Bearer {API_KEY}"},
    )
    resp.raise_for_status()
    return resp.json()

def print_request_summary(requests):
    if 'data' not in requests:
        print('There are no requests')
    for request in requests['data']:
        print(f"{request['request_id']} : {request['status']}")

In [None]:
requests = get_all_requests()
print_request_summary(requests)

d5b529c8-0c5e-43b6-a679-e01e01b1bba8 : completed
