In [None]:
import asyncio
import aiohttp
import os
import json as js
from itertools import product
from itertools import cycle
import time
from pathlib import Path
import networkx as nx

import logging

In [2]:
class CustomError(RuntimeError):
    pass

In [None]:
log_file = Path("logs/async_requests.log")
log_file.parent.mkdir(parents=True, exist_ok=True)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(log_file, mode="a"),
        logging.StreamHandler()
    ]
)

In [None]:
logging.basicConfig(level=logging.INFO)

MAX_RETRIES = 3
RETRY_DELAY = 3 #DELAY BY SECONDS

RETRY_FILE = "retry_gpt.txt"

SEMAPHORE = asyncio.Semaphore(50) # Decrease if toooo big

endpoints = "YOUR END POINTS"
api_keys = "YOUR API KEYS"
endpoint_cycle = cycle(endpoints)
api_key_cycle = cycle(api_keys)

def get_next_endpoint_and_key():
    raise NotImplementedError()
    #return next(endpoint_cycle), next(api_key_cycle)


In [None]:
def find_graph_data_by_init(init:str) -> str:
    """Find the path of graph data file for INIT"""
    match init:
        case "dolphins":
            return "data/dolphins_transformed.json"
        case "karate":
            return "data/Karate club(34 nodes).json"
        case "lesmis":
            return "data/lesmis.json"
        case _:
            raise CustomError(f"Invalid INIT {init}. Init shall be one of \"dolphins\", \"karate\" or \"lesmis\".")

In [None]:
def read_graph_from_edge_list(filename:str):
    with open(filename, "r") as f:
        graph = js.load(f)
    G = nx.Graph(graph["edges"])
    
    return sorted(G.edges)

In [None]:
def parse_graph_to_str_el(f):
    e = read_graph_from_edge_list(f)
    return str(e)

async def perform_test(session, test_pks:list, debug=False, virtual=True, console=True):
    tasks = []
    valid_requirements = list(range(1, 5))
    
    init, model, requirement, allow_existing = test_pks

    if requirement not in valid_requirements:
        raise CustomError(f"Invalid requirement {requirement}.")


    f = find_graph_data_by_init(init)
    graph_data = parse_graph_to_str_el(f)

        
    content = generate_prompt_q2(graph_data, requirement, allow_existing, console=virtual)
    if not virtual: 
        tasks.append(create_completion(session, content, init, graph_data, model, requirement, allow_existing, debug=debug))
    else:
        v_c = v_completion(content, model)
        tasks.append(v_c)
        if console:
            print(v_c)
    return tasks

In [None]:
async def do_task(d_inits:list=None, d_model=None, d_requirements:list=None, d_allow_existing:bool=None, debug=False, virtual=True, console=True):
    """Perform tests on task data.\n
        Parameters starting with "d_" are optional, use them if dimensions of the task are specified.\n
        Set DEBUG to True to test with a few runs.\n
        Set VIRTUAL to True to inspect the prompts generated without sending any actual requests.
        Set CONSOLE to False to disable print statements.
        """
    inits = ["dolphins", "karate", "lesmis"]
    models = ["gpt-4o-2024-11-20", "gemini-2.0-flash-001", "deepseek-v3"]
    requirements = list(range(2, 5))
    allow_existings = [True, False]
    
    if d_inits != None:
        for d_init in inits:
            if d_init not in d_init:
                raise CustomError(f"Invalid d_init {d_init}.")
        inits = d_inits

    if d_model != None:
        if d_model not in models:
            raise CustomError(f"Invalid d_model {d_model}.")
        else:
            models = [d_model]

    if d_requirements != None:
        for d_req in d_requirements:
            if d_req not in list(range(2, 5)):
                raise CustomError(f"Invalid d_requirements {d_requirements}, which {d_req} not in [2, 4].")
        requirements = d_requirements

    if d_allow_existing != None:
        if type(d_allow_existing) != bool:
            raise CustomError("d_allow_existing must be BOOL")
        allow_existings = [d_allow_existing]
 
    timeout_seconds = 180
    session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout_seconds,sock_read=timeout_seconds)
    tasks = []
    async with aiohttp.ClientSession(timeout=session_timeout)as session:
        test_pks = list(product(inits, models, requirements, allow_existings))
        for test_pk in test_pks:
            tasks.extend(await perform_test(session, test_pk, debug=debug, virtual=virtual, console=console))
        if not console:
                print(f"Request count {len(tasks)}.")
        if not virtual:
            results = await asyncio.gather(*tasks)

            failed_requests = [res for res in results if "error" in res]
            with open(RETRY_FILE, "w") as f:
                f.write("")
            for req in failed_requests:
                init, model, q, allow_existing = req["PK"] 
                with open(RETRY_FILE, "a") as f:
                    f.write(f"Faild,{init}_{model}_{q}_{allow_existing}_results.txt\n")
            logging.info(f"Failed Requests: {failed_requests}")

            

In [None]:
def generate_prompt_q2(graph_data: str, requirement:int, allow_existing:bool, console=False):

    if type(requirement) != int:
        raise CustomError(f"REQUIREMENT should be int not {type(requirement)}, {requirement}")
    if type(allow_existing) != bool:
        raise CustomError(f"ALLOW_EXISTING must be BOOL {allow_existing}")
    
    match requirement:
        case 2:
            s_requirement = "maintain consistency between Euclidean distance and graph-theoretic distance."
        case 3:
            s_requirement = "minimize edge crossings."
        case 4:
            s_requirement = "keep the community structure clear. You can use module-based community detection algorithms to devide communities."

    if not allow_existing:
        s_allow_existing = f"Notice, you cannot use any existing graph layout algorithms, including but not limited to nx.springlayout(), nx.circularlayout(), et cetera.\n"
    else:
        s_allow_existing = f"Notice, you can use existing graph layout algorithms.\n"

    ss = f"I will provide you a graph with the format of edge_list. Write Python code to generate a layout for the graph. The layout should {s_requirement}\n"\
    f"{s_allow_existing}"\
    f"Do not use matplotlib or networkx to draw the graph visually after positions are determined. The final output of your code shall be the graph layout in json format. Your answer for the Python script shall be returned in a code block.\n"\
    f"<graph data>\n{graph_data}"
    content = [{
                    "type": "text",
                    "text": ss,
        }]

    return content

In [10]:
def parse_result(rs, storage_file):
    result_data = rs

    with open(storage_file, "w+") as f:
        f.write(rs)

In [None]:
async def create_completion(session, content:list, init:str, graph_data:str, model:str, req:int, allow_existing:bool, debug=False, attempt=1, official=False):
    
    s_allow = "algpermitted" if allow_existing else "algrestricted"
    
    if debug == False:
        dir_path = f"results/{s_allow}/{model}/{req}" # modify structure if inappropriate
        full_res_dir_path = f"full_reses/{s_allow}/{model}/{req}"
    elif debug == True:
        dir_path = f"results-debug/{s_allow}/{model}/{req}"
        full_res_dir_path = f"full_reses-debug/{s_allow}/{model}/{req}"

    model_url, key = get_next_endpoint_and_key()
    headers = headers = {
        "Authorization": f"Bearer {key}",
        "Content-Type": "application/json",
    }

    model_alias = "deepseek-chat" if model == "deepseek-v3" else model


    payload = {
                "model": model_alias,
                "max_tokens": 8000,
                "temperature": 0,
                "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user",
                "content": content
            }],
                }
    
    os.makedirs(dir_path, exist_ok=True)
    os.makedirs(full_res_dir_path, exist_ok=True)

    
    storage_path = f"{dir_path}/{init}_{req}_results.txt"
    full_res_path = f"{full_res_dir_path}/{init}_{req}_results.json"
    
    async with SEMAPHORE:
        try:
            async with session.post(
                url=f"{model_url}/v1/chat/completions",
                json=payload,
                headers=headers
            ) as response:
                if response.status == 200:
                    no_exception = True
                    buffer = []
                    async for line in response.content:
                        text = line.decode("utf-8").strip()
                        if text:
                            buffer.append(text)
                    full_response = "\n".join(buffer)
                    try:
                        result = js.loads(full_response)
                    except js.JSONDecodeError as e:
                        no_exception = False
                        logging.warning(f"JSONDecodeError: {e}, {init}, {model}, {req}, {s_allow}")
                        if attempt < MAX_RETRIES:
                            await asyncio.sleep(RETRY_DELAY * attempt)
                        else:
                            logging.error(f"Request failed after {MAX_RETRIES} attempts: {init}, {model}, {req}, {s_allow}")
                            return {"error": "None response", "PK": (init, model, req, allow_existing), "attempts": attempt}
                    if no_exception:
                        with open(full_res_path, "w") as f:
                            js.dump(result, f)
                        try:
                            finish_reason = result['choices'][0]['finish_reason']
                            if finish_reason == "stop":
                                rs = result['choices'][0]['message']['content']
                                parse_result(rs, storage_path)
                            elif model == "gpt-4o-2024-11-20" and finish_reason == "content_filter":
                                raise CustomError("Content filtered.")
                            else:
                                no_exception = False
                                print(finish_reason, f"{init}, {model}, {req}, {s_allow}")
                                return {"error": finish_reason, "PK": (init, model, req, allow_existing), "attempts": attempt}
                        except TypeError as e:
                            no_exception = False
                            logging.warning(f"{e}, {init}, {model}, {req}, {s_allow}")
                            if attempt < MAX_RETRIES:
                                await asyncio.sleep(RETRY_DELAY * attempt)
                            else:
                                logging.error(f"Request failed after {MAX_RETRIES} attempts: {init}, {model}, {req}, {s_allow}")
                                return {"error": "No content in response", "PK": (init, model, req, allow_existing), "attempts": attempt}
                        except CustomError as e:
                            no_exception = False
                            logging.warning(f"Content filtered: {init}, {model}, {req}, {s_allow}")
                            if os.path.isfile(storage_path):
                                os.remove(storage_path)
                            if attempt >= MAX_RETRIES:
                                logging.error(f"Request failed for content filtered: {init}, {model}, {req}, {s_allow}")
                                return {"error": str(e), "PK": (init, model, req, allow_existing), "attempts": attempt}
                            else:
                                await asyncio.sleep(RETRY_DELAY * attempt)
                    if no_exception:
                        parse_result(rs, storage_path)
                        return {"Success": (init, model, req, allow_existing)}
                else:
                    logging.warning(f"Attempt {init}, {model}, {req}, {s_allow}, failed for {response.status}")

                    if attempt < MAX_RETRIES:
                        await asyncio.sleep(RETRY_DELAY * attempt)
                    else:
                        logging.error(f"Request failed after {MAX_RETRIES} attempts: {init}, {model}, {req}, {s_allow}")
                        return {"error": "MAX_RETRY", "PK": (init, model, req, allow_existing), "attempts": attempt}
        except (aiohttp.ClientError, asyncio.TimeoutError) as e:
            logging.warning(f"Attempt {init}, {model}, {req}, {s_allow}, excepts for {e}")

            if attempt < MAX_RETRIES:
                await asyncio.sleep(RETRY_DELAY * attempt)
            else:
                logging.error(f"Request failed after {MAX_RETRIES} attempts: {init}, {model}, {req}, {s_allow}")
                return {"error": str(e), "PK": (init, model, req, allow_existing), "attempts": attempt}
    return await create_completion(session, content, init, graph_data, model, req, allow_existing, debug, attempt+1)
        

In [12]:
def v_completion(content:list, model: str):
    model_alias = "deepseek-chat" if model == "deepseek-v3" else model

    json={
        "model": model_alias,
        "max_tokens": 8000,
        "temperature": 0,
        "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user",
        "content": content
    }],
        }
    return json

In [None]:
#Example usage:
await do_task(d_model="gpt-4o-2024-11-20", debug=False, virtual=False, console=False)