In [None]:
import ast
import asyncio
import aiohttp
import os
import json as js
import glob
import re
from itertools import product
from itertools import cycle
import time
from pathlib import Path
import random

import logging

In [2]:
class CustomError(RuntimeError):
    pass

In [None]:
log_file = Path("logs/async_requests.log")
log_file.parent.mkdir(parents=True, exist_ok=True)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(log_file, mode="a"),
        logging.StreamHandler()
    ]
)

In [None]:
logging.basicConfig(level=logging.INFO)

MAX_RETRIES = 3
RETRY_DELAY = 3 #DELAY BY SECONDS

RETRY_FILE = "retry_gpt.txt"

SEMAPHORE = asyncio.Semaphore(50) # Decrease if toooo big

endpoints = "YOUR END POINTS"
api_keys = "YOUR API KEYS"

endpoint_cycle = cycle(endpoints)
api_key_cycle = cycle(api_keys)

def get_next_endpoint_and_key():
    raise NotImplementedError()
    #return next(endpoint_cycle), next(api_key_cycle)

In [5]:
def extract_ids(directory):
    integers = set()
    pattern = re.compile(r'^(\d+)_')
    
    for filename in os.listdir(directory):
        match = pattern.match(filename)
        if match:
            integers.add(int(match.group(1)))
    
    return sorted(integers)

In [None]:
def find_graph_data_by_pk(fmt:str, p_alias:str, size:str, index:int) -> str:
    """Find the path of graph data file for (FMT, P_ALIAS, SIZE, INDEX)"""
    if size not in ["exlarge", "small"]:
        raise CustomError(f"Invalid size {size}.")
    if fmt not in ["edge_list"]:
        raise CustomError(f"Invalid fmt {fmt}.")

    term = "general" if p_alias == "SBM" else "special"

    str_pattern = f"{index}_{p_alias}_*.txt"
    files = glob.glob(f"data/{size}_{term}_graphs/{fmt}/{p_alias}/{str_pattern}")

    for file in files:
        match = re.match(rf"data/{size}_{term}_graphs/{fmt}/{p_alias}/(\d+)_.*\.txt$", file)
        if match and int(match.group(1)) == index:
            return file

    
    raise CustomError(f"Filename not found for {fmt} {p_alias} {size} {index}")

In [None]:
def read_graph_from_edge_list(filename:str):
    edges = []
    nodes = set()

    with open(filename, 'r') as file:
        for line in file:
            node1, node2 = map(int, line.split())
            
            edges.append((node1, node2))
            
            nodes.add(node1)
            nodes.add(node2)
    
    return sorted(list(nodes)), sorted(list(edges))

In [8]:
def get_p_alias(pattern: str) -> str:
    if pattern not in ["Cycle","Star", "Path", "Grid", "clustered graph"]:
        raise CustomError(f"Invalid pattern {pattern}")
    if pattern == "clustered graph":
        p_alias = "SBM"
    else:
        p_alias = pattern
    return p_alias

In [None]:
def parse_graph_to_str_el(f):
    n, e = read_graph_from_edge_list(f)
    return n, str(e)

async def perform_test(session, test_pks:list, p_alias:str, ids:list, debug=False, virtual=True, console=True):
    tasks = []
    valid_requirements = list(range(2, 5)) if p_alias == "SBM" else list(range(2, 4))
    
    debug_count = 0
    fmt, model, size, requirement, allow_existing = test_pks

    if requirement not in valid_requirements:
        raise CustomError(f"Invalid requirement {requirement} for {p_alias}.")


    for i in ids[size]:
        f = find_graph_data_by_pk(fmt, p_alias, size, i)

        debug_count += 1
        if debug == True and debug_count > 1:
            break

        nodes, graph_data = parse_graph_to_str_el(f)
            
        content = generate_prompt_q2(graph_data, fmt, requirement, allow_existing, console=virtual)
        if not virtual: 
            tasks.append(create_completion(session, content, p_alias, graph_data, fmt, model, size, i, requirement, allow_existing, debug=debug))
        else:
            v_c = v_completion(content, model)
            tasks.append(v_c)
            if console:
                print(v_c)
    return tasks

In [None]:
async def do_task(patterns: list, d_ids:list=None, d_fmt=None, d_model=None, d_size=None, d_requirements:list=None, d_allow_existing:bool=None, debug=False, virtual=True, console=True):
    """Perform tests on task data determined by PATTERN.\n
        Parameters starting with "d_" are optional, use them if dimensions of the task are specified.\n
        Set DEBUG to True to test with a few runs.\n
        Set VIRTUAL to True to inspect the prompts generated without sending any actual requests.
        Set CONSOLE to False to disable print statements.
        """
    fmts = ["edge_list"]
    models = ["gpt-4o-2024-11-20", "deepseek-v3", "gemini-2.0-flash-001"]
    sizes = ["exlarge","small"]
    requirements = list(range(2, 4))
    allow_existings = [True, False]

    if type(patterns) != list:
        raise CustomError("Patterns must be list.")
    
    if d_fmt != None:
        if d_fmt not in fmts:
            raise CustomError(f"Invalid d_fmt {d_fmt}.")
        else:
            fmts = [d_fmt]

    if d_model != None:
        if d_model not in models:
            raise CustomError(f"Invalid d_model {d_model}.")
        else:
            models = [d_model]

    if d_size != None:
        if d_size not in sizes:
            raise CustomError(f"Invalid d_size {d_size}.")
        else:
            sizes = [d_size]

    if d_requirements != None:
        for d_req in d_requirements:
            if d_req not in list(range(2, 5)):
                raise CustomError(f"Invalid d_requirements {d_requirements}, which {d_req} not in [2, 4].")
            if d_req == 4 and patterns[0] != "clustered graph" and len(patterns) != 0:
                raise CustomError("Invalid requirement KEEP COMMUNITY STRUCTURE for non-clustered graph.")
        requirements = d_requirements

    if d_allow_existing != None:
        if type(d_allow_existing) != bool:
            raise CustomError("d_allow_existing must be BOOL")
        allow_existings = [d_allow_existing]
 
    timeout_seconds = 180
    session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout_seconds,sock_read=timeout_seconds)
    tasks = []
    async with aiohttp.ClientSession(timeout=session_timeout)as session:
        for p in patterns:
            p_alias = get_p_alias(p)
            term = "general" if p_alias == "ER" or p_alias == "SBM" else "special"
            ids = dict()
            ex_ids = extract_ids(f"data/exlarge_{term}_graphs/edge_list/{p_alias}")
            s_ids = extract_ids(f"data/small_{term}_graphs/edge_list/{p_alias}")

            if p_alias == "SBM":
                ids["exlarge"] = random.sample(ex_ids, 20)
                ids["small"] = random.sample(s_ids, 20)
            else:
                ids["exlarge"] = ex_ids
                ids["small"] = s_ids

            if d_ids != None:
                ids["exlarge"] = sorted(d_ids)
                ids["small"] = sorted(d_ids)
    
            test_pks = list(product(fmts, models, sizes, requirements, allow_existings))
            if d_requirements == None and p_alias == "SBM":
                test_pks.extend(list(product(fmts, models, sizes, [4], allow_existings)))
            for test_pk in test_pks:
                tasks.extend(await perform_test(session, test_pk, 
                                                p_alias=p_alias, ids=ids, 
                                                debug=debug, virtual=virtual, console=console))
        if not console:
                print(f"Request count {len(tasks)}.")
        if not virtual:
            results = await asyncio.gather(*tasks)

            failed_requests = [res for res in results if "error" in res]
            with open(RETRY_FILE, "w") as f:
                f.write("")
            for req in failed_requests:
                model, p_alias, fmt, size, i, q, allow_existing = req["PK"] 
                with open(RETRY_FILE, "a") as f:
                    f.write(f"Faild,{q},{p_alias}_{fmt}_{size}_{i}_{allow_existing}_results.txt\n")
            logging.info(f"Failed Requests: {failed_requests}")

            

In [None]:
def generate_prompt_q2(graph_data: str, fmt: str, requirement:int, allow_existing:bool, console=False):

    if type(requirement) != int:
        raise CustomError(f"REQUIREMENT should be int not {type(requirement)}, {requirement}")
    if type(allow_existing) != bool:
        raise CustomError(f"ALLOW_EXISTING must be BOOL {allow_existing}")
    
    match requirement:
        case 2:
            s_requirement = "maintain consistency between Euclidean distance and graph-theoretic distance."
        case 3:
            s_requirement = "minimize edge crossings."
        case 4:
            s_requirement = "keep the community structure clear. You can use module-based community detection algorithms to devide communities."
        case _:
            raise CustomError("Invalid requirement！")

    if not allow_existing:
        s_allow_existing = f"Notice, you cannot use any existing graph layout algorithms, including but not limited to nx.springlayout(), nx.circularlayout(), et cetera.\n"
    else:
        s_allow_existing = f"Notice, you can use existing graph layout algorithms.\n"

    ss = f"I will provide you a graph with the format of {fmt}. Write Python code to generate a layout for the graph. The layout should {s_requirement}\n"\
    f"{s_allow_existing}"\
    f"Do not use matplotlib or networkx to draw the graph visually after positions are determined. The final output of your code shall be the graph layout in json format. Your answer for the Python script shall be returned in a code block.\n"\
    f"<graph data>\n{graph_data}"
    content = [{
                    "type": "text",
                    "text": ss,
        }]
    
    return content

In [13]:
def parse_result(rs, storage_file):
    result_data = rs

    with open(storage_file, "w+") as f:
        f.write(rs)

In [None]:
async def create_completion(session, content:list, p_alias:str, graph_data:str, fmt:str, model:str, size:str, i:int, req:int, allow_existing:bool, debug=False, attempt=1):
    
    s_allow = "algpermitted" if allow_existing else "algrestricted"
    
    if debug == False:
        dir_path = f"results_{s_allow}/{model}/{size}/{p_alias}/{req}" # modify structure if inappropriate
        full_res_dir_path = f"full_reses_{s_allow}/{model}/{size}/{p_alias}/{req}"
    elif debug == True:
        dir_path = f"results_{s_allow}-debug/{model}/{size}/{p_alias}/{req}"
        full_res_dir_path = f"full_reses_{s_allow}-debug/{model}/{size}/{p_alias}/{req}"

    model_url, key = get_next_endpoint_and_key()
    headers = headers = {
        "Authorization": f"Bearer {key}",
        "Content-Type": "application/json",
    }

    model_alias = "deepseek-chat" if model == "deepseek-v3" else model


    payload = {
                "model": model_alias,
                "max_tokens": 8000,
                "temperature": 0,
                "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user",
                "content": content
            }],
                }
    
    os.makedirs(dir_path, exist_ok=True)
    os.makedirs(full_res_dir_path, exist_ok=True)

    
    storage_path = f"{dir_path}/{p_alias}_{fmt}_{size}_{i}_{req}_results.txt"
    full_res_path = f"{full_res_dir_path}/{p_alias}_{fmt}_{size}_{i}_{req}_results.json"
    
    async with SEMAPHORE:
        try:
            async with session.post(
                url=f"{model_url}/v1/chat/completions",
                json=payload,
                headers=headers
            ) as response:
                if response.status == 200:
                    no_exception = True
                    buffer = []
                    async for line in response.content:
                        text = line.decode("utf-8").strip()
                        if text:
                            buffer.append(text)
                    full_response = "\n".join(buffer)
                    try:
                        result = js.loads(full_response)
                    except js.JSONDecodeError as e:
                        no_exception = False
                        logging.warning(f"JSONDecodeError: {e}, {p_alias}, {model}, {size}, {i}, {req}, {s_allow}")
                        if attempt < MAX_RETRIES:
                            await asyncio.sleep(RETRY_DELAY * attempt)
                        else:
                            logging.error(f"Request failed after {MAX_RETRIES} attempts: {model}, {p_alias}, {size}, {i}, {s_allow}")
                            return {"error": "None response", "PK": (model, p_alias, fmt, size, i, req, allow_existing), "attempts": attempt}
                    if no_exception:
                        with open(full_res_path, "w") as f:
                            js.dump(result, f)
                        try:
                            finish_reason = result['choices'][0]['finish_reason']
                            if finish_reason == "stop":
                                rs = result['choices'][0]['message']['content']
                                parse_result(rs, storage_path)
                            elif model == "gpt-4o-2024-11-20" and finish_reason == "content_filter":
                                raise CustomError("Content filtered.")
                            else:
                                no_exception = False
                                print(finish_reason, f"{model}, {p_alias}, {size}, {i}, {req}, {s_allow}")
                                return {"error": finish_reason, "PK": (model, p_alias, fmt, size, i, req, allow_existing), "attempts": attempt}
                        except TypeError as e:
                            no_exception = False
                            logging.warning(f"{e}, {model}, {p_alias}, {size}, {i}, {req}, {s_allow}")
                            if attempt < MAX_RETRIES:
                                await asyncio.sleep(RETRY_DELAY * attempt)
                            else:
                                logging.error(f"Request failed after {MAX_RETRIES} attempts: {model}, {p_alias}, {size}, {i}, {req}, {s_allow}")
                                return {"error": "No content in response", "PK": (model, p_alias, fmt, size, i, req, s_allow), "attempts": attempt}
                        except CustomError as e:
                            no_exception = False
                            logging.warning(f"Content filtered: {model}, {p_alias}, {size}, {i}, {req}, {s_allow}")
                            if os.path.isfile(storage_path):
                                os.remove(storage_path)
                            if attempt >= MAX_RETRIES:
                                logging.error(f"Request failed for content filtered: {model}, {p_alias}, {size}, {i}, {req}, {s_allow}")
                                return {"error": str(e), "PK": (model, p_alias, fmt, size, i, req, s_allow), "attempts": attempt}
                            else:
                                await asyncio.sleep(RETRY_DELAY * attempt)
                    if no_exception:
                        parse_result(rs, storage_path)
                        return {"Success": (model, p_alias, fmt, size, i, req, s_allow)}
                else:
                    logging.warning(f"Attempt  {model}, {p_alias}, {size}, {i}, {req}, {s_allow}, failed for {response.status}")

                    if attempt < MAX_RETRIES:
                        await asyncio.sleep(RETRY_DELAY * attempt)
                    else:
                        logging.error(f"Request failed after {MAX_RETRIES} attempts:  {model}, {p_alias}, {size}, {i}, {req}, {s_allow}")
                        return {"error": "MAX_RETRY", "PK": (model, p_alias, fmt, size, i, req, s_allow), "attempts": attempt}
        except (aiohttp.ClientError, asyncio.TimeoutError) as e:
            logging.warning(f"Attempt  {model}, {p_alias}, {size}, {i}, {req}, {s_allow}, excepts for {e}")

            if attempt < MAX_RETRIES:
                await asyncio.sleep(RETRY_DELAY * attempt)
            else:
                logging.error(f"Request failed after {MAX_RETRIES} attempts:  {model}, {p_alias}, {size}, {i}, {req}, {s_allow}")
                return {"error": str(e), "PK": (model, p_alias, fmt, size, i, req, s_allow), "attempts": attempt}
    return await create_completion(session, content, p_alias, graph_data, fmt, model, size, i, req, allow_existing, debug, attempt+1)
        

In [15]:
def v_completion(content:list, model: str):
    model_alias = "deepseek-chat" if model == "deepseek-v3" else model

    json={
        "model": model_alias,
        "max_tokens": 8000,
        "temperature": 0,
        "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user",
        "content": content
    }],
        }
    return json

In [None]:
async def redo_task_by_pks(p_ks:list, debug=False, virtual=True, console=True):

    timeout_seconds = 300
    session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout_seconds,sock_read=timeout_seconds)
    tasks = []
    async with aiohttp.ClientSession(timeout=session_timeout)as session:
        for p_k in p_ks:
            p_alias = p_k["p_alias"]
            fmt = p_k["fmt"]
            size = p_k["size"]
            ids = dict()
            ids[size] = [int(p_k["i"])]
            requirement = int(p_k["req"])
            term = "general" if p_alias == "SBM" else "special"
            model = p_k["model"]
                    
    
            test_pk = (fmt, model, size, requirement)
            tasks.extend(await perform_test(session, test_pk, p_alias, ids, debug=debug, virtual=virtual, console=console))
        if not console:
            print(f"Request count {len(tasks)}.")
        if not virtual:
            results = await asyncio.gather(*tasks)

            failed_requests = [res for res in results if "error" in res]
            logging.info(f"Failed Requests: {failed_requests}")

            

In [17]:
async def redo_task_from_file(model:str, filename: str, virtual=True, console=False):
    with open(filename, "r") as f:
        lines = f.readlines()
        p_ks = []
        for l in lines:
            no_pair_specified = None
            _, q, data_p_k = l.split(",")
            p_alias, fmt_1st_half, _, size, i, __ = data_p_k.split("_")
            fmt = f"{fmt_1st_half}_list"
            d = {
                "model": model,
                "p_alias": p_alias,
                "fmt": fmt,
                "size": size,
                "i": i,
                "req": q
            }
            p_ks.append(d)
        await redo_task_by_pks(p_ks, virtual=virtual, console=console)
        

In [None]:
#Example usage:
patterns = ["clustered graph"]
await do_task(patterns, d_size="small", debug=False, virtual=False, console=False)