In [None]:
import ast
import asyncio
import aiohttp
import os
import json as js
import glob
import re
from itertools import product
from itertools import cycle
from pathlib import Path
import random

import logging

In [2]:
class CustomError(RuntimeError):
    pass

In [None]:
log_file = Path("logs/async_requests.log")
log_file.parent.mkdir(parents=True, exist_ok=True)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(log_file, mode="a"),
        logging.StreamHandler()
    ]
)

In [None]:
logging.basicConfig(level=logging.INFO)

MAX_RETRIES = 3 #MAX_ATTEMPTS in fact.
RETRY_DELAY = 3

SEMAPHORE = asyncio.Semaphore(3)

endpoints = "YOUR LIST OF ENDPOINTS"
api_keys = "YOUR LIST OF APIKEYS"

endpoint_cycle = cycle(endpoints)
api_key_cycle = cycle(api_keys)

def get_next_endpoint_and_key():
    raise NotImplementedError()
    #return next(endpoint_cycle), next(api_key_cycle)


In [None]:
def extract_ids(directory: str):
    """Extract graph data IDs from DIRECTORY"""
    integers = set()
    pattern = re.compile(r'^(\d+)_')
    
    for filename in os.listdir(directory):
        match = pattern.match(filename)
        if match:
            integers.add(int(match.group(1)))
    
    return sorted(integers)

In [None]:
def find_graph_data_by_pk(fmt:str, p_alias:str, size:str, index:int) -> str:
    """Find the path of graph data file for (FMT, P_ALIAS, SIZE, INDEX)"""
    if size not in ["small", "exlarge"]:
        raise CustomError(f"Invalid size {size}.")
    if fmt not in ["adjacency_list", "edge_list"]:
        raise CustomError(f"Invalid fmt {fmt}.")

    term = "general" if p_alias == "SBM" else "special"

    str_pattern = f"{index}_{p_alias}_*.txt"
    files = glob.glob(f"data/{size}_{term}_graphs/{fmt}/{p_alias}/{str_pattern}")

    for file in files:
        match = re.match(rf"data/{size}_{term}_graphs/{fmt}/{p_alias}/(\d+)_.*\.txt$", file)
        if match and int(match.group(1)) == index:
            return file
    
    raise CustomError(f"Filename not found for {fmt} {p_alias} {size} {index}")

In [None]:
def read_graph_from_edge_list(filename:str):
    """Get graph data from file FILENAME, in which stores graph data in edge list. """
    edges = []
    nodes = set()

    with open(filename, 'r') as file:
        for line in file:
            node1, node2 = map(int, line.split())
            
            edges.append((node1, node2))
            
            nodes.add(node1)
            nodes.add(node2)
    
    return sorted(list(nodes)), sorted(list(edges))

def read_graph_from_adj_list(filename:str):
    """Get graph data from file FILENAME, in which stores graph data in adjacency list. """
    edges = set()
    nodes = set()
    graph = {}

    with open(filename, 'r') as file:
        for line in file:
            node, adj_list_str = line.split(':')
            node = int(node.strip())
            
            adj_list = ast.literal_eval(adj_list_str.strip())
            
            graph[node] = adj_list

            nodes.add(node)
            
            for adj_node in adj_list:
                edge = tuple(sorted((node, adj_node)))
                edges.add(edge)
                nodes.add(adj_node)
    
    return sorted(list(nodes)), sorted(list(edges)), graph

In [None]:
def get_p_alias(pattern: str) -> str:
    """Convert PATTERN into its alias. """
    if pattern not in ["Cycle","Star", "Path", "Grid", "clustered graph"]:
        raise CustomError(f"Invalid pattern {pattern}")
    elif pattern == "clustered graph":
        p_alias = "SBM"
    else:
        p_alias = pattern
    return p_alias

In [None]:
def parse_graph_to_str_el(f):
    n, e = read_graph_from_edge_list(f)
    return n, str(e)

def parse_graph_to_str_al(f):
    n, e, g = read_graph_from_adj_list(f)
    return n, str(g)

async def perform_test(session, test_pks:list, p_alias:str, ids:list, d_questions:list=None, d_pair:list=None, debug=False, virtual=False, console=True):
    tasks = []
    questions = ["minor", "minor", "minor", "minor"]
    minors = ["node_count", "edge_count", "shortest_path", "highest_degree"]
    if p_alias == "SBM":
        questions.append("community")
        minors.append(None)
    debug_count = 0
    fmt, model, size = test_pks

    q_combinations = list(zip(questions, minors))

    if d_questions != None:
        if type(d_questions) != list:
            raise CustomError("d_questions must be list")
        for d_q in d_questions:
            if d_q not in q_combinations:
                raise CustomError(f"Invalid d_question {d_q}.")
        q_combinations = d_questions

    for i in ids[size]:
        f = find_graph_data_by_pk(fmt, p_alias, size, i)

        debug_count += 1
        if debug == True and debug_count > 1:
            break

        if fmt == "adjacency_list":
            nodes, graph_data = parse_graph_to_str_al(f)
        else:
            nodes, graph_data = parse_graph_to_str_el(f)
        for q in q_combinations:
            if q == ("minor", "shortest_path"):
                if d_pair != None: 
                    if type(d_pair) == tuple and len(d_pair) == 2:
                        pair = d_pair
                    else:
                        raise CustomError(f"Invalid D_PAIR {d_pair}")
                else:
                    pair = random.sample(nodes, 2)
            else:
                pair = None
            content = generate_prompt_q1(graph_data, fmt, q[0], q[1], pair)
            if not virtual: 
                tasks.append(create_completion(session, content, p_alias, graph_data, fmt, model, size, i, q, pair, debug=debug))
            else:
                v_c = v_completion(content, model)
                tasks.append(v_c)
                if console:
                    print(v_c)

    return tasks


In [None]:
async def do_task(patterns: list, d_ids:list=None, d_fmt=None, d_model=None, d_size=None, d_questions:list=None, d_pair:list=None, debug=False, virtual=False, console=True):
    """Perform tests on task data determined by PATTERN.\n
        Parameters starting with "d_" are optional, use them if dimensions of the task are specified.\n
        Set DEBUG to True to test with a few runs.\n
        Set VIRTUAL to True to inspect the prompts generated without sending any actual requests.
        Set CONSOLE to False to disable print statements.
        """
    #Define task variables here
    fmts = ["adjacency_list", "edge_list"]
    models = ["gpt-4o-2024-11-20", "deepseek-v3", "gemini-2.0-flash-001"]
    sizes = ["exlarge","small"]
    

    if type(patterns) != list:
        raise CustomError("Patterns must be list.")
    
    if d_fmt != None:
        if d_fmt not in fmts:
            raise RuntimeError(f"Invalid d_fmt {d_fmt}.")
        else:
            fmts = [d_fmt]

    if d_model != None:
        if d_model not in models:
            raise RuntimeError(f"Invalid d_model {d_model}.")
        else:
            models = [d_model]

    if d_size != None:
        if d_size not in sizes:
            raise RuntimeError(f"Invalid d_size {d_size}.")
        else:
            sizes = [d_size]

    timeout_seconds = 120
    session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout_seconds,sock_read=timeout_seconds)
    tasks = []
    async with aiohttp.ClientSession(timeout=session_timeout)as session:
        for p in patterns:
            p_alias = get_p_alias(p)
            term = "general" if p_alias == "ER" or p_alias == "SBM" else "special"
            ids = dict()
            ids["exlarge"] = extract_ids(f"data/exlarge_{term}_graphs/edge_list/{p_alias}")
            ids["small"] = extract_ids(f"data/small_{term}_graphs/edge_list/{p_alias}")

            if d_ids != None:
                ids["exlarge"] = sorted(d_ids)
                ids["small"] = sorted(d_ids)
    
            test_pks = list(product(fmts, models, sizes))
            for test_pk in test_pks:
                tasks.extend(await perform_test(session, test_pk, p_alias, ids, d_questions, d_pair, debug, virtual, console))
        if not console:
                print(f"Request count {len(tasks)}.")
        if not virtual:
            results = await asyncio.gather(*tasks)

            failed_requests = [res for res in results if "error" in res]
            with open("retry.txt", "w") as f:
                f.write("")
            for req in failed_requests:
                model, p_alias, fmt, size, i, q = req["PK"] 
                with open("retry.txt", "a") as f:
                    f.write(f"Faild,{q},{p_alias}_{fmt}_{size}_{i}_results.txt\n")
                    
            logging.info(f"Failed Requests: {failed_requests}")
            

In [None]:
def generate_prompt_q1(graph_data: str, fmt: str, question:str, minor=None, pair:list=None):

    quesion_community = "It's a graph with community structure, count the number of communities in the graph."

    match minor:
        case None: 
            pass
        case "node_count": 
            question_minor = "count the number of nodes in the graph."
            answer_format = "Integer"
        case "edge_count": 
            question_minor = "count the number of edges in the graph."
            answer_format = "Integer"
        case "shortest_path":
            if pair == None:
                raise CustomError("No pair of nodes selected for shortest path!")
            else:
                node0, node1 = pair
            question_minor = f"calculate the length of the shortest path bewteen node '{node0}' and '{node1}'."
            answer_format = "Integer"
        case "highest_degree": 
            question_minor = "calculate the highest degree of all nodes in the graph."
            answer_format = "Integer"
        case _: raise CustomError(f"Invalid minor question {minor}")

    match question:
        case "community":
            s_question = quesion_community
            answer_format = "Integer"
        case "minor":
            if minor == None:
                raise RuntimeError("Minor should not be None")
            minor = minor
            s_question = question_minor
        case _:
            raise CustomError(f"Invalid major question {question}")

    ss = f"I will provide you a graph with the format of {fmt}, {s_question}\n"\
    f"<graph data>\n{graph_data}.\n Your response shall include a JSON wrapped in a code block in which a field \"answer\" represents your answer({answer_format})."
    content = [{
                    "type": "text",
                    "text": ss,
        }]
    return content

In [13]:
def parse_result(rs, storage_file):
    result_data = rs

    with open(storage_file, "w+") as f:
        f.write(rs)

In [None]:
async def create_completion(session, content:list, p_alias:str, graph_data:str, fmt:str, model:str, size:str, i:int, q:tuple, pair:list=None, debug=False, attempt=1):
    if debug == False:
        dir_path = f"results/{model}/{size}/{fmt}/{p_alias}/{q[0]}_{q[1]}" # modify structure if inappropriate
        full_res_dir_path = f"full_reses/{model}/{size}/{fmt}/{p_alias}/{q[0]}_{q[1]}"
    elif debug == True:
        dir_path = f"results-debug/{model}/{size}/{fmt}/{p_alias}/{q[0]}_{q[1]}"
        full_res_dir_path = f"full_reses-debug/{model}/{size}/{fmt}/{p_alias}/{q[0]}_{q[1]}"
    
    model_url, key = get_next_endpoint_and_key()
    headers = headers = {
        "Authorization": f"Bearer {key}",
        "Content-Type": "application/json",
    }

    model_alias = "deepseek-chat" if model == "deepseek-v3" else model #Deepseek offcial api provides deepseek-v3 by name deepseek-chat
    
    payload = {
                "model": model_alias,
                "max_completion_tokens":8000,
                "temperature": 0,
                "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user",
                "content": content
            }],
                }
    
    os.makedirs(dir_path, exist_ok=True)
    os.makedirs(full_res_dir_path, exist_ok=True)
    
    major, minor = q
    s_q = major if minor == None else f"{major}_{minor}"

    if s_q == "minor_shortest_path":
        if pair == None:
            raise CustomError("No pair of nodes selected for shortest path!")
        else:
            node0, node1 = pair
            storage_path = f"{dir_path}/{p_alias}_{fmt}_{size}_{i}_{node0}_{node1}_results.txt"
            full_res_path = f"{full_res_dir_path}/{p_alias}_{fmt}_{size}_{i}_{node0}_{node1}_results.json"
    else:
        storage_path = f"{dir_path}/{p_alias}_{fmt}_{size}_{i}_results.txt"
        full_res_path = f"{full_res_dir_path}/{p_alias}_{fmt}_{size}_{i}_results.json"
    async with SEMAPHORE:
        try:
            async with session.post(
                url=f"{model_url}/v1/chat/completions",
                json=payload,
                headers=headers
            ) as response:
                if response.status == 200:
                    no_exception = True
                    buffer = []
                    async for line in response.content:
                        text = line.decode("utf-8").strip()
                        if text:
                            buffer.append(text)
                    full_response = "\n".join(buffer)
                    try:
                        result = js.loads(full_response)
                    except js.JSONDecodeError as e:
                        no_exception = False
                        if attempt < MAX_RETRIES:
                            await asyncio.sleep(RETRY_DELAY * attempt)
                        else:
                            logging.error(f"Request failed after {MAX_RETRIES} attempts: {model}, {p_alias}, {fmt}, {size}, {i}, {s_q}")
                            return {"error": "None response", "PK": (model, p_alias, fmt, size, i, s_q), "attempts": attempt}
                    if no_exception:
                        with open(full_res_path, "w") as f:
                            js.dump(result, f)
                        try:
                            rs = result['choices'][0]['message']['content']
                        except TypeError as e:
                            no_exception = False
                            logging.warning(f"{e}, {p_alias}, {fmt}, {size}, {i}, {s_q}")
                            if attempt < MAX_RETRIES:
                                await asyncio.sleep(RETRY_DELAY * attempt)
                            else:
                                logging.error(f"Request failed after {MAX_RETRIES} attempts: {model}, {p_alias}, {fmt}, {size}, {i}, {s_q}")
                                return {"error": "No content in response", "PK": (model, p_alias, fmt, size, i, s_q), "attempts": attempt}
                    if no_exception:
                        parse_result(rs, storage_path)
                        return {"Success": (model, p_alias, fmt, size, i, s_q)}
                else:
                    logging.warning(f"Attempt {model}, {p_alias}, {fmt}, {size}, {i}, {s_q}, failed for {response.status}")

                    if attempt < MAX_RETRIES:
                        await asyncio.sleep(RETRY_DELAY * attempt)
                    
                    else:
                        logging.error(f"Request failed after {MAX_RETRIES} attempts: {model}, {p_alias}, {fmt}, {size}, {i}, {s_q}")
                        return {"error": "MAX_RETRY", "PK": (model, p_alias, fmt, size, i, s_q), "attempts": attempt}
        except (aiohttp.ClientError, asyncio.TimeoutError) as e:
            logging.warning(f"Attempt {model}, {p_alias}, {fmt}, {size}, {i}, {s_q}, excepts for {e}")

            if attempt < MAX_RETRIES:
                await asyncio.sleep(RETRY_DELAY * attempt)
            else:
                logging.error(f"Request failed after {MAX_RETRIES} attempts: {model}, {p_alias}, {fmt}, {size}, {i}, {s_q}")
                return {"error": str(e), "PK": (model, p_alias, fmt, size, i, s_q), "attempts": attempt}
    return await create_completion(session, content, p_alias, graph_data, fmt, model, size, i, q, pair, debug, attempt+1)
        

In [None]:
def v_completion(content:list, model: str):
    """Return what would be sent to LLM."""
    model_alias = "deepseek-chat" if model == "deepseek-v3" else model
    json={
        "model": model_alias,
        "max_completion_tokens":8000,
        "temperature": 0,
        "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user",
        "content": content
    }],
        }
    return json

In [None]:
async def redo_task_by_pks(p_ks:list, debug=False, virtual=True, console=True):

    timeout_seconds = 300
    session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout_seconds,sock_read=timeout_seconds)
    tasks = []
    async with aiohttp.ClientSession(timeout=session_timeout)as session:
        for p_k in p_ks:
            p_alias = p_k["p_alias"]
            fmt = p_k["fmt"]
            size = p_k["size"]
            ids = dict()
            ids[size] = [int(p_k["i"])]
            d_questions = [p_k["q_combination"]]
            term = "general" if p_alias == "SBM" else "special"
            model = p_k["model"]
            if d_questions[0][1] == "shortest_path":
                try:
                    d_pair = p_k["node_pair"]
                except Exception as e:
                    d_pair = None
            else:
                d_pair = None
            
    
            test_pk = (fmt, model, size)
            tasks.extend(await perform_test(session, test_pk, p_alias, ids, d_questions, d_pair, debug, virtual, console))
        if not console:
            print(f"Request count {len(tasks)}.")
        if not virtual:
            results = await asyncio.gather(*tasks)

            failed_requests = [res for res in results if "error" in res]
            logging.info(f"Failed Requests: {failed_requests}")

            with open("retry4retry.txt", "w") as f:
                f.write("")
            for req in failed_requests:
                model, p_alias, fmt, size, i, q = req["PK"] 
                with open("retry4retry.txt", "a") as f:
                    f.write(f"Faild,{q},{p_alias}_{fmt}_{size}_{i}_results.txt\n")

            

In [17]:
async def redo_task_from_file(model, filename: str, virtual=True, console=False):
    with open(filename, "r") as f:
        lines = f.readlines()
    p_ks = []
    for l in lines:
        no_pair_specified = None
        _, q, data_p_k = l.split(",")
        major_q, minor_q = q.split("_", 1)
        if minor_q == "None":
            minor_q = None
        if minor_q == "shortest_path":
            try:
                p_alias, fmt_1st_half, _, size, i, node0, node1, __ = data_p_k.split("_")
            except Exception as e:
                no_pair_specified = True
                p_alias, fmt_1st_half, _, size, i, __ = data_p_k.split("_")
        else:
            p_alias, fmt_1st_half, _, size, i, __ = data_p_k.split("_")
        fmt = f"{fmt_1st_half}_list"
        d = {
            "model": model,
            "p_alias": p_alias,
            "fmt": fmt,
            "size": size,
            "i": i,
            "q_combination": (major_q, minor_q)
        }
        if minor_q == "shortest_path" and no_pair_specified == None:
            d["node_pair"] = (node0, node1)
        p_ks.append(d)
    await redo_task_by_pks(p_ks, virtual=virtual, console=console)
        

In [None]:
#Example usage
patterns = ["Cycle"]
await do_task(patterns, d_size="exlarge", d_model="deepseek-v3", debug=False, virtual=True, console=False)