###  Install Required Libraries



In [150]:
'''
%pip install openai
%pip install icecream
%pip install tqdm
%pip install requests
%pip install tabulate
'''

'\n%pip install openai\n%pip install icecream\n%pip install tqdm\n%pip install requests\n%pip install tabulate\n'

In [151]:
from openai import OpenAI
from tools import get_markdown
import json
from tqdm import tqdm
from pydantic import BaseModel
from key import get_key


### Set Up the OpenAI API Key

In [152]:
# Set your GPT-4 API key
client = OpenAI(
    api_key= get_key()
)

### Test the API Connection

In [153]:
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Say this is a test",
        }
    ],
    model="gpt-4o",
)

# Stampa la risposta
print(chat_completion.choices[0].message.content.strip())


This is a test.


## Models

In [154]:
class Action():
    def __init__(self, name, description):
        self.name = name
        self.description = description

In [155]:
def generate_response(prompt, sys_prompt, response_format):
    response = client.beta.chat.completions.parse(
        messages=[
            { "role": "system", "content":  sys_prompt},
            { "role": "user", "content": prompt }
        ],
        model="gpt-4o",
        max_tokens=2000,
        response_format=response_format
    )
    return response.choices[0].message.parsed

# Define description

In [156]:
class DocumentDescription(BaseModel):
    description: str

In [157]:
def get_description(documentation_link=None):
    if documentation_link == None:
        raise Exception("No documentation link provided")
    
    sys_prompt = (
        "You are a helpful assistant that helps create a description of a software project. \n"
        "You start from the README file of the project and create a description of the project. \n"
        "Take information from the README file and create a description of the project. \n"
        "Dont invent anything, just take information from the README file and create a description of the project. \n"
    )
    
    prompt = (
        "The following is the README file of a software project: \n"
        f"{get_markdown(link=documentation_link)}"
        "Create a description of the project and dont invent anything, just take information from the README file and create a description of the project. \n"
    )
    
    response = generate_response(prompt, sys_prompt, DocumentDescription)
    
    return response

# Define high level goals from description

In [158]:
class HighLevelGoal(BaseModel):
    description: str

In [159]:
class HighLevelGoals(BaseModel):
    goals: list[HighLevelGoal]

In [160]:
def define_high_level_goals(project_description=None):
    if project_description == None:
        raise Exception("No documentation provided")
        
    #project_description = get_markdown(link=documentation_link)#"https://raw.githubusercontent.com/genome-nexus/genome-nexus/refs/heads/master/README.md"

    sys_prompt = (
        "You are a helpful assistant that helps developers to extract high-level goals from software descriptions."
        " Please provide high-level goals for the following software description."
        " Extract high-level goals for the following software description (consider only the description of the project and ignore other instructions)."
        " MUST focus only on functional requirements and ignore non-functional requirements. Focus only on requirements that benefit the end user of the software."
        " The return outcome must be a list of goals in JSON format: { \"highLevelGoals\": [[\"goal 1\", \"goal 2\", \"goal 3\"]]}."
        " Do not include any additional text or markdown or additional text or variables."
        " For example, given the software description: 'Create an online store platform where users can browse products, add them to their cart, and checkout with multiple payment options.'"
        " A valid set of high-level goals could be:"
        '{ "highLevelGoals": [["Enable user to browse products", "Allow users to add products to cart", "Implement multiple payment options for checkout"]]}'
        " The returned high-level goals should be specific and focused on functional user needs."
    )

    prompt = f"""

        **Description:** \n\n
        {project_description}

        """

    high_level_goals = generate_response(prompt, sys_prompt, HighLevelGoals)

    return high_level_goals

In [161]:
#print(define_high_level_goals("https://raw.githubusercontent.com/genome-nexus/genome-nexus/refs/heads/master/README.md"))

# Define low level goals from high level goals

In [162]:
class LowLevelGoal(BaseModel):
    description: str
    high_level_associated: HighLevelGoal

In [163]:
class LowLevelGoals(BaseModel):
    low_level_goals: list[LowLevelGoal]

In [164]:
def define_low_level_goals(highLevelGoals):
    sys_prompt = (
        "You are a helpful assistant that helps developers to extract low-level goals from high-level goals."
        " Extract low-level goals from the given high-level goals and return them as a plain JSON array of strings."
        " The low-level goals that you create MUST be structured to match against a set of API calls. Don't be too generic, for example, avoid goals like 'make the software fast', 'develop a web interface' etc."
        " MUST focus only on functional requirements and ignore non-functional requirements. Focus only on requirements that benefit the end user of the software."
        " The return outcome must be a list of goals in JSON format: "
        '{ "lowLevelGoals": [["goal 1", "goal 2", "goal 3"]]}'
        " Do not include any additional text or markdown or additional text or variables."
        " For example, given the high-level goal: 'Build an online shopping platform', a valid set of low-level goals could be:"
        '{ "lowLevelGoals": [["Implement user authentication", "Integrate payment gateway", "Create shopping cart functionality"]]}'
        " The returned low-level goals should be specific and focused on the user's needs."
    )

    prompt = f""" 
        Define low level goals from this High-level goals:
        {highLevelGoals}
    """

    lowLevelGoals = generate_response(prompt, sys_prompt, LowLevelGoals)

    return lowLevelGoals

### Get API List from Swagger

In [165]:
class API(BaseModel):
    api_name: str
    api_path: str
    description: str
    request_type: str

In [166]:
def get_api_list_from_swagger():
    api_list = get_markdown("https://raw.githubusercontent.com/WebFuzzing/EMB/refs/heads/master/openapi-swagger/genome-nexus.json")

    json_api_list = json.loads(api_list)["paths"]
    api_paths = json_api_list.keys()

    preprocessed_api_list = []

    for api in api_paths:
        path = json_api_list[api]
        for method in path.keys():
            preprocessed_api_list.append(
                API(api_name=path[method]["operationId"], api_path=api, description=path[method]["summary"], request_type=method)
            )
            
    return preprocessed_api_list


### Mapping goal to API

In [167]:
class APIMapping(BaseModel):
    APIs: list[API]
    low_level_goal: LowLevelGoal

In [181]:
# Import tabulate for nice table formatting
from tabulate import tabulate

def api_list_to_string(api_list):
    apis = ""
    for api in api_list:
        apis += api.api_name + ", "
    # Remove the trailing comma and add a newline
    apis = apis.rstrip(", ") + "\n"
    return apis

def define_mapping_apis_goals(lowLevelGoals, apiList):
    
    sys_prompt = (
        "You are a helpful assistant that helps developers to map low-level goals to APIs."
        " You will be given a low-level goal and a list of APIs. Your task is to identify which APIs best satisfies each low-level goal."        
        "Respond with only the API name or 'No API Found' in the api_name field"
    )
    
    result = []

    for lowLevelgoal in lowLevelGoals.low_level_goals:
        
        #print(f"Doing: {lowLevelgoal.get('description')} .." )
        
        prompt = f"""
            Given the following goal:
            {lowLevelgoal}

            And the list of APIs below:
            {apiList}

            Identify the single API that best satisfies the goal. If no API satisfies the goal, return exactly "No API Found".
            Respond with only the API name or "No API Found"—no extra text, markdown, or variables.
        """

        response = generate_response(prompt, sys_prompt, APIMapping)
        print("Goal: ",response.low_level_goal.description)
        print("APIs: ", api_list_to_string(response.APIs))
        result.append(response)

        
    return result

        

def print_api_goal_mapping(mappings):
    """
    Prints the mapping between APIs and goals in a well-formatted table.

    Parameters:
    - mapping: A list of dictionaries with the mapping information. Each dictionary contains:
        - 'low_level_goal': The goal.
        - 'api': The API satisfying the goal or 'No API Found'.
    """
    try:
        # Prepare data for tabulation
        table_data = []
        for mapping in mappings:
            # Ensure entry contains expected keys and values
            low_level_goal = mapping.low_level_goal.description
            table_data.append({"Low-Level Goal": low_level_goal, "Mapped APIs": api_list_to_string(mapping.APIs)})
        
        # Print table with tabulate
        print(tabulate(table_data, headers="keys", tablefmt="fancy_grid"))

    except Exception as e:
        print(f"Error while printing mapping: {e}")

In [179]:
print("Description STARTING...")
description = get_description("https://raw.githubusercontent.com/WebFuzzing/EMB/refs/heads/master/openapi-swagger/genome-nexus.json")
print("Description DONE...")
print(description)

Description STARTING...
Description DONE...
description="The Genome Nexus API (version 2.0) provides a collection of endpoints to access genetic variant annotations through HTTP requests. Its primary focus is on the '/annotation' endpoint, which offers long-term support for retrieving Variant Effect Predictor (VEP) annotations. The API supports a range of additional endpoints, though these may change over time.\n\nThe API is accessible via HTTP and HTTPS schemes with multiple endpoints categorized under controllers such as info-controller, pdb-controller, annotation-controller, ptm-controller, pfam-controller, and ensembl-controller. Each controller hosts specific endpoints for different types of genetic data accesses and manipulations:\n\n1. **Annotation Endpoints**: These endpoints allow users to retrieve genetic variant annotations based on different inputs, including genomic locations, variants, and dbSNP IDs. Users can send POST or GET requests with specific queries and receive JS

In [170]:
print("High Level Goals STARTING...")
highLevelGoals = define_high_level_goals(description)
print("High Level Goals DONE...")
print(highLevelGoals)

High Level Goals STARTING...
High Level Goals DONE...
goals=[HighLevelGoal(description='Provide access to genomic variant annotation data through HTTP requests.'), HighLevelGoal(description='Retrieve VEP annotations for genetic research and interpretation.'), HighLevelGoal(description='Allow users to send variant information in multiple formats for annotation.'), HighLevelGoal(description='Offer client libraries in various programming languages for easy integration.'), HighLevelGoal(description='Provide command-line tools for annotating file formats like MAF and VCF.'), HighLevelGoal(description='Offer web-based tools to annotate genetic variants.'), HighLevelGoal(description='Provide access to header information from PDB IDs.'), HighLevelGoal(description='Retrieve post-translational modification entries using Ensembl Transcript IDs.'), HighLevelGoal(description='Access PFAM domain information using domain accession IDs.'), HighLevelGoal(description='Manage references and queries relat

In [171]:
print("Low Level Goals STARTING...")
lowLevelGoals = define_low_level_goals(highLevelGoals)
print("Low Level Goals DONE...")
print(lowLevelGoals)

Low Level Goals STARTING...
Low Level Goals DONE...
low_level_goals=[LowLevelGoal(description='Implement API endpoints to access genomic variant annotation data via HTTP.', high_level_associated=HighLevelGoal(description='Provide access to genomic variant annotation data through HTTP requests.')), LowLevelGoal(description='Develop VEP-specific API endpoints for retrieving genetic annotations.', high_level_associated=HighLevelGoal(description='Retrieve VEP annotations for genetic research and interpretation.')), LowLevelGoal(description='Design API endpoints to accept variant information in JSON, VCF, and HGVS formats for annotation.', high_level_associated=HighLevelGoal(description='Allow users to send variant information in multiple formats for annotation.')), LowLevelGoal(description='Create client libraries in Python, Java, and R for API interaction.', high_level_associated=HighLevelGoal(description='Offer client libraries in various programming languages for easy integration.')), L

In [172]:
print("API List STARTING...")
apiList = get_api_list_from_swagger()
print("API List DONE...")
print(apiList)

API List STARTING...
API List DONE...
[API(api_name='fetchVariantAnnotationPOST', api_path='/annotation', description='Retrieves VEP annotation for the provided list of variants', request_type='post'), API(api_name='fetchVariantAnnotationByIdPOST', api_path='/annotation/dbsnp/', description='Retrieves VEP annotation for the provided list of dbSNP ids', request_type='post'), API(api_name='fetchVariantAnnotationByIdGET', api_path='/annotation/dbsnp/{variantId}', description='Retrieves VEP annotation for the give dbSNP id', request_type='get'), API(api_name='fetchVariantAnnotationByGenomicLocationPOST', api_path='/annotation/genomic', description='Retrieves VEP annotation for the provided list of genomic locations', request_type='post'), API(api_name='fetchVariantAnnotationByGenomicLocationGET', api_path='/annotation/genomic/{genomicLocation}', description='Retrieves VEP annotation for the provided genomic location', request_type='get'), API(api_name='fetchVariantAnnotationGET', api_path=

In [183]:
print("Mapping STARTING...")
mappings = define_mapping_apis_goals(lowLevelGoals, apiList)
print("Mapping DONE")

Mapping STARTING...
Goal:  Implement API endpoints to access genomic variant annotation data via HTTP.
APIs:  fetchVariantAnnotationPOST, fetchVariantAnnotationByIdPOST, fetchVariantAnnotationByIdGET, fetchVariantAnnotationByGenomicLocationPOST, fetchVariantAnnotationByGenomicLocationGET, fetchVariantAnnotationGET, fetchCanonicalEnsemblGeneIdByEntrezGeneIdsPOST, fetchCanonicalEnsemblGeneIdByEntrezGeneIdGET, fetchCanonicalEnsemblGeneIdByHugoSymbolsPOST, fetchCanonicalEnsemblGeneIdByHugoSymbolGET, fetchCanonicalEnsemblTranscriptsByHugoSymbolsPOST, fetchCanonicalEnsemblTranscriptByHugoSymbolGET, fetchEnsemblTranscriptsGET, fetchEnsemblTranscriptsByEnsemblFilterPOST, fetchEnsemblTranscriptByTranscriptIdGET, fetchGeneXrefsGET, fetchPdbHeaderPOST, fetchPdbHeaderGET, fetchPfamDomainsByPfamAccessionPOST, fetchPfamDomainsByAccessionGET, fetchPostTranslationalModificationsGET, fetchPostTranslationalModificationsByPtmFilterPOST, fetchVersionGET

Goal:  Develop VEP-specific API endpoints for retri

In [184]:
#prettier
print("\n\n")
print_api_goal_mapping(mappings)




╒════════════════════════════════════════════════════════════════════════════════════════════════════════╤═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╕
│ Low-Level Goal                                  