In [1]:
import json
import openai
import os
import time 
import argparse
# openai.api_key = os.getenv("OPENAI_API_KEY")


In [2]:

def load_log(LOG_FILE):
    if os.path.exists(LOG_FILE):
        with open(LOG_FILE, "r") as f:
            return json.load(f)
    else:
        return {"tokens_used": 0, "dollars_spent": 0.0, "time_taken_last_run": 0.0, "time_taken_total": 0.0}

def save_log(LOG_FILE,log_data):
    with open(LOG_FILE, "w") as f:
        json.dump(log_data, f, indent=4)

def estimate_cost(tokens, rate_per_token):
    return tokens * rate_per_token

def openai_chat(context, prompt, model,temperature, max_tokens, rate_per_token, LOG_FILE, DOLLAR_LIMIT):
    log_data = load_log(LOG_FILE)
    tokens_estimate = len(prompt) + max_tokens

    if estimate_cost(log_data["tokens_used"] + tokens_estimate, rate_per_token) > DOLLAR_LIMIT:
        print("The API call is estimated to exceed the dollar limit. Aborting.")
        return

    try:
        start_time = time.time()
        response = openai.ChatCompletion.create(
            model=model,
            messages=[{"role": "system", "content": context},{"role": "user", "content": prompt}],
            max_tokens=max_tokens,
            n=1,
            stop=None,
            temperature=temperature,
        )
        end_time = time.time() 

        # print(response)
        tokens_used = response["usage"]["total_tokens"]
        
        # Update and save the log
        log_data["tokens_used"] += tokens_used
        log_data["dollars_spent"] = estimate_cost(log_data["tokens_used"], rate_per_token)
        time_usage = end_time - start_time
        log_data["time_taken_last_run"] = time_usage
        log_data["time_taken_total"] += time_usage
        print(tokens_used)
        save_log(LOG_FILE,log_data)

        return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [4]:
from getpass import getpass
key = getpass()

 ···················································


In [5]:
openai.api_key = key

In [6]:
context = "You are assisting a molecular biologist in the analysis of a system of proteins that are physically close to each other and/or interact with each other."

In [7]:
prompt = """Write a critical and concise analysis of this system based on the child systems summary, describing your reasoning as you go.        
Format with Title, Summary, and References sections.        
Avoid overly general statements of how the proteins are involved in various cellular processes
        
Avoid recapitulating the goals of the analysis. 
What cellular components and complexes are involved in this system?
What mechanisms and biological processes are performed by this system?
Discuss potential names for the system. Names should reflect the function and location, and should be specific not general. Select the best name and place it as the title of your report
Provide complete paper references with pubmed link, if any
A critical goal of the analysis is to determine if this is a novel complex or if any proteins are novel members of a known complex

Here are the summaries of the child nodes directly branched from this parent system: 

Child cluster with Genes: MRPL15, MRPL39, MRPL52, MTHFD2, RHOT2
The system of proteins analyzed in this report consists of five proteins: MRPL15, MRPL39, MRPL52, MTHFD2, and RHOT2. These proteins are all localized to the mitochondrion, with three of them (MRPL15, MRPL39, and MRPL52) being part of the mitochondrial large ribosomal subunit and involved in mitochondrial translation. Additionally, MRPL15, MTHFD2, and RHOT2 are involved in protein binding, while MRPL15 and MRPL39 have RNA binding capabilities. The system appears to be a novel complex, as it includes MTHFD2 and RHOT2, which are not known to be part of any other mitochondrial ribosomal complexes. The proposed name for this system is the Mitochondrial Ribosomal Complex with MTHFD2 and RHOT2, reflecting its function and location.
Cellular Components and Complexes:
The proteins in this system are involved in the following cellular components and complexes:
1. Mitochondrial large ribosomal subunit: MRPL15, MRPL39, and MRPL52 are part of this subunit, which is responsible for mitochondrial translation (Greber et al., 2014).
2. Mitochondrial inner membrane: MRPL15, MRPL39, and MRPL52 are localized to this membrane, which is crucial for maintaining mitochondrial function (Kühlbrandt, 2015).
Mechanisms and Biological Processes:
The proteins in this system are involved in the following mechanisms and biological processes:
1. Mitochondrial translation: MRPL15, MRPL39, and MRPL52 are involved in the translation of proteins within the mitochondria (O'Brien et al., 2017).
2. Protein binding: MRPL15, MTHFD2, and RHOT2 are involved in protein-protein interactions, which are essential for various cellular processes (Stumpf et al., 2008).
3. RNA binding: MRPL15 and MRPL39 have RNA binding capabilities, which are important for ribosome assembly and function (Simsek et al., 2017).

Child cluster with Genes: COQ3, MRPL22, MRPL47, MRPL41, MRPL37, MALSU1, MRPL46, MTIF2, GADD45GIP1, MRPL19, MRPL53, MRPL28, MRPL44, PARP9, MRPL12, ELAVL2, MRPL42, HSPD1, MTFR1L, MAPK11, MRPL48, MRPL9, MTERF3, FAM120A, IREB2, MRPL10, MRPL50, RO60, MRPL58, CNNM2, CNNM3, MRPL23, MRPL51, YARS2, NGRN, MRPL14, MRPL40, GTPBP3, PMPCB, PMPCA, BCAT2, MRPL57
The system of proteins analyzed in this report is primarily involved in the mitochondrial large ribosomal subunit and its associated processes. The majority of the proteins are localized to the mitochondrion and mitochondrial inner membrane, with a significant number involved in mitochondrial translation and RNA binding. The system appears to be involved in various cellular processes, including translation, ribosome assembly, and protein targeting to the mitochondrion. The proposed name for this system is "Mitochondrial Large Ribosomal Subunit and Associated Proteins."
Cellular Components and Complexes:
The main cellular components and complexes involved in this system are the mitochondrial large ribosomal subunit, mitochondrial inner membrane, and mitochondrial matrix. Several proteins are also found in the nucleoplasm, cytosol, plasma membrane, and extracellular space.
Mechanisms and Biological Processes:
The primary biological processes performed by this system are mitochondrial translation, ribosome assembly, and protein targeting to the mitochondrion. Other processes include metal ion binding, GTPase activity, magnesium ion homeostasis, and positive regulation of interleukin-12 production."""

In [13]:
response_text = openai_chat(context, prompt, "gpt-4-0314",0.2, 1000, 6e-5, "test.log", 2)


1621


In [14]:
with open("test-gpt-4-0314.out", "w") as f:
    f.write(response_text)

In [None]:

# excute the script
if __name__ == "__main__":
    argparser = argparse.ArgumentParser()
    argparser.add_argument("--openai_api_key", type=str, required=True)
    argparser.add_argument("--context", type=str, default=)
    argparser.add_argument("--prompt",required=True, type=str, help="input prompt to chatgpt")
    argparser.add_argument("--model", type=str, default="gpt-3.5-turbo")
    argparser.add_argument("--temperature", type=float, default=0, help="temperature for chatgpt to control randomness, 0 means deterministic, 1 means random")
    argparser.add_argument("--max_tokens", type=int, default=500, help="max tokens for chatgpt response")
    argparser.add_argument("--rate_per_token", type=float, default=0.0005, help="rate per token to estimate cost")
    argparser.add_argument("--log_file", type=str, default="./log.json", help="PATH to the log file to save tokens used and dollars spent")
    argparser.add_argument("--dollor_limit", type=float, default=5.0, help="dollor limit to abort the chatgpt query")
    argparser.add_argument("--file_path", type=str, default="./response.txt", help="PATH to the file to save the response")
    
    args = argparser.parse_args()

     = args.openai_api_key

    context = args.context
    prompt = args.prompt
    model = args.model
    temperature = args.temperature
    max_tokens = args.max_tokens
    rate_per_token = args.rate_per_token
    file_path = args.file_path

    LOG_FILE = args.log_file
    DOLLAR_LIMIT = args.dollor_limit
    if response_text:
        
        # print(response_text)