In [23]:
import os
import dotenv
import re
import requests
import logging
from openai import OpenAI
from langchain.chat_models import ChatOpenAI

logging.basicConfig(level=logging.INFO)

In [24]:
dotenv.load_dotenv('/Users/asze01/Code/Hassoun-Lab/GPT.env')

# Retrieve the API key once and use it everywhere
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("API key not found. Please check your .env file.")

# Check if the API key is there
# print(f"API Key: {api_key}")

# Initialize OpenAI client and chat model with the same API key
client = OpenAI(api_key=api_key)
chat = ChatOpenAI(model="gpt-4o", temperature=0.2, openai_api_key=api_key)

In [25]:
def query_kegg_for_info(keywords):
    try:
        chat_completion = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a KEGG REST API generator. Convert these keywords from a question into one single correct KEGG API URL following KEGG REST API documentation and output it."},
                {"role": "user", "content": f"Keywords: {keywords}"}
            ],
            temperature=0.2
        )
        response_text = chat_completion.choices[0].message.content
        urls = re.findall(r'https?://[^\s]+', response_text)

        if urls:
            # Selecting the last URL
            last_url = urls[-1]
            logging.info(f"API Query URL: {last_url}")
            response = requests.get(last_url)
            if response.status_code == 200:
                response_content = response.text
                if last_url.startswith("http://rest.kegg.jp/get/"):
                    # Exclude text below the word "GENES"
                    genes_index = response_content.find("GENES")
                    if genes_index != -1:
                        response_content = response_content[:genes_index]
                return response_content
            else:
                logging.error(f"Failed to retrieve data: Status code {response.status_code}")
                return None
        else:
            logging.error("No valid URL found.")
            return None
    except Exception as e:
        logging.error(f"Error querying KEGG: {e}")
        return None

def extract_keywords(question):
    try:
        chat_completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a biochemistry assistant. Identify the main keywords from this question."},
                {"role": "user", "content": question}
            ]
        )
        # Accessing 'content' directly
        keywords = chat_completion.choices[0].message.content
        logging.info(f"Extracted Keywords: {keywords}")
        return keywords.strip()
    except Exception as e:
        logging.error(f"Error extracting keywords: {e}")
        return None

In [26]:
def interact_with_chatbot(user_input):
    # Extract keywords and query KEGG for information
    keywords = extract_keywords(user_input)
    if not keywords:
        logging.error("No keywords extracted.")
        return

    info = query_kegg_for_info(keywords)
    if not info:
        logging.error("No information retrieved from KEGG.")
        return

    try:
        chat_completion = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a biochemistry assistant. Answer the user question concisely using only the given context from KEGG and cite your response. Do not give any answers that can't be directly supported with the given context from KEGG."},
                {"role": "user", "content": f"User Question: {user_input}. Context from KEGG: {info}"}
            ]
        )
        logging.info(chat_completion.choices[0].message.content)
    except Exception as e:
        logging.error(f"Error during final interaction: {e}")

In [22]:
interact_with_chatbot("Show me the reaction mechanism of enzyme catalase.")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Extracted Keywords: - Reaction mechanism
- Enzyme
- Catalase
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:API Query URL: http://rest.kegg.jp/find/enzyme/catalase
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:The enzyme catalase (EC 1.11.1.6) and catalase-peroxidase (EC 1.11.1.21) facilitate the conversion of hydrogen peroxide (H₂O₂) into water (H₂O) and oxygen (O₂). The general reaction for catalase can be simplified to:

\[ 2 H₂O₂ \rightarrow 2 H₂O + O₂ \]

Catalase-peroxidase, represented by the gene katG, also catalyzes this reaction.

Reference:
KEGG, EC 1.11.1.6 and EC 1.11.1.21


In [27]:
interact_with_chatbot("Which all substrates can be accepted by enzyme amylase?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Extracted Keywords: - Substrates
- Enzyme
- Amylase
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:API Query URL: http://rest.kegg.jp/link/compound/enzyme:3.2.1.1
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:The enzyme amylase (EC 3.2.1.1) can accept the following substrates: water (cpd:C00001), glycogen (cpd:C00208), maltotriose (cpd:C00369), starch (cpd:C00721), and amylopectin (cpd:C01935).

Cite:
KEGG 
