In [24]:
import requests
import json
import os
import openai



In [25]:
# Replace with your cluster of genes
# AKAP11 ANAPC1 ANKRD11 ANKRD31 DOCK2 HECTD4 ITPR1 LYST MYLK MYO5A PCDH15 
# PFDN6 PLXNA2 PLXNA4 PTPN13 RALGAPA2 TRRAP
cluster_genes = ["AKAP11", "ANAPC1", "ANKRD11", "ANKRD31", 
                 "DOCK2", "HECTD4", "ITPR1", "LYST", "MYLK", 
                 "MYO5A", "PCDH15", "PFDN6", "PLXNA2", "PLXNA4", 
                 "PTPN13", "RALGAPA2", "TRRAP"]

cluster_genes_text = " ".join(cluster_genes)
cluster_genes_text

'AKAP11 ANAPC1 ANKRD11 ANKRD31 DOCK2 HECTD4 ITPR1 LYST MYLK MYO5A PCDH15 PFDN6 PLXNA2 PLXNA4 PTPN13 RALGAPA2 TRRAP'

In [26]:

import requests
import json

def query_gprofiler(cluster_genes):
    url = "https://biit.cs.ut.ee/gprofiler/api/gost/profile"
    headers = {"Content-Type": "application/json"}
    payload = {
        "organism": "hsapiens",
        "query": cluster_genes,
        "sources": ["GO:BP", "KEGG", "REAC", "WP", "MIRNA", "HPA", "CORUM"],
        "user_threshold": 0.1,
        "all_results": False,
        "ordered": False,
        "no_iea": False,
        "combined": True,
        "measure_underrepresentation": False
    }
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    json_response = response.json()

    filtered_results = []
    for item in json_response['result']:
        filtered_item = {
            "name": item["name"],
            "description": item["description"],
            "source": item["source"],
            "p_value": item["p_values"]
        }
        filtered_results.append(filtered_item)

    return filtered_results

def gprofiler_results_to_text(gprofiler_results):
    result_names = [result['name'] for result in gprofiler_results]
    return '\n'.join(result_names)

In [27]:
gprofiler_results = query_gprofiler(cluster_genes)
gprofiler_results

[{'name': 'SEMA3A-Plexin repulsion signaling by inhibiting Integrin adhesion',
  'description': 'SEMA3A-Plexin repulsion signaling by inhibiting Integrin adhesion',
  'source': 'REAC',
  'p_value': [0.02967279395498139]},
 {'name': 'MFAP5 effect on permeability and motility of endothelial cells via cytoskeleton rearrangement',
  'description': 'MFAP5 effect on permeability and motility of endothelial cells via cytoskeleton rearrangement',
  'source': 'WP',
  'p_value': [0.030285217532717647]},
 {'name': 'Sema3A PAK dependent Axon repulsion',
  'description': 'Sema3A PAK dependent Axon repulsion',
  'source': 'REAC',
  'p_value': [0.039080559405259445]},
 {'name': 'CRMPs in Sema3A signaling',
  'description': 'CRMPs in Sema3A signaling',
  'source': 'REAC',
  'p_value': [0.039080559405259445]},
 {'name': 'Other semaphorin interactions',
  'description': 'Other semaphorin interactions',
  'source': 'REAC',
  'p_value': [0.0555865049099508]},
 {'name': 'semaphorin-plexin signaling pathway

In [28]:
import requests

def query_string_api(cluster_genes):
    string_api_url = "https://string-db.org/api/json/network"
    string_params = {
        "identifiers": "%0d".join(cluster_genes),
        "species": 9606,
        "caller_identity": "myapp"
    }
    response = requests.get(string_api_url, params=string_params)
    json_response = response.json()

    nodes = set()
    edges = []
    for interaction in json_response:
        nodes.add(interaction["preferredName_A"])
        nodes.add(interaction["preferredName_B"])
        edges.append({
            "source": interaction["preferredName_A"],
            "target": interaction["preferredName_B"]
        })

    return {"nodes": list(nodes), "edges": edges}


In [29]:
string_results = query_string_api(cluster_genes)
string_results

{'nodes': ['ITPR1', 'PLXNA4', 'PLXNA2', 'MYLK', 'MYO5A', 'LYST'],
 'edges': [{'source': 'ITPR1', 'target': 'MYLK'},
  {'source': 'ITPR1', 'target': 'MYLK'},
  {'source': 'PLXNA4', 'target': 'PLXNA2'},
  {'source': 'PLXNA4', 'target': 'PLXNA2'},
  {'source': 'LYST', 'target': 'MYO5A'},
  {'source': 'LYST', 'target': 'MYO5A'}]}

In [30]:
## placeholder data for chatgtp
cluster_name = "my_cluster"
summary = "summary of the cluster"
chatgpt_analysis = "analysis by chatgpt"

# Load your API key from an environment variable or secret management service
openai.api_key = os.getenv("OPENAI_API_KEY")

gprofiler_text = gprofiler_results_to_text(gprofiler_results)

# Combine the background input and questions into a single prompt
prompt = f"write a brief analysis of these genes {cluster_genes_text} \n based on background knowledge plus these processes relevant to some of the genes \n{gprofiler_text}"
#print(prompt)
    
# Call the OpenAI API to generate answers
response = openai.Completion.create(
    engine="davinci",
    prompt=prompt,
    max_tokens=1000,
    n=1,
    stop=None,
    temperature=0,
)

#print(response)
# Parse the response to get the text of the first choice
chatgpt_analysis = response.choices[0].text
print(chatgpt_analysis)







The following is a brief description of the genes and their functions.

AKAP11 is a gene that encodes a protein called A-kinase anchor protein 11. This protein is a member of the AKAP family, which is a group of proteins that bind to the regulatory subunit of protein kinase A (PKA) and anchor it to the cytoskeleton. This protein is expressed in the brain, and is thought to be involved in the regulation of PKA activity.

ANAPC1 is a gene that encodes a protein called Anaphase-promoting complex subunit 1. This protein is a subunit of the anaphase-promoting complex (APC), which is a complex that targets proteins for degradation. This protein is thought to be involved in the regulation of the cell cycle.

ANKRD11 is a gene that encodes a protein called ankyrin repeat domain 11. This protein is a member of the ankyrin repeat protein family, which is a group of proteins that contain ankyrin repeats. This protein is thought to be involved in the regulation of the cell cycle.

ANKRD31 is a g

In [31]:
from jinja2 import Template

def generate_html_report(cluster_name, summary, gprofiler_results, string_results, chatgpt_analysis):
    template_string = '''
    <!DOCTYPE html>
    <html>
    <head>
        <meta charset="UTF-8">
        <title>{{ cluster_name }} Cluster Report</title>
        <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
        <script src="https://code.jquery.com/jquery-3.3.1.min.js"></script>
        <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js"></script>
    </head>
    <body>
        <div class="container">
            <h1>{{ cluster_name }} Cluster Report</h1>
            <p>{{ summary }}</p>
            <h2>ChatGPT Analysis</h2>
            <p>{{ chatgpt_analysis }}</p>
            <h2>g:Profiler Functional Enrichment Results</h2>
            <table class="table table-striped">
                <thead>
                    <tr>
                        <th>Term Name</th>
                        <th>Description</th>
                        <th>Source</th>
                        <th>p-value</th>
                    </tr>
                </thead>
                <tbody>
                    {% for result in gprofiler_results %}
                    <tr>
                        <td>{{ result.name }}</td>
                        <td>{{ result.description }}</td>
                        <td>{{ result.source }}</td>
                        <td>{{ result.p_value }}</td>
                    </tr>
                    {% endfor %}
                </tbody>
            </table>
            <h2>STRING Interaction Network</h2>
            <table class="table table-striped">
                <thead>
                    <tr>
                        <th>Source</th>
                        <th>Target</th>
                    </tr>
                </thead>
                <tbody>
                    {% for edge in string_results.edges %}
                    <tr>
                        <td>{{ edge.source }}</td>
                        <td>{{ edge.target }}</td>
                    </tr>
                    {% endfor %}
                </tbody>
            </table>
        </div>
    </body>
    </html>
    '''
    template = Template(template_string)
    html_report = template.render(cluster_name=cluster_name, summary=summary, chatgpt_analysis=chatgpt_analysis, gprofiler_results=gprofiler_results, string_results=string_results)
    return html_report




In [34]:
from IPython.display import HTML
chatgpt_analysis = response_text
html_report = generate_html_report(cluster_name, summary, gprofiler_results, string_results, chatgpt_analysis)
HTML(html_report)


Term Name,Description,Source,p-value
SEMA3A-Plexin repulsion signaling by inhibiting Integrin adhesion,SEMA3A-Plexin repulsion signaling by inhibiting Integrin adhesion,REAC,[0.02967279395498139]
MFAP5 effect on permeability and motility of endothelial cells via cytoskeleton rearrangement,MFAP5 effect on permeability and motility of endothelial cells via cytoskeleton rearrangement,WP,[0.030285217532717647]
Sema3A PAK dependent Axon repulsion,Sema3A PAK dependent Axon repulsion,REAC,[0.039080559405259445]
CRMPs in Sema3A signaling,CRMPs in Sema3A signaling,REAC,[0.039080559405259445]
Other semaphorin interactions,Other semaphorin interactions,REAC,[0.0555865049099508]
semaphorin-plexin signaling pathway involved in axon guidance,"""Any semaphorin-plexin signaling pathway that is involved in axon guidance."" [GOC:BHF, GOC:rl, GOC:TermGenie, PMID:22790009]",GO:BP,[0.08108453303744273]

Source,Target
ITPR1,MYLK
ITPR1,MYLK
PLXNA4,PLXNA2
PLXNA4,PLXNA2
LYST,MYO5A
LYST,MYO5A
