In [4]:
import ndex2 as nc

ndexuser = "examplemodel"
ndexpassword = "modelx"
SERVER = 'http://ndexbio.org'


In [12]:
import pandas as pd
model_uuid = "61a58f6e-ed06-11ed-b4a3-005056ae23aa"
model = nc.create_nice_cx_from_server(SERVER, uuid=model_uuid, username=ndexuser, password=ndexpassword)


'hidef_on_bc_ppi_minkyu'

In [13]:
import pandas as pd
from datetime import datetime
import os

def add_row(df, type_value, text_value):
    # Create a new DataFrame with the values
    new_row = pd.DataFrame({"type": [type_value], "text": [text_value]})
    
    # Append the new row to the existing DataFrame
    df = df.append(new_row, ignore_index=True)

    return df

def write_csv(dataframe, name):
    # Ensure the directory exists
    directory = os.path.expanduser('~/csv_upload/')
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Get current date and time
    now = datetime.now()

    # Format date and time
    date_time = now.strftime("%Y_%m_%d-%H_%M_%S")

    # Create filename based on name and current date and time
    filename = f"{directory}{name}_{date_time}.csv"

    # Write DataFrame to CSV file
    dataframe.to_csv(filename, index=False)

    print(f"File saved to: {filename}")
    return filename


In [70]:
import os
import json

def get_cached_dictionary(cache, cache_file):
    if cache is not None:
        return cache
    else:
        # Check if the cache_file exists before trying to read it
        if os.path.exists(cache_file):
            with open(cache_file, 'r') as file:
                cache_data = json.load(file)
            return cache_data
        else:
            # If the cache_file doesn't exist, return an empty dictionary
            return {}

def save_cached_dictionary(cache_data, cache_file):
    with open(cache_file, 'w') as file:
        json.dump(cache_data, file)
        
def get_model_json_cache_file_name(name):
    # Ensure the directory exists
    directory = os.path.expanduser('~/json_cache/')
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Get current date and time
    now = datetime.now()

    # Format date and time
    date_time = now.strftime("%Y_%m_%d-%H_%M_%S")

    # Create filename based on name and current date and time
    filename = f"{directory}{name}.json"
    return filename
    
def dict_to_dataframe(dict_of_dicts):
    # Convert the dictionary of dictionaries to a DataFrame
    df = pd.DataFrame(dict_of_dicts).T

    # Reset the index to make the index (the 'thing') a column
    df.reset_index(inplace=True)

    # Rename the index column to 'system'
    df.rename(columns={'index': 'system'}, inplace=True)

    return df


In [75]:
import openai
import os

# Load the API key from an environment variable or secret management service
openai.api_key = os.getenv("OPENAI_API_KEY")


def process_system(model, system_name, model_analysis):
    print(system_name)
    system = model.get_node_by_name(system_name)
    system_info = {}
    genes = model.get_node_attribute_value(system, "CD_MemberList").split(" ")
    print(genes)
    user_prompt = make_user_prompt(genes)
    system_prompt = "You are assisting a molecular biologist to analyze systems of interacting proteins"
    response = gpt_4_query(system_prompt, user_prompt)
    analysis = response['choices'][0]['message']['content']
    system_info["analysis"] = analysis
    model_analysis[system_name] = system_info


def make_user_prompt(genes):
    """
    Create a ChatGPT prompt based on the system network.
    :return: A string containing the ChatGPT prompt text
    """

    general_analysis_instructions = "\nSave any summary analysis of the system to the last paragraph. \
    \nAvoid overly general statements of how the proteins are involved in various cellular processes\
    \nUse gene symbols, not the full gene names\
    \nPut the proposed system name at the beginning of the analysis\
    \nBe brief, avoid using unneccesary words\
    \nAvoid recapitulating the goals of the analysis."

    task_instructions = "Let's write a critical analysis of this system of interacting proteins.\
    \nWork step by step. For each important point, describe your reasoning.\
    \nWhat mechanisms and biological processes are performed by this system?\
    \nWhat cellular components and complexes are involved in this system?\n\
    \nPropose a name for the system. The name should be very brief. Do not compose an acronym."

    # Generate the ChatGPT prompt in HTML format
    prompt_text = task_instructions
    prompt_text += general_analysis_instructions
    prompt_text += "\n\nHere are the interacting proteins:\n"
    prompt_text += f'\nProteins: '
    prompt_text += ", ".join(genes) + ".\n\n"

    return prompt_text


def gpt_4_query (system_prompt, user_prompt):
    #print(system_prompt)
    print(user_prompt)
    response = openai.ChatCompletion.create(
      model="gpt-4",
      messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
      ]
    )
    #print(response)
    return response



In [76]:
# for system_id, system in model.get_nodes():
#    process_system(system)


model_name = model.get_network_attribute("name").get("v")
model_cache_file = get_model_json_cache_file_name(model_name)
model_analysis = get_cached_dictionary(None, model_cache_file)
process_system(model, "C2368", model_analysis)
save_cached_dictionary(model_analysis, model_cache_file)
model_df = dict_to_dataframe(model_analysis)
model_df

C2368
['CBFB', 'HSPA2', 'HSPA4', 'HSPA4L', 'HSPBP1', 'HSPH1', 'RUNX1', 'RUNX2']
Let's write a critical analysis of this system of interacting proteins.    
Work step by step. For each important point, describe your reasoning.    
What mechanisms and biological processes are performed by this system?    
What cellular components and complexes are involved in this system?
    
Propose a name for the system. The name should be very brief. Do not compose an acronym.
Save any summary analysis of the system to the last paragraph.     
Avoid overly general statements of how the proteins are involved in various cellular processes    
Use gene symbols, not the full gene names    
Put the proposed system name at the beginning of the analysis    
Be brief, avoid using unneccesary words    
Avoid recapitulating the goals of the analysis.

Here are the interacting proteins:

Proteins: CBFB, HSPA2, HSPA4, HSPA4L, HSPBP1, HSPH1, RUNX1, RUNX2.




Unnamed: 0,system,analysis
0,C2385,DNA Damage Response and Cell Cycle Regulation ...
1,C2368,Protein Interaction System: Chaperone-RUNX Com...


In [77]:
model_analysis = get_cached_dictionary(model_analysis, model_cache_file)
model_analysis

{'C2385': {'analysis': "DNA Damage Response and Cell Cycle Regulation System\n\nThe DNA Damage Response and Cell Cycle Regulation System includes CDKN2AIP, CDKN2AIPNL, KPNA1, KPNA6, NKRF, and XRN2. These proteins interact and together play significant roles in DNA damage response, cell cycle regulation, and regulation of gene expression.\n\nCDKN2AIP, also known as Cyclin-Dependent Kinase Inhibitor 2A Interacting Protein, is known to interact with CDKN2A and enhance its stability, thus leading to cell cycle arrest at the G1 checkpoint (1). CDKN2AIPNL, the long-noncoding RNA, has been reported to play a role in cellular response to DNA damage, specifically in the context of homologous recombination repair (2).\n\nKPNA1 and KPNA6, Karyopherin alpha subfamily members, are involved in the nuclear import of proteins containing a specific nuclear localization signal (3). Both are known to play essential roles in various biological processes, including cell cycle progression (4).\n\nNKRF, the 

In [67]:
write_csv(model_df, model_name)

File saved to: /Users/depratt/csv_upload/hidef_on_bc_ppi_minkyu_2023_05_14-16_35_44.csv


'/Users/depratt/csv_upload/hidef_on_bc_ppi_minkyu_2023_05_14-16_35_44.csv'

In [49]:
foo = {"system": ["C2410"], "analysis": [content]}
model_df = pd.DataFrame(foo)
model_df

Unnamed: 0,system,analysis
0,C2410,System Analysis: The system of interacting pro...


In [None]:
content = model_analysis["C2410"]["analysis"]['choices'][0]['message']['content']
model_analysis["C2410"]["analysis"] = content