In [None]:
import sys
import os

# Add the parent directory of the current script to the Python path
cwd = os.getcwd()
dirname = os.path.dirname(cwd)
print(cwd)
print(dirname)
sys.path.append(dirname)

print(sys.path)


## Datasets

In [None]:

from hre.analysis.dataset import Dataset
dataset_1_csv = 'MECOM,0.0\nRING1,1.150034561\nRPS18,0.0\nSLC30A1,-2.114594111\nSLC39A7,0.0\nTCEB2,0.0'
dataset_1 = Dataset(data=dataset_1_csv, experiment_description="dataset_1")
dataset_2_csv = 'BAP1,0.0\nBARD1,0.0\nBRAP,0.950359927\nBRCA1,0.0\nLYAR,1.381904766\nRELA,0.0\nRING1,1.150034561\nRPS27A,0.0\nUBA52,0.0\nUBC,0.0\nUBE2O,1.145852987'
dataset_2 = Dataset(data=dataset_2_csv, experiment_description="dataset_2")
print(dataset_1.data)  
print("---")
print(dataset_2.data)  

## Analyst Agents

In [None]:
from hre.analysis.analyst import Analyst

analyst_1_context = """
You are a helpful analyst of genomic, proteomic, and other biological data. 
"""

analyst_1_prompt_template = """ 
The attached proteomics "dataset" includes interacting proteins and the measurements of their differential abundance as a ratio between treated and non-treated samples, where the treatment is the infection of human cells with Dengue virus. 
Not all proteins in the dataset have differential abundance measurements.

The dataset has 2 columns with the following headers: name, DV3_24h-Mock_24h. 
The first column contains the protein names and the last columns contains the abundance data. 

Your task is to leverage this dataset to analyze a subset of interacting proteins that are defined as “proteins of interest".

First, determine what proteins of interest show a differential abundance recorded in the dataset. 
Then, based on this information and on the known functions of all other proteins of interest, 
I want you to generate a novel hypothesis describing the mechanisms that may contribute to the disease state 
and could potentially be targeted by drug therapies. 

When presenting your results, please adhere to the following guidelines:

- Avoid including any code.
- Do not describe the analytical steps you took.
- Do not merely list the proteins of interest, regardless whether they show a differential abundance recorded in the dataset or not.
- Build your hypotheses taking into consideration the interplay among all proteins of interest, not only those that show a differential abundance in the dataset.

- Your output should consist solely of the identified proteins of interest with changed abundance levels, the hypotheses you propose, and the reasons supporting these hypotheses.

Here is the set of proteins of interest: 
{data}
"""

analyst_1 = Analyst("gpt-3.5-turbo-1106", analyst_1_context, analyst_1_prompt_template, "Jane", "The first analyst")

analyst_2_context = """
You are a helpful analyst of genomic, proteomic, and other biological data. 
"""

analyst_2_prompt_template = """
The differential abundance of the following proteins were measured in a dengue infection experiment. 
Propose a novel hypothesis for the mechanism of action of these proteins in the context of dengue infection, 
given the known functions of these proteins and the observed changes in abundance.
{data}
"""

analyst_2 = Analyst("gpt-3.5-turbo-1106", analyst_2_context, analyst_2_prompt_template, "John", "The second analyst")

## The TestPlan

In [None]:
from hre.analysis.test import TestPlan
test_plan = TestPlan(analysts=[analyst_1, analyst_2], datasets=[dataset_1, dataset_2])


## Run the Test

In [None]:
from hre.analysis.test import Test

test = Test(test_plan)
test.run()

## dataset_1 hypotheses

In [None]:
for hypothesis in test.hypotheses:
    if hypothesis.dataset == dataset_1:
        print(hypothesis.analyst.name)
        print(hypothesis.description)
        print("---")

## dataset_2 hypotheses

In [None]:
for hypothesis in test.hypotheses:
    if hypothesis.dataset == dataset_2:
        print(hypothesis.analyst.name)
        print(hypothesis.description)
        print("---")

## Reviewer Agents

In [None]:
from hre.analysis.test import Reviewer

reviewer_1_context = ""
reviewer_1_prompt_template = ""

reviewer_1 = Reviewer(llm="GPT-3.5", context="", prompt_template="", name="The first reviewer")

reviewer_2_context = ""
reviewer_2_prompt_template = ""
reviewer_2 = Reviewer(llm="GPT-3.5", context="", prompt_template="", name="The second reviewer")



## The ReviewPlan

In [None]:
from hre.analysis.review import ReviewPlan

review_plan = ReviewPlan(reviewers=[reviewer_1, reviewer_2], test=test, description="This is a review plan")


## Run the Review

In [None]:
from hre.analysis.review import Review

review = Review(review_plan)    
review.run()


## The Review