In [4]:
import ipfshttpclient
import os
import json
import openai
from dotenv import load_dotenv

load_dotenv()

from enum import Enum
from pydantic import BaseModel
from typing import List, Optional

In [5]:
def add_json_ipfs(json_object):
    u = "/dns/provider.tekti.net/tcp/31861/http"
    client = ipfshttpclient.connect(u)
    return client.add_json(json_object)

def get_json_ipfs(ipfs_hash):
    u = "/dns/provider.tekti.net/tcp/31861/http"
    client = ipfshttpclient.connect(u)
    json_str = client.cat(ipfs_hash)
    return json.loads(json_str)

In [6]:
class NodeType(str, Enum):
    technology = "technology"
    sub_field = "sub_field"
    application = "application"
    component = "component"
    standard = "standard"
    research = "research"
    methodology = "methodology"
    tool = "tool"
    organization = "organization"
    person = "person"
    event = "event"
    concept = "concept"
    framework = "framework"
    project = "project"

class NodeInfo(BaseModel):
    node_id: str
    name: str
    description: str
    node_type: NodeType

class NodeScopeSection(BaseModel):
    research_scope: Optional[List[str]] = None
    scope_description: Optional[str] = None
    scope_references: Optional[List[str]] = None

class NodeContributionSection(BaseModel):
    problem_statement: Optional[str] = None
    research_scope: Optional[str] = None
    background_information: Optional[str] = None
    objectives: Optional[List[str]] = None
    methodology: Optional[List[str]] = None
    expected_outcome: Optional[List[str]] = None
    key_findings: Optional[List[str]] = None
    conclusion: Optional[str] = None
    future_works: Optional[List[str]] = None
    references: Optional[List[str]] = None

class NodeJson(BaseModel):
    node_info: NodeInfo
    scope_section: NodeScopeSection
    contribution_section: NodeContributionSection

### New node

In [7]:
node_info = NodeInfo(
    node_id="1",
    name="CRISPR-Cas9",
    description="CRISPR-Cas9 is a revolutionary gene-editing technology that allows for precise modifications to DNA. It has broad applications in medicine, agriculture, and research.",
    node_type=NodeType.technology
)

scope_section = NodeScopeSection(
    research_scope=["Gene editing", "Genetic therapy", "Agricultural biotechnology"],
    scope_description="CRISPR-Cas9 research covers various domains including gene editing techniques, therapeutic applications for genetic disorders, and advancements in agricultural biotechnology.",
    scope_references=["Jinek, M. et al. (2012). A Programmable Dual-RNA–Guided DNA Endonuclease in Adaptive Bacterial Immunity. Science.",
                        "Doudna, J. A., & Charpentier, E. (2014). The new frontier of genome engineering with CRISPR-Cas9. Science."]
)

contribution_section = NodeContributionSection(
    problem_statement="The need for precise, efficient, and affordable gene-editing tools to advance genetic research and therapy.",
    research_scope="Investigate the mechanisms, applications, and ethical considerations of CRISPR-Cas9 technology in various fields including medicine, agriculture, and biotechnology.",
    background_information="CRISPR-Cas9 was discovered as a part of the immune system in bacteria, which use it to fend off viruses. The system was adapted for gene-editing in 2012, allowing for targeted modifications to genomes.",
    objectives=[
        "1. To develop reliable CRISPR-Cas9 based tools for gene editing.", 
        "2. To explore therapeutic applications for genetic disorders.", 
        "3. To assess the ethical implications of gene editing."
    ],
    methodology=[
        "1. Isolate CRISPR-Cas9 components.", 
        "2. Design guide RNA sequences specific to target genes.", 
        "3. Deliver the CRISPR-Cas9 complex into cells.", 
        "4. Validate gene modifications through sequencing and functional assays.", 
        "5. Enhanced delivery mechanisms for CRISPR-Cas9 complexes were developed, focusing on nanoparticle-based delivery systems to improve in vivo targeting efficiency and reduce off-target effects."
    ],
    expected_outcome=[
        "Development of robust gene-editing protocols", 
        "potential treatments for genetic diseases", 
        "a better understanding of gene function and regulation."
    ],
    key_findings=[
        "CRISPR-Cas9 can effectively target and modify specific DNA sequences with high precision.", 
        "Off-target effects are a concern and are being addressed through various approaches.",
    ],
    conclusion="CRISPR-Cas9 has transformed the field of genetics, offering unprecedented control over DNA modifications. Ongoing research is addressing its limitations and expanding its applications.",
    future_works=["1. Improve the accuracy and efficiency of CRISPR-Cas9.", 
                  "2. Develop delivery systems for in vivo applications.", 
                  "3. Explore the use of CRISPR for epigenetic modifications."
    ],
    references=["Jinek, M. et al. (2012). A Programmable Dual-RNA–Guided DNA Endonuclease in Adaptive Bacterial Immunity. Science.",
                 "Doudna, J. A., & Charpentier, E. (2014). The new frontier of genome engineering with CRISPR-Cas9. Science."]
)

crispr_node = NodeJson(
    node_info=node_info,
    scope_section=scope_section,
    contribution_section=contribution_section
)

In [8]:
print(crispr_node.model_dump())

{'node_info': {'node_id': '1', 'name': 'CRISPR-Cas9', 'description': 'CRISPR-Cas9 is a revolutionary gene-editing technology that allows for precise modifications to DNA. It has broad applications in medicine, agriculture, and research.', 'node_type': <NodeType.technology: 'technology'>}, 'scope_section': {'research_scope': ['Gene editing', 'Genetic therapy', 'Agricultural biotechnology'], 'scope_description': 'CRISPR-Cas9 research covers various domains including gene editing techniques, therapeutic applications for genetic disorders, and advancements in agricultural biotechnology.', 'scope_references': ['Jinek, M. et al. (2012). A Programmable Dual-RNA–Guided DNA Endonuclease in Adaptive Bacterial Immunity. Science.', 'Doudna, J. A., & Charpentier, E. (2014). The new frontier of genome engineering with CRISPR-Cas9. Science.']}, 'contribution_section': {'problem_statement': 'The need for precise, efficient, and affordable gene-editing tools to advance genetic research and therapy.', '

In [9]:
ipfs_hash_initial = add_json_ipfs(crispr_node.model_dump())
print(ipfs_hash_initial)

Qmd1SMEYuSxWZht9mKpgRJX2nZvwKqJLfJL73Wd9UEGDRt


In [10]:
retrieved_node_json = get_json_ipfs(ipfs_hash_initial)
retrieved_crispr_node = NodeJson.model_validate(retrieved_node_json)
retrieved_crispr_node

NodeJson(node_info=NodeInfo(node_id='1', name='CRISPR-Cas9', description='CRISPR-Cas9 is a revolutionary gene-editing technology that allows for precise modifications to DNA. It has broad applications in medicine, agriculture, and research.', node_type=<NodeType.technology: 'technology'>), scope_section=NodeScopeSection(research_scope=['Gene editing', 'Genetic therapy', 'Agricultural biotechnology'], scope_description='CRISPR-Cas9 research covers various domains including gene editing techniques, therapeutic applications for genetic disorders, and advancements in agricultural biotechnology.', scope_references=['Jinek, M. et al. (2012). A Programmable Dual-RNA–Guided DNA Endonuclease in Adaptive Bacterial Immunity. Science.', 'Doudna, J. A., & Charpentier, E. (2014). The new frontier of genome engineering with CRISPR-Cas9. Science.']), contribution_section=NodeContributionSection(problem_statement='The need for precise, efficient, and affordable gene-editing tools to advance genetic res

### New node

In [12]:
new_contribution_section = NodeContributionSection(
    problem_statement="The need for precise, efficient, and affordable gene-editing tools to advance genetic research and therapy.",
    research_scope="Investigate the mechanisms, applications, and ethical considerations of CRISPR-Cas9 technology in various fields including medicine, agriculture, and biotechnology.",
    background_information="CRISPR-Cas9 was discovered as a part of the immune system in bacteria, which use it to fend off viruses. The system was adapted for gene-editing in 2012, allowing for targeted modifications to genomes.",
    objectives=[
        "1. To develop reliable CRISPR-Cas9 based tools for gene editing.", 
        "2. To explore therapeutic applications for genetic disorders.", 
        "3. To assess the ethical implications of gene editing.",
        "4. To develop a scalable manufacturing process for CRISPR components, making the technology more accessible for large-scale applications in agriculture and industry.",
        "5. To introduce advanced computational models to predict off-target effects and optimize guide RNA design, significantly improving the specificity of CRISPR-Cas9 edits."
    ],
    methodology=[
        "1. Isolate CRISPR-Cas9 components.", 
        "2. Design guide RNA sequences specific to target genes.", 
        "3. Deliver the CRISPR-Cas9 complex into cells.", 
        "4. Validate gene modifications through sequencing and functional assays.", 
        "5. Enhanced delivery mechanisms for CRISPR-Cas9 complexes were developed, focusing on nanoparticle-based delivery systems to improve in vivo targeting efficiency and reduce off-target effects."
    ],
    expected_outcome=[
        "Development of robust gene-editing protocols", 
        "potential treatments for genetic diseases", 
        "a better understanding of gene function and regulation."
    ],
    key_findings=[
        "CRISPR-Cas9 can effectively target and modify specific DNA sequences with high precision.", 
        "Off-target effects are a concern and are being addressed through various approaches.", 
        "Introduced advanced computational models to predict off-target effects and optimize guide RNA design, significantly improving the specificity of CRISPR-Cas9 edits.",
        "Developed a scalable manufacturing process for CRISPR components, making the technology more accessible for large-scale applications in agriculture and industry.",
        "Conducted extensive ethical reviews and proposed guidelines for the responsible use of CRISPR technology in human germline editing, balancing innovation with ethical considerations."
    ],
    conclusion="CRISPR-Cas9 has transformed the field of genetics, offering unprecedented control over DNA modifications. Ongoing research is addressing its limitations and expanding its applications. Conducted extensive ethical reviews and proposed guidelines for the responsible use of CRISPR technology in human germline editing, balancing innovation with ethical considerations.",
    future_works=[
        "1. Improve the accuracy and efficiency of CRISPR-Cas9.", 
        "2. Develop delivery systems for in vivo applications.", 
        "3. Explore the use of CRISPR for epigenetic modifications. Discovered a new application of CRISPR-Cas9 for epigenetic modifications, enabling precise control over gene expression without altering the DNA sequence.",
        "4. Develop nanoparticle-based delivery systems for CRISPR-Cas9.",
        "5. Explore the use of CRISPR for epigenetic modifications. Discovered a new application of CRISPR-Cas9 for epigenetic modifications, enabling precise control over gene expression without altering the DNA sequence."
    ],
    references=[
        "Jinek, M. et al. (2012). A Programmable Dual-RNA–Guided DNA Endonuclease in Adaptive Bacterial Immunity. Science.",
        "Doudna, J. A., & Charpentier, E. (2014). The new frontier of genome engineering with CRISPR-Cas9. Science.",
        "Smith, J. et al. (2023). Nanoparticle-based delivery systems for CRISPR-Cas9. Nature Biotechnology.",
        "Doe, J. et al. (2023). Ethical guidelines for human germline editing using CRISPR-Cas9. Bioethics Journal.",
        "Turing, A. et al. (2023). Computational models for optimizing CRISPR guide RNA specificity. Computational Biology.",
        "Curie, M. et al. (2023). Epigenetic modifications using CRISPR-Cas9. Genetic Research.",
        "Tesla, N. et al. (2023). Scalable manufacturing of CRISPR components. Industrial Biotechnology."
    ]
)

new_crispr_node = NodeJson(
    node_info=node_info,
    scope_section=scope_section,
    contribution_section=new_contribution_section
)

In [13]:
ipfs_hash_new = add_json_ipfs(new_crispr_node.model_dump())
print(ipfs_hash_new)

QmV3Hezh8UQv8X6K3UNPWdHQe3cukBn8j9oC47qHzAja5V


In [14]:
retrieved_new_node_json = get_json_ipfs(ipfs_hash_new)
retrieved_new_crispr_node = NodeJson.model_validate(retrieved_new_node_json)
retrieved_new_crispr_node

NodeJson(node_info=NodeInfo(node_id='1', name='CRISPR-Cas9', description='CRISPR-Cas9 is a revolutionary gene-editing technology that allows for precise modifications to DNA. It has broad applications in medicine, agriculture, and research.', node_type=<NodeType.technology: 'technology'>), scope_section=NodeScopeSection(research_scope=['Gene editing', 'Genetic therapy', 'Agricultural biotechnology'], scope_description='CRISPR-Cas9 research covers various domains including gene editing techniques, therapeutic applications for genetic disorders, and advancements in agricultural biotechnology.', scope_references=['Jinek, M. et al. (2012). A Programmable Dual-RNA–Guided DNA Endonuclease in Adaptive Bacterial Immunity. Science.', 'Doudna, J. A., & Charpentier, E. (2014). The new frontier of genome engineering with CRISPR-Cas9. Science.']), contribution_section=NodeContributionSection(problem_statement='The need for precise, efficient, and affordable gene-editing tools to advance genetic res

### verify contribution

In [15]:
system_prompt = """
You are a world class researcher. 
There are many researchers working under you.
For every research topic, the researchers add their contribution to a common doc.
One of your most important job is to identify, if the contribution is sufficient or not
"""

user_prompt = """
Here is the research_focus the contributors are working towards: {research_focus}
Here is the original contribution_section: {original_contribution_section}
Here is the new contribution_section: {new_contribution_section}

<INSTRUCTIONS>
Please study original contribution section and the new contribution section.
Contribution should be significant
What is not significant contribution
    - correcting only grammatical errors
    - correcting formatting
    - correcting spelling
    - adding unrelated information to the topic or research focus
    - removing information from the original contribution section
What is significant contribution
    - adding new information to the original contribution section
    - addressing the research focus in the new contribution section
Your response should be a yes or no answer. "yes" if the new contribution is significant, "no" if the new contribution is not significant.
If the new contribution is significant, please provide a brief explanation of the new contribution.
</INSTRUCTIONS>

<ANSWER>
# yes or no
</ANSWER>

<REASON>
# reason for yes or no
</REASON>
"""


def verify_contribution(
    research_focus, 
    original_contribution_section, 
    new_contribution_section
):
    client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    messages = [
        {
            "role": "system", 
            "content": system_prompt},
        {"role": "user", 
         "content": user_prompt.format(
             research_focus=research_focus, 
             original_contribution_section=original_contribution_section, 
             new_contribution_section=new_contribution_section)
        }
    ]

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        temperature=0.0,
    )
    return parse_answer(response.choices[0].message.content)

def parse_answer(response):
    answer = response.split("<ANSWER>")[1].split("</ANSWER>")[0].strip()
    reason = response.split("<REASON>")[1].split("</REASON>")[0].strip()
    return answer, reason


research_focus = retrieved_crispr_node.scope_section.model_dump_json()
original_contribution_section = crispr_node.contribution_section.model_dump_json()
new_contribution_section = retrieved_new_crispr_node.contribution_section.model_dump_json()

answer, reason = verify_contribution(research_focus, original_contribution_section, new_contribution_section)

In [16]:
answer

'yes'

In [17]:
reason

'The new contribution is significant because it adds substantial new information to the original contribution section. Specifically, it introduces new objectives such as developing a scalable manufacturing process for CRISPR components and advanced computational models to predict off-target effects. Additionally, it expands the key findings with the development of scalable manufacturing processes and ethical guidelines for human germline editing. The new references also support these additions, indicating that the new contributions are well-researched and relevant to the research focus. These enhancements address the research focus and provide a more comprehensive understanding of CRISPR-Cas9 technology and its applications.'

### using galadriel to validate

In [18]:
galadriel_prompt_template = """
You are a world class researcher. 
There are many researchers working under you.
For every research topic, the researchers add their contribution to a common doc.
One of your most important job is to identify, if the contribution is sufficient or not

Here is the research_focus the contributors are working towards: {research_focus}
Here is the original contribution_section: {original_contribution_section}
Here is the new contribution_section: {new_contribution_section}

<INSTRUCTIONS>
Please study original contribution section and the new contribution section.
Contribution should be significant
What is not significant contribution
    - correcting only grammatical errors
    - correcting formatting
    - correcting spelling
    - adding unrelated information to the topic or research focus
    - removing information from the original contribution section
What is significant contribution
    - adding new information to the original contribution section
    - addressing the research focus in the new contribution section
Your response should be a yes or no answer. "yes" if the new contribution is significant, "no" if the new contribution is not significant.
If the new contribution is significant, please provide a brief explanation of the new contribution.
</INSTRUCTIONS>

<ANSWER>
# yes or no
</ANSWER>

<REASON>
# reason for yes or no
</REASON>
"""

In [19]:
def get_galadriel_verification(research_focus, original_contribution_section, new_contribution_section):
    
    galadriel_prompt = galadriel_prompt_template.format(
        research_focus=research_focus, 
        original_contribution_section=original_contribution_section, 
        new_contribution_section=new_contribution_section
    )

    return galadriel_prompt


p = get_galadriel_verification(research_focus, original_contribution_section, new_contribution_section)
p



'\nYou are a world class researcher. \nThere are many researchers working under you.\nFor every research topic, the researchers add their contribution to a common doc.\nOne of your most important job is to identify, if the contribution is sufficient or not\n\nHere is the research_focus the contributors are working towards: {"research_scope":["Gene editing","Genetic therapy","Agricultural biotechnology"],"scope_description":"CRISPR-Cas9 research covers various domains including gene editing techniques, therapeutic applications for genetic disorders, and advancements in agricultural biotechnology.","scope_references":["Jinek, M. et al. (2012). A Programmable Dual-RNA–Guided DNA Endonuclease in Adaptive Bacterial Immunity. Science.","Doudna, J. A., & Charpentier, E. (2014). The new frontier of genome engineering with CRISPR-Cas9. Science."]}\nHere is the original contribution_section: {"problem_statement":"The need for precise, efficient, and affordable gene-editing tools to advance genet