In [215]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import getpass
from langchain_fireworks import ChatFireworks
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_huggingface import HuggingFaceEmbeddings
from pathlib import Path

In [212]:
os.environ["FIREWORKS_API_KEY"] = getpass.getpass()

In [213]:
MODEL_ID = "accounts/fireworks/models/llama-v3p1-405b-instruct"
fireworks_llm = ChatFireworks(
    model=MODEL_ID,
    temperature = 0.6,
    max_tokens = 16384,
    model_kwargs={
        "top_p": 1,
    },
    cache=None,
)

In [214]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hfEmbedding = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)



In [216]:
folderPath = Path.cwd() / "data"
loader = PyPDFDirectoryLoader(folderPath)

In [217]:
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [162]:
vectorstore = Chroma.from_documents(documents=splits, embedding=hfEmbedding)
retriever = vectorstore.as_retriever()

In [168]:
system_prompt = (
    """
    Innovation is seeing what everybody has seen and thinking what nobody has thought.
    An idea is nothing more nor less than a new combination of old elements. 
    It can be a thought or suggestion aimed at solving a problem or exploring a possibility.
    Ideas challenge, shift paradigms, and drive innovation by synthesizing information, reflection, and imagination. 
    As a research scientist, your role is to generate new ideas and innovations based on a research paper."""
    "\n\n"
    "{context}"
)

In [178]:
human_prompt = (
    """
    You are a research scientist following the Diagram of Thought (DoT) framework to generate ideas from a research paper. Your workflow will transition through three roles: Proposer, Critic, and Summarizer.
    
    **1. <Proposer>:** 
    - **Process**: Analyze the research paper on the topic: {input}
    - **Reflect**: Examine prior knowledge, look for patterns, and break down complex concepts. Critically assess assumptions and concepts to gain insights.
    - **Imagine**: Use the insights to brainstorm new ideas and generate novel concepts or solutions beyond what is already known. Store these in a list called 'Idea list.'
    - **Output**: 'Idea list' = {{proposed_ideas}}
    - If no ideas are generated or {{proposed_ideas}} is empty, respond with: "No ideas proposed."

    **2. <Critic>**:
    - **Evaluate Relevancy**: Ensure the ideas in {{proposed_ideas}} are relevant to the research paper. An idea is not relevant if it seems unrelated to the research the paper. An idea is relevant if it aligns with the research paper. Remove irrelevant ideas from {{proposed_ideas}}, creating {{relevant_ideas}}.
    - **Evaluate Novelty**: Assess the originality/novelty of the ideas in {{relevant_ideas}}. An idea is not novel if it is generic, already exists, or has already been explored by numerous researchers. An idea is novel if it represents a good direction, highly innovative, and has been explored by only few or no researchers. Remove unnoriginal ideas from {{relevant_ideas}}, resulting in {{novel_ideas}}.
    - **Evaluate Feasiblity**: Review {{novel_ideas}} for factual correctness and practicality. An idea is not feasible if it doesn't make any sense, impractical, or not realistic. An idea is feasible if it is practical and realistic even to a minimal degree. Remove any unrealistic or impractical ideas from {{novel_ideas}}, creating {{refined_ideas}}.
    - If {{refined_ideas}} is empty after the critique process, respond with: "No ideas after critique.".
    
    **3. <Summarizer>**:
    - Synthesize the remaining ideas from {{refined_ideas}} and write a concise summary for each idea in bullet points.  Begin the summary with: "Potential top future research ideas from the paper are:"

    "\n\n"
    "{input}"
    """
)

In [179]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", human_prompt),
    ]
)

In [193]:
idea_generation_chain = create_stuff_documents_chain(fireworks_llm, prompt)

In [194]:
idea_retriever_chain = create_retrieval_chain(retriever, idea_generation_chain)

In [195]:
response = idea_retriever_chain.invoke({"input": "CyberNFTs: Conceptualizing a decentralized and reward-driven intrusion detection system with ML"})

In [204]:
print(response['answer'])

**1. <Proposer>**

After analyzing the research paper "CyberNFTs: Conceptualizing a decentralized and reward-driven intrusion detection system with ML", I reflected on the concepts and broke down complex ideas to gain insights. Here are some potential ideas that came to mind:

* Using blockchain-based systems for decentralized threat intelligence sharing
* Developing a machine learning-powered intrusion detection system that leverages cyberNFTs for reward-driven detection
* Exploring the application of cyberNFTs in other areas of cybersecurity, such as incident response and threat hunting
* Investigating the use of decentralized autonomous organizations (DAOs) for managing and maintaining decentralized intrusion detection systems
* Designing a framework for evaluating the effectiveness of decentralized intrusion detection systems in various network environments
* Developing a system for detecting and mitigating advanced persistent threats (APTs) using a combination of machine learning 