### LLM

In [None]:
import os
import json
import re
from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
from langchain_groq import ChatGroq

# llm = ChatGroq(model="qwen/qwen3-32b")
llm = ChatGroq(model="openai/gpt-oss-120b")
# llm.invoke("Hello, world!",reasoning_format="hidden")   

In [None]:
from typing import Annotated,List ,Optional
import operator 
from typing_extensions import Literal,TypedDict
from pydantic import BaseModel, Field
from langchain_core.messages import HumanMessage,SystemMessage 
from IPython.display import display,Image,Markdown
from langgraph.types import Send 
from tavily import TavilyClient
from langchain_community.tools import ArxivQueryRun,WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper,ArxivAPIWrapper

Utils

In [None]:
def get_content_from_json(json_data):
    json_str = re.search(r"```json\n(.*?)\n```", json_data.content, re.DOTALL).group(1)
    data = json.loads(json_str)
    return data

### TOOLS

In [None]:
# methods = "Transfer learning ,finetuning of cnn, support vector machine, random forest classifier,linear dicriminant analysis,prinicpal compnent analysis,independent component analysis,genetic alforithm,binary bat optimisation,binary particle swarm optimisation "

In [None]:
# methods_list = methods.split(',')
# methods_list

In [None]:
def WikiSearchContent(query):
    api_wrapper_wiki = WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=1000)
    wiki = WikipediaQueryRun(api_wrapper=api_wrapper_wiki)
    result = wiki.run(query)
    llm = ChatGroq(model="qwen/qwen3-32b")
    prompt = [
    SystemMessage(
        content=f"""You are good content writer and also a researcher.
        Follow the below instructions while generatin response for the topic: {query}
         **Instructions:**
        - Each method should include:
            - A Title heading (Bold)
            • A detailed summary (~500 words)
            • Relevant equations
            • A separator (e.g., "---") at the end
        \n\n
        if found relevant use the below extra content
        \n\n
        {result}
        
        Note: Dont include any Subheadings!!!! just the content as paragraph is needed."""
    )
]
    res = llm.invoke(prompt,reasoning_format="hidden")
    return res.content

In [None]:
# api_wrapper_wiki = WikipediaAPIWrapper(top_k_results=3,doc_content_chars_max=2500)
# wiki = WikipediaQueryRun(api_wrapper=api_wrapper_wiki)
# wiki.name

In [None]:
def TavilySearchContent(query,top_k):
    client = TavilyClient()
# Search scientific research articles
    results = client.search(
    query = query,
    include_domains=[
        "google.com"
        "nature.com",
        "sciencedirect.com",
        "springer.com",
        "ieee.org",
        "mdpi.com",
        "researchgate.net",
        "pubmed.ncbi.nlm.nih.gov",
        "jamanetwork.com",
        "frontiersin.org",
        "hindawi.com",
    ],
    search_depth="advanced",       # Enables more comprehensive and scholarly search
    max_results=top_k,                # Limit to 15 high-quality results
    time_range="year",             # Focus on publications from the past year
    include_answer=True,           # Return a concise summary/answer if available
    include_images=False,          # Skip irrelevant images
    include_raw_content=True    # Include raw text for further processing or embedding
)

# Print the results
    extra_info = []
    for result in results['results']:
        extra_info.append(result['content']+" with a score of "+str(result['score']))
    extra_info = "\n\n".join(extra_info)
    return extra_info

In [None]:

# # Initialize the Tavily client (make sure your API key is set in environment variables or config)
# client = TavilyClient()
# # Search scientific research articles
# results = client.search(
#     query=" research paper title with author on independent component analysis",
#     include_domains=[
#         "google.com"
#         "nature.com",
#         "sciencedirect.com",
#         "springer.com",
#         "ieee.org",
#         "mdpi.com",
#         "researchgate.net",
#         "pubmed.ncbi.nlm.nih.gov",
#         "jamanetwork.com",
#         "frontiersin.org",
#         "hindawi.com",
#     ],
#     search_depth="advanced",       # Enables more comprehensive and scholarly search
#     max_results=1,                # Limit to 15 high-quality results
#     time_range="year",             # Focus on publications from the past year
#     include_answer=True,           # Return a concise summary/answer if available
#     include_images=False,          # Skip irrelevant images
#     include_raw_content=True  # Include raw text for further processing or embedding
# )

# # Print the results
# extra_info = []
# for result in results['results']:
#     extra_info.append(result['content']+" with a score of "+str(result['score']))
# extra_info = "\n\n".join(extra_info)
# Markdown(extra_info)

In [None]:
# api_wrapper_arxiv = ArxivAPIWrapper(top_k_results=1,doc_content_chars_max=250)
# arxiv = ArxivQueryRun(api_wrapper=api_wrapper_arxiv)
# arxiv.name

In [None]:
# Markdown(arxiv.run("Transfer Learning"))

### State


In [None]:
class Section(BaseModel):
    title:str=Field(description="Title of the section")
    description:str = Field(description="Description about the section based on the title given")
    
class Sections(BaseModel):
    sections:List[Section] = Field(description="A list of sections in the report")
     
auto_planner = llm.with_structured_output(Sections)

class UserInput(TypedDict):
    title:str 
    about_problem:str 
    methods_used:str
    proposed_workflow:str 
    results:str

class AutoState(TypedDict):
    topic: str
    sections: list[Section] # default empty list   # default empty list
    final_report: str 
    
class UserState(TypedDict):
    user_input:UserInput
    abstract:str
    intro:str 
    methodology:str 
    proposed_method:str 
    results:str
    references:str 
    conclusion:str
     
class State(TypedDict):
    user: UserState 
    auto: AutoState  
    is_userInput:Literal[True,False]
    completed_sections:Annotated[list,operator.add]
    
class WorkerState(TypedDict):
    section:Section
    completed_sections:Annotated[list,operator.add]

### Nodes

In [None]:
def route(state:State):
    """
    Routes the graph flow based on the decision taken by the user.
    """
    print("------------ROUTING-------------")
    if state['is_userInput']:
        return "User"
    else:
        return "Auto"

In [None]:
def generate_abstract(state:State):
    """
    Generates the abstract for the report based on the user input.
    """
    print("------------ABSRACT-------------")
    prompt = [
        SystemMessage(
            content=f"""
                        You are a good researcher and can make standard reports according to the IEEE format. 
                        You are tasked to make an abstract for the report following the IEEE format based on the below content.
                        Also use your own knowledge about neatly presenting the abstract
                        Title : {state['user']['user_input']['title']}
                        Problem Statement: {state['user']['user_input']['about_problem']}
                        Proposed Workflow : {state['user']['user_input']['proposed_workflow']}
                        Results : {state['user']['user_input']['results']}
                        ---------
                        """
        )
    ]
    abstract = llm.invoke(prompt,reasoning_format="hidden")
    user_data = state.get("user", {})
    user_data["abstract"] = abstract.content
    # print(abstract.content)
    return {"user":user_data}    

In [None]:
# prompt_abstract = [
#         SystemMessage(
#             content=f"""
#                         You are a good researcher and can make standard reports according to the IEEE format. 
#                         You are tasked to make an abstract for the report following the IEEE format based on the below content.
#                         Also use your own knwowledge about neatly presenting the abstract
#                         \n\n
#                         Title : Fusion of Texture and Deep Feature for Laryngeal Cancer Detection 
#                         \n\n
#                         Problem Statement: Laryngeal cancer is a major global health concern, with increasing incidence 
#                         primarily associated with risk factors such as tobacco use, excessive alcohol
#                         consumption, and viral infections.Using of Laryngeal cancer tissue patch images.
#                          \n\n
#                         Proposed Workflow : This study introduces a high-performance Deep Convolutional Neural Network (CNN)-based system
#                         using ResNet152V2, enhanced with Segmentation-Based Fractal Texture Analysis (SFTA) for 
#                         feature extraction and Linear Discriminant Analysis (LDA) for dimensionality reduction. 
#                         Classification is performed using Kernel Support Vector Machine (SVM), ensuring higher precision in detecting laryngeal cancer.
#                         . To evaluate the proposed framework, we implement five types of K-fold cross-validation(K = 2, 3, 4, 5, and 10).  
#                         \n\n
#                         Results :  achieving a mean training accuracy of 99.92%, mean testing accuracy of 99.92%, and
#                         mean precision, recall, and F1-scores of 99.92% under the K=10 cross-validation protocol.                                                
#                         """   
#         )
#     ]
# abstract = llm.invoke(prompt_abstract,reasoning_format="hidden")
# abstract

In [None]:
def generate_introduction(state: State):
    """
    Generates the Introduction for the report based on the user input.
    """
    print("------------INTRODUCTION------------")
    extra_info = TavilySearchContent(state['user']['user_input']['about_problem'],top_k=15)
    prompt = [
        SystemMessage(
            content=f"""
                        You are a good researcher and can make standard reports according to the IEEE format. 
                        You are tasked to draft an Introduction for the report following the IEEE format based on the below content.
                        
                        Problem Statement:{state['user']['user_input']['about_problem']}
                        ----------------------------------------------------------------
                          \n\n
                        Also try to include the whole below given extra information in framing the introduction like stating about the problem and then 
                        include all of the data from extra information to give a story type large introduction discussing about all
                        of the methodologies used.
                        and finally add the proposed workflow and highlight how the current proposed method would be better and can improve results
                        Extra Information:
                        \n\n
                        {extra_info} 
                        \n\n
                        --------------------------------------------------------------------
                         Proposed Workflow : {state['user']['user_input']['proposed_workflow']}
                         
                         **DONOT MENTION ABOUT THE REFERENCES HERE**
                        """
        )
    ]
    introduction = llm.invoke(prompt, reasoning_format="hidden")
    user_data = state.get("user", {})
    # print(introduction.content)
    user_data["introduction"] = introduction.content
    return {"user":user_data}

In [None]:
# prompt = [
#         SystemMessage(
#             content=f"""
#                         You are a good researcher and can make standard reports according to the IEEE format. 
#                         You are tasked to draft an Introduction for the report following the IEEE format based on the below content.
                        
#                         Problem Statement: Laryngeal cancer is a major global health concern, with increasing incidence 
#                          primarily associated with risk factors such as tobacco use, excessive alcohol
#                          consumption, and viral infections.Using of Laryngeal cancer tissue patch images.
#                           \n\n
#                         Also try to include the whole below given extra information in framing the introduction like stating about the problem and then 
#                         include all of the data from extra information to give a story type large introduction discussing about all
#                         of the methodologies used.
#                         and finally add the proposed workflow and highlight how the current proposed method would be better and can improve results
#                         Extra Information:
#                         \n\n
#                         {extra_info} 
#                         \n\n
                        
#                          Proposed Workflow : This study introduces a high-performance Deep Convolutional Neural Network (CNN)-based system
#                          using ResNet152V2, enhanced with Segmentation-Based Fractal Texture Analysis (SFTA) for 
#                          feature extraction and Linear Discriminant Analysis (LDA) for dimensionality reduction. 
#                          Classification is performed using Kernel Support Vector Machine (SVM), ensuring higher precision in detecting laryngeal cancer.
#                          . To evaluate the proposed framework, we implement five types of K-fold cross-validation(K = 2, 3, 4, 5, and 10).
#                         ---------
#                         Returns : 
#                         (dict)
                        
#                         dict contains keys: 
#                         title and description
#                         title as introduction
#                         description as the content which need to be filled.
#                         """
#         )
#     ]
# introduction = user_planner.invoke(prompt,reasoning_format="hidden")
# Markdown(introduction.description)

In [None]:
def generate_methodology(state:State):
    """
     Generates the Methodology Section of the report explaining and highlighting about the methodologies used in the proposed work.
    """
    print("------------METHODOLOGY-------------")
    methods = state['user']['user_input']['methods_used']
    methods = methods.split(",")
    methodology = ' **Methodology** \n'
    for method in methods:
        methodology+= WikiSearchContent(method)
    # print(methodology)
    user_data = state.get("user", {})
    user_data["methodology"] = methodology
    return {"user":user_data}

In [None]:
# methods = """Transfer learning ,finetuning of cnn,
# support vector machine, random forest classifier,
# linear dicriminant analysis,
# prinicpal compnent analysis,
# independent component analysis,
# genetic alforithm,binary bat optimisation,
# binary particle swarm optimisation """

In [None]:
# methods = methods.split(",")
# methodology = """ """
# for method in methods:
#     methodology+= WikiSearchContent(method)
# Markdown(methodology)

In [None]:
# user_proposed = """
# Deep CNN - based model that uses:

# ResNet152V2 + SFTA for fusion feature extraction,

# Linear Discriminant Analysis (LDA) for dimensionality reduction, and

# Kernel SVM for classification.
# """

In [None]:
# prompt = [
#         SystemMessage(
#             content=f"""
#                 You are a good researcher and can make standard reports according to the IEEE format. 
#                You are tasked to draft an Proposed Method for the report following the IEEE format based on the below content.
#                 You can understand the below given method workflow and explain and Enhance more about the method
#                 \n 
#                 {user_proposed}
#                 Returns : 
#                         (dict)
                        
#                         dict contains keys: 
#                         title and description
#                         title as Proposed Method
#                         description as the content which need to be filled.
#             """
#         )
#     ]
# result = user_planner.invoke(prompt,reasoning_format="hidden")
# Markdown(result.description)
#     # return {"user":{"proposed_method":result.content}}

In [None]:
def generate_proposed_method(state:State):
    """
    Generates the proposed method Section of the report explaining how the workflow is.
    """
    print("------------PORPOSED METHOD-------------")
    user_proposed = state['user']['user_input']['proposed_workflow']
    prompt = [
        SystemMessage(
            content=f"""
                You are a good researcher and can make standard reports according to the IEEE format. 
               You are tasked to draft an Proposed Method for the report following the IEEE format based on the below content.
                You can understand the below given method workflow and explain and Enhance more about the method
                \n 
                {user_proposed}
                ** DONOT ADD REFERENCES HERE **
            """
        )
    ]
    result = llm.invoke(prompt,reasoning_format="hidden")
    user_data = state.get("user", {})
    user_data["proposed_method"] = result.content
    # print(result.content)
    return {"user":user_data}

In [None]:
def generate_results(state:State):
    """
    Generates the result section of the report explaining
    """
    print("------------RESULTS-------------")
    user_results = state['user']['user_input']['results']
    prompt = [
        SystemMessage(
            content=f"""
                You are a good researcher and can make standard reports according to the IEEE format. 
               You are tasked to draft an  Result for the report following the IEEE format based on the below content.
                Frame the results in well mannered format.
                \n 
                {user_results}
            """
        )
    ]
    result = llm.invoke(prompt,reasoning_format="hidden")
    user_data = state.get("user", {})
    user_data["results"] = result.content
    # print(result.content)
    return {"user":user_data}

In [None]:
def generate_conclusion(state:State):
    """
    Generates the conclusion section of the report
    """
    print("------------CONCLUSION-------------")
    prompt = [
        SystemMessage(
            content=f"""
            You are a good researcher and can make standard reports according to the IEEE format. 
               You are tasked to draft the conclusion section for the report following the IEEE format based on the below content.
               Elaborate and Include how in future new methods can be added to this work.
               \n\n
               {state['user']['abstract']}
            """
        )
    ]
    result = llm.invoke(prompt,reasoning_format="hidden")
    user_data = state.get("user", {})
    user_data["conclusion"] = result.content
    # print(result.content)
    return {"user":user_data}

In [None]:
def generate_references(state:State):
    """
    Generates the References section of the report.
    """
    print("------------REFERENCES-------------")
    methods = state['user']['user_input']['methods_used']
    prompt = [
        SystemMessage(
            content=f"""
            You are a good researcher and can make standard reports according to the IEEE format. 
               You are tasked to draft an  References for the report following the IEEE format based on the below content.
               Extract the two refernce per method information as per IEEE format from below methods.
               {methods}
            """
        )
    ]
    result = llm.invoke(prompt,reasoning_format="hidden")
    user_data = state.get("user", {})
    user_data["references"] = result.content
    # print(result.content)
    return {"user":user_data}

In [None]:
# d

In [None]:
# prompt = [
#         SystemMessage(
#             content=f"""
#             You are a good researcher and can make standard reports according to the IEEE format. 
#                You are tasked to draft the conclusion section for the report following the IEEE format based on the below content.
#                Elaborate and Include how in future new methods can be added to this work.
#                \n\n
#                This study presents an advanced laryngeal cancer detection system that integrates deep learning with texture analysis. The proposed framework combines ResNet152V2 for deep feature extraction with Segmentation-Based Fractal Texture Analysis (SFTA) to capture microstructural patterns. Dimensionality reduction using Linear Discriminant Analysis (LDA) enhances feature discrimination, followed by Kernel Support Vector Machine (SVM) classification. Evaluated through 10-fold cross-validation, the system achieves a mean testing accuracy of 99.92% with precision, recall, and F1-scores of 99.92%, demonstrating its robustness for clinical application in early laryngeal cancer diagnosis.
#                 Returns : 
#                         (dict)
                        
#                         dict contains keys: 
#                         title and description
#                         title as Conclusion
#                         description as the content which need to be filled.
#             """
#         )
#     ]
# result = user_planner.invoke(prompt,reasoning_format="hidden")
# Markdown(result.description)

In [None]:
def final_report(state:State):
    """
    Combines the all generated section's content
    """
    combined_sections = "\n\n".join(
    str(value) for key, value in state['user'].items() if key != "user_input"
)   
    user_data = state.get("user", {})
    user_data["final_report"] = combined_sections
    # print(result.content)
    return {"user":user_data}

In [None]:
def orcehstrator(state:State):
    """ Orchestrtor that generates plan for the report"""
    print("In orchestrator")
    report_sections = auto_planner.invoke(
        [
            SystemMessage(content="You are a world class research assistant,and you are great at creating outlines for reports"),
            HumanMessage(content=f"Create a detailed outline for a report on the topic:{state['auto']['topic']}.List at least 5 sections with name and description"),
        ],reasoning_format="hidden"
    )
    print("In orchestrator after llm")
    # print("Report Sections:",report_sections)

    return {"auto":{"sections":report_sections.sections}}


def llm_call(state:State):
    """Worker writes a section of the report"""
    section = llm.invoke(
        [
            SystemMessage(
                        content=f"Write a report section following the provided name and description. Include no preamble for each section.Used markdown formatting"
            ),
            HumanMessage(
                content=f"here is the section name : {state['auto']['section'].title} and description: {state['auto']['section'].description}"
            )
        ],reasoning_format="hidden"
    )
    return {"completed_sections":[section.content]}


def assign_workers(state:State):
    """Assign workers to each section of the report"""
    return [Send("llm_call",{"auto":{"section":s}}) for s in state['auto']['sections']]

def synthesizer(state:State):
    """Synthesize full report from sections"""
    print(state['auto'].keys())
    completed_sections = state["completed_sections"]
    
    completed_report_sections = "\n\n---\n\n".join(completed_sections)
    return {"auto":{"final_report":completed_report_sections}}

### Graph

In [None]:
from langgraph.graph import StateGraph,START,END 

builder = StateGraph(State)
# builder.add_node("router",route)
builder.add_node("abstract",generate_abstract)
builder.add_node("introduction",generate_introduction)
builder.add_node("methodology",generate_methodology)
builder.add_node("proposed",generate_proposed_method)
builder.add_node("results",generate_results)
builder.add_node("references",generate_references)
builder.add_node("conclusion",generate_conclusion)
builder.add_node("final_report",final_report)
builder.add_node("orchestrator",orcehstrator)
builder.add_node("llm_call",llm_call)
builder.add_node("synthesizer",synthesizer)


builder.add_conditional_edges(
    START,
    route,
    {
        "User":"abstract",
        "Auto":"orchestrator"
    },
)
builder.add_edge("abstract","introduction")
builder.add_edge("introduction","methodology")
builder.add_edge("methodology","proposed")
builder.add_edge("proposed","results")
builder.add_edge("results","conclusion")
builder.add_edge("conclusion","references")
builder.add_edge("references","final_report")
builder.add_edge("final_report",END)
builder.add_conditional_edges(
    "orchestrator",
    assign_workers,
    ["llm_call"],
)
builder.add_edge("llm_call","synthesizer")
builder.add_edge("synthesizer",END)
graph = builder.compile()

display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
report = graph.invoke({
    "is_userInput": False,
    "auto": {
        "topic": "An detailed report on the impact of use of Agentic AI applications in software development",
    }
})

In [None]:
# report = graph.invoke(
#     {
#         "is_userInput": True,
#         "user": {
#            "user_input": {
#         "title": "Laryngeal Cancer Detection Using Deep CNN and Feature Fusion",
#         "about_problem": "Recent researches done on Laryngeal Cancer detection",
        
#         "methods_used": "ResNet152V2 CNN, SFTA texture analysis, feature fusion, Linear Discriminant Analysis, Kernel SVM, K-fold cross-validation.",
        
#         "proposed_workflow": "Collect and preprocess laryngeal images. Extract deep features with ResNet152V2 and texture features with SFTA. Fuse features, reduce dimensionality with LDA, and classify using Kernel SVM. Evaluate with K-fold cross-validation.",
        
#         "results": "The model achieved 99.89% training and 99.85% testing accuracy, demonstrating strong generalization and robustness for automated laryngeal cancer detection."
#     }
#         },
#     }
# )

In [None]:
# Markdown(report['auto']['final_report'])
Markdown(report['auto']['final_report'])

In [None]:
# Markdown(report['user']['final_report'])

In [None]:
content = report["auto"]["final_report"]  # or whatever large text you have

html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>Plant Leaf Disease Detection Report</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            line-height: 1.6;
            margin: 40px;
        # }}
        h1, h2, h3 {{
            color: #2E8B57;
        }}
        pre {{
            white-space: pre-wrap;
            word-wrap: break-word;
        }}
    </style>
</head>
<body>
    <h1>Automated Plant Leaf Disease Detection Using Deep Learning</h1>
    <pre>{content}</pre>
</body>
</html>
"""

# Save to HTML file
with open("plant_leaf_report.html", "w", encoding="utf-8") as f:
    f.write(html_content)

print("✅ HTML file saved as plant_leaf_report.html")
