In [13]:
import asyncio
import sys

if sys.platform == "win32":
    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

from typing import TypedDict, List, Annotated
import operator



class ResearchState(TypedDict):
    topic: str
    sources: Annotated[List[str], operator.add]
    draft: str
    revisions: int
    messages: Annotated[List[dict], operator.add]


In [14]:
from langchain_community.tools import DuckDuckGoSearchResults

def research_node(state: ResearchState):
    search = DuckDuckGoSearchResults(max_results=5)
    results = search.invoke(state["topic"])
    
        
    
    return {
        "sources": [results],
        "messages": [{
            "role": "researcher",
            "content": f"Found {len(results)} relevant sources"
        }]
    }


In [15]:
from pydantic import BaseModel
from config import config
import asyncio
from pydantic import BaseModel
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from config import config

class ExtractedData(BaseModel):
    title: str
    url: str
    summary: str

max_tokens = (1024*8)
# Configure the LLM extraction strategy with OpenRouter
llm_strategy = LLMExtractionStrategy(
    provider="openrouter",
    api_token=config.OPENROUTER_API_KEY,
    schema=ExtractedData.model_json_schema(),
    extraction_type="schema",
    instruction="Extract the title, URL, and summary from the content.",
    chunk_token_threshold=1200,
    apply_chunking=True,
    input_format="html",
    extra_args={
        "model": "meta-llama/llama-3.2-3b-instruct",  # Specify the model to use
        "temperature": 0.1,
        "max_tokens": 1000,
    },
)

In [18]:
import requests
import json
import time

def crawl_with_openrouter(url: str, api_token: str = None):
    headers = {"Authorization": f"Bearer {api_token}"} if api_token else {}

    request_data = {
        "urls": url,
        "extraction_strategy": {
            "type": "llm",
            "provider": "openrouter",
            "model": "meta-llama/llama-3.2-3b-instruct",
            "instruction": "Extract the title, URL, and summary from the content.",
            "schema": {
                "title": "str",
                "url": "str",
                "summary": "str"
            }
        },
        "cache_mode": "bypass",
        "extra_args": {
            "temperature": 0.1,
            "max_tokens": 1000
        }
    }

    response = requests.post("http://localhost:11235/crawl", headers=headers, json=request_data)
    print(response.json())
    task_id = response.json().get("task_id")

    while True:
        result = requests.get(f"http://localhost:11235/task/{task_id}", headers=headers)
        status = result.json()

        if status["status"] == "completed":
            return status["result"]["extracted_content"]
        elif status["status"] == "failed":
            print("Crawl failed:", status["error"])
            return None

        time.sleep(2)

# Example usage
url = "https://www.lycamobile.co.uk/en/how-to/how-to-activate-my-new-sim-and-prepay-plan/?utm_source=onboarding&utm_medium=email&utm_campaign=freesimactivation"
api_token = "899839892"  # Replace with your actual token if needed
data = crawl_with_openrouter(url, api_token)
print(json.dumps(data, indent=2))

{'task_id': 'e2f04c8f-4139-4ed0-b795-f81da3d0e509'}
null


In [3]:
from langchain_openai import ChatOpenAI
from config import config
def writer_node(state: ResearchState):
   
    
    llm = ChatOpenAI(
        model="meta-llama/llama-3.2-3b-instruct",
        openai_api_key=config.OPENROUTER_API_KEY,
        openai_api_base=config.OPENROUTER_API_BASE,
        max_retries=3,
        max_tokens=max_tokens
    ) 
    sources = "\n".join(state["sources"][-5:])
    
    response = llm.invoke(f"""
    Write a comprehensive report on: {state['topic']}
    Using these sources:
    {sources}
    """)
    
    return {
        "draft": response.content,
        "messages": [{
            "role": "writer", 
            "content": "Draft generated"
        }]
    }


In [4]:
from langgraph.graph import StateGraph, END

# Initialize graph
workflow = StateGraph(ResearchState)

# Add nodes
workflow.add_node("research", research_node)
workflow.add_node("write", writer_node)

# Set edges
workflow.set_entry_point("research")
workflow.add_edge("research", "write")
workflow.add_edge("write", END)

# Compile
chain = workflow.compile()


In [5]:
# Run with topic
result = chain.invoke({
    "topic": "Recent advances in AI protein folding",
    "sources": [],
    "draft": "",
    "revisions": 0,
    "messages": []
})

print(f"""
# Final Report: {result['topic']}
{result['draft']}


""")



# Final Report: Recent advances in AI protein folding
**Comprehensive Report: Recent Advances in AI Protein Folding**

**Introduction**

The protein folding problem has been a long-standing challenge in biology, with proteins in their native state having unique three-dimensional structures that determine their function and interact with other molecules. Despite significant efforts, solving this problem remained elusive until the advent of artificial intelligence (AI) approaches. Recent advances in AI protein folding have revolutionized the field, enabling the prediction of protein structures with unprecedented accuracy and opening up new avenues for protein design and engineering.

**Background**

The protein folding problem involves predicting the three-dimensional structure of a protein from its amino acid sequence. This problem is difficult due to the vast number of possible conformations, known as conformational space, which contains millions of unique structures. The development 