In [None]:
from dotenv import load_dotenv
from typing import Annotated,TypedDict
from langgraph.graph import StateGraph,START,END
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.tools import tool
import os
from langgraph.graph.message import add_messages
from pydantic import BaseModel,Field

load_dotenv()

True

In [None]:
import datetime
from typing import Optional

def now_iso():
    """Return current datetime in ISO format including time and timezone if available."""
    return datetime.datetime.now(datetime.timezone.utc).isoformat()

class AnalysisSource(BaseModel):
    source_name:  Optional[str] = Field(..., description="Name of the data source, e.g., Google, Bing, Reddit, Google Finance")
    analysis: str = Field(..., description="Analysis result or summary from this source")
    source_link: Optional[str] = Field(..., description="Direct link to the information or post")


class LLMAnalysisResult(BaseModel):
    sources: list[AnalysisSource] = Field(..., description="List of sources with analysis and URLs")
    synthesized_answer: str = Field(..., description="LLM's synthesized answer using all sources")

In [None]:
llm=ChatGoogleGenerativeAI(model='gemini-2.5-flash',api_key=os.getenv('GEMINI_API_KEY'),async_client_running=True,verbose=True)
analysing_llm=llm.with_structured_output(AnalysisSource)

In [None]:
await llm.ainvoke('give me aa para about ethics in ai')

AIMessage(content="Ethics in AI is a critical and rapidly evolving field, grappling with the profound moral implications as intelligent systems become increasingly integrated into every facet of society. The imperative is to ensure these powerful technologies align with human values and principles, preventing potential harms while maximizing societal benefit. Key concerns revolve around algorithmic bias, where datasets can perpetuate or amplify societal inequalities, leading to discriminatory outcomes in areas like hiring, lending, or criminal justice. Equally vital are issues of transparency and explainability, as complex models often operate as 'black boxes,' making it difficult to understand *why* a decision was made, undermining trust and accountability. Furthermore, questions of privacy, data security, and the autonomous nature of some AI systems raise serious moral dilemmas regarding surveillance, control, and the erosion of human agency. Without robust ethical frameworks and gov

In [None]:
analysis=await analysing_llm.ainvoke(content)

In [None]:
analysis.source_link

'No specific link provided, this is a general statement.'

## Define state


In [None]:
class ResearchState(TypedDict):
    messages:Annotated[str,add_messages]
    user_question:str|None
    google_search_results:str|None
    google_finance_results:str|None
    bing_search_results:str|None
    reddit_search_results:str|None
    selected_reddit_urls:list[str]|None
    reddit_posts:str|None
    google_analysis:str
    bing_analysis:str
    reddit_analysis:str
    google_finance_analysis:str
    synthesized_answer:str

# NODES

In [None]:

def google_search(state:ResearchState)->ResearchState:
    return state


def bing_search(state:ResearchState)->ResearchState:
    return state


def reddit_search(state:ResearchState)->ResearchState:
    return state

def twitter_search(state: ResearchState) -> ResearchState:
    return state

def google_finance_search(state:ResearchState)->ResearchState:
    return state

def analysis_google_results(state:ResearchState)->ResearchState:
    return state


def analysis_bing_results(state:ResearchState)->ResearchState:
    return state


def analysis_reddit_results(state:ResearchState)->ResearchState:
    return state


def analyze_results_results(state:ResearchState)->ResearchState:
    return state
    

def synthesize_results(state:ResearchState)->ResearchState:
    return state


def final_results(state:ResearchState)->ResearchState:
    return state

In [4]:
from serpapi import GoogleSearch

params = {
  "engine": "google_finance",
  "q": "GOOGL:NASDAQ",
  "api_key": os.getenv('SERP_API')
}

search = GoogleSearch(params)
results = search.get_dict()

ImportError: cannot import name 'GoogleSearch' from 'serpapi' (/Users/divyyadav/miniforge3/envs/deep_env/lib/python3.11/site-packages/serpapi/__init__.py)

# GRAPH

In [None]:
graph=StateGraph(ResearchState)

In [None]:
graph.add_node("google_search", google_search)
graph.add_node("bing_search", bing_search)
graph.add_node("reddit_search", reddit_search)
graph.add_node("analysis_google_results", analysis_google_results)
graph.add_node("analysis_bing_results", analysis_bing_results)
graph.add_node("analysis_reddit_results", analysis_reddit_results)
graph.add_node("analyze_results_results", analyze_results_results)
graph.add_node("synthesize_results", synthesize_results)
graph.add_node("final_results", final_results)

In [None]:
graph.add_edge(START, "google_search")
graph.add_edge(START, "bing_search")
graph.add_edge(START, "reddit_search")

In [None]:
from typing import Any
import asyncpraw
import asyncio
import os
from aiocache import caches

def chunk_list(data, chunk_size):
    for i in range(0,len(data),chunk_size):
        yield data[i:i+chunk_size]

# Load Reddit API credentials from environment variables (with default fallback)
client_id = os.getenv('REDDIT_CLIENT_ID')
client_secret = os.getenv('REDDIT_CLIENT_SECRET')
user_agent = 'dp by /u/Temporary_Version105'

# Reset aiocache state (helpful in Jupyter notebooks)
caches._caches.clear()
caches._config.clear()

# Configure aiocache to use in-memory cache with JSON serialization
caches.set_config({
    'default': {
        'cache': 'aiocache.backends.memory.SimpleMemoryCache',
        'serializer': {'class': 'aiocache.serializers.PickleSerializer'},
        'ttl': 3600
    }
})

async def fetch_reddit_posts():
    """
    Fetch 'hot' posts and top-level comments from r/finance and store in cache.
    Skips posts if already cached. Stores post IDs for easy querying later.
    """
    cache = caches.get('default')
    print("Connecting to Reddit...")
    
    reddit = asyncpraw.Reddit(
        client_id=client_id,
        client_secret=client_secret,
        user_agent=user_agent
    )

    try:
        # Get the finance subreddit
        subreddit = await reddit.subreddit('finance')
        post_processed = 0
        post_ids = []

        # Iterate over hot posts (limit 10 for demo)
        async for post in subreddit.hot(limit=0):
            post_processed += 1
            post_id = post.id

            # Keys for post and its comments
            post_cache_key = f'posts{post_id}'
            comments_cache_key = f'comments{post_id}'

            # Check if this post and its comments are already cached
            cached_post = await cache.get(post_cache_key)
            cached_comments = await cache.get(comments_cache_key)

            if cached_post and cached_comments:
                print(f"  > Using cached: {cached_post.get('title', 'No Title')[:20]}")
                continue

            # Fetch data if not cached
            else:
                try:
                    # Fully load post and expand all comments (no 'more')
                    await post.load()
                    await post.comments.replace_more(limit=0)
                except Exception as e:
                    print(f"Error loading post {post.id}: {e}")
                    continue

                # Collect up to 50 comments with key details
                comments = []
                for com in post.comments.list()[:20]:
                    if hasattr(com, 'body') and com.body.strip():
                        comments.append(
                            {
                                "id": com.id,
                                "body": com.body,
                                "author": str(getattr(com, "author", "N/A")),
                                "score": getattr(com, "score", 0),
                                "depth": com.depth,   
                                "controversiality": com.controversiality,
                                "gilded": com.gilded,
                                "total_awards_received": com.total_awards_received,
                            }
                        )

                # Store post data with title, score, and comments
                post_data = {
                    "title": getattr(post, "title", "N/A"),
                    "score": getattr(post, "score", 0),
                    "comments": comments
                }

                # Track fetched post IDs
                post_ids.append(post.id)

                for idx, chunk in enumerate(chunk_list(comments, 200)):
                    chunk_key = f"{comments_cache_key}_chunk{idx}"
                    await cache.set(chunk_key, chunk, ttl=36000)

                await cache.set(post_cache_key, post_data, ttl=3600)
                await cache.set('all_post_ids', post_ids, ttl=3600)

                print(f' post ids {post_ids} and their length {len(post_ids)}')
                print(f"  > Fetched and cached post {post_processed}: {post.title[:50]}...")

    except asyncio.TimeoutError:
        print("Operation timed out.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Always close Reddit API connection
        await reddit.close()  

async def get_posts(postid):
    """
    Retrieve a cached Reddit post by its ID.
    Returns a dict with post and comments if found, or None.
    """
    try:
        cache_instance = caches.get('default')
        post_cache_key = f'posts{postid}'
        posts = await cache_instance.get(post_cache_key)
        if posts:
            print(f"  > Using cached data for post: {posts['title']}")
            return posts
        else:
            print('no posts found')
    except Exception as e:
        print(f"Error getting posts for postid {postid}: {e}")
        return None

async def get_all_comments():
    """
    Retrieve all cached comments from all posts currently stored in cache.
    Returns a flat list of comments (good for analysis or language models).
    """
    cache = caches.get('default')
    
    # Find all cached post IDs
    post_ids_key = 'all_post_ids'
    post_ids = await cache.get(post_ids_key) or []
    
    if not post_ids:
        print("⚠️  No cached posts found. Run fetch_reddit_posts() first!")
        return []
    
    all_comments = []
    print(f"📥 Collecting comments from {len(post_ids)} posts...\n")
    
    # Gather all comments for each post (add post_id for provenance)
    for post_id in post_ids:
        comments_cache_key = f'comments{post_id}'
        idx=0
        comments = []
        while True:
            chunk = await cache.get(f"{comments_cache_key}_chunk{idx}")
            if not chunk:
                break
            comments.extend(chunk)
            idx += 1
        
        if comments:
            for comment in comments:
                comment['post_id'] = post_id  # Track origin post
                all_comments.append(comment)
    
    print(f"✅ Total comments collected: {len(all_comments)}")
    return all_comments


In [4]:
await fetch_reddit_posts()
await get_all_comments()

Connecting to Reddit...
⚠️  No cached posts found. Run fetch_reddit_posts() first!


[]

# FUNCTIONS

In [None]:
import os
from typing import Literal
from tavily import TavilyClient
from deepagents import create_deep_agent

api=os.environ["TAVILY_API_KEY"]='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'

tavily_client = TavilyClient(api_key=api)

# Web search tool
def internet_search(
    query: str,
    max_results: int = 5,
    topic: Literal["general", "news", "finance"] = "general",
    include_raw_content: bool = False,
):
    """Run a web search"""
    return tavily_client.search(
        query,
        max_results=max_results,
        include_raw_content=include_raw_content,
        topic=topic,
    )


# System prompt to steer the agent to be an expert researcher
research_instructions = """You are an expert researcher. Your job is to conduct thorough research, and then write a polished report.

You have access to an internet search tool as your primary means of gathering information.

## `internet_search`

Use this to run an internet search for a given query. You can specify the max number of results to return, the topic, and whether raw content should be included.
"""

# Create the deep agent
agent = create_deep_agent(
    tools=[internet_search],
    system_prompt=research_instructions,
    model=llm,
    
)


In [38]:
result = agent.invoke({"messages": [{"role": "user", "content": "What is deep agents by langgraph use web search ?"}]})

In [49]:
for results in result['messages'][-1]:
    print(results[1])

"Deep Agents" with LangGraph refers to a new approach to building more sophisticated and autonomous AI agents. Unlike traditional agents that follow a simple loop of calling tools, Deep Agents are designed to handle complex, long-running tasks by incorporating several key features:

1.  **Planning:** This allows agents to strategize and stay on track for complex objectives.
2.  **File system:** Agents can use a file system to store and retrieve context, enabling them to offload information and manage longer interactions.
3.  **Sub-agents:** These are specialized agents that can be called upon to handle specific tasks, acting as focused specialists within a larger workflow.
4.  **Prompting:** Detailed instructions and careful prompting are used to guide the agents and ensure they understand and execute tasks effectively.

LangGraph is a library that helps orchestrate these long-running, multi-agent workflows. It provides the framework to combine these "Deep Agent" features, allowing dev

In [43]:
result

{'messages': [HumanMessage(content='What is deep agents by langgraph use web search ?', additional_kwargs={}, response_metadata={}, id='20b9adf6-b999-44b1-86bf-3582ac8b5e72'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'internet_search', 'arguments': '{"query": "deep agents langgraph"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': [], 'grounding_metadata': {}, 'model_provider': 'google_genai'}, id='lc_run--a5aa4cac-e8f7-4566-8a81-9bff7ca161bb-0', tool_calls=[{'name': 'internet_search', 'args': {'query': 'deep agents langgraph'}, 'id': '844a87fb-596f-402c-b88f-0f8288dd9f4a', 'type': 'tool_call'}], usage_metadata={'input_tokens': 5122, 'output_tokens': 87, 'total_tokens': 5209, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 69}}),
  ToolMessage(content='{"query": "deep agents langgraph", "follow_up_questions": null,