# OpenAI Deep Research

Recently, OpenAI has released API access to its deep research models. This notebook provides a quick overview of how to use these models for research purposes, including evaluating startups, value judgments, gathering info from web and making high-level comparisons.

In [None]:
import os
import json
import logging
from typing import Dict, List, Any, TypedDict
from datetime import datetime
import requests
from time import sleep
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# LangChain imports
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain.agents import create_react_agent
from langchain.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# LangGraph imports
from langgraph.graph import StateGraph, MessagesState, START, END
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langgraph.types import Command, interrupt

# Tavily search tool from LangChain Community
from langchain_tavily import TavilySearch

# Native OpenAI imports
from openai import OpenAI

# Load environment variables
load_dotenv()

True

In [3]:
# Configure logging
def setup_logging(level: str = "INFO", log_file: str = None):
    """Setup logging configuration"""
    
    # Create custom formatter
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S'
    )
    
    # Setup root logger
    root_logger = logging.getLogger()
    root_logger.setLevel(getattr(logging, level.upper()))
    
    # Clear existing handlers
    root_logger.handlers.clear()
    
    # Console handler
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(formatter)
    root_logger.addHandler(console_handler)
    
    # File handler (optional)
    if log_file:
        file_handler = logging.FileHandler(log_file, encoding='utf-8')
        file_handler.setFormatter(formatter)
        root_logger.addHandler(file_handler)
    
    # Configure LangChain/LangGraph specific loggers
    langchain_logger = logging.getLogger("langchain")
    langchain_logger.setLevel(getattr(logging, level.upper()))
    
    langgraph_logger = logging.getLogger("langgraph")
    langgraph_logger.setLevel(getattr(logging, level.upper()))
    
    # Create application logger
    app_logger = logging.getLogger("research_workflow")
    app_logger.setLevel(getattr(logging, level.upper()))
    
    return app_logger

# Setup default logging
logger = setup_logging("INFO")
logger.info("\N{WHITE HEAVY CHECK MARK} All imports and logging setup successful!")

2025-06-30 09:29:17 - research_workflow - INFO - ✅ All imports and logging setup successful!


In [None]:
fast_model = "o4-mini-deep-research"
smart_model = "o3-deep-research"

In [None]:
client = OpenAI(timeout=3600)

input_text = """
Research the economic impact of semaglutide on global healthcare systems.
Do:
- Include specific figures, trends, statistics, and measurable outcomes.
- Prioritize reliable, up-to-date sources: peer-reviewed research, health
  organizations (e.g., WHO, CDC), regulatory agencies, or pharmaceutical
  earnings reports.
- Include inline citations and return all source metadata.

Be analytical, avoid generalities, and ensure that each section supports
data-backed reasoning that could inform healthcare policy or financial modeling.
"""

response = client.responses.create(
  model="o4-mini-deep-research",
  input=input_text,
  tools=[
    {"type": "web_search_preview"},
    {"type": "code_interpreter", "container": {"type": "auto"}},
  ],
  background=True
)

while resp.status in {"queued", "in_progress"}:
  print(f"Current status: {resp.status}")
  sleep(2)
  resp = client.responses.retrieve(resp.id)

final_output = resp.output_text

print(f"Final status: {resp.status}\nOutput:\n{resp.output_text}")