In [None]:
!pip install langchain-nvidia-ai-endpoints langchain langchain-community schedule

In [None]:
import os
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import WebBaseLoader
import schedule
import time

# Set NVIDIA API key
os.environ["NVIDIA_API_KEY"] = "your-nvidia-api-key"

# Initialize NVIDIA LLM
llm = ChatNVIDIA(
    base_url="http://your-nvidia-endpoint-url:8000/v1",  # Replace with your NVIDIA endpoint
    model="meta/llama3-8b-instruct"
)

# Define prompt template
prompt = ChatPromptTemplate.from_template(
    "Summarize the following content in 2-3 sentences:\n\n{content}"
)

# Create chain
chain = prompt | llm | StrOutputParser()

def summarize_website(url="https://example.com"):
    try:
        # Load website content
        loader = WebBaseLoader(url)
        docs = loader.load()
        content = docs[0].page_content

        # Get summary
        summary = chain.invoke({"content": content})
        print(f"Summary for {url}:\n{summary}\n")

        # Optional: Save to file
        with open("summary.txt", "a") as f:
            f.write(f"{time.ctime()} - {url}:\n{summary}\n\n")

    except Exception as e:
        print(f"Error summarizing {url}: {str(e)}")

# List of websites to summarize
websites = [
    "https://example.com",
    "https://news.example.org",
    # Add more URLs as needed
]

def daily_summary_job():
    print(f"Starting daily summary at {time.ctime()}")
    for url in websites:
        summarize_website(url)

# Schedule daily task at 8:00 AM
schedule.every().day.at("08:00").do(daily_summary_job)

# Keep script running
while True:
    schedule.run_pending()
    time.sleep(60)  # Check every minute

.env file

# --- Google AI ---
GOOGLE_API_KEY="YOUR_GOOGLE_API_KEY"
GOOGLE_MODEL_NAME="gemini-1.5-flash-latest" # Or gemini-pro, etc.

# --- NVIDIA NIM ---
# Get from NGC Console after deploying a model NIM
NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY"
# The Invoke URL from your NIM deployment details (remove /v1/chat/completions)
NVIDIA_BASE_URL="YOUR_NVIDIA_ENDPOINT_BASE_URL"
# The model name your NIM is serving (check NIM documentation/deployment)
NVIDIA_MODEL_NAME="meta/llama3-70b-instruct" # Example, use your deployed model

# --- Ollama ---
# Optional: If Ollama runs on a different host/port
# OLLAMA_BASE_URL="http://localhost:11434"
OLLAMA_MODEL="llama3" # Or mistral, etc. (ensure it's pulled: ollama pull llama3)


In [None]:
!pip install langchain langchain-google-genai langchain-nvidia-ai-endpoints langchain-community python-dotenv beautifulsoup4 html2text

import os
import logging
from typing import Literal, Optional
from dotenv import load_dotenv

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_community.chat_models import ChatOllama
from langchain_core.language_models.chat_models import BaseChatModel

# --- Configuration & Setup ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
load_dotenv()

# --- LLM Provider Selection ---

def get_llm(provider: Literal['google', 'nvidia', 'ollama']) -> Optional[BaseChatModel]:
    """Initializes and returns the specified LangChain Chat Model."""
    try:
        if provider == 'google':
            api_key = os.getenv("GOOGLE_API_KEY")
            model_name = os.getenv("GOOGLE_MODEL_NAME", "gemini-1.5-flash-latest")
            if not api_key:
                logging.error("GOOGLE_API_KEY not found in environment variables.")
                return None
            logging.info(f"Using Google AI model: {model_name}")
            return ChatGoogleGenerativeAI(model=model_name, google_api_key=api_key)

        elif provider == 'nvidia':
            api_key = os.getenv("NVIDIA_API_KEY")
            base_url = os.getenv("NVIDIA_BASE_URL")
            model_name = os.getenv("NVIDIA_MODEL_NAME") # Required for NIM

            if not api_key:
                logging.error("NVIDIA_API_KEY not found.")
                return None
            if not base_url:
                 logging.error("NVIDIA_BASE_URL (NIM endpoint invoke URL base) not found.")
                 return None
            if not model_name:
                logging.error("NVIDIA_MODEL_NAME (specific model served by NIM) not found.")
                return None

            # Ensure base_url doesn't end with common suffixes like /v1/chat/completions
            if base_url.endswith("/v1/chat/completions"):
                 base_url = base_url[:-len("/v1/chat/completions")]
            elif base_url.endswith("/"):
                 base_url = base_url[:-1]

            logging.info(f"Using NVIDIA NIM model: {model_name} at {base_url}")
            # Note: ChatNVIDIA uses base_url slightly differently than raw API calls
            # It expects the base path to the API, often ending in /v1
            # Adjust if necessary based on langchain-nvidia-ai-endpoints documentation or testing
            # For NIM, often just the base invoke URL works if model is specified.
            return ChatNVIDIA(
                base_url=base_url,
                api_key=api_key,
                model=model_name # Specify the model served by the NIM endpoint
            )


        elif provider == 'ollama':
            model_name = os.getenv("OLLAMA_MODEL", "llama3")
            base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
            logging.info(f"Using Ollama model: {model_name} at {base_url}")
            # Check if Ollama server is reachable (optional but good practice)
            try:
                # Simple check, replace with more robust health check if needed
                import requests
                requests.get(f"{base_url}/api/tags", timeout=5).raise_for_status()
            except Exception as e:
                 logging.error(f"Could not connect to Ollama at {base_url}. Is it running? Error: {e}")
                 return None
            return ChatOllama(model=model_name, base_url=base_url)

        else:
            logging.error(f"Invalid provider specified: {provider}")
            return None
    except ImportError as e:
        logging.error(f"Missing library for provider '{provider}': {e}. Please install required packages.")
        return None
    except Exception as e:
        logging.error(f"Failed to initialize LLM for provider '{provider}': {e}")
        return None

# --- Summarization Logic ---

def summarize_website(url: str, provider: Literal['google', 'nvidia', 'ollama']) -> Optional[str]:
    """
    Fetches content from a URL and summarizes it using the specified LLM provider.
    """
    logging.info(f"Attempting to summarize URL: {url} using provider: {provider}")

    llm = get_llm(provider)
    if not llm:
        return None

    try:
        # 1. Load Website Content
        # WebBaseLoader handles fetching and basic parsing
        # You might need to install 'html2text' and 'beautifulsoup4'
        loader = WebBaseLoader(url)
        docs = loader.load() # Returns a list of Document objects (usually one)
        logging.info(f"Successfully loaded content from {url}. Content length: {len(docs[0].page_content)} chars.")

        if not docs or not docs[0].page_content:
             logging.warning(f"No content extracted from {url}.")
             return "Could not extract content from the website."

    except Exception as e:
        logging.error(f"Failed to load URL {url}: {e}")
        return f"Error loading URL: {e}"

    try:
        # 2. Summarization Chain
        # Using map_reduce: Good for longer documents. It summarizes chunks independently (map)
        # then combines those summaries (reduce).
        # Other options: 'stuff' (for short docs), 'refine' (iterative refinement)
        chain = load_summarize_chain(llm, chain_type="map_reduce")

        # Optional: Define custom prompts for map and combine steps
        # map_prompt_template = "Summarize the following text:\n{text}\nSummary:"
        # combine_prompt_template = "Combine the following summaries:\n{text}\nCombined Summary:"
        # map_prompt = ChatPromptTemplate.from_template(map_prompt_template)
        # combine_prompt = ChatPromptTemplate.from_template(combine_prompt_template)
        # chain = load_summarize_chain(
        #     llm,
        #     chain_type="map_reduce",
        #     map_prompt=map_prompt,
        #     combine_prompt=combine_prompt
        # )

        logging.info("Starting summarization process...")
        summary_result = chain.invoke(docs) # Pass the list of documents

        # The result structure might vary slightly depending on the chain type
        summary = summary_result.get('output_text', 'Summarization failed.')

        logging.info(f"Summarization complete for {url}.")
        return summary

    except Exception as e:
        logging.error(f"Summarization failed for {url} using {provider}: {e}")
        # Consider more specific error handling for API errors (rate limits, auth)
        return f"Error during summarization: {e}"

# --- Main Execution ---

if __name__ == "__main__":
    # --- Configuration ---
    # List of websites to summarize daily
    WEBSITES_TO_SUMMARIZE = [
        "https://blog.langchain.dev/langchain-expression-language/",
        "https://developer.nvidia.com/blog/",
        # Add more URLs here
    ]

    # Choose your preferred provider for this run
    # Can be dynamically chosen or configured elsewhere
    # PROVIDER_CHOICE: Literal['google', 'nvidia', 'ollama'] = 'google'
    # PROVIDER_CHOICE: Literal['google', 'nvidia', 'ollama'] = 'nvidia'
    PROVIDER_CHOICE: Literal['google', 'nvidia', 'ollama'] = 'ollama'

    # --- Summarization Loop ---
    all_summaries = {}
    for site_url in WEBSITES_TO_SUMMARIZE:
        summary = summarize_website(site_url, PROVIDER_CHOICE)
        if summary:
            print("-" * 80)
            print(f"Summary for: {site_url} (using {PROVIDER_CHOICE})")
            print("-" * 80)
            print(summary)
            print("\n")
            all_summaries[site_url] = summary
        else:
            print("-" * 80)
            print(f"Failed to get summary for: {site_url} (using {PROVIDER_CHOICE})")
            print("-" * 80)
            all_summaries[site_url] = "Failed"

    # --- Optional: Do something with the summaries ---
    # e.g., save to a file, send an email, etc.
    # with open("daily_summaries.txt", "w") as f:
    #     for url, summary_text in all_summaries.items():
    #         f.write(f"URL: {url}\nProvider: {PROVIDER_CHOICE}\nSummary:\n{summary_text}\n\n{'='*80}\n\n")
    # logging.info("Summaries saved to daily_summaries.txt")


# Run website summarizer every day at 7 AM
# 0 7 * * * /usr/bin/python3 /path/to/your/website_summarizer.py >> /path/to/your/summarizer.log 2>&1