In [None]:
# Test Reddit Incremental Scraper
print("üß™ Testing Reddit Incremental Scraper...")

# Run a small test
import subprocess
import sys

try:
    # Run the incremental scraper with small limits
    result = subprocess.run(
        [
            sys.executable,
            "-m",
            "ingest.reddit_incremental",
            "scrape",
            "--max-threads",
            "1",
            "--max-comments-per-thread",
            "2",
        ],
        capture_output=True,
        text=True,
        cwd=".",
    )

    print("‚úÖ Scraper output:")
    print(result.stdout)

    if result.stderr:
        print("‚ö†Ô∏è Warnings/Errors:")
        print(result.stderr)

except Exception as e:
    print(f"‚ùå Error running scraper: {e}")

print("\n" + "=" * 50)

# Market Pulse Component Testing Notebook

This notebook allows you to test each component of the Market Pulse system separately with real data.

## Components to Test:
1. **Database Connection & Ticker Loading**
2. **GDELT Data Ingestion**
3. **Content Scraping**
4. **Ticker Linking**
5. **Sentiment Analysis**
6. **Context Analysis**|
7. **End-to-End Pipeline**

## Setup
Make sure you have:
- Database running (PostgreSQL)
- Environment variables set (.env file)
- Tickers seeded in database


In [34]:
# Load environment variables from .env file
from dotenv import load_dotenv

load_dotenv(override=True)

print("‚úÖ Environment variables loaded from .env file")

‚úÖ Environment variables loaded from .env file


In [30]:
# Import required libraries
import logging
import os
import sys
from datetime import UTC, datetime
from typing import Any

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from IPython.display import HTML, display

# Add project root to path
project_root = os.path.abspath(".")
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

print("‚úÖ Imports and setup complete")

‚úÖ Imports and setup complete


## 1. Database Connection & Ticker Loading Test


In [31]:
# Test database connection and ticker loading
from sqlalchemy import func, select

from app.db.models import Article, Ticker
from app.db.session import SessionLocal


def test_database_connection():
    """Test database connection and basic queries."""
    try:
        db = SessionLocal()

        # Test basic connection
        result = db.execute(select(func.count(Ticker.symbol)))
        ticker_count = result.scalar()
        print("‚úÖ Database connected successfully")
        print(f"üìä Total tickers in database: {ticker_count}")

        # Load sample tickers
        tickers = db.execute(select(Ticker).limit(10)).scalars().all()
        print("\nüìà Sample tickers:")
        for ticker in tickers:
            print(
                f"  - {ticker.symbol}: {ticker.name} (aliases: {len(ticker.aliases)})"
            )

        # Check articles
        article_count = db.execute(select(func.count(Article.id))).scalar()
        print(f"\nüì∞ Total articles in database: {article_count}")

        db.close()
        return tickers

    except Exception as e:
        print(f"‚ùå Database connection failed: {e}")
        return None


# Run test
sample_tickers = test_database_connection()

‚úÖ Database connected successfully
üìä Total tickers in database: 58

üìà Sample tickers:
  - AAPL: Apple Inc (aliases: 4)
  - MSFT: Microsoft Corporation (aliases: 4)
  - GOOGL: Alphabet Inc (aliases: 6)
  - AMZN: Amazon.com Inc (aliases: 4)
  - TSLA: Tesla Inc (aliases: 4)
  - META: Meta Platforms Inc (aliases: 5)
  - NVDA: NVIDIA Corporation (aliases: 4)
  - BRK.B: Berkshire Hathaway Inc (aliases: 5)
  - JPM: JPMorgan Chase & Co (aliases: 4)
  - V: Visa Inc (aliases: 4)

üì∞ Total articles in database: 5


In [9]:
from sqlalchemy import inspect

from app.db.session import engine  # make sure you have engine

inspector = inspect(engine)
print(inspector.get_table_names())

['article_ticker', 'ticker', 'article']


In [13]:
from sqlalchemy import text

with engine.connect() as conn:
    result = conn.execute(text("SELECT * FROM article_ticker LIMIT 5"))
    for row in result:
        print(row)

In [15]:
from app.db.session import engine

print("DB URL:", engine.url)
print("Driver:", engine.name)  # will say "postgresql"

DB URL: postgresql+psycopg://postgres:***@localhost:5432/market_pulse
Driver: postgresql


## 7. Reddit Data Ingestion Test


In [36]:
# Test Reddit data ingestion with latest posts
import os

from ingest.reddit import get_reddit_credentials
from ingest.reddit_parser import RedditParser


def test_reddit_ingestion(subreddit: str = "wallstreetbets", limit: int = 5):
    """Test Reddit data ingestion from a specific subreddit."""
    print(f"üî¥ Testing Reddit ingestion from r/{subreddit} (limit: {limit} posts)")

    try:
        # Check for Reddit credentials
        try:
            client_id, client_secret, user_agent = get_reddit_credentials()
            print("‚úÖ Reddit credentials found")
            print(f"   Client ID: {client_id[:8]}...")
            print(f"   User Agent: {user_agent}")
        except ValueError as e:
            print(f"‚ùå Reddit credentials not configured: {e}")
            print(
                "   Please set REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET in your .env file"
            )
            return []

        # Initialize Reddit parser
        reddit_parser = RedditParser()
        reddit_parser.initialize_reddit(client_id, client_secret, user_agent)
        print("‚úÖ Reddit parser initialized")

        # Fetch posts from subreddit
        print(f"\nüì° Fetching latest posts from r/{subreddit}...")
        posts = reddit_parser.fetch_subreddit_posts(
            subreddit_name=subreddit, limit=limit, time_filter="day"
        )

        if not posts:
            print(f"‚ö†Ô∏è  No posts found in r/{subreddit}")
            return []

        print(f"‚úÖ Fetched {len(posts)} posts from r/{subreddit}")

        # Parse posts into Article objects
        print("\nüìù Parsing posts into Article objects...")
        articles = reddit_parser.parse_subreddit_posts(
            subreddit_name=subreddit, limit=limit, time_filter="day"
        )

        if not articles:
            print(f"‚ö†Ô∏è  No articles parsed from r/{subreddit}")
            return []

        print(f"‚úÖ Parsed {len(articles)} articles")

        # Display sample posts
        print(f"\nüìã Sample Reddit posts from r/{subreddit}:")
        for i, article in enumerate(articles[:3], 1):
            print(f"\n{i}. {article.title}")
            print(f"   Author: u/{article.author}")
            print(f"   Upvotes: {article.upvotes}")
            print(f"   Comments: {article.num_comments}")
            print(f"   Published: {article.published_at}")
            print(f"   Reddit URL: {article.reddit_url}")
            if article.text and len(article.text) > 100:
                print(f"   Content: {article.text[:100]}...")
            else:
                print(f"   Content: {article.text}")

        # Test ticker linking on Reddit posts
        if sample_tickers:
            print("\nüîó Testing ticker linking on Reddit posts...")
            from jobs.ingest.linker import TickerLinker

            linker = TickerLinker(sample_tickers, max_scraping_workers=2)
            linking_results = []

            for i, article in enumerate(articles[:3], 1):
                print(f"\nüì∞ Testing post {i}: {article.title[:50]}...")

                # Link article to tickers
                ticker_links = linker.link_article(article, use_title_only=True)

                print(f"   Found {len(ticker_links)} ticker matches:")
                for link in ticker_links:
                    print(f"   - {link.ticker}: {link.confidence:.2f} confidence")
                    print(f"     Matched terms: {link.matched_terms}")
                    print(f"     Reasoning: {link.reasoning}")

                linking_results.append((article, ticker_links))

            # Summary
            total_links = sum(len(links) for _, links in linking_results)
            linked_posts = sum(1 for _, links in linking_results if links)

            print("\nüìä Reddit Ticker Linking Summary:")
            print(f"   Posts processed: {len(articles[:3])}")
            print(f"   Posts with ticker links: {linked_posts}")
            print(f"   Total ticker links: {total_links}")

        return articles

    except Exception as e:
        print(f"‚ùå Reddit ingestion test failed: {e}")
        return []


# Run Reddit test
reddit_articles = test_reddit_ingestion(subreddit="wallstreetbets", limit=5)

2025-09-24 12:50:20,359 - ingest.reddit_parser - INFO - Reddit API client initialized


üî¥ Testing Reddit ingestion from r/wallstreetbets (limit: 5 posts)
‚úÖ Reddit credentials found
   Client ID: Q7UvV4ZY...
   User Agent: MarketPulse/1.0 by MarketPulseBot
‚úÖ Reddit parser initialized

üì° Fetching latest posts from r/wallstreetbets...


2025-09-24 12:50:21,100 - ingest.reddit_parser - INFO - Fetched 5 posts from r/wallstreetbets


‚úÖ Fetched 5 posts from r/wallstreetbets

üìù Parsing posts into Article objects...


2025-09-24 12:50:21,359 - ingest.reddit_parser - INFO - Fetched 5 posts from r/wallstreetbets
2025-09-24 12:50:21,368 - ingest.reddit_parser - INFO - Parsed 0 articles from r/wallstreetbets


‚ö†Ô∏è  No articles parsed from r/wallstreetbets


In [41]:
# Initialize Reddit parser
client_id, client_secret, user_agent = get_reddit_credentials()
subreddit = "wallstreetbets"
limit = 10
reddit_parser = RedditParser()
reddit_parser.initialize_reddit(client_id, client_secret, user_agent)
print("‚úÖ Reddit parser initialized")

# Fetch posts from subreddit
print(f"\nüì° Fetching latest posts from r/{subreddit}...")
posts = reddit_parser.fetch_subreddit_posts(
    subreddit_name=subreddit, limit=limit, time_filter="day"
)

2025-09-24 12:51:56,802 - ingest.reddit_parser - INFO - Reddit API client initialized


‚úÖ Reddit parser initialized

üì° Fetching latest posts from r/wallstreetbets...


2025-09-24 12:51:57,604 - ingest.reddit_parser - INFO - Fetched 10 posts from r/wallstreetbets


In [49]:
for post in posts:
    print(post.title)

üòÖ
Daddy Powell just kicked us in the balls ‚ÄúStocks are overvalued‚Äù
I went to the mall and EVERY girls was dressed like a skank - LULU to the MOON
JPow with SPY
$OPENed my butthole
10 Years ago my net worth was $0
Loaded up my grandmas retirement savings onto RIVN stock here
When you buy the dip...
Daily Discussion Thread for September 23, 2025
What Are Your Moves Tomorrow, September 24, 2025


In [None]:
# Test multiple subreddits and compare content
def test_multiple_subreddits(subreddits: list = None, limit_per_subreddit: int = 3):
    """Test Reddit ingestion from multiple subreddits and compare content."""
    if subreddits is None:
        subreddits = ["wallstreetbets", "stocks", "investing"]

    print(f"üî¥ Testing multiple subreddits: {', '.join(f'r/{s}' for s in subreddits)}")
    print(f"   Limit per subreddit: {limit_per_subreddit} posts")

    try:
        # Check credentials
        try:
            client_id, client_secret, user_agent = get_reddit_credentials()
        except ValueError as e:
            print(f"‚ùå Reddit credentials not configured: {e}")
            return {}

        # Initialize parser
        reddit_parser = RedditParser()
        reddit_parser.initialize_reddit(client_id, client_secret, user_agent)

        all_articles = {}

        for subreddit in subreddits:
            print(f"\nüì° Processing r/{subreddit}...")

            try:
                articles = reddit_parser.parse_subreddit_posts(
                    subreddit_name=subreddit,
                    limit=limit_per_subreddit,
                    time_filter="day",
                )

                if articles:
                    all_articles[subreddit] = articles
                    print(f"   ‚úÖ {len(articles)} posts from r/{subreddit}")
                else:
                    print(f"   ‚ö†Ô∏è  No posts from r/{subreddit}")

            except Exception as e:
                print(f"   ‚ùå Error processing r/{subreddit}: {e}")

        # Display comparison
        if all_articles:
            print("\nüìä Subreddit Comparison:")
            print(
                f"{'Subreddit':<15} {'Posts':<8} {'Avg Upvotes':<12} {'Avg Comments':<12} {'Ticker Mentions'}"
            )
            print("-" * 70)

            for subreddit, articles in all_articles.items():
                avg_upvotes = sum(a.upvotes or 0 for a in articles) / len(articles)
                avg_comments = sum(a.num_comments or 0 for a in articles) / len(
                    articles
                )

                # Count ticker mentions in titles
                ticker_mentions = 0
                for article in articles:
                    title_lower = article.title.lower()
                    # Look for common ticker patterns
                    if any(
                        pattern in title_lower
                        for pattern in ["$", "stock", "ticker", "earnings", "dividend"]
                    ):
                        ticker_mentions += 1

                print(
                    f"r/{subreddit:<12} {len(articles):<8} {avg_upvotes:<12.1f} {avg_comments:<12.1f} {ticker_mentions}"
                )

            # Show sample posts from each subreddit
            print("\nüìã Sample Posts by Subreddit:")
            for subreddit, articles in all_articles.items():
                print(f"\nüî¥ r/{subreddit}:")
                for i, article in enumerate(articles[:2], 1):
                    print(f"   {i}. {article.title[:60]}...")
                    print(
                        f"      ‚Üë{article.upvotes} üí¨{article.num_comments} by u/{article.author}"
                    )

        return all_articles

    except Exception as e:
        print(f"‚ùå Multiple subreddit test failed: {e}")
        return {}


# Run multiple subreddit test
multi_subreddit_results = test_multiple_subreddits(limit_per_subreddit=3)

In [None]:
# Test complete Reddit ingestion pipeline
def test_reddit_pipeline(subreddit: str = "wallstreetbets", limit: int = 5):
    """Test the complete Reddit ingestion pipeline from fetching to database storage."""
    print(f"üöÄ Testing complete Reddit pipeline for r/{subreddit}")

    try:
        # Check credentials
        try:
            client_id, client_secret, user_agent = get_reddit_credentials()
        except ValueError as e:
            print(f"‚ùå Reddit credentials not configured: {e}")
            return None

        # Step 1: Fetch Reddit posts
        print(f"\nüì° Step 1: Fetching Reddit posts from r/{subreddit}...")
        reddit_parser = RedditParser()
        reddit_parser.initialize_reddit(client_id, client_secret, user_agent)

        articles = reddit_parser.parse_subreddit_posts(
            subreddit_name=subreddit, limit=limit, time_filter="day"
        )

        if not articles:
            print(f"‚ùå No articles fetched from r/{subreddit}")
            return None

        print(f"‚úÖ Fetched {len(articles)} articles from r/{subreddit}")

        # Step 2: Load tickers
        print("\nüìà Step 2: Loading tickers...")
        db = SessionLocal()
        tickers = db.execute(select(Ticker)).scalars().all()
        db.close()

        if not tickers:
            print("‚ùå No tickers available")
            return None

        # Step 3: Initialize linker
        print("\nüîó Step 3: Initializing ticker linker...")
        linker = TickerLinker(tickers, max_scraping_workers=2)

        # Step 4: Link articles to tickers
        print("\nüîó Step 4: Linking Reddit posts to tickers...")
        linked_results = linker.link_articles_to_db(articles)

        # Step 5: Analyze sentiment
        print("\nüòä Step 5: Analyzing sentiment...")
        sentiment_service = get_sentiment_service()

        pipeline_results = []

        for article, article_tickers in linked_results:
            # Analyze sentiment
            text_for_sentiment = article.text or article.title
            if text_for_sentiment:
                try:
                    sentiment_score, sentiment_label = (
                        sentiment_service.analyze_with_label(text_for_sentiment)
                    )
                except:
                    sentiment_score, sentiment_label = 0.0, "Neutral"
            else:
                sentiment_score, sentiment_label = 0.0, "Neutral"

            pipeline_results.append(
                {
                    "article": article,
                    "article_tickers": article_tickers,
                    "sentiment_score": sentiment_score,
                    "sentiment_label": sentiment_label,
                    "num_tickers": len(article_tickers),
                    "subreddit": article.subreddit,
                    "upvotes": article.upvotes,
                    "num_comments": article.num_comments,
                }
            )

        # Summary
        print("\nüìä Reddit Pipeline Results Summary:")
        print(f"   Articles processed: {len(articles)}")
        print(
            f"   Articles with ticker links: {sum(1 for r in pipeline_results if r['num_tickers'] > 0)}"
        )
        print(
            f"   Total ticker relationships: {sum(r['num_tickers'] for r in pipeline_results)}"
        )

        if pipeline_results:
            sentiment_labels = [r["sentiment_label"] for r in pipeline_results]
            print("   Sentiment distribution:")
            print(f"     Positive: {sentiment_labels.count('Positive')}")
            print(f"     Neutral: {sentiment_labels.count('Neutral')}")
            print(f"     Negative: {sentiment_labels.count('Negative')}")

            # Show top posts by engagement
            print("\nüî• Top Posts by Engagement:")
            sorted_results = sorted(
                pipeline_results, key=lambda x: x["upvotes"] or 0, reverse=True
            )
            for i, result in enumerate(sorted_results[:3], 1):
                article = result["article"]
                print(f"   {i}. {article.title[:60]}...")
                print(
                    f"      ‚Üë{result['upvotes']} üí¨{result['num_comments']} | {result['sentiment_label']} | {result['num_tickers']} tickers"
                )

        return pipeline_results

    except Exception as e:
        print(f"‚ùå Reddit pipeline test failed: {e}")
        return None


# Run Reddit pipeline test
reddit_pipeline_results = test_reddit_pipeline(subreddit="wallstreetbets", limit=5)

## 2. GDELT Data Ingestion Test


In [26]:
# Create Reddit-specific visualizations
def create_reddit_visualizations():
    """Create visualizations specifically for Reddit data."""
    print("üìä Creating Reddit-specific visualizations")

    try:
        if not reddit_pipeline_results:
            print("‚ö†Ô∏è  No Reddit pipeline results available for visualization")
            return None

        # Set up plotting style
        plt.style.use("default")
        sns.set_palette("husl")

        # Create figure with subplots
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        fig.suptitle("Reddit Data Analysis Results", fontsize=16, fontweight="bold")

        # 1. Sentiment Distribution for Reddit
        ax1 = axes[0, 0]
        sentiment_labels = [r["sentiment_label"] for r in reddit_pipeline_results]
        sentiment_counts = pd.Series(sentiment_labels).value_counts()

        colors = [
            "green" if label == "Positive" else "gray" if label == "Neutral" else "red"
            for label in sentiment_counts.index
        ]

        sentiment_counts.plot(kind="bar", ax=ax1, color=colors)
        ax1.set_title("Reddit Sentiment Distribution")
        ax1.set_xlabel("Sentiment")
        ax1.set_ylabel("Count")
        ax1.tick_params(axis="x", rotation=45)

        # 2. Upvotes vs Comments Scatter
        ax2 = axes[0, 1]
        upvotes = [r["upvotes"] or 0 for r in reddit_pipeline_results]
        comments = [r["num_comments"] or 0 for r in reddit_pipeline_results]

        ax2.scatter(upvotes, comments, alpha=0.7, color="red")
        ax2.set_title("Reddit Engagement: Upvotes vs Comments")
        ax2.set_xlabel("Upvotes")
        ax2.set_ylabel("Comments")

        # Add trend line
        if len(upvotes) > 1:
            z = np.polyfit(upvotes, comments, 1)
            p = np.poly1d(z)
            ax2.plot(upvotes, p(upvotes), "r--", alpha=0.8)

        # 3. Ticker Links per Post
        ax3 = axes[1, 0]
        ticker_counts = [r["num_tickers"] for r in reddit_pipeline_results]

        if ticker_counts:
            ax3.hist(
                ticker_counts,
                bins=max(1, max(ticker_counts)),
                alpha=0.7,
                color="red",
                edgecolor="black",
            )
            ax3.set_title("Ticker Links per Reddit Post")
            ax3.set_xlabel("Number of Ticker Links")
            ax3.set_ylabel("Number of Posts")
        else:
            ax3.text(
                0.5,
                0.5,
                "No ticker links found",
                ha="center",
                va="center",
                transform=ax3.transAxes,
            )
            ax3.set_title("Ticker Links per Reddit Post")

        # 4. Top Posts by Engagement
        ax4 = axes[1, 1]
        if reddit_pipeline_results:
            # Sort by upvotes and take top 5
            top_posts = sorted(
                reddit_pipeline_results, key=lambda x: x["upvotes"] or 0, reverse=True
            )[:5]

            post_titles = [
                (
                    r["article"].title[:30] + "..."
                    if len(r["article"].title) > 30
                    else r["article"].title
                )
                for r in top_posts
            ]
            upvote_counts = [r["upvotes"] or 0 for r in top_posts]

            y_pos = range(len(post_titles))
            ax4.barh(y_pos, upvote_counts, alpha=0.7, color="red")
            ax4.set_yticks(y_pos)
            ax4.set_yticklabels(post_titles, fontsize=8)
            ax4.set_title("Top 5 Posts by Upvotes")
            ax4.set_xlabel("Upvotes")
        else:
            ax4.text(
                0.5,
                0.5,
                "No Reddit data available",
                ha="center",
                va="center",
                transform=ax4.transAxes,
            )
            ax4.set_title("Top Posts by Engagement")

        plt.tight_layout()
        plt.show()

        # Create Reddit summary table
        print("\nüìã Reddit Analysis Summary:")
        reddit_summary_data = []

        # Engagement metrics
        total_upvotes = sum(r["upvotes"] or 0 for r in reddit_pipeline_results)
        total_comments = sum(r["num_comments"] or 0 for r in reddit_pipeline_results)
        avg_upvotes = (
            total_upvotes / len(reddit_pipeline_results)
            if reddit_pipeline_results
            else 0
        )
        avg_comments = (
            total_comments / len(reddit_pipeline_results)
            if reddit_pipeline_results
            else 0
        )

        reddit_summary_data.append(
            {"Metric": "Total Posts", "Value": len(reddit_pipeline_results)}
        )
        reddit_summary_data.append(
            {"Metric": "Total Upvotes", "Value": f"{total_upvotes:,}"}
        )
        reddit_summary_data.append(
            {"Metric": "Total Comments", "Value": f"{total_comments:,}"}
        )
        reddit_summary_data.append(
            {"Metric": "Avg Upvotes/Post", "Value": f"{avg_upvotes:.1f}"}
        )
        reddit_summary_data.append(
            {"Metric": "Avg Comments/Post", "Value": f"{avg_comments:.1f}"}
        )

        # Sentiment metrics
        sentiment_labels = [r["sentiment_label"] for r in reddit_pipeline_results]
        reddit_summary_data.append(
            {
                "Metric": "Positive Sentiment",
                "Value": f"{sentiment_labels.count('Positive')} ({sentiment_labels.count('Positive')/len(sentiment_labels)*100:.1f}%)",
            }
        )
        reddit_summary_data.append(
            {
                "Metric": "Negative Sentiment",
                "Value": f"{sentiment_labels.count('Negative')} ({sentiment_labels.count('Negative')/len(sentiment_labels)*100:.1f}%)",
            }
        )

        # Ticker linking metrics
        posts_with_tickers = sum(
            1 for r in reddit_pipeline_results if r["num_tickers"] > 0
        )
        total_ticker_links = sum(r["num_tickers"] for r in reddit_pipeline_results)

        reddit_summary_data.append(
            {
                "Metric": "Posts with Ticker Links",
                "Value": f"{posts_with_tickers} ({posts_with_tickers/len(reddit_pipeline_results)*100:.1f}%)",
            }
        )
        reddit_summary_data.append(
            {"Metric": "Total Ticker Links", "Value": total_ticker_links}
        )

        # Display summary table
        reddit_summary_df = pd.DataFrame(reddit_summary_data)
        display(HTML(reddit_summary_df.to_html(index=False, escape=False)))

        return reddit_summary_df

    except Exception as e:
        print(f"‚ùå Reddit visualization creation failed: {e}")
        return None


# Create Reddit visualizations
reddit_summary_df = create_reddit_visualizations()

üìä Creating Reddit-specific visualizations
‚ùå Reddit visualization creation failed: name 'reddit_pipeline_results' is not defined


In [None]:
# Test GDELT data ingestion with small sample
from ingest.gdelt import fetch_gdelt_file, get_gdelt_export_urls
from ingest.parser import parse_gdelt_export_csv


def test_gdelt_ingestion(hours_back: int = 1, max_files: int = 2):
    """Test GDELT data ingestion with limited data."""
    print(
        f"üîç Testing GDELT ingestion for last {hours_back} hours (max {max_files} files)"
    )

    try:
        # Get GDELT URLs
        urls = get_gdelt_export_urls(hours_back)[:max_files]
        print(f"üì° Found {len(urls)} GDELT files to process")

        articles = []

        for i, url in enumerate(urls, 1):
            print(f"\nüì• Processing file {i}/{len(urls)}: {url.split('/')[-1]}")

            # Fetch file content
            content = fetch_gdelt_file(url)
            if not content:
                print(f"‚ö†Ô∏è  Failed to fetch {url}")
                continue

            # Parse articles
            file_articles = parse_gdelt_export_csv(content)
            print(f"üì∞ Parsed {len(file_articles)} articles from this file")

            articles.extend(file_articles)

        print(f"\n‚úÖ Total articles parsed: {len(articles)}")

        # Display sample articles
        if articles:
            print("\nüìã Sample articles:")
            for i, article in enumerate(articles[:3], 1):
                print(f"\n{i}. {article.title}")
                print(f"   URL: {article.url}")
                print(f"   Published: {article.published_at}")
                print(f"   Source: {article.source}")

        return articles

    except Exception as e:
        print(f"‚ùå GDELT ingestion test failed: {e}")
        return []


# Run test with small sample
sample_articles = test_gdelt_ingestion(hours_back=1, max_files=2)

## 3. Content Scraping Test


In [None]:
# Test content scraping component
from app.services.content_scraper import get_content_scraper


def test_content_scraping(articles: list[Any], max_articles: int = 3):
    """Test content scraping on sample articles."""
    print(f"üï∑Ô∏è  Testing content scraping on {min(len(articles), max_articles)} articles")

    if not articles:
        print("‚ö†Ô∏è  No articles available for scraping test")
        return {}

    try:
        scraper = get_content_scraper()
        scraper.max_workers = 2  # Limit workers for testing

        # Select articles with valid URLs
        test_articles = [
            a for a in articles[:max_articles] if a.url and a.url.startswith("http")
        ]

        if not test_articles:
            print("‚ö†Ô∏è  No valid URLs found for scraping")
            return {}

        print("\nüîó Testing URLs:")
        for article in test_articles:
            print(f"  - {article.url}")

        # Test single URL scraping
        print("\nüìÑ Testing single URL scraping:")
        first_url = test_articles[0].url
        content = scraper.scrape_article_content(first_url)

        if content:
            print(f"‚úÖ Successfully scraped {len(content)} characters")
            print(f"üìù Content preview: {content[:200]}...")
        else:
            print(f"‚ùå Failed to scrape content from {first_url}")

        # Test multithreaded scraping
        print("\nüöÄ Testing multithreaded scraping:")
        urls = [article.url for article in test_articles]
        scraped_results = scraper.scrape_articles_multithreaded(urls)

        successful_scrapes = sum(
            1 for content in scraped_results.values() if content is not None
        )
        print(f"‚úÖ Successfully scraped {successful_scrapes}/{len(urls)} URLs")

        # Display results
        for url, content in scraped_results.items():
            if content:
                print(f"\nüìÑ {url}:")
                print(f"   Length: {len(content)} characters")
                print(f"   Preview: {content[:150]}...")
            else:
                print(f"\n‚ùå {url}: Failed to scrape")

        return scraped_results

    except Exception as e:
        print(f"‚ùå Content scraping test failed: {e}")
        return {}


# Run test
scraped_content = test_content_scraping(sample_articles, max_articles=3)

## 4. Ticker Linking Test


In [None]:
# Test ticker linking component
from jobs.ingest.linker import TickerLinker


def test_ticker_linking(articles: list[Any], tickers: list[Any], max_articles: int = 3):
    """Test ticker linking on sample articles."""
    print(f"üîó Testing ticker linking on {min(len(articles), max_articles)} articles")

    if not articles or not tickers:
        print("‚ö†Ô∏è  No articles or tickers available for linking test")
        return []

    try:
        # Initialize linker
        linker = TickerLinker(tickers, max_scraping_workers=2)
        print(f"‚úÖ TickerLinker initialized with {len(tickers)} tickers")

        # Test on sample articles
        test_articles = articles[:max_articles]
        linking_results = []

        for i, article in enumerate(test_articles, 1):
            print(f"\nüì∞ Testing article {i}: {article.title[:50]}...")

            # Link article to tickers
            ticker_links = linker.link_article(article)

            print(f"   Found {len(ticker_links)} ticker matches")

            for link in ticker_links:
                print(f"   - {link.ticker}: {link.confidence:.2f} confidence")
                print(f"     Matched terms: {link.matched_terms}")
                print(f"     Reasoning: {link.reasoning}")

            linking_results.append((article, ticker_links))

        # Summary
        total_links = sum(len(links) for _, links in linking_results)
        linked_articles = sum(1 for _, links in linking_results if links)

        print("\nüìä Linking Summary:")
        print(f"   Articles processed: {len(test_articles)}")
        print(f"   Articles with links: {linked_articles}")
        print(f"   Total ticker links: {total_links}")

        return linking_results

    except Exception as e:
        print(f"‚ùå Ticker linking test failed: {e}")
        return []


# Run test
linking_results = test_ticker_linking(sample_articles, sample_tickers, max_articles=3)

## 5. Sentiment Analysis Test


In [None]:
# Test sentiment analysis component
from app.services.sentiment import get_sentiment_service


def test_sentiment_analysis(articles: list[Any], scraped_content: dict[str, str]):
    """Test sentiment analysis on sample articles."""
    print(f"üòä Testing sentiment analysis on {len(articles)} articles")

    if not articles:
        print("‚ö†Ô∏è  No articles available for sentiment test")
        return []

    try:
        sentiment_service = get_sentiment_service()
        print("‚úÖ SentimentService initialized")

        sentiment_results = []

        for i, article in enumerate(articles, 1):
            print(f"\nüì∞ Analyzing sentiment for article {i}: {article.title[:50]}...")

            # Get text for analysis (prefer scraped content, fallback to title)
            text_to_analyze = None
            if article.url in scraped_content and scraped_content[article.url]:
                text_to_analyze = scraped_content[article.url]
                print(f"   Using scraped content ({len(text_to_analyze)} chars)")
            elif article.title:
                text_to_analyze = article.title
                print(f"   Using title only ({len(text_to_analyze)} chars)")

            if not text_to_analyze:
                print("   ‚ö†Ô∏è  No text available for analysis")
                continue

            # Analyze sentiment
            try:
                score, label = sentiment_service.analyze_with_label(text_to_analyze)
                print(f"   Sentiment: {label} (score: {score:.3f})")

                sentiment_results.append(
                    {
                        "article": article,
                        "text_length": len(text_to_analyze),
                        "sentiment_score": score,
                        "sentiment_label": label,
                        "text_preview": (
                            text_to_analyze[:100] + "..."
                            if len(text_to_analyze) > 100
                            else text_to_analyze
                        ),
                    }
                )

            except Exception as e:
                print(f"   ‚ùå Sentiment analysis failed: {e}")

        # Summary
        if sentiment_results:
            scores = [r["sentiment_score"] for r in sentiment_results]
            labels = [r["sentiment_label"] for r in sentiment_results]

            print("\nüìä Sentiment Analysis Summary:")
            print(f"   Articles analyzed: {len(sentiment_results)}")
            print(f"   Average score: {sum(scores)/len(scores):.3f}")
            print(f"   Positive: {labels.count('Positive')}")
            print(f"   Neutral: {labels.count('Neutral')}")
            print(f"   Negative: {labels.count('Negative')}")

        return sentiment_results

    except Exception as e:
        print(f"‚ùå Sentiment analysis test failed: {e}")
        return []


# Run test
sentiment_results = test_sentiment_analysis(sample_articles, scraped_content)

## 6. Context Analyzer Test


In [None]:
# Test context analyzer component
from app.services.context_analyzer import get_context_analyzer


def test_context_analyzer():
    """Test context analyzer with sample ticker mentions."""
    print("üß† Testing context analyzer with sample scenarios")

    try:
        analyzer = get_context_analyzer()
        print("‚úÖ ContextAnalyzer initialized")

        # Test scenarios
        test_scenarios = [
            {
                "ticker": "AAPL",
                "text": "Apple Inc reported strong quarterly earnings with revenue growth of 15%. The company's stock price rose significantly.",
                "matched_terms": ["AAPL", "Apple"],
                "description": "Positive financial context for Apple",
            },
            {
                "ticker": "V",
                "text": "I need to apply for a visa to travel to Europe next month. The visa application process is quite complex.",
                "matched_terms": ["V"],
                "description": "Negative context - visa application, not Visa Inc",
            },
            {
                "ticker": "TSLA",
                "text": "Tesla stock surged after the company announced new electric vehicle models. Investors are bullish on TSLA.",
                "matched_terms": ["TSLA", "Tesla"],
                "description": "Positive financial context for Tesla",
            },
            {
                "ticker": "CAT",
                "text": "My cat is very playful and loves to chase toys around the house.",
                "matched_terms": ["CAT"],
                "description": "Negative context - pet cat, not Caterpillar",
            },
            {
                "ticker": "MA",
                "text": "Mastercard Inc announced a new partnership with fintech companies. MA stock is performing well.",
                "matched_terms": ["MA", "Mastercard"],
                "description": "Positive financial context for Mastercard",
            },
        ]

        results = []

        for i, scenario in enumerate(test_scenarios, 1):
            print(f"\nüß™ Test {i}: {scenario['description']}")
            print(f"   Ticker: {scenario['ticker']}")
            print(f"   Text: {scenario['text'][:80]}...")

            # Analyze context
            confidence, reasoning = analyzer.analyze_ticker_relevance(
                scenario["ticker"], scenario["text"], scenario["matched_terms"]
            )

            print(f"   Confidence: {confidence:.3f}")
            print(f"   Reasoning: {reasoning}")

            # Determine if result is correct
            expected_positive = "Positive" in scenario["description"]
            is_positive = confidence >= 0.5
            is_correct = expected_positive == is_positive

            print(f"   Expected: {'Positive' if expected_positive else 'Negative'}")
            print(f"   Result: {'‚úÖ Correct' if is_correct else '‚ùå Incorrect'}")

            results.append(
                {
                    "scenario": scenario,
                    "confidence": confidence,
                    "reasoning": reasoning,
                    "is_correct": is_correct,
                }
            )

        # Summary
        correct_count = sum(1 for r in results if r["is_correct"])
        total_count = len(results)

        print("\nüìä Context Analysis Summary:")
        print(f"   Tests run: {total_count}")
        print(f"   Correct predictions: {correct_count}")
        print(f"   Accuracy: {correct_count/total_count:.1%}")

        return results

    except Exception as e:
        print(f"‚ùå Context analyzer test failed: {e}")
        return []


# Run test
context_results = test_context_analyzer()

## 7. End-to-End Pipeline Test


In [None]:
# Test complete end-to-end pipeline
def test_end_to_end_pipeline():
    """Test the complete pipeline from GDELT ingestion to database storage."""
    print("üöÄ Testing complete end-to-end pipeline")

    try:
        # Step 1: Get fresh GDELT data
        print("\nüì° Step 1: Fetching fresh GDELT data...")
        fresh_articles = test_gdelt_ingestion(hours_back=1, max_files=1)

        if not fresh_articles:
            print("‚ùå No fresh articles available for pipeline test")
            return None

        # Step 2: Load tickers
        print("\nüìà Step 2: Loading tickers...")
        db = SessionLocal()
        tickers = db.execute(select(Ticker)).scalars().all()
        db.close()

        if not tickers:
            print("‚ùå No tickers available for pipeline test")
            return None

        # Step 3: Initialize linker
        print("\nüîó Step 3: Initializing ticker linker...")
        linker = TickerLinker(tickers, max_scraping_workers=2)

        # Step 4: Link articles with multithreaded scraping
        print("\nüï∑Ô∏è  Step 4: Linking articles with content scraping...")
        linked_results = linker.link_articles_with_multithreaded_scraping(
            fresh_articles
        )

        # Step 5: Analyze sentiment for linked articles
        print("\nüòä Step 5: Analyzing sentiment...")
        sentiment_service = get_sentiment_service()

        pipeline_results = []

        for article, ticker_links in linked_results:
            if not ticker_links:  # Skip articles with no ticker links
                continue

            # Analyze sentiment
            text_for_sentiment = article.text or article.title
            if text_for_sentiment:
                try:
                    sentiment_score, sentiment_label = (
                        sentiment_service.analyze_with_label(text_for_sentiment)
                    )
                except:
                    sentiment_score, sentiment_label = 0.0, "Neutral"
            else:
                sentiment_score, sentiment_label = 0.0, "Neutral"

            pipeline_results.append(
                {
                    "article": article,
                    "ticker_links": ticker_links,
                    "sentiment_score": sentiment_score,
                    "sentiment_label": sentiment_label,
                    "num_tickers": len(ticker_links),
                }
            )

        # Summary
        print("\nüìä Pipeline Results Summary:")
        print(f"   Articles processed: {len(fresh_articles)}")
        print(f"   Articles with ticker links: {len(pipeline_results)}")
        print(
            f"   Total ticker relationships: {sum(r['num_tickers'] for r in pipeline_results)}"
        )

        if pipeline_results:
            sentiment_labels = [r["sentiment_label"] for r in pipeline_results]
            print("   Sentiment distribution:")
            print(f"     Positive: {sentiment_labels.count('Positive')}")
            print(f"     Neutral: {sentiment_labels.count('Neutral')}")
            print(f"     Negative: {sentiment_labels.count('Negative')}")

        return pipeline_results

    except Exception as e:
        print(f"‚ùå End-to-end pipeline test failed: {e}")
        return None


# Run end-to-end test
pipeline_results = test_end_to_end_pipeline()

## 8. Data Visualization & Analysis


In [None]:
# Create visualizations and analysis
def create_visualizations():
    """Create visualizations for the test results."""
    print("üìä Creating visualizations and analysis")

    try:
        # Set up plotting style
        plt.style.use("default")
        sns.set_palette("husl")

        # Create figure with subplots
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        fig.suptitle(
            "Market Pulse Component Testing Results", fontsize=16, fontweight="bold"
        )

        # 1. Sentiment Distribution
        if sentiment_results:
            ax1 = axes[0, 0]
            sentiment_labels = [r["sentiment_label"] for r in sentiment_results]
            sentiment_counts = pd.Series(sentiment_labels).value_counts()

            colors = [
                (
                    "green"
                    if label == "Positive"
                    else "gray" if label == "Neutral" else "red"
                )
                for label in sentiment_counts.index
            ]

            sentiment_counts.plot(kind="bar", ax=ax1, color=colors)
            ax1.set_title("Sentiment Distribution")
            ax1.set_xlabel("Sentiment")
            ax1.set_ylabel("Count")
            ax1.tick_params(axis="x", rotation=45)
        else:
            axes[0, 0].text(
                0.5,
                0.5,
                "No sentiment data available",
                ha="center",
                va="center",
                transform=axes[0, 0].transAxes,
            )
            axes[0, 0].set_title("Sentiment Distribution")

        # 2. Ticker Linking Confidence Distribution
        if linking_results:
            ax2 = axes[0, 1]
            all_confidences = []
            for _, ticker_links in linking_results:
                for link in ticker_links:
                    all_confidences.append(link.confidence)

            if all_confidences:
                ax2.hist(
                    all_confidences, bins=10, alpha=0.7, color="blue", edgecolor="black"
                )
                ax2.set_title("Ticker Linking Confidence Distribution")
                ax2.set_xlabel("Confidence Score")
                ax2.set_ylabel("Frequency")
                ax2.axvline(x=0.5, color="red", linestyle="--", label="Threshold (0.5)")
                ax2.legend()
            else:
                ax2.text(
                    0.5,
                    0.5,
                    "No linking data available",
                    ha="center",
                    va="center",
                    transform=ax2.transAxes,
                )
        else:
            axes[0, 1].text(
                0.5,
                0.5,
                "No linking data available",
                ha="center",
                va="center",
                transform=axes[0, 1].transAxes,
            )
            axes[0, 1].set_title("Ticker Linking Confidence Distribution")

        # 3. Context Analyzer Accuracy
        if context_results:
            ax3 = axes[1, 0]
            correct_count = sum(1 for r in context_results if r["is_correct"])
            total_count = len(context_results)
            incorrect_count = total_count - correct_count

            labels = ["Correct", "Incorrect"]
            sizes = [correct_count, incorrect_count]
            colors = ["lightgreen", "lightcoral"]

            ax3.pie(
                sizes, labels=labels, colors=colors, autopct="%1.1f%%", startangle=90
            )
            ax3.set_title(
                f"Context Analyzer Accuracy\\n({correct_count}/{total_count} correct)"
            )
        else:
            axes[1, 0].text(
                0.5,
                0.5,
                "No context analysis data available",
                ha="center",
                va="center",
                transform=axes[1, 0].transAxes,
            )
            axes[1, 0].set_title("Context Analyzer Accuracy")

        # 4. Pipeline Results Summary
        if pipeline_results:
            ax4 = axes[1, 1]
            ticker_counts = [r["num_tickers"] for r in pipeline_results]

            if ticker_counts:
                ax4.hist(
                    ticker_counts,
                    bins=max(1, max(ticker_counts)),
                    alpha=0.7,
                    color="purple",
                    edgecolor="black",
                )
                ax4.set_title("Tickers per Article Distribution")
                ax4.set_xlabel("Number of Tickers")
                ax4.set_ylabel("Number of Articles")
            else:
                ax4.text(
                    0.5,
                    0.5,
                    "No pipeline data available",
                    ha="center",
                    va="center",
                    transform=ax4.transAxes,
                )
        else:
            axes[1, 1].text(
                0.5,
                0.5,
                "No pipeline data available",
                ha="center",
                va="center",
                transform=axes[1, 1].transAxes,
            )
            axes[1, 1].set_title("Tickers per Article Distribution")

        plt.tight_layout()
        plt.show()

        # Create summary table
        print("\\nüìã Test Results Summary Table:")
        summary_data = []

        # Database test
        summary_data.append(
            {
                "Component": "Database Connection",
                "Status": "‚úÖ Pass" if sample_tickers else "‚ùå Fail",
                "Details": f"{len(sample_tickers) if sample_tickers else 0} tickers loaded",
            }
        )

        # GDELT test
        summary_data.append(
            {
                "Component": "GDELT Ingestion",
                "Status": "‚úÖ Pass" if sample_articles else "‚ùå Fail",
                "Details": f"{len(sample_articles)} articles parsed",
            }
        )

        # Content scraping test
        successful_scrapes = sum(
            1 for content in scraped_content.values() if content is not None
        )
        summary_data.append(
            {
                "Component": "Content Scraping",
                "Status": "‚úÖ Pass" if successful_scrapes > 0 else "‚ùå Fail",
                "Details": f"{successful_scrapes}/{len(scraped_content)} URLs scraped",
            }
        )

        # Ticker linking test
        total_links = sum(len(links) for _, links in linking_results)
        summary_data.append(
            {
                "Component": "Ticker Linking",
                "Status": "‚úÖ Pass" if total_links > 0 else "‚ùå Fail",
                "Details": f"{total_links} ticker links found",
            }
        )

        # Sentiment analysis test
        summary_data.append(
            {
                "Component": "Sentiment Analysis",
                "Status": "‚úÖ Pass" if sentiment_results else "‚ùå Fail",
                "Details": f"{len(sentiment_results)} articles analyzed",
            }
        )

        # Context analyzer test
        if context_results:
            correct_count = sum(1 for r in context_results if r["is_correct"])
            accuracy = correct_count / len(context_results)
            summary_data.append(
                {
                    "Component": "Context Analyzer",
                    "Status": "‚úÖ Pass" if accuracy >= 0.6 else "‚ö†Ô∏è Partial",
                    "Details": f"{accuracy:.1%} accuracy ({correct_count}/{len(context_results)})",
                }
            )
        else:
            summary_data.append(
                {
                    "Component": "Context Analyzer",
                    "Status": "‚ùå Fail",
                    "Details": "No test data",
                }
            )

        # Pipeline test
        summary_data.append(
            {
                "Component": "End-to-End Pipeline",
                "Status": "‚úÖ Pass" if pipeline_results else "‚ùå Fail",
                "Details": f"{len(pipeline_results) if pipeline_results else 0} articles processed",
            }
        )

        # Display summary table
        summary_df = pd.DataFrame(summary_data)
        display(HTML(summary_df.to_html(index=False, escape=False)))

        return summary_df

    except Exception as e:
        print(f"‚ùå Visualization creation failed: {e}")
        return None


# Create visualizations
summary_df = create_visualizations()

## 9. Custom Testing Functions

You can use these functions to test specific components with your own data:


In [None]:
# Custom testing functions for your own data
def test_custom_article(text: str, title: str = None, url: str = None):
    """Test a custom article through the complete pipeline."""
    print(f"üß™ Testing custom article: {title or 'Untitled'}")

    try:
        # Create article object
        from app.db.models import Article

        article = Article(
            source="custom",
            url=url or "https://example.com",
            published_at=datetime.now(UTC),
            title=title or "Custom Test Article",
            text=text,
            lang="en",
        )

        # Load tickers
        db = SessionLocal()
        tickers = db.execute(select(Ticker)).scalars().all()
        db.close()

        if not tickers:
            print("‚ùå No tickers available")
            return None

        # Initialize components
        linker = TickerLinker(tickers, max_scraping_workers=1)
        sentiment_service = get_sentiment_service()

        # Test ticker linking
        print("\\nüîó Testing ticker linking...")
        ticker_links = linker.link_article(article)
        print(f"   Found {len(ticker_links)} ticker matches:")
        for link in ticker_links:
            print(f"   - {link.ticker}: {link.confidence:.2f} confidence")
            print(f"     Matched terms: {link.matched_terms}")
            print(f"     Reasoning: {link.reasoning}")

        # Test sentiment analysis
        print("\\nüòä Testing sentiment analysis...")
        sentiment_score, sentiment_label = sentiment_service.analyze_with_label(text)
        print(f"   Sentiment: {sentiment_label} (score: {sentiment_score:.3f})")

        return {
            "article": article,
            "ticker_links": ticker_links,
            "sentiment_score": sentiment_score,
            "sentiment_label": sentiment_label,
        }

    except Exception as e:
        print(f"‚ùå Custom article test failed: {e}")
        return None


def test_custom_url(url: str):
    """Test a custom URL by scraping and analyzing it."""
    print(f"üåê Testing custom URL: {url}")

    try:
        # Test content scraping
        scraper = get_content_scraper()
        content = scraper.scrape_article_content(url)

        if not content:
            print("‚ùå Failed to scrape content from URL")
            return None

        print(f"‚úÖ Successfully scraped {len(content)} characters")

        # Test with scraped content
        return test_custom_article(text=content, title=f"Article from {url}", url=url)

    except Exception as e:
        print(f"‚ùå Custom URL test failed: {e}")
        return None


# Example usage:
print("\\nüìù Example usage:")
print("# Test a custom article:")
print("result = test_custom_article(")
print("    text='Apple Inc reported strong quarterly earnings. AAPL stock is up 5%.',")
print("    title='Apple Earnings Beat Expectations'")
print(")")
print("\\n# Test a custom URL:")
print("result = test_custom_url('https://example-news-site.com/article')")

## 10. Conclusion & Next Steps

### What This Notebook Tests:

1. **Database Connection** - Verifies PostgreSQL connection and ticker data
2. **GDELT Ingestion** - Tests fetching and parsing GDELT data
3. **Content Scraping** - Tests web scraping capabilities
4. **Ticker Linking** - Tests article-to-ticker matching with confidence scores
5. **Sentiment Analysis** - Tests VADER sentiment analysis
6. **Context Analysis** - Tests ticker relevance determination
7. **End-to-End Pipeline** - Tests complete workflow
8. **Visualization** - Creates charts and summary tables

### Prerequisites:

- PostgreSQL database running
- Environment variables configured (.env file)
- Tickers seeded in database (`uv run app/scripts/seed_tickers.py`)
- Required Python packages installed

### Usage Tips:

1. **Run cells sequentially** - Each test builds on previous results
2. **Adjust parameters** - Modify `hours_back`, `max_files`, `max_articles` for different data sizes
3. **Use custom functions** - Test your own articles or URLs with the provided functions
4. **Check logs** - Monitor console output for detailed component behavior
5. **Review visualizations** - Use charts to understand system performance

### Troubleshooting:

- **Database errors**: Ensure PostgreSQL is running and .env is configured
- **No GDELT data**: Try different time ranges or check GDELT server status
- **Scraping failures**: Some URLs may be blocked or require different handling
- **Import errors**: Ensure you're running from the project root directory

### Next Steps:

1. **Scale up testing** - Increase data volumes for performance testing
2. **Add more tickers** - Expand ticker universe for better coverage
3. **Fine-tune parameters** - Adjust confidence thresholds and scraping settings
4. **Monitor in production** - Use this notebook for ongoing system health checks
