# Hivedex API Testing Notebook

Test all data sources and signal calculations before deploying to Hex.

In [None]:
# Install dependencies if needed
# !pip install requests pandas numpy vaderSentiment yfinance tqdm

In [None]:
import sys
sys.path.insert(0, '../scripts')

import pandas as pd
import requests
from datetime import datetime, timedelta
import json

## 1. Test Arctic Shift API (Reddit Data)

In [None]:
# Test Arctic Shift API
url = 'https://arctic-shift.photon-reddit.com/api/posts/search'
params = {
    'subreddit': 'wallstreetbets',
    'title': 'NVDA',
    'limit': 10
}

response = requests.get(url, params=params, timeout=30)
print(f'Status: {response.status_code}')

if response.status_code == 200:
    data = response.json()
    posts = data.get('data', [])
    print(f'Posts returned: {len(posts)}')
    
    if posts:
        df = pd.DataFrame(posts)
        display(df[['title', 'score', 'num_comments', 'created_utc']].head())

## 2. Test GDELT API (News Data)

In [None]:
# Test GDELT DOC 2.0 API
url = 'https://api.gdeltproject.org/api/v2/doc/doc'
params = {
    'query': 'NVIDIA earnings',
    'mode': 'artlist',
    'maxrecords': 10,
    'format': 'json',
    'startdatetime': '20241101000000',
    'enddatetime': '20241120235959'
}

response = requests.get(url, params=params, timeout=60)
print(f'Status: {response.status_code}')

if response.status_code == 200:
    data = response.json()
    articles = data.get('articles', [])
    print(f'Articles returned: {len(articles)}')
    
    if articles:
        df = pd.DataFrame(articles)
        display(df[['title', 'domain', 'seendate']].head())

## 3. Test yfinance (Stock Data)

In [None]:
import yfinance as yf

stock = yf.Ticker('NVDA')
history = stock.history(start='2024-11-01', end='2024-11-25')

print(f'Days of data: {len(history)}')
display(history.head())

# Calculate price change
if not history.empty:
    change = ((history['Close'].iloc[-1] / history['Close'].iloc[0]) - 1) * 100
    print(f'Price change: {change:.2f}%')

## 4. Test VADER Sentiment

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

test_texts = [
    'NVDA to the moon! Best earnings ever!',
    'This stock is going to crash hard',
    'Just bought some shares',
    'Terrible investment, stay away',
    'Nvidia beats expectations, AI demand strong'
]

results = []
for text in test_texts:
    scores = analyzer.polarity_scores(text)
    results.append({
        'text': text,
        'compound': scores['compound'],
        'pos': scores['pos'],
        'neg': scores['neg']
    })

pd.DataFrame(results)

## 5. Test Data Fetcher Module

In [None]:
from data_fetcher import fetch_reddit_posts, fetch_gdelt_news

# Fetch Reddit posts
posts = fetch_reddit_posts(
    subreddits=['wallstreetbets'],
    keywords=['NVDA', 'nvidia'],
    start_date='2024-11-01',
    end_date='2024-11-20',
    use_cache=False
)

print(f'Fetched {len(posts)} Reddit posts')
if not posts.empty:
    display(posts[['title', 'score', 'created_utc']].head())

In [None]:
# Fetch GDELT news
news = fetch_gdelt_news(
    keywords=['NVIDIA', 'earnings'],
    start_date='2024-11-01',
    end_date='2024-11-20',
    use_cache=False
)

print(f'Fetched {len(news)} news articles')
if not news.empty:
    display(news[['title', 'domain', 'seendate']].head())

## 6. Test Signal Calculator

In [None]:
from signal_calculator import (
    add_sentiment_to_posts,
    calculate_reddit_signal,
    calculate_gdelt_signal,
    calculate_combined_signal,
    calculate_lead_time
)

# Add sentiment to posts
if not posts.empty:
    posts_with_sentiment = add_sentiment_to_posts(posts)
    print(f'Average sentiment: {posts_with_sentiment["compound"].mean():.3f}')
    
    # Calculate Reddit signal
    reddit_signal = calculate_reddit_signal(posts_with_sentiment)
    print(f'\nReddit Signal Summary:')
    print(f'  Days analyzed: {len(reddit_signal)}')
    print(f'  Average signal: {reddit_signal["reddit_signal"].mean():.2f}')
    print(f'  Peak signal: {reddit_signal["reddit_signal"].max():.2f}')
    
    display(reddit_signal[['date', 'volume', 'sentiment', 'reddit_signal']].head(10))

In [None]:
# Calculate GDELT signal
if not news.empty:
    gdelt_signal = calculate_gdelt_signal(news)
    print(f'GDELT Signal Summary:')
    print(f'  Days analyzed: {len(gdelt_signal)}')
    print(f'  Average signal: {gdelt_signal["gdelt_signal"].mean():.2f}')
    
    display(gdelt_signal[['date', 'coverage', 'gdelt_signal']].head(10))

In [None]:
# Combine signals
if not posts.empty and not news.empty:
    combined = calculate_combined_signal(reddit_signal, gdelt_signal)
    
    print(f'Combined Signal Summary:')
    print(f'  Days analyzed: {len(combined)}')
    print(f'  Average hivemind signal: {combined["hivemind_signal"].mean():.2f}')
    
    # Calculate lead time
    lead_time = calculate_lead_time(combined, '2024-11-20')
    print(f'\nLead Time:')
    for k, v in lead_time.items():
        print(f'  {k}: {v}')

## 7. Visualize Signals

In [None]:
import altair as alt

if 'combined' in dir() and not combined.empty:
    # Melt for multi-line chart
    plot_data = combined[['date', 'reddit_signal', 'gdelt_signal', 'hivemind_signal']].melt(
        id_vars=['date'],
        var_name='signal_type',
        value_name='signal_value'
    )
    
    chart = alt.Chart(plot_data).mark_line(point=True).encode(
        x=alt.X('date:T', title='Date'),
        y=alt.Y('signal_value:Q', title='Signal (0-100)', scale=alt.Scale(domain=[0, 100])),
        color='signal_type:N',
        tooltip=['date:T', 'signal_type:N', 'signal_value:Q']
    ).properties(
        width=700,
        height=400,
        title='Hivedex Signal Timeline'
    ).interactive()
    
    chart

## 8. Load and View Events Catalog

In [None]:
events = pd.read_csv('../data/events_catalog.csv', comment='#')
print(f'Total events: {len(events)}')
print(f'\nBy category:')
print(events['category'].value_counts())

display(events.head(10))