In [1]:
from datetime import datetime, timedelta
import openai
import sys
sys.path.append('../..') 
from openai import OpenAI
from tqdm import tqdm
import json
import os
import numpy as np
from MultimodalForcast.data_loader.data_loader import process_bitcoin_data, split_series, TimeSeriesDataset, collate_fn

In [6]:
def get_unified_prompt(start_date: str, end_date: str) -> str:
    return f"""You are an expert cryptocurrency analyst with access to real-time news, articles, and tweets.
    Your task is to retrieve and summarize all content from {start_date} to {end_date} that is relevant to Bitcoin price analysis and forecasting. 

    Proceed the task through two steps:

    Step 1: Identify Influencer Categories
    Determine all key categories that influence Bitcoin's price using your domain knowledge and any relevant articles or analyses you can find online. Examples include:
    - Regulatory changes
    - Market sentiment
    - Political events
    - Economic indicators
    - Technological developments
    - Institutional adoption
    - Supply and demand dynamics
    - Mining activities
    - Macroeconomic factors

    Step 2: Process Each Category Individually
    For each identified category:
    1. Search for all relevant news articles, posts, or tweets published between {start_date} and {end_date}.
    2. Ensure comprehensive coverage by including content with short-term or long-term, positive or negative, direct or indirect effects on Bitcoin's price.

    Output Requirement:
    For each piece of content retrieved, extract and return the following information:
    - Influencer category
    - Title
    - Source
    - Publication date (must be between {start_date} and {end_date})
    - A clear and concise summary (2-6 sentences)

    Output Format:
    Return each article ONLY as a seperate json object without any extra explanation or additional text:
    {{
        "influencer_category": "category_name",
        "title": "article title",
        "source": "source name",
        "publication_date": "YYYY-MM-DD", 
        "summary": "summary in 2-6 sentences"
    }}


    CRITICAL RULES:
    1. Output only valid JSON objects—no explanations, no markdown formatting, no surrounding text.
    2. All publication dates must be strictly between {start_date} and {end_date}.
    3. Verify dates carefully before including each item.
    4. Include as many relevant pieces of content as possible.
    5. Do not include summaries that mention specific Bitcoin price values (e.g., in USD).
    """

def retrieve_news(start_date: str, end_date: str) -> dict:
    client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
    
    response = client.chat.completions.create(
        model="gpt-4o-search-preview", 
        messages=[
            {
                "role": "user",
                "content": get_unified_prompt(start_date, end_date)
            }
        ]
    )
    is_truncated = response.choices[0].finish_reason == "length"
    if is_truncated:
        print(f"The response is truncated for start date {start_date} and end date {end_date}")
    # Parse the response into JSON
    try:
        return json.loads(response.choices[0].message.content)
    except json.JSONDecodeError:
        # If JSON parsing fails, return the raw response
        print(f"Failed to parse JSON for one of the response for start date {start_date} and end date {end_date}")
        return response.choices[0].message.content



In [7]:
print(retrieve_news(start_date='2018-01-01', end_date='2018-01-31'))

Failed to parse JSON for one of the response for start date 2018-01-01 and end date 2018-01-31
{"influencer_category": "Regulatory changes", "title": "South Korea Regulates Bitcoin Traders' Identities", "source": "Bitbo", "publication_date": "2018-01-22", "summary": "South Korea introduced regulations mandating identity verification for all Bitcoin traders, effectively ending anonymous trading in the country. This regulatory move was part of broader efforts to curb speculative trading, impacting global sentiment and contributing to market volatility."}

{"influencer_category": "Technological developments", "title": "Stripe Phases Out Bitcoin Payment Support", "source": "Bitbo", "publication_date": "2018-01-24", "summary": "Stripe, a major online payment processor, announced it would discontinue Bitcoin payment support, citing high fees, slow transaction speeds, and reduced demand. This decision underscored Bitcoin's scalability challenges and raised concerns about its viability as a ma

In [None]:
retrieve_news(start_date='2024-01-01', end_date='2024-01-07')

ValueError: Invalid format specifier