In [49]:
from datetime import datetime, timedelta
import openai
import sys
sys.path.append('../..') 
from openai import OpenAI
from tqdm import tqdm
import json
import os
import numpy as np
from MultimodalForcast.data_loader.data_loader import process_bitcoin_data, split_series, TimeSeriesDataset, collate_fn

In [92]:
def get_unified_prompt(start_date: str, end_date: str) -> str:
    return f"""You are an expert cryptocurrency analyst with access to real-time news and articles search capabilities.
    Your task is to analyze Bitcoin price forcast influencers and retrieve relevant news articles or tweets from {start_date} to {end_date}.

    Follow these steps:
    1. First, identify and learn the key categories that influence Bitcoin price based on your own knowledge and any related articles/analyses you could find
      (e.g., regulatory changes, market sentiment, political events, economic factors, related products etc.)

    2. For each category:
        - Search for ALL news articles relevant to Bitcoin price from {start_date} to {end_date} by looking for articles from various sources (news outlets, crypto media, financial news).
        Also you need to consider both positive and negative as well as both short term and long term impacts
        - Analyze how these news items might affect Bitcoin price

    3. For each article found, provide:
        - Title
        - Source
        - Publication date (MUST be between {start_date} and {end_date})
        - A concise summary in 2-5 sentences
        - The key influencer category it belongs to
        - An analysis of its potential impact on Bitcoin price

    Return each article ONLY in this exact structure without any extra explanation or additional text:
    {{
        "influencer_category": "category_name",
        "title": "article title",
        "source": "source name",
        "publication_date": "date", 
        "summary": "concise summary in 2-5 sentences",
        "impact_analysis": "analysis of potential impact on Bitcoin price"
    }}


    CRITICAL RULES:
    1. Return ONLY the JSON object above
    2. NO explanations, NO markdown, NO json blocks
    3. NO additional text before or after the JSON
    4. Publication date MUST be between {start_date} and {end_date}, DO NOT include any articles with dates outside this range
    5. Verify each article's date before including it
    6. Include as many relevant articles as possible
    7. The news articles should not directly mention Bitcoin price equal to some dollar value
    """

def retrieve_news(start_date: str, end_date: str) -> dict:
    client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
    
    response = client.chat.completions.create(
        model="gpt-4o-search-preview", 
        messages=[
            {
                "role": "user",
                "content": get_unified_prompt(start_date, end_date)
            }
        ]
    )
    is_truncated = response.choices[0].finish_reason == "length"
    if is_truncated:
        print(f"The response is truncated for start date {start_date} and end date {end_date}")
    # Parse the response into JSON
    try:
        return json.loads(response.choices[0].message.content)
    except json.JSONDecodeError:
        # If JSON parsing fails, return the raw response
        print(f"Failed to parse JSON for one of the response for start date {start_date} and end date {end_date}")
        return response.choices[0].message.content



In [93]:
print(retrieve_news(start_date='2018-01-01', end_date='2018-01-31'))

Failed to parse JSON for one of the response for start date 2018-01-01 and end date 2018-01-31
{"influencer_category": "Regulatory Changes", "title": "Bitcoin's January fall wipes off $44bn in value", "source": "The Guardian", "publication_date": "2018-02-01", "summary": "Bitcoin experienced a significant decline in January, losing over $44 billion in value. This drop was influenced by regulatory actions, including the Indian government's announcement to ban cryptocurrency trading and Facebook's decision to prohibit digital currency advertisements.", "impact_analysis": "The regulatory measures and advertising bans likely contributed to negative market sentiment, leading to a decrease in Bitcoin's price."}

{"influencer_category": "Regulatory Changes", "title": "Bitcoin value plunges 13% and falls below $9,000", "source": "CNN Money", "publication_date": "2018-02-01", "summary": "Bitcoin's price fell by 13%, dropping below $9,000. The decline followed the Indian finance minister's state