In [1]:
import json
import os
import ollama
from tqdm import tqdm
import time
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# ============================== GLOBAL VARIABLES ============================== #

ROOT_DIR="/home/ibrahim/stock/data"
DIRS=[os.path.join(ROOT_DIR, dir) for dir in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, dir))]
PATHS=[os.path.join(dir, file) for dir in DIRS for file in os.listdir(dir) if file.endswith(".json")]

In [2]:
keyowrds = [
    "earnings",
    "product launch",
    "acquisition",
    "merger",
    "regulation",
    "market share",
    "innovation",
    "investment",
    "partnership",
    "new chip",
    "technology breakthrough",
    "legal",
]

In [None]:
import subprocess

ROOT_DIR = "/home/ibrahim/stock/data/events"
PATHS = [os.path.join(ROOT_DIR, file) for file in os.listdir(ROOT_DIR) if file.endswith(".json")]
YEARS = ["2020", "2021", "2022", "2023", "2024", "2025"]

for year in YEARS:
    os.makedirs(os.path.join(ROOT_DIR, year), exist_ok=True)

for path in PATHS:
    if "2020" in path:
        subprocess.run(f"mv {path} {os.path.join(ROOT_DIR, '2020')}", shell=True)
    elif "2021" in path:
        subprocess.run(f"mv {path} {os.path.join(ROOT_DIR, '2021')}", shell=True)
    elif "2022" in path:
        subprocess.run(f"mv {path} {os.path.join(ROOT_DIR, '2022')}", shell=True)
    elif "2023" in path:
        subprocess.run(f"mv {path} {os.path.join(ROOT_DIR, '2023')}", shell=True)
    elif "2024" in path:
        subprocess.run(f"mv {path} {os.path.join(ROOT_DIR, '2024')}", shell=True)
    elif "2025" in path:
        subprocess.run(f"mv {path} {os.path.join(ROOT_DIR, '2025')}", shell=True)
    else:
        logging.warning(f"Unknown year for file: {path}. Skipping.")
        continue
    

In [21]:
path = PATHS[2]

with open(path, "r") as f:
    data = json.load(f)

headlines = [headline["headline"] for headline in data["headlines"]]

In [22]:
headlines

['Nvidia Q1 2020 Paints A Mixed Picture (NASDAQ:NVDA) - Seeking Alpha',
 'NVIDIA Stock Surges to All-Time High Ahead of Earnings - Investopedia',
 "Here's How Long It Took Nvidia To Reach A $100B Market Cap - Yahoo Finance",
 'NVIDIA’s New Ampere Data Center GPU in Full Production - NVIDIA Blog',
 'Nvidia accused of reporting $1 billion worth of mining GPU sales as gaming revenue - TechSpot',
 "Nvidia Stock Hits Record High After Mellanox Acquisition - Investor's Business Daily",
 'Nvidia CEO shows off "The world\'s largest graphics card" - Graphics - News - HEXUS',
 'Charts Suggest Semiconductor Stocks Are Headed Higher - Investopedia',
 "Nvidia DGX A100 'Ampere' deep learning system trademarked - Graphics - News - HEXUS",
 'Nvidia reports quarterly revenue up 39 per cent YoY - General Business - News - HEXUS',
 'Nvidia unveils monstrous A100 AI chip with 54 billion transistors and 5 petaflops of performance - VentureBeat',
 'How NVIDIA Uses AI to Boost Digital Marketing Efficiency - 

In [40]:
prompt = """
**System Prompt:**
<|im_start|>system You are an expert financial analyst evaluating a month's worth of news headlines related to NVIDIA. 
Use your reasoning capabilities [
    [
      8
    ]
] to select the single most impactful event headline from the list, focusing on events that could significantly affect NVIDIA's stock price, reputation, or market position. 
Consider major product launches, earnings reports, regulatory actions, mergers & acquisitions, innovations, or other transformative events. <|im_end|>

**User Prompt:**

<|im_start|>user
Analyze the following news headlines and select the most impactful ones for NVIDIA.
{headlines}

Generate an analysis of the selected headline events using the following JSON format:
```json
{{
  "selected_events": {{
    "headline": "Original headline text",
    "event_type": "Type of event",
    "summary": "Brief summary of the event",
    "impact": "High/Medium/Low",
    "rationale": "Explanation for the selection"
  }}
}}
"""
start = time.time()

for path in tqdm(PATHS):

  with open(path, "r") as f:
    data = json.load(f)

  headlines = [headline["headline"] for headline in data["headlines"]]

  response = ollama.chat(
      model="qwen2.5:7b",
      messages=[
          {"role": "user", "content": prompt.format(headlines=headlines)},
      ],
      options={"temperature": 0.7},
      format="json",
  )

  json_response = json.loads(response.message.content)

  high_impact_events = [headline for headline in json_response["selected_events"] if headline["impact"].lower() == "high"]
  medium_impact_events = [headline for headline in json_response["selected_events"] if headline["impact"].lower() == "medium"]
  low_impact_events = [headline for headline in json_response["selected_events"] if headline["impact"].lower() == "low"]

  events = {
      "high_impact_events": high_impact_events,
      "medium_impact_events": medium_impact_events,
      "low_impact_events": low_impact_events
  }

  if not os.path.exists(os.path.join(ROOT_DIR, "events")):
      os.makedirs(os.path.join(ROOT_DIR, "events"))

  with open(os.path.join(ROOT_DIR, "events", os.path.basename("events" + path)), "w") as f:
      json.dump(events, f, indent=4)

elapsed_time = time.time() - start
logging.info(f"Elapsed time: {elapsed_time:.2f} seconds")

  0%|          | 0/63 [00:02<?, ?it/s]


KeyboardInterrupt: 

In [30]:
ROOT_DIR = "/home/ibrahim/stock/data"
dirs = os.listdir(ROOT_DIR)
paths = []
proxies = [
    "http://45.61.139.48:8000",
    "http://103.156.141.100:80",
    "http://190.61.88.147:8080",
]

for dir in dirs:
    if os.path.isdir(os.path.join(ROOT_DIR, dir)):
        files = os.listdir(os.path.join(ROOT_DIR, dir))
        for file in files:
            if file.endswith(".json"):
                paths.append(os.path.join(ROOT_DIR, dir, file))

In [58]:
for path in paths:
    with open(path, "r") as f:
        data = json.load(f)
    
    encoded_urls = [article["links"][0]["href"] for article in data["articles"]]
    decoded_urls = asyncio.run(decode(encoded_urls))
    links = [article["link"] for article in data["articles"]]

    wrong_links = 0

    # for i, article in enumerate(data["articles"]):
    #     if links[i] != decoded_urls[i]:
            
    #         wrong_links += 1
    print(f"Encoded URLs: {encoded_urls}")
    print(f"Decoded URLs: {decoded_urls}")
    print(f"Links: {links}")
    # print(f"Wrong links: {wrong_links} out of {len(data['articles'])}")

['https://www.fool.com/investing/2020/01/08/why-nvidia-stock-soared-76-in-2019.aspx', 'https://sg.finance.yahoo.com/news/uol-acquires-154-key-hotel-123125281.html', 'https://sg.finance.yahoo.com/news/sembcorp-acquires-veolia-apos-public-062423565.html', 'https://blogs.nvidia.com/blog/italy-ai-nvaitc/', 'https://www.tomshardware.com/features/amd-radeon-rx-5500xt-vs-nvidia-gtx-1660', 'https://wccftech.com/nvidia-geforce-rtx-2060-6-gb-graphics-card-299-usd-price-drop/', 'https://sg.finance.yahoo.com/news/3-ways-cny-fashion-more-151218893.html', 'https://sg.finance.yahoo.com/news/the-philippines-is-making-roads-and-cement-with-plastic-garbage-024413736.html', 'https://m.hexus.net/tech/news/graphics/138887-vulkan-12-specification-released-khronos/', 'https://9to5google.com/2020/01/27/nvidia-shield-tv-remote-older-models/', 'https://www.thestreet.com/investing/trading-nvidia-nvda-stock-when-to-buy', 'https://insidehpc.com/2020/01/__trashed-15/', 'https://videocardz.com/84309/nvidia-geforce-r

HTTPStatusError: Server error '503 Service Unavailable' for url 'https://news.google.com/rss/articles/CBMiywFBVV95cUxPZ1hQY3NOMVM1WDVPRF9MYUNsc3pwNm1xWlZqVlE1aXRmZWp3YXpNYXlVLXBBd1Z5MGhwTklVb3pVdUF1YTU1aHBDZEp3TUVlWlZ6UGFUaWNmaU1RNTJxTDdqRTJRUC1tOXRJMWNLYmR6VmZBSlNsSG1GQWUxajExX0ZNMXNVYmlibkt5N2pGckVobVN3eGZSRzdSa1NXVm9iMHVjQktLRE9IbW0wZXNBckRXbUY4RmVaTkwtT2RjSlVLYlcwREljZWpVRQ'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/503

In [1]:
from googlenewsdecoder import gnewsdecoder

decoded = gnewsdecoder(source_url="https://news.google.com/rss/articles/CBMiiAFBVV95cUxOT25FOFMyQ1pQdDdoR0dWVklaazJJQjBhakRaT1BQelh5NGNqWWI5cDBJeWM5QndaamJyWUhwa1AyTmh0eGRiX1NtQ0FLRGljaWViVnVjcEdabzBrU2tNSUNwUVFpc0dwX0dhTXhOZUN2b0ZvMlJDWnd0anE3U25WZkRYSVRxQklq?oc=5")

In [2]:
decoded

{'status': False,
 'message': 'Request error in decode_url: 429 Client Error: Too Many Requests for url: https://www.google.com/sorry/index?continue=https://news.google.com/_/DotsSplashUi/data/batchexecute&q=EgSQ1gAGGKG-tL8GIjCIRfzLvgi_kyEMfgoNa7dHQ-mRf7F3bAFu3a-quxnp0vGGJz4XkalOtsb6Ae7VUYsyAnJSWgFD'}

In [3]:
headlines = [headline["headline"] for headline in data["headlines"]]
analyses = [article["ai_analysis"] for article in data["articles"]]

In [5]:
from transformers import AutoTokenizer

In [9]:
prompt = """<|im_start|>system
You are an expert financial analyst specializing in technology stocks, particularly NVIDIA.
<|im_end|>

<|im_start|>user
Analyze these NVIDIA-related headlines from February 2025:

{headlines}

Extract the 2-3 most significant events that could impact NVIDIA's stock price.
For each event:
1. Identify the specific news event
2. Explain how it might affect NVIDIA's stock price
3. Note any potential market implications

Format your response as valid JSON with this exact structure:
```json
{{
  "events": [
    {{
      "event": "Brief name of event 1",
      "description": "Detailed explanation of event 1",
      "potential_impact": "Analysis of potential stock impact"
    }},
    {{
      "event": "Brief name of event 2",
      "description": "Detailed explanation of event 2",
      "potential_impact": "Analysis of potential stock impact"
    }}
  ]
}}
"""

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B")
prompt = prompt.format(headlines=headlines)

tokens = tokenizer(prompt, return_tensors="pt")
print(f"Number of tokens: {len(tokens['input_ids'][0])}")

response = ollama.chat(messages=[
    {"role": "user", "content": prompt},
], model="qwen2.5:1.5b", format="json")

print(response.message.content)

Number of tokens: 3632
{
  "events": [
    {
      "event": "Nvidia earnings beat expectations and signal strong AI chip demands",
      "description": "NVIDIA reported record revenue and profit for the year, which is likely to boost investors' confidence in the company's future growth prospects. This could lead to an increase in the stock price.",
      "potential_impact": "Positive news can often attract short-term buying activity, potentially causing a short-term surge in the stock price."
    },
    {
      "event": "Nvidia unveils powerful AI system for genetic research",
      "description": "The unveiling of a new AI system by NVIDIA could indicate increased investment in AI technologies and further solidify the company's position as a leading player in this field. This development is expected to positively impact the stock price.",
      "potential_impact": "Positive news about cutting-edge technology can often lead to long-term growth, potentially boosting the stock price over