In [1]:
import pandas as pd
from tqdm import tqdm
import ollama
import requests
import json
import time
import os

In [2]:
START_DATE = "2018-01-01"
# END_DATE = "2021-12-31"
END_DATE = "2020-01-01"
MODEL = "llama3.2:3b"

TICKERS = ["AAPL", "AMZN", "GOOGL", "MSFT", "NVDA", "TSLA"]
FUNDS = 1000
HOLDINGS = 100
RISK = "HIGH"

In [3]:
def query_ollama(prompt):
    response = ollama.chat(
        model=MODEL,
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    return response["message"]["content"]

In [4]:
def extract_factors(stock_ticker, news_content, k=5):
    prompt = f"""
    Please extract the top {k} factors that may affect the stock price of {stock_ticker} from the following news.
    
    {news_content}
    """
    return query_ollama(prompt)

In [5]:
def was_market_open(date):
    df = pd.read_csv("research/were-markets-open.csv")

    # Reduce to rows where 'was_open' is True
    df = df[df["was_open"]]

    return date in df["date"].values

In [6]:
# Generate a list of dates between START_DATE and END_DATE as strings
dates = pd.date_range(start=START_DATE, end=END_DATE).strftime("%Y-%m-%d")
dates

Index(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
       '2018-01-06', '2018-01-07', '2018-01-08', '2018-01-09', '2018-01-10',
       ...
       '2019-12-23', '2019-12-24', '2019-12-25', '2019-12-26', '2019-12-27',
       '2019-12-28', '2019-12-29', '2019-12-30', '2019-12-31', '2020-01-01'],
      dtype='object', length=731)

In [7]:
df = pd.DataFrame(columns=["date", "factors"])

In [8]:
# Modify the loop that executes trades based on the model's response
for ticker in TICKERS:
    print(f"Processing {ticker}...")
    for date in tqdm(dates):
        # Skip if the factors file already exists
        if os.path.exists(f"factors/{ticker}-{date}.md"):
            continue

        # Ensure that the market was open on the given date
        if not was_market_open(date):
            continue

        news_data = ""
        try:
            data = requests.get(f"http://localhost:8000/{ticker}/{date}")
            data = data.json()
        except:
            # Formatting issue with the dataset
            # Log offending ticker/date to a .txt file
            with open("error.log", "a") as f:
                f.write(f"{ticker}, {date}\n")

            # TODO: Investigate why some JSON is malformed
            # example: AAPL, 2019-04-11
            continue

        for news in data["news"]:
            news_data += f"""
                ### {news["title"]}
                
                    {news["summary"]}
                """

        factors = extract_factors(
            ticker, news_data
        )  # TODO: Pre-calculate factors for each day
        with open(f"factors/{ticker}-{date}.md", "w") as f:
            f.write(factors)

Processing AAPL...


  0%|          | 0/731 [00:00<?, ?it/s]

 54%|█████▍    | 398/731 [00:00<00:00, 3975.71it/s]