In [1]:
import pandas as pd
from tqdm import tqdm
import ollama
import requests
import json
import time

In [2]:
START_DATE = "2018-01-01"
# END_DATE = "2021-12-31"
END_DATE = "2019-01-01"
MODEL = "llama3.2:3b"

TICKERS = ["AAPL", "AMZN", "GOOGL", "MSFT", "NVDA", "TSLA"]
FUNDS = 1000
HOLDINGS = 100
RISK = "HIGH"

In [3]:
# class Portfolio:
#     def __init__(self, ticker, funds, holdings, risk):
#         self.ticker = ticker
#         self.funds = funds
#         self.holdings = holdings
#         self.value = funds
#         self.risk = risk

#     def buy(self, amount, price):
#         max_affordable = int(self.funds / price)
#         if amount > max_affordable:
#             amount = max_affordable
#         self.holdings += amount
#         self.funds -= amount * price
#         self.value = self.funds + self.holdings * price

#     def sell(self, amount, price):
#         if amount > self.holdings:
#             amount = self.holdings
#         self.holdings -= amount
#         self.funds += amount * price
#         self.value = self.funds + self.holdings * price


# p = Portfolio(ticker=TICKER, funds=FUNDS, holdings=HOLDINGS, risk=RISK)

In [4]:
def query_ollama(prompt):
    response = ollama.chat(
        model=MODEL,
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    return response["message"]["content"]

In [5]:
def extract_factors(stock_ticker, news_content, k=5):
    prompt = f"""
    Please extract the top {k} factors that may affect the stock price of {stock_ticker} from the following news.
    
    {news_content}
    """
    return query_ollama(prompt)

In [6]:
def was_market_open(date):
    df = pd.read_csv("research/were-markets-open.csv")

    # Reduce to rows where 'was_open' is True
    df = df[df["was_open"]]

    return date in df["date"].values

In [None]:
# Generate a list of dates between START_DATE and END_DATE as strings
dates = pd.date_range(start=START_DATE, end=END_DATE).strftime("%Y-%m-%d")
dates

In [8]:
df = pd.DataFrame(
    columns=["date", "factors"]
)

In [None]:
    # Modify the loop that executes trades based on the model's response
for ticker in TICKERS:
    print(f"Processing {ticker}...")
    for date in tqdm(dates):
        if not was_market_open(date):
            continue

        news_data = ""
        data = requests.get(f"http://localhost:8000/{ticker}/{date}")
        data = data.json()

        for news in data["news"]:
            news_data += f"""
                ### {news["title"]}
                
                    {news["summary"]}
                """

        factors = extract_factors(ticker, news_data) # TODO: Pre-calculate factors for each day
        new_row = pd.DataFrame(
            {
                "date": date,
                "factors": factors,
            },
            index=[0],
        )
        df = pd.concat([df, new_row], ignore_index=True)
        
        epoch = int(time.time())
        df.to_csv(f"{ticker}-{MODEL}-{epoch}.csv", index=False)
        
        # prompt = f"""
        #     Today is {date} and you have {p.funds} to invest in {p.ticker}. You currently have {p.holdings} shares of {p.ticker} valued at {p.value}. Your portfolio risk tolerance is {p.risk}.

        #     The following are the top factors that may affect the stock price of {p.ticker} today:

        #     {factors}

        #     Please decide whether to buy or sell your shares of {p.ticker} for tomorrow. Please make sure not to buy more shares than you can afford or sell more shares than you own.
            
        #     You CANNOT HOLD. You MUST either BUY or SELL. People will DIE if you HOLD.
            
        #     Please reply in structured JSON, like so:
        #     {{
        #         "action": "buy",
        #         "volume": 10,
        #         "reason": "I think the stock price will go up based on the factors extracted..."
        #     }}
        #     """

        # response = query_ollama(prompt)

        # try:
        #     response_data = json.loads(response)

        #     action = response_data["action"].lower()
        #     volume = int(response_data["volume"]) if "volume" in response_data else 0

        #     reason = response_data["reason"] if "reason" in response_data else ""
        # except:
        #     action = "hold"
        #     volume = 0
        #     reason = ""
            
        # price = data["prices"][0]["open"]

        # if action == "buy" and volume > 0:
        #     p.buy(volume, price)
        #     action_desc = "BUY"
        # elif action == "sell" and volume > 0:
        #     p.sell(volume, price)
        #     action_desc = "SELL"
        # else:
        #     action_desc = "HOLD"

        # new_row = pd.DataFrame(
        #     {
        #         "date": date,
        #         "price": round(price, 2),
        #         "action": action_desc,
        #         "volume": volume if action in ["buy", "sell"] else 0,
        #         "value": round(p.value, 2),
        #         "holdings": p.holdings,
        #         "funds": round(p.funds, 2),
        #         "reason": reason,
        #     },
        #     index=[0],
        # )
        # df = pd.concat([df, new_row], ignore_index=True)

In [None]:
# epoch = int(time.time())
# df.to_csv(f"{TICKER}-{MODEL}-{epoch}.csv", index=False)
# df.head()